From 1cb9165e95db75354b5681b7749fcdf9133afd5a Mon Sep 17 00:00:00 2001
From: =?utf8?q?Rapha=C3=ABl=20Gertz?= <git@rapsys.eu>
Date: Tue, 8 Dec 2020 01:20:43 +0100
Subject: [PATCH] Add bin/console airlibre:weather command

---
 Command/WeatherCommand.php | 642 +++++++++++++++++++++++++++++++++++++
 1 file changed, 642 insertions(+)
 create mode 100644 Command/WeatherCommand.php

diff --git a/Command/WeatherCommand.php b/Command/WeatherCommand.php
new file mode 100644
index 0000000..00f922c
--- /dev/null
+++ b/Command/WeatherCommand.php
@@ -0,0 +1,642 @@
+<?php
+
+namespace Rapsys\AirBundle\Command;
+
+use Doctrine\Bundle\DoctrineBundle\Command\DoctrineCommand;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Output\OutputInterface;
+use Symfony\Component\Filesystem\Exception\IOExceptionInterface;
+use Symfony\Component\Filesystem\Filesystem;
+use Rapsys\AirBundle\Entity\Session;
+
+class WeatherCommand extends DoctrineCommand {
+	//Set failure constant
+	const FAILURE = 1;
+
+	///Set success constant
+	const SUCCESS = 0;
+
+	///Set Tidy config
+	private $config = [
+		//Mostly useless in fact
+		'indent' => true,
+		//Required to simplify simplexml transition
+		'output-xml' => true,
+		//Required to avoid xml errors
+		'quote-nbsp' => false,
+		//Required to fix code
+		'clean' => true
+	];
+
+	///Set accuweather uris
+	private $accuweather = [
+		//Hourly uri
+		'hourly' => [
+			75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=',
+			75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=',
+			75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=',
+			75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=',
+			75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=',
+			75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=',
+			75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day='
+		],
+		//Daily uri
+		'daily' => [
+			75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc',
+			75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc',
+			75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc',
+			75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc',
+			75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc',
+			75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc',
+			75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc'
+		]
+	];
+
+	///Set curl handler
+	private $ch = null;
+
+	///Configure attribute command
+	protected function configure() {
+		//Configure the class
+		$this
+			//Set name
+			->setName('rapsysair:weather')
+			//Set description shown with bin/console list
+			->setDescription('Updates session rain and temperature fields')
+			//Set description shown with bin/console --help airlibre:attribute
+			->setHelp('This command updates session rain and temperature fields in next three days')
+			//Add daily and hourly aliases
+			->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']);
+	}
+
+	///Process the attribution
+	protected function execute(InputInterface $input, OutputInterface $output) {
+		//Fetch doctrine
+		$doctrine = $this->getDoctrine();
+
+		//Get manager
+		$manager = $doctrine->getManager();
+
+		//Tidy object
+		$tidy = new \tidy();
+
+		//Init zipcodes array
+		$zipcodes = [];
+
+		//Init types
+		$types = [];
+
+		//Process hourly accuweather
+		if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') {
+			//Fetch hourly sessions to attribute
+			$types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather();
+
+			//Iterate on each session
+			foreach($types['hourly'] as $sessionId => $session) {
+				//Get zipcode
+				$zipcode = $session->getLocation()->getZipcode();
+
+				//Get start
+				$start = $session->getStart();
+
+				//Set start day
+				$day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
+
+				//Check if zipcode is set
+				if (!isset($zipcodes[$zipcode])) {
+					$zipcodes[$zipcode] = [];
+				}
+
+				//Check if zipcode date is set
+				if (!isset($zipcodes[$zipcode][$day])) {
+					$zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
+				} else {
+					$zipcodes[$zipcode][$day][$sessionId] = $sessionId;
+				}
+
+				//Get stop
+				$stop = $session->getStop();
+
+				//Set stop day
+				$day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
+
+				//Check if zipcode date is set
+				if (!isset($zipcodes[$zipcode][$day])) {
+					$zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
+				} else {
+					$zipcodes[$zipcode][$day][$sessionId] = $sessionId;
+				}
+			}
+		}
+
+		//Process daily accuweather
+		if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') {
+			//Fetch daily sessions to attribute
+			$types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather();
+
+			//Iterate on each session
+			foreach($types['daily'] as $sessionId => $session) {
+				//Get zipcode
+				$zipcode = $session->getLocation()->getZipcode();
+
+				//Get start
+				$start = $session->getStart();
+
+				//Set start day
+				$day = 'daily';
+
+				//Check if zipcode is set
+				if (!isset($zipcodes[$zipcode])) {
+					$zipcodes[$zipcode] = [];
+				}
+
+				//Check if zipcode date is set
+				if (!isset($zipcodes[$zipcode][$day])) {
+					$zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
+				} else {
+					$zipcodes[$zipcode][$day][$sessionId] = $sessionId;
+				}
+			}
+		}
+
+		//Get filesystem
+		$filesystem = new Filesystem();
+
+		//Set tmpdir
+		//XXX: worst case scenario we have 3 files per zipcode
+		if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) {
+			try {
+				//Create dir
+			    $filesystem->mkdir($tmpdir, 0775);
+			} catch (IOExceptionInterface $exception) {
+				//Display error
+				echo 'Create dir '.$exception->getPath().' failed'."\n";
+
+				//Exit with failure
+				exit(self::FAILURE);
+			}
+		}
+
+		//Init curl
+		$this->curl_init();
+
+		//Init data array
+		$data = [];
+
+		//Iterate on zipcodes
+		foreach($zipcodes as $zipcode => $days) {
+			//Iterate on days
+			foreach($days as $day => $null) {
+				//Try to load content from cache
+				if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) {
+					//Prevent timing detection
+					//XXX: from 0.1 to 5 seconds
+					usleep(rand(1,50) * 100000); 
+
+					//Get content
+					//TODO: for daily we may load data for requested quarter of the day
+					$content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day);
+
+					//Store it
+					if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) {
+						//Display error
+						echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n";
+
+						//Exit with failure
+						exit(self::FAILURE);
+					}
+				}
+
+				//Parse string
+				$tidy->parseString($content, $this->config, 'utf8');
+
+				//Fix error buffer
+				//XXX: don't care about theses errors, tidy is here to fix...
+				#if (!empty($tidy->errorBuffer)) {
+				#	var_dump($tidy->errorBuffer);
+				#	die('Tidy errors');
+				#}
+
+				//Load simplexml
+				//XXX: trash all xmlns= broken tags
+				$sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy));
+
+				//Process daily
+				if ($day == 'daily') {
+					//Iterate on each link containing data
+					foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) {
+						//Get date
+						$dsm = trim($node->div[0]->h2[0]->span[1]);
+
+						//Get temperature
+						$temperature = str_replace('°', '', $node->div[0]->div[0]->span[0]);
+
+						//Get rainrisk
+						$rainrisk = str_replace('%', '', trim($node->div[2]))/100;
+
+						//Store data
+						$data[$zipcode][$dsm]['daily'] = [
+							'temperature' => $temperature,
+							'rainrisk' => $rainrisk
+						];
+					}
+				//Process hourly
+				} else {
+					//Iterate on each div containing data
+					#(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value
+					#/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1]
+					foreach($sx->xpath('//div[@data-shared="false"]') as $node) {
+						//Get hour
+						$hour = trim($node->div[0]->div[0]->h2[0]->span[0]);
+
+						//Get dsm
+						$dsm = trim($node->div[0]->div[0]->h2[0]->span[1]);
+
+						//Get temperature
+						$temperature = str_replace('°', '', $node->div[0]->div[0]->div[0]);
+
+						//Get realfeel
+						$realfeel = str_replace(['RealFeel® ', '°'], '', trim($node->div[0]->div[0]->span[0]));
+
+						//Get rainrisk
+						$rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100;
+
+						//Label is Rain when we have a rainfall
+						if (($pluviolabel = trim($node->div[1]->div[0]->div[0]->div[1]->p[1])) == 'Rain') {
+							//Get rainfall
+							$rainfall = str_replace(' mm', '', $node->div[1]->div[0]->div[0]->div[1]->p[1]->span[0]);
+						//Cloud Cover, no rainfall
+						} else {
+							//Set rainfall to 0 (mm)
+							$rainfall = 0;
+						}
+
+						//Store data
+						$data[$zipcode][$dsm][$hour] = [
+							'temperature' => $temperature,
+							'realfeel' => $realfeel,
+							'rainrisk' => $rainrisk,
+							'rainfall' => $rainfall
+						];
+					}
+				}
+
+				//Cleanup
+				unset($sx);
+			}
+		}
+
+		//Iterate on types
+		foreach($types as $type => $sessions) {
+			//Iterate on each type
+			foreach($sessions as $sessionId => $session) {
+				//Get zipcode
+				$zipcode = $session->getLocation()->getZipcode();
+
+				//Get start
+				$start = $session->getStart();
+
+				//Daily type
+				if ($type == 'daily') {
+					//Set period
+					$period = [ $start ];
+				//Hourly type
+				} else {
+					//Get stop
+					$stop = $session->getStop();
+
+					//Compute period
+					$period = new \DatePeriod(
+						//Start from begin
+						$start,
+						//Iterate on each hour
+						new \DateInterval('PT1H'),
+						//End with begin + length
+						$stop
+					);
+				}
+
+				//Set meteo
+				$meteo = [
+					'rainfall' => null,
+					'rainrisk' => null,
+					'realfeel' => [],
+					'realfeelmin' => null,
+					'realfeelmax' => null,
+					'temperature' => [],
+					'temperaturemin' => null,
+					'temperaturemax' => null
+				];
+
+				//Iterate on the period
+				foreach($period as $time) {
+					//Set dsm
+					$dsm = $time->format('d/m');
+
+					//Set hour
+					$hour = $type=='daily'?$type:$time->format('H');
+
+					//Check data availability
+					//XXX: should never happen
+					#if (!isset($data[$zipcode][$dsm][$hour])) {
+					#	//Skip unavailable data
+					#	continue;
+					#}
+
+					//Set info alias
+					$info = $data[$zipcode][$dsm][$hour];
+
+					//Check if rainrisk is higher
+					if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) {
+						//Set highest rain risk
+						$meteo['rainrisk'] = floatval($info['rainrisk']);
+					}
+
+					//Check if rainfall is set
+					if (isset($info['rainfall'])) {
+						//Set rainfall sum
+						$meteo['rainfall'] += floatval($info['rainfall']);
+					}
+
+					//Add temperature
+					$meteo['temperature'][$hour] = $info['temperature'];
+
+					//Hourly type
+					if ($type != 'daily') {
+						//Check min temperature
+						if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) {
+							$meteo['temperaturemin'] = floatval($info['temperature']);
+						}
+
+						//Check max temperature
+						if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) {
+							$meteo['temperaturemax'] = floatval($info['temperature']);
+						}
+					}
+
+					//Check if realfeel is set
+					if (isset($info['realfeel'])) {
+						//Add realfeel
+						$meteo['realfeel'][$hour] = $info['realfeel'];
+
+						//Check min realfeel
+						if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) {
+							$meteo['realfeelmin'] = floatval($info['realfeel']);
+						}
+
+						//Check max realfeel
+						if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) {
+							$meteo['realfeelmax'] = floatval($info['realfeel']);
+						}
+					}
+				}
+
+				//Check if rainfall is set and differ
+				if ($session->getRainfall() !== $meteo['rainfall']) {
+					//Set rainfall
+					$session->setRainfall($meteo['rainfall']);
+				}
+
+				//Check if rainrisk differ
+				if ($session->getRainrisk() !== $meteo['rainrisk']) {
+					//Set rainrisk
+					$session->setRainrisk($meteo['rainrisk']);
+				}
+
+				//Check realfeel array
+				if ($meteo['realfeel'] !== []) {
+					//Compute realfeel
+					$realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1));
+
+					//Check if realfeel differ
+					if ($session->getRealfeel() !== $realfeel) {
+						//Set average realfeel
+						#$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']);
+						$session->setRealfeel($realfeel);
+					}
+
+					//Check if realfeelmin differ
+					if ($session->getRealfeelmin() !== $meteo['realfeelmin']) {
+						//Set realfeelmin
+						$session->setRealfeelmin($meteo['realfeelmin']);
+					}
+
+					//Check if realfeelmax differ
+					if ($session->getRealfeelmax() !== $meteo['realfeelmax']) {
+						//Set realfeelmax
+						$session->setRealfeelmax($meteo['realfeelmax']);
+					}
+				}
+
+				//Check temperature array
+				if ($meteo['temperature'] !== []) {
+					//Compute temperature
+					$temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1));
+
+					//Check if temperature differ
+					if ($session->getTemperature() !== $temperature) {
+						//Set average temperature
+						#$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']);
+						$session->setTemperature($temperature);
+					}
+
+					//Check if temperaturemin differ
+					if ($session->getTemperaturemin() !== $meteo['temperaturemin']) {
+						//Set temperaturemin
+						$session->setTemperaturemin($meteo['temperaturemin']);
+					}
+
+					//Check if temperaturemax differ
+					if ($session->getTemperaturemax() !== $meteo['temperaturemax']) {
+						//Set temperaturemax
+						$session->setTemperaturemax($meteo['temperaturemax']);
+					}
+				}
+			}
+		}
+
+		//Flush to get the ids
+		$manager->flush();
+
+		//Close curl handler
+		$this->curl_close();
+
+		//Return success
+		return self::SUCCESS;
+	}
+
+	/**
+	 * Init curl handler
+	 *
+	 * @return bool|void Return success or exit
+	 */
+	function curl_init() {
+		//Init curl
+		if (($this->ch = curl_init()) === false) {
+			//Display error
+			echo 'Curl init failed: '.curl_error($this->ch)."\n";
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Set curl options
+		if (
+			curl_setopt_array(
+				$this->ch,
+				[
+					//Force http2
+					CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
+					//Set http headers
+					CURLOPT_HTTPHEADER => [
+						//XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off
+						//XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview
+						#'Pragma: akamai-x-cache-off',
+						//XXX: working curl command
+						#curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/'
+						//Set accept
+						'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+						//Set accept language
+						'Accept-Language: en-GB,en;q=0.9',
+						//Disable cache
+						'Cache-Control: no-cache',
+						//Keep connection alive
+						'Connection: keep-alive',
+						//Disable cache
+						'Pragma: no-cache',
+						//Force secure requests
+						'Upgrade-Insecure-Requests: 1',
+						//Set user agent
+						'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
+						//Force akamai cookie
+						//XXX: seems to come from http request
+						'Cookie: AKA_A2=A',
+					],
+					//Enable cookie
+					CURLOPT_COOKIEFILE => '',
+					//Disable location following
+					CURLOPT_FOLLOWLOCATION => false,
+					//Set url
+					#CURLOPT_URL => $url = 'https://www.accuweather.com/',
+					//Return headers too
+					CURLOPT_HEADER => true,
+					//Return content
+					CURLOPT_RETURNTRANSFER => true,
+
+					//XXX: debug
+					CURLINFO_HEADER_OUT => true
+				]
+			) === false
+		) {
+			//Display error
+			echo 'Curl setopt array failed: '.curl_error($this->ch)."\n";
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Return success
+		return true;
+	}
+
+	/**
+	 * Get url
+	 *
+	 * @return string|void Return url content or exit
+	 */
+	function curl_get($url) {
+		//Set url to fetch
+		if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) {
+			//Display error
+			echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+			//Close curl handler
+			curl_close($this->ch);
+
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Check return status
+		if (($response = curl_exec($this->ch)) === false) {
+			//Display error
+			echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+			//Display sent headers
+			var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+			//Display response
+			var_dump($response);
+
+			//Close curl handler
+			curl_close($this->ch);
+
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Get header size
+		if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) {
+			//Display error
+			echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+			//Display sent headers
+			var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+			//Display response
+			var_dump($response);
+
+			//Close curl handler
+			curl_close($this->ch);
+
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Get header
+		if (empty($header = substr($response, 0, $hs))) {
+			//Display error
+			echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n";
+
+			//Display sent headers
+			var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+			//Display response
+			var_dump($response);
+
+			//Close curl handler
+			curl_close($this->ch);
+
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Check request success
+		if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') {
+			//Display error
+			echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+			//Display sent headers
+			var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+			//Display response
+			var_dump($header);
+
+			//Close curl handler
+			curl_close($this->ch);
+
+			//Exit with failure
+			exit(self::FAILURE);
+		}
+
+		//Return content
+		return substr($response, $hs);
+	}
+
+	/**
+	 * Close curl handler
+	 *
+	 * @return bool Return success or failure
+	 */
+	function curl_close() {
+		return curl_close($this->ch);
+	}
+}
-- 
2.41.3