From 894549fc5484a8cd2522f0d5b5677093aeb05e8a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Rapha=C3=ABl=20Gertz?= Date: Tue, 8 Dec 2020 01:20:43 +0100 Subject: [PATCH] Add bin/console airlibre:weather command --- Command/WeatherCommand.php | 642 +++++++++++++++++++++++++++++++++++++ 1 file changed, 642 insertions(+) create mode 100644 Command/WeatherCommand.php diff --git a/Command/WeatherCommand.php b/Command/WeatherCommand.php new file mode 100644 index 0000000..00f922c --- /dev/null +++ b/Command/WeatherCommand.php @@ -0,0 +1,642 @@ + true, + //Required to simplify simplexml transition + 'output-xml' => true, + //Required to avoid xml errors + 'quote-nbsp' => false, + //Required to fix code + 'clean' => true + ]; + + ///Set accuweather uris + private $accuweather = [ + //Hourly uri + 'hourly' => [ + 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=', + 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=', + 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=', + 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=', + 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=', + 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=', + 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day=' + ], + //Daily uri + 'daily' => [ + 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc', + 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc', + 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc', + 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc', + 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc', + 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc', + 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc' + ] + ]; + + ///Set curl handler + private $ch = null; + + ///Configure attribute command + protected function configure() { + //Configure the class + $this + //Set name + ->setName('rapsysair:weather') + //Set description shown with bin/console list + ->setDescription('Updates session rain and temperature fields') + //Set description shown with bin/console --help airlibre:attribute + ->setHelp('This command updates session rain and temperature fields in next three days') + //Add daily and hourly aliases + ->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']); + } + + ///Process the attribution + protected function execute(InputInterface $input, OutputInterface $output) { + //Fetch doctrine + $doctrine = $this->getDoctrine(); + + //Get manager + $manager = $doctrine->getManager(); + + //Tidy object + $tidy = new \tidy(); + + //Init zipcodes array + $zipcodes = []; + + //Init types + $types = []; + + //Process hourly accuweather + if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') { + //Fetch hourly sessions to attribute + $types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather(); + + //Iterate on each session + foreach($types['hourly'] as $sessionId => $session) { + //Get zipcode + $zipcode = $session->getLocation()->getZipcode(); + + //Get start + $start = $session->getStart(); + + //Set start day + $day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1; + + //Check if zipcode is set + if (!isset($zipcodes[$zipcode])) { + $zipcodes[$zipcode] = []; + } + + //Check if zipcode date is set + if (!isset($zipcodes[$zipcode][$day])) { + $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ]; + } else { + $zipcodes[$zipcode][$day][$sessionId] = $sessionId; + } + + //Get stop + $stop = $session->getStop(); + + //Set stop day + $day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1; + + //Check if zipcode date is set + if (!isset($zipcodes[$zipcode][$day])) { + $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ]; + } else { + $zipcodes[$zipcode][$day][$sessionId] = $sessionId; + } + } + } + + //Process daily accuweather + if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') { + //Fetch daily sessions to attribute + $types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather(); + + //Iterate on each session + foreach($types['daily'] as $sessionId => $session) { + //Get zipcode + $zipcode = $session->getLocation()->getZipcode(); + + //Get start + $start = $session->getStart(); + + //Set start day + $day = 'daily'; + + //Check if zipcode is set + if (!isset($zipcodes[$zipcode])) { + $zipcodes[$zipcode] = []; + } + + //Check if zipcode date is set + if (!isset($zipcodes[$zipcode][$day])) { + $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ]; + } else { + $zipcodes[$zipcode][$day][$sessionId] = $sessionId; + } + } + } + + //Get filesystem + $filesystem = new Filesystem(); + + //Set tmpdir + //XXX: worst case scenario we have 3 files per zipcode + if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) { + try { + //Create dir + $filesystem->mkdir($tmpdir, 0775); + } catch (IOExceptionInterface $exception) { + //Display error + echo 'Create dir '.$exception->getPath().' failed'."\n"; + + //Exit with failure + exit(self::FAILURE); + } + } + + //Init curl + $this->curl_init(); + + //Init data array + $data = []; + + //Iterate on zipcodes + foreach($zipcodes as $zipcode => $days) { + //Iterate on days + foreach($days as $day => $null) { + //Try to load content from cache + if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) { + //Prevent timing detection + //XXX: from 0.1 to 5 seconds + usleep(rand(1,50) * 100000); + + //Get content + //TODO: for daily we may load data for requested quarter of the day + $content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day); + + //Store it + if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) { + //Display error + echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n"; + + //Exit with failure + exit(self::FAILURE); + } + } + + //Parse string + $tidy->parseString($content, $this->config, 'utf8'); + + //Fix error buffer + //XXX: don't care about theses errors, tidy is here to fix... + #if (!empty($tidy->errorBuffer)) { + # var_dump($tidy->errorBuffer); + # die('Tidy errors'); + #} + + //Load simplexml + //XXX: trash all xmlns= broken tags + $sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy)); + + //Process daily + if ($day == 'daily') { + //Iterate on each link containing data + foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) { + //Get date + $dsm = trim($node->div[0]->h2[0]->span[1]); + + //Get temperature + $temperature = str_replace('°', '', $node->div[0]->div[0]->span[0]); + + //Get rainrisk + $rainrisk = str_replace('%', '', trim($node->div[2]))/100; + + //Store data + $data[$zipcode][$dsm]['daily'] = [ + 'temperature' => $temperature, + 'rainrisk' => $rainrisk + ]; + } + //Process hourly + } else { + //Iterate on each div containing data + #(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value + #/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1] + foreach($sx->xpath('//div[@data-shared="false"]') as $node) { + //Get hour + $hour = trim($node->div[0]->div[0]->h2[0]->span[0]); + + //Get dsm + $dsm = trim($node->div[0]->div[0]->h2[0]->span[1]); + + //Get temperature + $temperature = str_replace('°', '', $node->div[0]->div[0]->div[0]); + + //Get realfeel + $realfeel = str_replace(['RealFeel® ', '°'], '', trim($node->div[0]->div[0]->span[0])); + + //Get rainrisk + $rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100; + + //Label is Rain when we have a rainfall + if (($pluviolabel = trim($node->div[1]->div[0]->div[0]->div[1]->p[1])) == 'Rain') { + //Get rainfall + $rainfall = str_replace(' mm', '', $node->div[1]->div[0]->div[0]->div[1]->p[1]->span[0]); + //Cloud Cover, no rainfall + } else { + //Set rainfall to 0 (mm) + $rainfall = 0; + } + + //Store data + $data[$zipcode][$dsm][$hour] = [ + 'temperature' => $temperature, + 'realfeel' => $realfeel, + 'rainrisk' => $rainrisk, + 'rainfall' => $rainfall + ]; + } + } + + //Cleanup + unset($sx); + } + } + + //Iterate on types + foreach($types as $type => $sessions) { + //Iterate on each type + foreach($sessions as $sessionId => $session) { + //Get zipcode + $zipcode = $session->getLocation()->getZipcode(); + + //Get start + $start = $session->getStart(); + + //Daily type + if ($type == 'daily') { + //Set period + $period = [ $start ]; + //Hourly type + } else { + //Get stop + $stop = $session->getStop(); + + //Compute period + $period = new \DatePeriod( + //Start from begin + $start, + //Iterate on each hour + new \DateInterval('PT1H'), + //End with begin + length + $stop + ); + } + + //Set meteo + $meteo = [ + 'rainfall' => null, + 'rainrisk' => null, + 'realfeel' => [], + 'realfeelmin' => null, + 'realfeelmax' => null, + 'temperature' => [], + 'temperaturemin' => null, + 'temperaturemax' => null + ]; + + //Iterate on the period + foreach($period as $time) { + //Set dsm + $dsm = $time->format('d/m'); + + //Set hour + $hour = $type=='daily'?$type:$time->format('H'); + + //Check data availability + //XXX: should never happen + #if (!isset($data[$zipcode][$dsm][$hour])) { + # //Skip unavailable data + # continue; + #} + + //Set info alias + $info = $data[$zipcode][$dsm][$hour]; + + //Check if rainrisk is higher + if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) { + //Set highest rain risk + $meteo['rainrisk'] = floatval($info['rainrisk']); + } + + //Check if rainfall is set + if (isset($info['rainfall'])) { + //Set rainfall sum + $meteo['rainfall'] += floatval($info['rainfall']); + } + + //Add temperature + $meteo['temperature'][$hour] = $info['temperature']; + + //Hourly type + if ($type != 'daily') { + //Check min temperature + if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) { + $meteo['temperaturemin'] = floatval($info['temperature']); + } + + //Check max temperature + if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) { + $meteo['temperaturemax'] = floatval($info['temperature']); + } + } + + //Check if realfeel is set + if (isset($info['realfeel'])) { + //Add realfeel + $meteo['realfeel'][$hour] = $info['realfeel']; + + //Check min realfeel + if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) { + $meteo['realfeelmin'] = floatval($info['realfeel']); + } + + //Check max realfeel + if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) { + $meteo['realfeelmax'] = floatval($info['realfeel']); + } + } + } + + //Check if rainfall is set and differ + if ($session->getRainfall() !== $meteo['rainfall']) { + //Set rainfall + $session->setRainfall($meteo['rainfall']); + } + + //Check if rainrisk differ + if ($session->getRainrisk() !== $meteo['rainrisk']) { + //Set rainrisk + $session->setRainrisk($meteo['rainrisk']); + } + + //Check realfeel array + if ($meteo['realfeel'] !== []) { + //Compute realfeel + $realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1)); + + //Check if realfeel differ + if ($session->getRealfeel() !== $realfeel) { + //Set average realfeel + #$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']); + $session->setRealfeel($realfeel); + } + + //Check if realfeelmin differ + if ($session->getRealfeelmin() !== $meteo['realfeelmin']) { + //Set realfeelmin + $session->setRealfeelmin($meteo['realfeelmin']); + } + + //Check if realfeelmax differ + if ($session->getRealfeelmax() !== $meteo['realfeelmax']) { + //Set realfeelmax + $session->setRealfeelmax($meteo['realfeelmax']); + } + } + + //Check temperature array + if ($meteo['temperature'] !== []) { + //Compute temperature + $temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1)); + + //Check if temperature differ + if ($session->getTemperature() !== $temperature) { + //Set average temperature + #$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']); + $session->setTemperature($temperature); + } + + //Check if temperaturemin differ + if ($session->getTemperaturemin() !== $meteo['temperaturemin']) { + //Set temperaturemin + $session->setTemperaturemin($meteo['temperaturemin']); + } + + //Check if temperaturemax differ + if ($session->getTemperaturemax() !== $meteo['temperaturemax']) { + //Set temperaturemax + $session->setTemperaturemax($meteo['temperaturemax']); + } + } + } + } + + //Flush to get the ids + $manager->flush(); + + //Close curl handler + $this->curl_close(); + + //Return success + return self::SUCCESS; + } + + /** + * Init curl handler + * + * @return bool|void Return success or exit + */ + function curl_init() { + //Init curl + if (($this->ch = curl_init()) === false) { + //Display error + echo 'Curl init failed: '.curl_error($this->ch)."\n"; + //Exit with failure + exit(self::FAILURE); + } + + //Set curl options + if ( + curl_setopt_array( + $this->ch, + [ + //Force http2 + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0, + //Set http headers + CURLOPT_HTTPHEADER => [ + //XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off + //XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview + #'Pragma: akamai-x-cache-off', + //XXX: working curl command + #curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/' + //Set accept + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + //Set accept language + 'Accept-Language: en-GB,en;q=0.9', + //Disable cache + 'Cache-Control: no-cache', + //Keep connection alive + 'Connection: keep-alive', + //Disable cache + 'Pragma: no-cache', + //Force secure requests + 'Upgrade-Insecure-Requests: 1', + //Set user agent + 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', + //Force akamai cookie + //XXX: seems to come from http request + 'Cookie: AKA_A2=A', + ], + //Enable cookie + CURLOPT_COOKIEFILE => '', + //Disable location following + CURLOPT_FOLLOWLOCATION => false, + //Set url + #CURLOPT_URL => $url = 'https://www.accuweather.com/', + //Return headers too + CURLOPT_HEADER => true, + //Return content + CURLOPT_RETURNTRANSFER => true, + + //XXX: debug + CURLINFO_HEADER_OUT => true + ] + ) === false + ) { + //Display error + echo 'Curl setopt array failed: '.curl_error($this->ch)."\n"; + //Exit with failure + exit(self::FAILURE); + } + + //Return success + return true; + } + + /** + * Get url + * + * @return string|void Return url content or exit + */ + function curl_get($url) { + //Set url to fetch + if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) { + //Display error + echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n"; + + //Close curl handler + curl_close($this->ch); + + //Exit with failure + exit(self::FAILURE); + } + + //Check return status + if (($response = curl_exec($this->ch)) === false) { + //Display error + echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n"; + + //Display sent headers + var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT)); + + //Display response + var_dump($response); + + //Close curl handler + curl_close($this->ch); + + //Exit with failure + exit(self::FAILURE); + } + + //Get header size + if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) { + //Display error + echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n"; + + //Display sent headers + var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT)); + + //Display response + var_dump($response); + + //Close curl handler + curl_close($this->ch); + + //Exit with failure + exit(self::FAILURE); + } + + //Get header + if (empty($header = substr($response, 0, $hs))) { + //Display error + echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n"; + + //Display sent headers + var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT)); + + //Display response + var_dump($response); + + //Close curl handler + curl_close($this->ch); + + //Exit with failure + exit(self::FAILURE); + } + + //Check request success + if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') { + //Display error + echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n"; + + //Display sent headers + var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT)); + + //Display response + var_dump($header); + + //Close curl handler + curl_close($this->ch); + + //Exit with failure + exit(self::FAILURE); + } + + //Return content + return substr($response, $hs); + } + + /** + * Close curl handler + * + * @return bool Return success or failure + */ + function curl_close() { + return curl_close($this->ch); + } +} -- 2.41.1