--- /dev/null
+<?php
+
+namespace Rapsys\AirBundle\Command;
+
+use Doctrine\Bundle\DoctrineBundle\Command\DoctrineCommand;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Output\OutputInterface;
+use Symfony\Component\Filesystem\Exception\IOExceptionInterface;
+use Symfony\Component\Filesystem\Filesystem;
+use Rapsys\AirBundle\Entity\Session;
+
+class WeatherCommand extends DoctrineCommand {
+ //Set failure constant
+ const FAILURE = 1;
+
+ ///Set success constant
+ const SUCCESS = 0;
+
+ ///Set Tidy config
+ private $config = [
+ //Mostly useless in fact
+ 'indent' => true,
+ //Required to simplify simplexml transition
+ 'output-xml' => true,
+ //Required to avoid xml errors
+ 'quote-nbsp' => false,
+ //Required to fix code
+ 'clean' => true
+ ];
+
+ ///Set accuweather uris
+ private $accuweather = [
+ //Hourly uri
+ 'hourly' => [
+ 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=',
+ 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=',
+ 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=',
+ 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=',
+ 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=',
+ 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=',
+ 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day='
+ ],
+ //Daily uri
+ 'daily' => [
+ 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc',
+ 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc',
+ 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc',
+ 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc',
+ 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc',
+ 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc',
+ 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc'
+ ]
+ ];
+
+ ///Set curl handler
+ private $ch = null;
+
+ ///Configure attribute command
+ protected function configure() {
+ //Configure the class
+ $this
+ //Set name
+ ->setName('rapsysair:weather')
+ //Set description shown with bin/console list
+ ->setDescription('Updates session rain and temperature fields')
+ //Set description shown with bin/console --help airlibre:attribute
+ ->setHelp('This command updates session rain and temperature fields in next three days')
+ //Add daily and hourly aliases
+ ->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']);
+ }
+
+ ///Process the attribution
+ protected function execute(InputInterface $input, OutputInterface $output) {
+ //Fetch doctrine
+ $doctrine = $this->getDoctrine();
+
+ //Get manager
+ $manager = $doctrine->getManager();
+
+ //Tidy object
+ $tidy = new \tidy();
+
+ //Init zipcodes array
+ $zipcodes = [];
+
+ //Init types
+ $types = [];
+
+ //Process hourly accuweather
+ if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') {
+ //Fetch hourly sessions to attribute
+ $types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather();
+
+ //Iterate on each session
+ foreach($types['hourly'] as $sessionId => $session) {
+ //Get zipcode
+ $zipcode = $session->getLocation()->getZipcode();
+
+ //Get start
+ $start = $session->getStart();
+
+ //Set start day
+ $day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
+
+ //Check if zipcode is set
+ if (!isset($zipcodes[$zipcode])) {
+ $zipcodes[$zipcode] = [];
+ }
+
+ //Check if zipcode date is set
+ if (!isset($zipcodes[$zipcode][$day])) {
+ $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
+ } else {
+ $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
+ }
+
+ //Get stop
+ $stop = $session->getStop();
+
+ //Set stop day
+ $day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
+
+ //Check if zipcode date is set
+ if (!isset($zipcodes[$zipcode][$day])) {
+ $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
+ } else {
+ $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
+ }
+ }
+ }
+
+ //Process daily accuweather
+ if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') {
+ //Fetch daily sessions to attribute
+ $types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather();
+
+ //Iterate on each session
+ foreach($types['daily'] as $sessionId => $session) {
+ //Get zipcode
+ $zipcode = $session->getLocation()->getZipcode();
+
+ //Get start
+ $start = $session->getStart();
+
+ //Set start day
+ $day = 'daily';
+
+ //Check if zipcode is set
+ if (!isset($zipcodes[$zipcode])) {
+ $zipcodes[$zipcode] = [];
+ }
+
+ //Check if zipcode date is set
+ if (!isset($zipcodes[$zipcode][$day])) {
+ $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
+ } else {
+ $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
+ }
+ }
+ }
+
+ //Get filesystem
+ $filesystem = new Filesystem();
+
+ //Set tmpdir
+ //XXX: worst case scenario we have 3 files per zipcode
+ if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) {
+ try {
+ //Create dir
+ $filesystem->mkdir($tmpdir, 0775);
+ } catch (IOExceptionInterface $exception) {
+ //Display error
+ echo 'Create dir '.$exception->getPath().' failed'."\n";
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+ }
+
+ //Init curl
+ $this->curl_init();
+
+ //Init data array
+ $data = [];
+
+ //Iterate on zipcodes
+ foreach($zipcodes as $zipcode => $days) {
+ //Iterate on days
+ foreach($days as $day => $null) {
+ //Try to load content from cache
+ if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) {
+ //Prevent timing detection
+ //XXX: from 0.1 to 5 seconds
+ usleep(rand(1,50) * 100000);
+
+ //Get content
+ //TODO: for daily we may load data for requested quarter of the day
+ $content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day);
+
+ //Store it
+ if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) {
+ //Display error
+ echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n";
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+ }
+
+ //Parse string
+ $tidy->parseString($content, $this->config, 'utf8');
+
+ //Fix error buffer
+ //XXX: don't care about theses errors, tidy is here to fix...
+ #if (!empty($tidy->errorBuffer)) {
+ # var_dump($tidy->errorBuffer);
+ # die('Tidy errors');
+ #}
+
+ //Load simplexml
+ //XXX: trash all xmlns= broken tags
+ $sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy));
+
+ //Process daily
+ if ($day == 'daily') {
+ //Iterate on each link containing data
+ foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) {
+ //Get date
+ $dsm = trim($node->div[0]->h2[0]->span[1]);
+
+ //Get temperature
+ $temperature = str_replace('°', '', $node->div[0]->div[0]->span[0]);
+
+ //Get rainrisk
+ $rainrisk = str_replace('%', '', trim($node->div[2]))/100;
+
+ //Store data
+ $data[$zipcode][$dsm]['daily'] = [
+ 'temperature' => $temperature,
+ 'rainrisk' => $rainrisk
+ ];
+ }
+ //Process hourly
+ } else {
+ //Iterate on each div containing data
+ #(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value
+ #/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1]
+ foreach($sx->xpath('//div[@data-shared="false"]') as $node) {
+ //Get hour
+ $hour = trim($node->div[0]->div[0]->h2[0]->span[0]);
+
+ //Get dsm
+ $dsm = trim($node->div[0]->div[0]->h2[0]->span[1]);
+
+ //Get temperature
+ $temperature = str_replace('°', '', $node->div[0]->div[0]->div[0]);
+
+ //Get realfeel
+ $realfeel = str_replace(['RealFeel® ', '°'], '', trim($node->div[0]->div[0]->span[0]));
+
+ //Get rainrisk
+ $rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100;
+
+ //Label is Rain when we have a rainfall
+ if (($pluviolabel = trim($node->div[1]->div[0]->div[0]->div[1]->p[1])) == 'Rain') {
+ //Get rainfall
+ $rainfall = str_replace(' mm', '', $node->div[1]->div[0]->div[0]->div[1]->p[1]->span[0]);
+ //Cloud Cover, no rainfall
+ } else {
+ //Set rainfall to 0 (mm)
+ $rainfall = 0;
+ }
+
+ //Store data
+ $data[$zipcode][$dsm][$hour] = [
+ 'temperature' => $temperature,
+ 'realfeel' => $realfeel,
+ 'rainrisk' => $rainrisk,
+ 'rainfall' => $rainfall
+ ];
+ }
+ }
+
+ //Cleanup
+ unset($sx);
+ }
+ }
+
+ //Iterate on types
+ foreach($types as $type => $sessions) {
+ //Iterate on each type
+ foreach($sessions as $sessionId => $session) {
+ //Get zipcode
+ $zipcode = $session->getLocation()->getZipcode();
+
+ //Get start
+ $start = $session->getStart();
+
+ //Daily type
+ if ($type == 'daily') {
+ //Set period
+ $period = [ $start ];
+ //Hourly type
+ } else {
+ //Get stop
+ $stop = $session->getStop();
+
+ //Compute period
+ $period = new \DatePeriod(
+ //Start from begin
+ $start,
+ //Iterate on each hour
+ new \DateInterval('PT1H'),
+ //End with begin + length
+ $stop
+ );
+ }
+
+ //Set meteo
+ $meteo = [
+ 'rainfall' => null,
+ 'rainrisk' => null,
+ 'realfeel' => [],
+ 'realfeelmin' => null,
+ 'realfeelmax' => null,
+ 'temperature' => [],
+ 'temperaturemin' => null,
+ 'temperaturemax' => null
+ ];
+
+ //Iterate on the period
+ foreach($period as $time) {
+ //Set dsm
+ $dsm = $time->format('d/m');
+
+ //Set hour
+ $hour = $type=='daily'?$type:$time->format('H');
+
+ //Check data availability
+ //XXX: should never happen
+ #if (!isset($data[$zipcode][$dsm][$hour])) {
+ # //Skip unavailable data
+ # continue;
+ #}
+
+ //Set info alias
+ $info = $data[$zipcode][$dsm][$hour];
+
+ //Check if rainrisk is higher
+ if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) {
+ //Set highest rain risk
+ $meteo['rainrisk'] = floatval($info['rainrisk']);
+ }
+
+ //Check if rainfall is set
+ if (isset($info['rainfall'])) {
+ //Set rainfall sum
+ $meteo['rainfall'] += floatval($info['rainfall']);
+ }
+
+ //Add temperature
+ $meteo['temperature'][$hour] = $info['temperature'];
+
+ //Hourly type
+ if ($type != 'daily') {
+ //Check min temperature
+ if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) {
+ $meteo['temperaturemin'] = floatval($info['temperature']);
+ }
+
+ //Check max temperature
+ if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) {
+ $meteo['temperaturemax'] = floatval($info['temperature']);
+ }
+ }
+
+ //Check if realfeel is set
+ if (isset($info['realfeel'])) {
+ //Add realfeel
+ $meteo['realfeel'][$hour] = $info['realfeel'];
+
+ //Check min realfeel
+ if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) {
+ $meteo['realfeelmin'] = floatval($info['realfeel']);
+ }
+
+ //Check max realfeel
+ if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) {
+ $meteo['realfeelmax'] = floatval($info['realfeel']);
+ }
+ }
+ }
+
+ //Check if rainfall is set and differ
+ if ($session->getRainfall() !== $meteo['rainfall']) {
+ //Set rainfall
+ $session->setRainfall($meteo['rainfall']);
+ }
+
+ //Check if rainrisk differ
+ if ($session->getRainrisk() !== $meteo['rainrisk']) {
+ //Set rainrisk
+ $session->setRainrisk($meteo['rainrisk']);
+ }
+
+ //Check realfeel array
+ if ($meteo['realfeel'] !== []) {
+ //Compute realfeel
+ $realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1));
+
+ //Check if realfeel differ
+ if ($session->getRealfeel() !== $realfeel) {
+ //Set average realfeel
+ #$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']);
+ $session->setRealfeel($realfeel);
+ }
+
+ //Check if realfeelmin differ
+ if ($session->getRealfeelmin() !== $meteo['realfeelmin']) {
+ //Set realfeelmin
+ $session->setRealfeelmin($meteo['realfeelmin']);
+ }
+
+ //Check if realfeelmax differ
+ if ($session->getRealfeelmax() !== $meteo['realfeelmax']) {
+ //Set realfeelmax
+ $session->setRealfeelmax($meteo['realfeelmax']);
+ }
+ }
+
+ //Check temperature array
+ if ($meteo['temperature'] !== []) {
+ //Compute temperature
+ $temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1));
+
+ //Check if temperature differ
+ if ($session->getTemperature() !== $temperature) {
+ //Set average temperature
+ #$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']);
+ $session->setTemperature($temperature);
+ }
+
+ //Check if temperaturemin differ
+ if ($session->getTemperaturemin() !== $meteo['temperaturemin']) {
+ //Set temperaturemin
+ $session->setTemperaturemin($meteo['temperaturemin']);
+ }
+
+ //Check if temperaturemax differ
+ if ($session->getTemperaturemax() !== $meteo['temperaturemax']) {
+ //Set temperaturemax
+ $session->setTemperaturemax($meteo['temperaturemax']);
+ }
+ }
+ }
+ }
+
+ //Flush to get the ids
+ $manager->flush();
+
+ //Close curl handler
+ $this->curl_close();
+
+ //Return success
+ return self::SUCCESS;
+ }
+
+ /**
+ * Init curl handler
+ *
+ * @return bool|void Return success or exit
+ */
+ function curl_init() {
+ //Init curl
+ if (($this->ch = curl_init()) === false) {
+ //Display error
+ echo 'Curl init failed: '.curl_error($this->ch)."\n";
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Set curl options
+ if (
+ curl_setopt_array(
+ $this->ch,
+ [
+ //Force http2
+ CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
+ //Set http headers
+ CURLOPT_HTTPHEADER => [
+ //XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off
+ //XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview
+ #'Pragma: akamai-x-cache-off',
+ //XXX: working curl command
+ #curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/'
+ //Set accept
+ 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+ //Set accept language
+ 'Accept-Language: en-GB,en;q=0.9',
+ //Disable cache
+ 'Cache-Control: no-cache',
+ //Keep connection alive
+ 'Connection: keep-alive',
+ //Disable cache
+ 'Pragma: no-cache',
+ //Force secure requests
+ 'Upgrade-Insecure-Requests: 1',
+ //Set user agent
+ 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
+ //Force akamai cookie
+ //XXX: seems to come from http request
+ 'Cookie: AKA_A2=A',
+ ],
+ //Enable cookie
+ CURLOPT_COOKIEFILE => '',
+ //Disable location following
+ CURLOPT_FOLLOWLOCATION => false,
+ //Set url
+ #CURLOPT_URL => $url = 'https://www.accuweather.com/',
+ //Return headers too
+ CURLOPT_HEADER => true,
+ //Return content
+ CURLOPT_RETURNTRANSFER => true,
+
+ //XXX: debug
+ CURLINFO_HEADER_OUT => true
+ ]
+ ) === false
+ ) {
+ //Display error
+ echo 'Curl setopt array failed: '.curl_error($this->ch)."\n";
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Return success
+ return true;
+ }
+
+ /**
+ * Get url
+ *
+ * @return string|void Return url content or exit
+ */
+ function curl_get($url) {
+ //Set url to fetch
+ if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) {
+ //Display error
+ echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+ //Close curl handler
+ curl_close($this->ch);
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Check return status
+ if (($response = curl_exec($this->ch)) === false) {
+ //Display error
+ echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+ //Display sent headers
+ var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+ //Display response
+ var_dump($response);
+
+ //Close curl handler
+ curl_close($this->ch);
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Get header size
+ if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) {
+ //Display error
+ echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+ //Display sent headers
+ var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+ //Display response
+ var_dump($response);
+
+ //Close curl handler
+ curl_close($this->ch);
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Get header
+ if (empty($header = substr($response, 0, $hs))) {
+ //Display error
+ echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n";
+
+ //Display sent headers
+ var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+ //Display response
+ var_dump($response);
+
+ //Close curl handler
+ curl_close($this->ch);
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Check request success
+ if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') {
+ //Display error
+ echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n";
+
+ //Display sent headers
+ var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
+
+ //Display response
+ var_dump($header);
+
+ //Close curl handler
+ curl_close($this->ch);
+
+ //Exit with failure
+ exit(self::FAILURE);
+ }
+
+ //Return content
+ return substr($response, $hs);
+ }
+
+ /**
+ * Close curl handler
+ *
+ * @return bool Return success or failure
+ */
+ function curl_close() {
+ return curl_close($this->ch);
+ }
+}