]> Raphaƫl G. Git Repositories - airbundle/blob - Command/WeatherCommand.php
Add skip on weather unavailable data when startup timeout makes us miss upstream...
[airbundle] / Command / WeatherCommand.php
1 <?php
2
3 namespace Rapsys\AirBundle\Command;
4
5 use Doctrine\Bundle\DoctrineBundle\Command\DoctrineCommand;
6 use Symfony\Component\Console\Input\InputInterface;
7 use Symfony\Component\Console\Output\OutputInterface;
8 use Symfony\Component\Filesystem\Exception\IOExceptionInterface;
9 use Symfony\Component\Filesystem\Filesystem;
10 use Rapsys\AirBundle\Entity\Session;
11
12 class WeatherCommand extends DoctrineCommand {
13 //Set failure constant
14 const FAILURE = 1;
15
16 ///Set success constant
17 const SUCCESS = 0;
18
19 ///Set Tidy config
20 private $config = [
21 //Mostly useless in fact
22 'indent' => true,
23 //Required to simplify simplexml transition
24 'output-xml' => true,
25 //Required to avoid xml errors
26 'quote-nbsp' => false,
27 //Required to fix code
28 'clean' => true
29 ];
30
31 ///Set accuweather uris
32 private $accuweather = [
33 //Hourly uri
34 'hourly' => [
35 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=',
36 75004 => 'https://www.accuweather.com/en/fr/paris-04-hotel-de-ville/75004/hourly-weather-forecast/179145_pc?day=',
37 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=',
38 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=',
39 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=',
40 75013 => 'https://www.accuweather.com/en/fr/paris-13-gobelins/75013/hourly-weather-forecast/179154_pc?day=',
41 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=',
42 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=',
43 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day='
44 ],
45 //Daily uri
46 'daily' => [
47 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc',
48 75004 => 'https://www.accuweather.com/en/fr/paris-04-hotel-de-ville/75004/daily-weather-forecast/179145_pc',
49 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc',
50 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc',
51 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc',
52 75013 => 'https://www.accuweather.com/en/fr/paris-13-gobelins/75013/daily-weather-forecast/179154_pc',
53 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc',
54 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc',
55 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc'
56 ]
57 ];
58
59 ///Set curl handler
60 private $ch = null;
61
62 ///Configure attribute command
63 protected function configure() {
64 //Configure the class
65 $this
66 //Set name
67 ->setName('rapsysair:weather')
68 //Set description shown with bin/console list
69 ->setDescription('Updates session rain and temperature fields')
70 //Set description shown with bin/console --help airlibre:attribute
71 ->setHelp('This command updates session rain and temperature fields in next three days')
72 //Add daily and hourly aliases
73 ->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']);
74 }
75
76 ///Process the attribution
77 protected function execute(InputInterface $input, OutputInterface $output) {
78 //Fetch doctrine
79 $doctrine = $this->getDoctrine();
80
81 //Get manager
82 $manager = $doctrine->getManager();
83
84 //Tidy object
85 $tidy = new \tidy();
86
87 //Init zipcodes array
88 $zipcodes = [];
89
90 //Init types
91 $types = [];
92
93 //Process hourly accuweather
94 if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') {
95 //Fetch hourly sessions to attribute
96 $types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather();
97
98 //Iterate on each session
99 foreach($types['hourly'] as $sessionId => $session) {
100 //Get zipcode
101 $zipcode = $session->getLocation()->getZipcode();
102
103 //Get start
104 $start = $session->getStart();
105
106 //Set start day
107 $day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
108
109 //Check if zipcode is set
110 if (!isset($zipcodes[$zipcode])) {
111 $zipcodes[$zipcode] = [];
112 }
113
114 //Check if zipcode date is set
115 if (!isset($zipcodes[$zipcode][$day])) {
116 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
117 } else {
118 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
119 }
120
121 //Get stop
122 $stop = $session->getStop();
123
124 //Set stop day
125 $day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
126
127 //Check if zipcode date is set
128 if (!isset($zipcodes[$zipcode][$day])) {
129 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
130 } else {
131 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
132 }
133 }
134 }
135
136 //Process daily accuweather
137 if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') {
138 //Fetch daily sessions to attribute
139 $types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather();
140
141 //Iterate on each session
142 foreach($types['daily'] as $sessionId => $session) {
143 //Get zipcode
144 $zipcode = $session->getLocation()->getZipcode();
145
146 //Get start
147 $start = $session->getStart();
148
149 //Set start day
150 $day = 'daily';
151
152 //Check if zipcode is set
153 if (!isset($zipcodes[$zipcode])) {
154 $zipcodes[$zipcode] = [];
155 }
156
157 //Check if zipcode date is set
158 if (!isset($zipcodes[$zipcode][$day])) {
159 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
160 } else {
161 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
162 }
163 }
164 }
165
166 //Get filesystem
167 $filesystem = new Filesystem();
168
169 //Set tmpdir
170 //XXX: worst case scenario we have 3 files per zipcode
171 if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) {
172 try {
173 //Create dir
174 $filesystem->mkdir($tmpdir, 0775);
175 } catch (IOExceptionInterface $exception) {
176 //Display error
177 echo 'Create dir '.$exception->getPath().' failed'."\n";
178
179 //Exit with failure
180 exit(self::FAILURE);
181 }
182 }
183
184 //Init curl
185 $this->curl_init();
186
187 //Init data array
188 $data = [];
189
190 //Iterate on zipcodes
191 foreach($zipcodes as $zipcode => $days) {
192 //Iterate on days
193 foreach($days as $day => $null) {
194 //Try to load content from cache
195 if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) {
196 //Prevent timing detection
197 //XXX: from 0.1 to 5 seconds
198 usleep(rand(1,50) * 100000);
199
200 //Get content
201 //TODO: for daily we may load data for requested quarter of the day
202 $content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day);
203
204 //Store it
205 if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) {
206 //Display error
207 echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n";
208
209 //Exit with failure
210 exit(self::FAILURE);
211 }
212 }
213
214 //Parse string
215 $tidy->parseString($content, $this->config, 'utf8');
216
217 //Fix error buffer
218 //XXX: don't care about theses errors, tidy is here to fix...
219 #if (!empty($tidy->errorBuffer)) {
220 # var_dump($tidy->errorBuffer);
221 # die('Tidy errors');
222 #}
223
224 //Load simplexml
225 //XXX: trash all xmlns= broken tags
226 $sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy));
227
228 //Process daily
229 if ($day == 'daily') {
230 //Iterate on each link containing data
231 foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) {
232 //Get date
233 $dsm = trim($node->div[0]->h2[0]->span[1]);
234
235 //Get temperature
236 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->span[0]);
237
238 //Get rainrisk
239 $rainrisk = str_replace('%', '', trim($node->div[2]))/100;
240
241 //Store data
242 $data[$zipcode][$dsm]['daily'] = [
243 'temperature' => $temperature,
244 'rainrisk' => $rainrisk
245 ];
246 }
247 //Process hourly
248 } else {
249 //Iterate on each div containing data
250 #(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value
251 #/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1]
252 foreach($sx->xpath('//div[@data-shared="false"]') as $node) {
253 //Get hour
254 $hour = trim($node->div[0]->div[0]->h2[0]->span[0]);
255
256 //Get dsm
257 $dsm = trim($node->div[0]->div[0]->h2[0]->span[1]);
258
259 //Get temperature
260 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->div[0]);
261
262 //Get realfeel
263 $realfeel = str_replace(['RealFeelĀ® ', 'Ā°'], '', trim($node->div[0]->div[0]->span[0]));
264
265 //Get rainrisk
266 $rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100;
267
268 //Set rainfall to 0 (mm)
269 $rainfall = 0;
270
271 //Iterate on each entry
272 foreach($node->div[1]->div[0]->div[0]->div[1]->p as $p) {
273 //Lookup for rain entry if present
274 if (trim($p) == 'Rain') {
275 //Get rainfall
276 $rainfall = floatval(str_replace(' mm', '', $p->span[0]));
277 }
278 }
279
280 //Store data
281 $data[$zipcode][$dsm][$hour] = [
282 'temperature' => $temperature,
283 'realfeel' => $realfeel,
284 'rainrisk' => $rainrisk,
285 'rainfall' => $rainfall
286 ];
287 }
288 }
289
290 //Cleanup
291 unset($sx);
292 }
293 }
294
295 //Iterate on types
296 foreach($types as $type => $sessions) {
297 //Iterate on each type
298 foreach($sessions as $sessionId => $session) {
299 //Get zipcode
300 $zipcode = $session->getLocation()->getZipcode();
301
302 //Get start
303 $start = $session->getStart();
304
305 //Daily type
306 if ($type == 'daily') {
307 //Set period
308 $period = [ $start ];
309 //Hourly type
310 } else {
311 //Get stop
312 $stop = $session->getStop();
313
314 //Compute period
315 $period = new \DatePeriod(
316 //Start from begin
317 $start,
318 //Iterate on each hour
319 new \DateInterval('PT1H'),
320 //End with begin + length
321 $stop
322 );
323 }
324
325 //Set meteo
326 $meteo = [
327 'rainfall' => null,
328 'rainrisk' => null,
329 'realfeel' => [],
330 'realfeelmin' => null,
331 'realfeelmax' => null,
332 'temperature' => [],
333 'temperaturemin' => null,
334 'temperaturemax' => null
335 ];
336
337 //Iterate on the period
338 foreach($period as $time) {
339 //Set dsm
340 $dsm = $time->format('d/m');
341
342 //Set hour
343 $hour = $type=='daily'?$type:$time->format('H');
344
345 //Check data availability
346 //XXX: sometimes startup delay causes weather data to be unavailable for session first hour
347 if (!isset($data[$zipcode][$dsm][$hour])) {
348 //Skip unavailable data
349 continue;
350 }
351
352 //Set info alias
353 $info = $data[$zipcode][$dsm][$hour];
354
355 //Check if rainrisk is higher
356 if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) {
357 //Set highest rain risk
358 $meteo['rainrisk'] = floatval($info['rainrisk']);
359 }
360
361 //Check if rainfall is set
362 if (isset($info['rainfall'])) {
363 //Set rainfall sum
364 $meteo['rainfall'] += floatval($info['rainfall']);
365 }
366
367 //Add temperature
368 $meteo['temperature'][$hour] = $info['temperature'];
369
370 //Hourly type
371 if ($type != 'daily') {
372 //Check min temperature
373 if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) {
374 $meteo['temperaturemin'] = floatval($info['temperature']);
375 }
376
377 //Check max temperature
378 if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) {
379 $meteo['temperaturemax'] = floatval($info['temperature']);
380 }
381 }
382
383 //Check if realfeel is set
384 if (isset($info['realfeel'])) {
385 //Add realfeel
386 $meteo['realfeel'][$hour] = $info['realfeel'];
387
388 //Check min realfeel
389 if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) {
390 $meteo['realfeelmin'] = floatval($info['realfeel']);
391 }
392
393 //Check max realfeel
394 if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) {
395 $meteo['realfeelmax'] = floatval($info['realfeel']);
396 }
397 }
398 }
399
400 //Check if rainfall is set and differ
401 if ($session->getRainfall() !== $meteo['rainfall']) {
402 //Set rainfall
403 $session->setRainfall($meteo['rainfall']);
404 }
405
406 //Check if rainrisk differ
407 if ($session->getRainrisk() !== $meteo['rainrisk']) {
408 //Set rainrisk
409 $session->setRainrisk($meteo['rainrisk']);
410 }
411
412 //Check realfeel array
413 if ($meteo['realfeel'] !== []) {
414 //Compute realfeel
415 $realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1));
416
417 //Check if realfeel differ
418 if ($session->getRealfeel() !== $realfeel) {
419 //Set average realfeel
420 #$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']);
421 $session->setRealfeel($realfeel);
422 }
423
424 //Check if realfeelmin differ
425 if ($session->getRealfeelmin() !== $meteo['realfeelmin']) {
426 //Set realfeelmin
427 $session->setRealfeelmin($meteo['realfeelmin']);
428 }
429
430 //Check if realfeelmax differ
431 if ($session->getRealfeelmax() !== $meteo['realfeelmax']) {
432 //Set realfeelmax
433 $session->setRealfeelmax($meteo['realfeelmax']);
434 }
435 }
436
437 //Check temperature array
438 if ($meteo['temperature'] !== []) {
439 //Compute temperature
440 $temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1));
441
442 //Check if temperature differ
443 if ($session->getTemperature() !== $temperature) {
444 //Set average temperature
445 #$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']);
446 $session->setTemperature($temperature);
447 }
448
449 //Check if temperaturemin differ
450 if ($session->getTemperaturemin() !== $meteo['temperaturemin']) {
451 //Set temperaturemin
452 $session->setTemperaturemin($meteo['temperaturemin']);
453 }
454
455 //Check if temperaturemax differ
456 if ($session->getTemperaturemax() !== $meteo['temperaturemax']) {
457 //Set temperaturemax
458 $session->setTemperaturemax($meteo['temperaturemax']);
459 }
460 }
461 }
462 }
463
464 //Flush to get the ids
465 $manager->flush();
466
467 //Close curl handler
468 $this->curl_close();
469
470 //Return success
471 return self::SUCCESS;
472 }
473
474 /**
475 * Init curl handler
476 *
477 * @return bool|void Return success or exit
478 */
479 function curl_init() {
480 //Init curl
481 if (($this->ch = curl_init()) === false) {
482 //Display error
483 echo 'Curl init failed: '.curl_error($this->ch)."\n";
484 //Exit with failure
485 exit(self::FAILURE);
486 }
487
488 //Set curl options
489 if (
490 curl_setopt_array(
491 $this->ch,
492 [
493 //Force http2
494 CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
495 //Set http headers
496 CURLOPT_HTTPHEADER => [
497 //XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off
498 //XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview
499 #'Pragma: akamai-x-cache-off',
500 //XXX: working curl command
501 #curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/'
502 //Set accept
503 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
504 //Set accept language
505 'Accept-Language: en-GB,en;q=0.9',
506 //Disable cache
507 'Cache-Control: no-cache',
508 //Keep connection alive
509 'Connection: keep-alive',
510 //Disable cache
511 'Pragma: no-cache',
512 //Force secure requests
513 'Upgrade-Insecure-Requests: 1',
514 //Set user agent
515 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
516 //Force akamai cookie
517 //XXX: seems to come from http request
518 'Cookie: AKA_A2=A',
519 ],
520 //Enable cookie
521 CURLOPT_COOKIEFILE => '',
522 //Disable location following
523 CURLOPT_FOLLOWLOCATION => false,
524 //Set url
525 #CURLOPT_URL => $url = 'https://www.accuweather.com/',
526 //Return headers too
527 CURLOPT_HEADER => true,
528 //Return content
529 CURLOPT_RETURNTRANSFER => true,
530
531 //XXX: debug
532 CURLINFO_HEADER_OUT => true
533 ]
534 ) === false
535 ) {
536 //Display error
537 echo 'Curl setopt array failed: '.curl_error($this->ch)."\n";
538 //Exit with failure
539 exit(self::FAILURE);
540 }
541
542 //Return success
543 return true;
544 }
545
546 /**
547 * Get url
548 *
549 * @return string|void Return url content or exit
550 */
551 function curl_get($url) {
552 //Set url to fetch
553 if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) {
554 //Display error
555 echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n";
556
557 //Close curl handler
558 curl_close($this->ch);
559
560 //Exit with failure
561 exit(self::FAILURE);
562 }
563
564 //Check return status
565 if (($response = curl_exec($this->ch)) === false) {
566 //Display error
567 echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n";
568
569 //Display sent headers
570 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
571
572 //Display response
573 var_dump($response);
574
575 //Close curl handler
576 curl_close($this->ch);
577
578 //Exit with failure
579 exit(self::FAILURE);
580 }
581
582 //Get header size
583 if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) {
584 //Display error
585 echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n";
586
587 //Display sent headers
588 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
589
590 //Display response
591 var_dump($response);
592
593 //Close curl handler
594 curl_close($this->ch);
595
596 //Exit with failure
597 exit(self::FAILURE);
598 }
599
600 //Get header
601 if (empty($header = substr($response, 0, $hs))) {
602 //Display error
603 echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n";
604
605 //Display sent headers
606 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
607
608 //Display response
609 var_dump($response);
610
611 //Close curl handler
612 curl_close($this->ch);
613
614 //Exit with failure
615 exit(self::FAILURE);
616 }
617
618 //Check request success
619 if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') {
620 //Display error
621 echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n";
622
623 //Display sent headers
624 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
625
626 //Display response
627 var_dump($header);
628
629 //Close curl handler
630 curl_close($this->ch);
631
632 //Exit with failure
633 exit(self::FAILURE);
634 }
635
636 //Return content
637 return substr($response, $hs);
638 }
639
640 /**
641 * Close curl handler
642 *
643 * @return bool Return success or failure
644 */
645 function curl_close() {
646 return curl_close($this->ch);
647 }
648 }