]> Raphaƫl G. Git Repositories - airbundle/blob - Command/WeatherCommand.php
New layout
[airbundle] / Command / WeatherCommand.php
1 <?php
2
3 namespace Rapsys\AirBundle\Command;
4
5 use Doctrine\Bundle\DoctrineBundle\Command\DoctrineCommand;
6 use Symfony\Component\Console\Input\InputInterface;
7 use Symfony\Component\Console\Output\OutputInterface;
8 use Symfony\Component\Filesystem\Exception\IOExceptionInterface;
9 use Symfony\Component\Filesystem\Filesystem;
10 use Rapsys\AirBundle\Entity\Session;
11
12 class WeatherCommand extends DoctrineCommand {
13 //Set failure constant
14 const FAILURE = 1;
15
16 ///Set success constant
17 const SUCCESS = 0;
18
19 ///Set Tidy config
20 private $config = [
21 //Mostly useless in fact
22 'indent' => true,
23 //Required to simplify simplexml transition
24 'output-xml' => true,
25 //Required to avoid xml errors
26 'quote-nbsp' => false,
27 //Required to fix code
28 'clean' => true
29 ];
30
31 ///Set accuweather uris
32 private $accuweather = [
33 //Hourly uri
34 'hourly' => [
35 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=',
36 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=',
37 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=',
38 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=',
39 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=',
40 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=',
41 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day='
42 ],
43 //Daily uri
44 'daily' => [
45 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc',
46 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc',
47 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc',
48 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc',
49 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc',
50 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc',
51 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc'
52 ]
53 ];
54
55 ///Set curl handler
56 private $ch = null;
57
58 ///Configure attribute command
59 protected function configure() {
60 //Configure the class
61 $this
62 //Set name
63 ->setName('rapsysair:weather')
64 //Set description shown with bin/console list
65 ->setDescription('Updates session rain and temperature fields')
66 //Set description shown with bin/console --help airlibre:attribute
67 ->setHelp('This command updates session rain and temperature fields in next three days')
68 //Add daily and hourly aliases
69 ->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']);
70 }
71
72 ///Process the attribution
73 protected function execute(InputInterface $input, OutputInterface $output) {
74 //Fetch doctrine
75 $doctrine = $this->getDoctrine();
76
77 //Get manager
78 $manager = $doctrine->getManager();
79
80 //Tidy object
81 $tidy = new \tidy();
82
83 //Init zipcodes array
84 $zipcodes = [];
85
86 //Init types
87 $types = [];
88
89 //Process hourly accuweather
90 if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') {
91 //Fetch hourly sessions to attribute
92 $types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather();
93
94 //Iterate on each session
95 foreach($types['hourly'] as $sessionId => $session) {
96 //Get zipcode
97 $zipcode = $session->getLocation()->getZipcode();
98
99 //Get start
100 $start = $session->getStart();
101
102 //Set start day
103 $day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
104
105 //Check if zipcode is set
106 if (!isset($zipcodes[$zipcode])) {
107 $zipcodes[$zipcode] = [];
108 }
109
110 //Check if zipcode date is set
111 if (!isset($zipcodes[$zipcode][$day])) {
112 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
113 } else {
114 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
115 }
116
117 //Get stop
118 $stop = $session->getStop();
119
120 //Set stop day
121 $day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
122
123 //Check if zipcode date is set
124 if (!isset($zipcodes[$zipcode][$day])) {
125 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
126 } else {
127 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
128 }
129 }
130 }
131
132 //Process daily accuweather
133 if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') {
134 //Fetch daily sessions to attribute
135 $types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather();
136
137 //Iterate on each session
138 foreach($types['daily'] as $sessionId => $session) {
139 //Get zipcode
140 $zipcode = $session->getLocation()->getZipcode();
141
142 //Get start
143 $start = $session->getStart();
144
145 //Set start day
146 $day = 'daily';
147
148 //Check if zipcode is set
149 if (!isset($zipcodes[$zipcode])) {
150 $zipcodes[$zipcode] = [];
151 }
152
153 //Check if zipcode date is set
154 if (!isset($zipcodes[$zipcode][$day])) {
155 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
156 } else {
157 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
158 }
159 }
160 }
161
162 //Get filesystem
163 $filesystem = new Filesystem();
164
165 //Set tmpdir
166 //XXX: worst case scenario we have 3 files per zipcode
167 if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) {
168 try {
169 //Create dir
170 $filesystem->mkdir($tmpdir, 0775);
171 } catch (IOExceptionInterface $exception) {
172 //Display error
173 echo 'Create dir '.$exception->getPath().' failed'."\n";
174
175 //Exit with failure
176 exit(self::FAILURE);
177 }
178 }
179
180 //Init curl
181 $this->curl_init();
182
183 //Init data array
184 $data = [];
185
186 //Iterate on zipcodes
187 foreach($zipcodes as $zipcode => $days) {
188 //Iterate on days
189 foreach($days as $day => $null) {
190 //Try to load content from cache
191 if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) {
192 //Prevent timing detection
193 //XXX: from 0.1 to 5 seconds
194 usleep(rand(1,50) * 100000);
195
196 //Get content
197 //TODO: for daily we may load data for requested quarter of the day
198 $content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day);
199
200 //Store it
201 if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) {
202 //Display error
203 echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n";
204
205 //Exit with failure
206 exit(self::FAILURE);
207 }
208 }
209
210 //Parse string
211 $tidy->parseString($content, $this->config, 'utf8');
212
213 //Fix error buffer
214 //XXX: don't care about theses errors, tidy is here to fix...
215 #if (!empty($tidy->errorBuffer)) {
216 # var_dump($tidy->errorBuffer);
217 # die('Tidy errors');
218 #}
219
220 //Load simplexml
221 //XXX: trash all xmlns= broken tags
222 $sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy));
223
224 //Process daily
225 if ($day == 'daily') {
226 //Iterate on each link containing data
227 foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) {
228 //Get date
229 $dsm = trim($node->div[0]->h2[0]->span[1]);
230
231 //Get temperature
232 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->span[0]);
233
234 //Get rainrisk
235 $rainrisk = str_replace('%', '', trim($node->div[2]))/100;
236
237 //Store data
238 $data[$zipcode][$dsm]['daily'] = [
239 'temperature' => $temperature,
240 'rainrisk' => $rainrisk
241 ];
242 }
243 //Process hourly
244 } else {
245 //Iterate on each div containing data
246 #(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value
247 #/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1]
248 foreach($sx->xpath('//div[@data-shared="false"]') as $node) {
249 //Get hour
250 $hour = trim($node->div[0]->div[0]->h2[0]->span[0]);
251
252 //Get dsm
253 $dsm = trim($node->div[0]->div[0]->h2[0]->span[1]);
254
255 //Get temperature
256 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->div[0]);
257
258 //Get realfeel
259 $realfeel = str_replace(['RealFeelĀ® ', 'Ā°'], '', trim($node->div[0]->div[0]->span[0]));
260
261 //Get rainrisk
262 $rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100;
263
264 //Label is Rain when we have a rainfall
265 if (($pluviolabel = trim($node->div[1]->div[0]->div[0]->div[1]->p[1])) == 'Rain') {
266 //Get rainfall
267 $rainfall = str_replace(' mm', '', $node->div[1]->div[0]->div[0]->div[1]->p[1]->span[0]);
268 //Cloud Cover, no rainfall
269 } else {
270 //Set rainfall to 0 (mm)
271 $rainfall = 0;
272 }
273
274 //Store data
275 $data[$zipcode][$dsm][$hour] = [
276 'temperature' => $temperature,
277 'realfeel' => $realfeel,
278 'rainrisk' => $rainrisk,
279 'rainfall' => $rainfall
280 ];
281 }
282 }
283
284 //Cleanup
285 unset($sx);
286 }
287 }
288
289 //Iterate on types
290 foreach($types as $type => $sessions) {
291 //Iterate on each type
292 foreach($sessions as $sessionId => $session) {
293 //Get zipcode
294 $zipcode = $session->getLocation()->getZipcode();
295
296 //Get start
297 $start = $session->getStart();
298
299 //Daily type
300 if ($type == 'daily') {
301 //Set period
302 $period = [ $start ];
303 //Hourly type
304 } else {
305 //Get stop
306 $stop = $session->getStop();
307
308 //Compute period
309 $period = new \DatePeriod(
310 //Start from begin
311 $start,
312 //Iterate on each hour
313 new \DateInterval('PT1H'),
314 //End with begin + length
315 $stop
316 );
317 }
318
319 //Set meteo
320 $meteo = [
321 'rainfall' => null,
322 'rainrisk' => null,
323 'realfeel' => [],
324 'realfeelmin' => null,
325 'realfeelmax' => null,
326 'temperature' => [],
327 'temperaturemin' => null,
328 'temperaturemax' => null
329 ];
330
331 //Iterate on the period
332 foreach($period as $time) {
333 //Set dsm
334 $dsm = $time->format('d/m');
335
336 //Set hour
337 $hour = $type=='daily'?$type:$time->format('H');
338
339 //Check data availability
340 //XXX: should never happen
341 #if (!isset($data[$zipcode][$dsm][$hour])) {
342 # //Skip unavailable data
343 # continue;
344 #}
345
346 //Set info alias
347 $info = $data[$zipcode][$dsm][$hour];
348
349 //Check if rainrisk is higher
350 if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) {
351 //Set highest rain risk
352 $meteo['rainrisk'] = floatval($info['rainrisk']);
353 }
354
355 //Check if rainfall is set
356 if (isset($info['rainfall'])) {
357 //Set rainfall sum
358 $meteo['rainfall'] += floatval($info['rainfall']);
359 }
360
361 //Add temperature
362 $meteo['temperature'][$hour] = $info['temperature'];
363
364 //Hourly type
365 if ($type != 'daily') {
366 //Check min temperature
367 if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) {
368 $meteo['temperaturemin'] = floatval($info['temperature']);
369 }
370
371 //Check max temperature
372 if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) {
373 $meteo['temperaturemax'] = floatval($info['temperature']);
374 }
375 }
376
377 //Check if realfeel is set
378 if (isset($info['realfeel'])) {
379 //Add realfeel
380 $meteo['realfeel'][$hour] = $info['realfeel'];
381
382 //Check min realfeel
383 if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) {
384 $meteo['realfeelmin'] = floatval($info['realfeel']);
385 }
386
387 //Check max realfeel
388 if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) {
389 $meteo['realfeelmax'] = floatval($info['realfeel']);
390 }
391 }
392 }
393
394 //Check if rainfall is set and differ
395 if ($session->getRainfall() !== $meteo['rainfall']) {
396 //Set rainfall
397 $session->setRainfall($meteo['rainfall']);
398 }
399
400 //Check if rainrisk differ
401 if ($session->getRainrisk() !== $meteo['rainrisk']) {
402 //Set rainrisk
403 $session->setRainrisk($meteo['rainrisk']);
404 }
405
406 //Check realfeel array
407 if ($meteo['realfeel'] !== []) {
408 //Compute realfeel
409 $realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1));
410
411 //Check if realfeel differ
412 if ($session->getRealfeel() !== $realfeel) {
413 //Set average realfeel
414 #$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']);
415 $session->setRealfeel($realfeel);
416 }
417
418 //Check if realfeelmin differ
419 if ($session->getRealfeelmin() !== $meteo['realfeelmin']) {
420 //Set realfeelmin
421 $session->setRealfeelmin($meteo['realfeelmin']);
422 }
423
424 //Check if realfeelmax differ
425 if ($session->getRealfeelmax() !== $meteo['realfeelmax']) {
426 //Set realfeelmax
427 $session->setRealfeelmax($meteo['realfeelmax']);
428 }
429 }
430
431 //Check temperature array
432 if ($meteo['temperature'] !== []) {
433 //Compute temperature
434 $temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1));
435
436 //Check if temperature differ
437 if ($session->getTemperature() !== $temperature) {
438 //Set average temperature
439 #$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']);
440 $session->setTemperature($temperature);
441 }
442
443 //Check if temperaturemin differ
444 if ($session->getTemperaturemin() !== $meteo['temperaturemin']) {
445 //Set temperaturemin
446 $session->setTemperaturemin($meteo['temperaturemin']);
447 }
448
449 //Check if temperaturemax differ
450 if ($session->getTemperaturemax() !== $meteo['temperaturemax']) {
451 //Set temperaturemax
452 $session->setTemperaturemax($meteo['temperaturemax']);
453 }
454 }
455 }
456 }
457
458 //Flush to get the ids
459 $manager->flush();
460
461 //Close curl handler
462 $this->curl_close();
463
464 //Return success
465 return self::SUCCESS;
466 }
467
468 /**
469 * Init curl handler
470 *
471 * @return bool|void Return success or exit
472 */
473 function curl_init() {
474 //Init curl
475 if (($this->ch = curl_init()) === false) {
476 //Display error
477 echo 'Curl init failed: '.curl_error($this->ch)."\n";
478 //Exit with failure
479 exit(self::FAILURE);
480 }
481
482 //Set curl options
483 if (
484 curl_setopt_array(
485 $this->ch,
486 [
487 //Force http2
488 CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
489 //Set http headers
490 CURLOPT_HTTPHEADER => [
491 //XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off
492 //XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview
493 #'Pragma: akamai-x-cache-off',
494 //XXX: working curl command
495 #curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/'
496 //Set accept
497 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
498 //Set accept language
499 'Accept-Language: en-GB,en;q=0.9',
500 //Disable cache
501 'Cache-Control: no-cache',
502 //Keep connection alive
503 'Connection: keep-alive',
504 //Disable cache
505 'Pragma: no-cache',
506 //Force secure requests
507 'Upgrade-Insecure-Requests: 1',
508 //Set user agent
509 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
510 //Force akamai cookie
511 //XXX: seems to come from http request
512 'Cookie: AKA_A2=A',
513 ],
514 //Enable cookie
515 CURLOPT_COOKIEFILE => '',
516 //Disable location following
517 CURLOPT_FOLLOWLOCATION => false,
518 //Set url
519 #CURLOPT_URL => $url = 'https://www.accuweather.com/',
520 //Return headers too
521 CURLOPT_HEADER => true,
522 //Return content
523 CURLOPT_RETURNTRANSFER => true,
524
525 //XXX: debug
526 CURLINFO_HEADER_OUT => true
527 ]
528 ) === false
529 ) {
530 //Display error
531 echo 'Curl setopt array failed: '.curl_error($this->ch)."\n";
532 //Exit with failure
533 exit(self::FAILURE);
534 }
535
536 //Return success
537 return true;
538 }
539
540 /**
541 * Get url
542 *
543 * @return string|void Return url content or exit
544 */
545 function curl_get($url) {
546 //Set url to fetch
547 if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) {
548 //Display error
549 echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n";
550
551 //Close curl handler
552 curl_close($this->ch);
553
554 //Exit with failure
555 exit(self::FAILURE);
556 }
557
558 //Check return status
559 if (($response = curl_exec($this->ch)) === false) {
560 //Display error
561 echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n";
562
563 //Display sent headers
564 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
565
566 //Display response
567 var_dump($response);
568
569 //Close curl handler
570 curl_close($this->ch);
571
572 //Exit with failure
573 exit(self::FAILURE);
574 }
575
576 //Get header size
577 if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) {
578 //Display error
579 echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n";
580
581 //Display sent headers
582 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
583
584 //Display response
585 var_dump($response);
586
587 //Close curl handler
588 curl_close($this->ch);
589
590 //Exit with failure
591 exit(self::FAILURE);
592 }
593
594 //Get header
595 if (empty($header = substr($response, 0, $hs))) {
596 //Display error
597 echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n";
598
599 //Display sent headers
600 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
601
602 //Display response
603 var_dump($response);
604
605 //Close curl handler
606 curl_close($this->ch);
607
608 //Exit with failure
609 exit(self::FAILURE);
610 }
611
612 //Check request success
613 if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') {
614 //Display error
615 echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n";
616
617 //Display sent headers
618 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
619
620 //Display response
621 var_dump($header);
622
623 //Close curl handler
624 curl_close($this->ch);
625
626 //Exit with failure
627 exit(self::FAILURE);
628 }
629
630 //Return content
631 return substr($response, $hs);
632 }
633
634 /**
635 * Close curl handler
636 *
637 * @return bool Return success or failure
638 */
639 function curl_close() {
640 return curl_close($this->ch);
641 }
642 }