]> Raphaƫl G. Git Repositories - airbundle/blob - Command/WeatherCommand.php
Add 75004 accuweather uris
[airbundle] / Command / WeatherCommand.php
1 <?php
2
3 namespace Rapsys\AirBundle\Command;
4
5 use Doctrine\Bundle\DoctrineBundle\Command\DoctrineCommand;
6 use Symfony\Component\Console\Input\InputInterface;
7 use Symfony\Component\Console\Output\OutputInterface;
8 use Symfony\Component\Filesystem\Exception\IOExceptionInterface;
9 use Symfony\Component\Filesystem\Filesystem;
10 use Rapsys\AirBundle\Entity\Session;
11
12 class WeatherCommand extends DoctrineCommand {
13 //Set failure constant
14 const FAILURE = 1;
15
16 ///Set success constant
17 const SUCCESS = 0;
18
19 ///Set Tidy config
20 private $config = [
21 //Mostly useless in fact
22 'indent' => true,
23 //Required to simplify simplexml transition
24 'output-xml' => true,
25 //Required to avoid xml errors
26 'quote-nbsp' => false,
27 //Required to fix code
28 'clean' => true
29 ];
30
31 ///Set accuweather uris
32 private $accuweather = [
33 //Hourly uri
34 'hourly' => [
35 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=',
36 75004 => 'https://www.accuweather.com/en/fr/paris-04-hotel-de-ville/75004/hourly-weather-forecast/179145_pc?day=',
37 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=',
38 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=',
39 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=',
40 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=',
41 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=',
42 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day='
43 ],
44 //Daily uri
45 'daily' => [
46 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc',
47 75004 => 'https://www.accuweather.com/en/fr/paris-04-hotel-de-ville/75004/daily-weather-forecast/179145_pc',
48 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc',
49 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc',
50 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc',
51 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc',
52 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc',
53 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc'
54 ]
55 ];
56
57 ///Set curl handler
58 private $ch = null;
59
60 ///Configure attribute command
61 protected function configure() {
62 //Configure the class
63 $this
64 //Set name
65 ->setName('rapsysair:weather')
66 //Set description shown with bin/console list
67 ->setDescription('Updates session rain and temperature fields')
68 //Set description shown with bin/console --help airlibre:attribute
69 ->setHelp('This command updates session rain and temperature fields in next three days')
70 //Add daily and hourly aliases
71 ->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']);
72 }
73
74 ///Process the attribution
75 protected function execute(InputInterface $input, OutputInterface $output) {
76 //Fetch doctrine
77 $doctrine = $this->getDoctrine();
78
79 //Get manager
80 $manager = $doctrine->getManager();
81
82 //Tidy object
83 $tidy = new \tidy();
84
85 //Init zipcodes array
86 $zipcodes = [];
87
88 //Init types
89 $types = [];
90
91 //Process hourly accuweather
92 if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') {
93 //Fetch hourly sessions to attribute
94 $types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather();
95
96 //Iterate on each session
97 foreach($types['hourly'] as $sessionId => $session) {
98 //Get zipcode
99 $zipcode = $session->getLocation()->getZipcode();
100
101 //Get start
102 $start = $session->getStart();
103
104 //Set start day
105 $day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
106
107 //Check if zipcode is set
108 if (!isset($zipcodes[$zipcode])) {
109 $zipcodes[$zipcode] = [];
110 }
111
112 //Check if zipcode date is set
113 if (!isset($zipcodes[$zipcode][$day])) {
114 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
115 } else {
116 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
117 }
118
119 //Get stop
120 $stop = $session->getStop();
121
122 //Set stop day
123 $day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
124
125 //Check if zipcode date is set
126 if (!isset($zipcodes[$zipcode][$day])) {
127 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
128 } else {
129 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
130 }
131 }
132 }
133
134 //Process daily accuweather
135 if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') {
136 //Fetch daily sessions to attribute
137 $types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather();
138
139 //Iterate on each session
140 foreach($types['daily'] as $sessionId => $session) {
141 //Get zipcode
142 $zipcode = $session->getLocation()->getZipcode();
143
144 //Get start
145 $start = $session->getStart();
146
147 //Set start day
148 $day = 'daily';
149
150 //Check if zipcode is set
151 if (!isset($zipcodes[$zipcode])) {
152 $zipcodes[$zipcode] = [];
153 }
154
155 //Check if zipcode date is set
156 if (!isset($zipcodes[$zipcode][$day])) {
157 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
158 } else {
159 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
160 }
161 }
162 }
163
164 //Get filesystem
165 $filesystem = new Filesystem();
166
167 //Set tmpdir
168 //XXX: worst case scenario we have 3 files per zipcode
169 if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) {
170 try {
171 //Create dir
172 $filesystem->mkdir($tmpdir, 0775);
173 } catch (IOExceptionInterface $exception) {
174 //Display error
175 echo 'Create dir '.$exception->getPath().' failed'."\n";
176
177 //Exit with failure
178 exit(self::FAILURE);
179 }
180 }
181
182 //Init curl
183 $this->curl_init();
184
185 //Init data array
186 $data = [];
187
188 //Iterate on zipcodes
189 foreach($zipcodes as $zipcode => $days) {
190 //Iterate on days
191 foreach($days as $day => $null) {
192 //Try to load content from cache
193 if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) {
194 //Prevent timing detection
195 //XXX: from 0.1 to 5 seconds
196 usleep(rand(1,50) * 100000);
197
198 //Get content
199 //TODO: for daily we may load data for requested quarter of the day
200 $content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day);
201
202 //Store it
203 if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) {
204 //Display error
205 echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n";
206
207 //Exit with failure
208 exit(self::FAILURE);
209 }
210 }
211
212 //Parse string
213 $tidy->parseString($content, $this->config, 'utf8');
214
215 //Fix error buffer
216 //XXX: don't care about theses errors, tidy is here to fix...
217 #if (!empty($tidy->errorBuffer)) {
218 # var_dump($tidy->errorBuffer);
219 # die('Tidy errors');
220 #}
221
222 //Load simplexml
223 //XXX: trash all xmlns= broken tags
224 $sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy));
225
226 //Process daily
227 if ($day == 'daily') {
228 //Iterate on each link containing data
229 foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) {
230 //Get date
231 $dsm = trim($node->div[0]->h2[0]->span[1]);
232
233 //Get temperature
234 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->span[0]);
235
236 //Get rainrisk
237 $rainrisk = str_replace('%', '', trim($node->div[2]))/100;
238
239 //Store data
240 $data[$zipcode][$dsm]['daily'] = [
241 'temperature' => $temperature,
242 'rainrisk' => $rainrisk
243 ];
244 }
245 //Process hourly
246 } else {
247 //Iterate on each div containing data
248 #(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value
249 #/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1]
250 foreach($sx->xpath('//div[@data-shared="false"]') as $node) {
251 //Get hour
252 $hour = trim($node->div[0]->div[0]->h2[0]->span[0]);
253
254 //Get dsm
255 $dsm = trim($node->div[0]->div[0]->h2[0]->span[1]);
256
257 //Get temperature
258 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->div[0]);
259
260 //Get realfeel
261 $realfeel = str_replace(['RealFeelĀ® ', 'Ā°'], '', trim($node->div[0]->div[0]->span[0]));
262
263 //Get rainrisk
264 $rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100;
265
266 //Label is Rain when we have a rainfall
267 if (($pluviolabel = trim($node->div[1]->div[0]->div[0]->div[1]->p[1])) == 'Rain') {
268 //Get rainfall
269 $rainfall = str_replace(' mm', '', $node->div[1]->div[0]->div[0]->div[1]->p[1]->span[0]);
270 //Cloud Cover, no rainfall
271 } else {
272 //Set rainfall to 0 (mm)
273 $rainfall = 0;
274 }
275
276 //Store data
277 $data[$zipcode][$dsm][$hour] = [
278 'temperature' => $temperature,
279 'realfeel' => $realfeel,
280 'rainrisk' => $rainrisk,
281 'rainfall' => $rainfall
282 ];
283 }
284 }
285
286 //Cleanup
287 unset($sx);
288 }
289 }
290
291 //Iterate on types
292 foreach($types as $type => $sessions) {
293 //Iterate on each type
294 foreach($sessions as $sessionId => $session) {
295 //Get zipcode
296 $zipcode = $session->getLocation()->getZipcode();
297
298 //Get start
299 $start = $session->getStart();
300
301 //Daily type
302 if ($type == 'daily') {
303 //Set period
304 $period = [ $start ];
305 //Hourly type
306 } else {
307 //Get stop
308 $stop = $session->getStop();
309
310 //Compute period
311 $period = new \DatePeriod(
312 //Start from begin
313 $start,
314 //Iterate on each hour
315 new \DateInterval('PT1H'),
316 //End with begin + length
317 $stop
318 );
319 }
320
321 //Set meteo
322 $meteo = [
323 'rainfall' => null,
324 'rainrisk' => null,
325 'realfeel' => [],
326 'realfeelmin' => null,
327 'realfeelmax' => null,
328 'temperature' => [],
329 'temperaturemin' => null,
330 'temperaturemax' => null
331 ];
332
333 //Iterate on the period
334 foreach($period as $time) {
335 //Set dsm
336 $dsm = $time->format('d/m');
337
338 //Set hour
339 $hour = $type=='daily'?$type:$time->format('H');
340
341 //Check data availability
342 //XXX: should never happen
343 #if (!isset($data[$zipcode][$dsm][$hour])) {
344 # //Skip unavailable data
345 # continue;
346 #}
347
348 //Set info alias
349 $info = $data[$zipcode][$dsm][$hour];
350
351 //Check if rainrisk is higher
352 if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) {
353 //Set highest rain risk
354 $meteo['rainrisk'] = floatval($info['rainrisk']);
355 }
356
357 //Check if rainfall is set
358 if (isset($info['rainfall'])) {
359 //Set rainfall sum
360 $meteo['rainfall'] += floatval($info['rainfall']);
361 }
362
363 //Add temperature
364 $meteo['temperature'][$hour] = $info['temperature'];
365
366 //Hourly type
367 if ($type != 'daily') {
368 //Check min temperature
369 if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) {
370 $meteo['temperaturemin'] = floatval($info['temperature']);
371 }
372
373 //Check max temperature
374 if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) {
375 $meteo['temperaturemax'] = floatval($info['temperature']);
376 }
377 }
378
379 //Check if realfeel is set
380 if (isset($info['realfeel'])) {
381 //Add realfeel
382 $meteo['realfeel'][$hour] = $info['realfeel'];
383
384 //Check min realfeel
385 if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) {
386 $meteo['realfeelmin'] = floatval($info['realfeel']);
387 }
388
389 //Check max realfeel
390 if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) {
391 $meteo['realfeelmax'] = floatval($info['realfeel']);
392 }
393 }
394 }
395
396 //Check if rainfall is set and differ
397 if ($session->getRainfall() !== $meteo['rainfall']) {
398 //Set rainfall
399 $session->setRainfall($meteo['rainfall']);
400 }
401
402 //Check if rainrisk differ
403 if ($session->getRainrisk() !== $meteo['rainrisk']) {
404 //Set rainrisk
405 $session->setRainrisk($meteo['rainrisk']);
406 }
407
408 //Check realfeel array
409 if ($meteo['realfeel'] !== []) {
410 //Compute realfeel
411 $realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1));
412
413 //Check if realfeel differ
414 if ($session->getRealfeel() !== $realfeel) {
415 //Set average realfeel
416 #$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']);
417 $session->setRealfeel($realfeel);
418 }
419
420 //Check if realfeelmin differ
421 if ($session->getRealfeelmin() !== $meteo['realfeelmin']) {
422 //Set realfeelmin
423 $session->setRealfeelmin($meteo['realfeelmin']);
424 }
425
426 //Check if realfeelmax differ
427 if ($session->getRealfeelmax() !== $meteo['realfeelmax']) {
428 //Set realfeelmax
429 $session->setRealfeelmax($meteo['realfeelmax']);
430 }
431 }
432
433 //Check temperature array
434 if ($meteo['temperature'] !== []) {
435 //Compute temperature
436 $temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1));
437
438 //Check if temperature differ
439 if ($session->getTemperature() !== $temperature) {
440 //Set average temperature
441 #$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']);
442 $session->setTemperature($temperature);
443 }
444
445 //Check if temperaturemin differ
446 if ($session->getTemperaturemin() !== $meteo['temperaturemin']) {
447 //Set temperaturemin
448 $session->setTemperaturemin($meteo['temperaturemin']);
449 }
450
451 //Check if temperaturemax differ
452 if ($session->getTemperaturemax() !== $meteo['temperaturemax']) {
453 //Set temperaturemax
454 $session->setTemperaturemax($meteo['temperaturemax']);
455 }
456 }
457 }
458 }
459
460 //Flush to get the ids
461 $manager->flush();
462
463 //Close curl handler
464 $this->curl_close();
465
466 //Return success
467 return self::SUCCESS;
468 }
469
470 /**
471 * Init curl handler
472 *
473 * @return bool|void Return success or exit
474 */
475 function curl_init() {
476 //Init curl
477 if (($this->ch = curl_init()) === false) {
478 //Display error
479 echo 'Curl init failed: '.curl_error($this->ch)."\n";
480 //Exit with failure
481 exit(self::FAILURE);
482 }
483
484 //Set curl options
485 if (
486 curl_setopt_array(
487 $this->ch,
488 [
489 //Force http2
490 CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
491 //Set http headers
492 CURLOPT_HTTPHEADER => [
493 //XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off
494 //XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview
495 #'Pragma: akamai-x-cache-off',
496 //XXX: working curl command
497 #curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/'
498 //Set accept
499 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
500 //Set accept language
501 'Accept-Language: en-GB,en;q=0.9',
502 //Disable cache
503 'Cache-Control: no-cache',
504 //Keep connection alive
505 'Connection: keep-alive',
506 //Disable cache
507 'Pragma: no-cache',
508 //Force secure requests
509 'Upgrade-Insecure-Requests: 1',
510 //Set user agent
511 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
512 //Force akamai cookie
513 //XXX: seems to come from http request
514 'Cookie: AKA_A2=A',
515 ],
516 //Enable cookie
517 CURLOPT_COOKIEFILE => '',
518 //Disable location following
519 CURLOPT_FOLLOWLOCATION => false,
520 //Set url
521 #CURLOPT_URL => $url = 'https://www.accuweather.com/',
522 //Return headers too
523 CURLOPT_HEADER => true,
524 //Return content
525 CURLOPT_RETURNTRANSFER => true,
526
527 //XXX: debug
528 CURLINFO_HEADER_OUT => true
529 ]
530 ) === false
531 ) {
532 //Display error
533 echo 'Curl setopt array failed: '.curl_error($this->ch)."\n";
534 //Exit with failure
535 exit(self::FAILURE);
536 }
537
538 //Return success
539 return true;
540 }
541
542 /**
543 * Get url
544 *
545 * @return string|void Return url content or exit
546 */
547 function curl_get($url) {
548 //Set url to fetch
549 if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) {
550 //Display error
551 echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n";
552
553 //Close curl handler
554 curl_close($this->ch);
555
556 //Exit with failure
557 exit(self::FAILURE);
558 }
559
560 //Check return status
561 if (($response = curl_exec($this->ch)) === false) {
562 //Display error
563 echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n";
564
565 //Display sent headers
566 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
567
568 //Display response
569 var_dump($response);
570
571 //Close curl handler
572 curl_close($this->ch);
573
574 //Exit with failure
575 exit(self::FAILURE);
576 }
577
578 //Get header size
579 if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) {
580 //Display error
581 echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n";
582
583 //Display sent headers
584 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
585
586 //Display response
587 var_dump($response);
588
589 //Close curl handler
590 curl_close($this->ch);
591
592 //Exit with failure
593 exit(self::FAILURE);
594 }
595
596 //Get header
597 if (empty($header = substr($response, 0, $hs))) {
598 //Display error
599 echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n";
600
601 //Display sent headers
602 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
603
604 //Display response
605 var_dump($response);
606
607 //Close curl handler
608 curl_close($this->ch);
609
610 //Exit with failure
611 exit(self::FAILURE);
612 }
613
614 //Check request success
615 if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') {
616 //Display error
617 echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n";
618
619 //Display sent headers
620 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
621
622 //Display response
623 var_dump($header);
624
625 //Close curl handler
626 curl_close($this->ch);
627
628 //Exit with failure
629 exit(self::FAILURE);
630 }
631
632 //Return content
633 return substr($response, $hs);
634 }
635
636 /**
637 * Close curl handler
638 *
639 * @return bool Return success or failure
640 */
641 function curl_close() {
642 return curl_close($this->ch);
643 }
644 }