]> Raphaƫl G. Git Repositories - airbundle/blob - Command/WeatherCommand.php
Switch to new header system
[airbundle] / Command / WeatherCommand.php
1 <?php
2
3 namespace Rapsys\AirBundle\Command;
4
5 use Doctrine\Bundle\DoctrineBundle\Command\DoctrineCommand;
6 use Symfony\Component\Console\Input\InputInterface;
7 use Symfony\Component\Console\Output\OutputInterface;
8 use Symfony\Component\Filesystem\Exception\IOExceptionInterface;
9 use Symfony\Component\Filesystem\Filesystem;
10 use Rapsys\AirBundle\Entity\Session;
11
12 class WeatherCommand extends DoctrineCommand {
13 //Set failure constant
14 const FAILURE = 1;
15
16 ///Set success constant
17 const SUCCESS = 0;
18
19 ///Set Tidy config
20 private $config = [
21 //Mostly useless in fact
22 'indent' => true,
23 //Required to simplify simplexml transition
24 'output-xml' => true,
25 //Required to avoid xml errors
26 'quote-nbsp' => false,
27 //Required to fix code
28 'clean' => true
29 ];
30
31 ///Set accuweather uris
32 private $accuweather = [
33 //Hourly uri
34 'hourly' => [
35 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/hourly-weather-forecast/179142_pc?day=',
36 75004 => 'https://www.accuweather.com/en/fr/paris-04-hotel-de-ville/75004/hourly-weather-forecast/179145_pc?day=',
37 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/hourly-weather-forecast/179146_pc?day=',
38 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/hourly-weather-forecast/179148_pc?day=',
39 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/hourly-weather-forecast/179150_pc?day=',
40 75013 => 'https://www.accuweather.com/en/fr/paris-13-gobelins/75013/hourly-weather-forecast/179154_pc?day=',
41 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/hourly-weather-forecast/179156_pc?day=',
42 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/hourly-weather-forecast/179160_pc?day=',
43 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/hourly-weather-forecast/179246_pc?day='
44 ],
45 //Daily uri
46 'daily' => [
47 75001 => 'https://www.accuweather.com/en/fr/paris-01-louvre/75001/daily-weather-forecast/179142_pc',
48 75004 => 'https://www.accuweather.com/en/fr/paris-04-hotel-de-ville/75004/daily-weather-forecast/179145_pc',
49 75005 => 'https://www.accuweather.com/en/fr/paris-05-pantheon/75005/daily-weather-forecast/179146_pc',
50 75007 => 'https://www.accuweather.com/en/fr/paris-07-palais-bourbon/75007/daily-weather-forecast/179148_pc',
51 75009 => 'https://www.accuweather.com/en/fr/paris-09-opera/75009/daily-weather-forecast/179150_pc',
52 75013 => 'https://www.accuweather.com/en/fr/paris-13-gobelins/75013/daily-weather-forecast/179154_pc',
53 75015 => 'https://www.accuweather.com/en/fr/paris-15-vaugirard/75015/daily-weather-forecast/179156_pc',
54 75019 => 'https://www.accuweather.com/en/fr/paris-19-buttes-chaumont/75019/daily-weather-forecast/179160_pc',
55 75116 => 'https://www.accuweather.com/en/fr/paris-16-passy/75116/daily-weather-forecast/179246_pc'
56 ]
57 ];
58
59 ///Set curl handler
60 private $ch = null;
61
62 ///Configure attribute command
63 protected function configure() {
64 //Configure the class
65 $this
66 //Set name
67 ->setName('rapsysair:weather')
68 //Set description shown with bin/console list
69 ->setDescription('Updates session rain and temperature fields')
70 //Set description shown with bin/console --help airlibre:attribute
71 ->setHelp('This command updates session rain and temperature fields in next three days')
72 //Add daily and hourly aliases
73 ->setAliases(['rapsysair:weather:daily', 'rapsysair:weather:hourly']);
74 }
75
76 ///Process the attribution
77 protected function execute(InputInterface $input, OutputInterface $output) {
78 //Fetch doctrine
79 $doctrine = $this->getDoctrine();
80
81 //Get manager
82 $manager = $doctrine->getManager();
83
84 //Tidy object
85 $tidy = new \tidy();
86
87 //Init zipcodes array
88 $zipcodes = [];
89
90 //Init types
91 $types = [];
92
93 //Process hourly accuweather
94 if (($command = $input->getFirstArgument()) == 'rapsysair:weather:hourly' || $command == 'rapsysair:weather') {
95 //Fetch hourly sessions to attribute
96 $types['hourly'] = $doctrine->getRepository(Session::class)->findAllPendingHourlyWeather();
97
98 //Iterate on each session
99 foreach($types['hourly'] as $sessionId => $session) {
100 //Get zipcode
101 $zipcode = $session->getLocation()->getZipcode();
102
103 //Get start
104 $start = $session->getStart();
105
106 //Set start day
107 $day = $start->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
108
109 //Check if zipcode is set
110 if (!isset($zipcodes[$zipcode])) {
111 $zipcodes[$zipcode] = [];
112 }
113
114 //Check if zipcode date is set
115 if (!isset($zipcodes[$zipcode][$day])) {
116 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
117 } else {
118 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
119 }
120
121 //Get stop
122 $stop = $session->getStop();
123
124 //Set stop day
125 $day = $stop->diff((new \DateTime('now'))->setTime(0, 0, 0))->d + 1;
126
127 //Check if zipcode date is set
128 if (!isset($zipcodes[$zipcode][$day])) {
129 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
130 } else {
131 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
132 }
133 }
134 }
135
136 //Process daily accuweather
137 if ($command == 'rapsysair:weather:daily' || $command == 'rapsysair:weather') {
138 //Fetch daily sessions to attribute
139 $types['daily'] = $doctrine->getRepository(Session::class)->findAllPendingDailyWeather();
140
141 //Iterate on each session
142 foreach($types['daily'] as $sessionId => $session) {
143 //Get zipcode
144 $zipcode = $session->getLocation()->getZipcode();
145
146 //Get start
147 $start = $session->getStart();
148
149 //Set start day
150 $day = 'daily';
151
152 //Check if zipcode is set
153 if (!isset($zipcodes[$zipcode])) {
154 $zipcodes[$zipcode] = [];
155 }
156
157 //Check if zipcode date is set
158 if (!isset($zipcodes[$zipcode][$day])) {
159 $zipcodes[$zipcode][$day] = [ $sessionId => $sessionId ];
160 } else {
161 $zipcodes[$zipcode][$day][$sessionId] = $sessionId;
162 }
163 }
164 }
165
166 //Get filesystem
167 $filesystem = new Filesystem();
168
169 //Set tmpdir
170 //XXX: worst case scenario we have 3 files per zipcode
171 if (!is_dir($tmpdir = sys_get_temp_dir().'/accuweather')) {
172 try {
173 //Create dir
174 $filesystem->mkdir($tmpdir, 0775);
175 } catch (IOExceptionInterface $exception) {
176 //Display error
177 echo 'Create dir '.$exception->getPath().' failed'."\n";
178
179 //Exit with failure
180 exit(self::FAILURE);
181 }
182 }
183
184 //Init curl
185 $this->curl_init();
186
187 //Init data array
188 $data = [];
189
190 //Iterate on zipcodes
191 foreach($zipcodes as $zipcode => $days) {
192 //Iterate on days
193 foreach($days as $day => $null) {
194 //Try to load content from cache
195 if (!is_file($file = $tmpdir.'/'.$zipcode.'.'.$day.'.html') || stat($file)['ctime'] <= (time() - ($day == 'daily' ? 4 : 2)*3600) || ($content = file_get_contents($file)) === false) {
196 //Prevent timing detection
197 //XXX: from 0.1 to 5 seconds
198 usleep(rand(1,50) * 100000);
199
200 //Get content
201 //TODO: for daily we may load data for requested quarter of the day
202 $content = $this->curl_get($day == 'daily' ? $this->accuweather['daily'][$zipcode] : $this->accuweather['hourly'][$zipcode].$day);
203
204 //Store it
205 if (file_put_contents($tmpdir.'/'.$zipcode.'.'.$day.'.html', $content) === false) {
206 //Display error
207 echo 'Write to '.$tmpdir.'/'.$zipcode.'.'.$day.'.html failed'."\n";
208
209 //Exit with failure
210 exit(self::FAILURE);
211 }
212 }
213
214 //Parse string
215 $tidy->parseString($content, $this->config, 'utf8');
216
217 //Fix error buffer
218 //XXX: don't care about theses errors, tidy is here to fix...
219 #if (!empty($tidy->errorBuffer)) {
220 # var_dump($tidy->errorBuffer);
221 # die('Tidy errors');
222 #}
223
224 //Load simplexml
225 //XXX: trash all xmlns= broken tags
226 $sx = new \SimpleXMLElement(str_replace(['xmlns=', 'xlink:href='], ['xns=', 'href='], $tidy));
227
228 //Process daily
229 if ($day == 'daily') {
230 //Iterate on each link containing data
231 foreach($sx->xpath('//a[@class="daily-forecast-card"]') as $node) {
232 //Get date
233 $dsm = trim($node->div[0]->h2[0]->span[1]);
234
235 //Get temperature
236 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->span[0]);
237
238 //Get rainrisk
239 $rainrisk = str_replace('%', '', trim($node->div[2]))/100;
240
241 //Store data
242 $data[$zipcode][$dsm]['daily'] = [
243 'temperature' => $temperature,
244 'rainrisk' => $rainrisk
245 ];
246 }
247 //Process hourly
248 } else {
249 //Iterate on each div containing data
250 #(string)$sx->xpath('//div[@class="hourly-card-nfl"]')[0]->attributes()->value
251 #/html/body/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div/h2/span[1]
252 foreach($sx->xpath('//div[@data-shared="false"]') as $node) {
253 //Get hour
254 $hour = trim($node->div[0]->div[0]->h2[0]->span[0]);
255
256 //Get dsm
257 $dsm = trim($node->div[0]->div[0]->h2[0]->span[1]);
258
259 //Get temperature
260 $temperature = str_replace('Ā°', '', $node->div[0]->div[0]->div[0]);
261
262 //Get realfeel
263 $realfeel = str_replace(['RealFeelĀ® ', 'Ā°'], '', trim($node->div[0]->div[0]->span[0]));
264
265 //Get rainrisk
266 $rainrisk = str_replace('%', '', trim($node->div[0]->div[0]->div[1]))/100;
267
268 //Label is Rain when we have a rainfall
269 if (($pluviolabel = trim($node->div[1]->div[0]->div[0]->div[1]->p[1])) == 'Rain') {
270 //Get rainfall
271 $rainfall = str_replace(' mm', '', $node->div[1]->div[0]->div[0]->div[1]->p[1]->span[0]);
272 //Cloud Cover, no rainfall
273 } else {
274 //Set rainfall to 0 (mm)
275 $rainfall = 0;
276 }
277
278 //Store data
279 $data[$zipcode][$dsm][$hour] = [
280 'temperature' => $temperature,
281 'realfeel' => $realfeel,
282 'rainrisk' => $rainrisk,
283 'rainfall' => $rainfall
284 ];
285 }
286 }
287
288 //Cleanup
289 unset($sx);
290 }
291 }
292
293 //Iterate on types
294 foreach($types as $type => $sessions) {
295 //Iterate on each type
296 foreach($sessions as $sessionId => $session) {
297 //Get zipcode
298 $zipcode = $session->getLocation()->getZipcode();
299
300 //Get start
301 $start = $session->getStart();
302
303 //Daily type
304 if ($type == 'daily') {
305 //Set period
306 $period = [ $start ];
307 //Hourly type
308 } else {
309 //Get stop
310 $stop = $session->getStop();
311
312 //Compute period
313 $period = new \DatePeriod(
314 //Start from begin
315 $start,
316 //Iterate on each hour
317 new \DateInterval('PT1H'),
318 //End with begin + length
319 $stop
320 );
321 }
322
323 //Set meteo
324 $meteo = [
325 'rainfall' => null,
326 'rainrisk' => null,
327 'realfeel' => [],
328 'realfeelmin' => null,
329 'realfeelmax' => null,
330 'temperature' => [],
331 'temperaturemin' => null,
332 'temperaturemax' => null
333 ];
334
335 //Iterate on the period
336 foreach($period as $time) {
337 //Set dsm
338 $dsm = $time->format('d/m');
339
340 //Set hour
341 $hour = $type=='daily'?$type:$time->format('H');
342
343 //Check data availability
344 //XXX: should never happen
345 #if (!isset($data[$zipcode][$dsm][$hour])) {
346 # //Skip unavailable data
347 # continue;
348 #}
349
350 //Set info alias
351 $info = $data[$zipcode][$dsm][$hour];
352
353 //Check if rainrisk is higher
354 if ($meteo['rainrisk'] === null || $info['rainrisk'] > $meteo['rainrisk']) {
355 //Set highest rain risk
356 $meteo['rainrisk'] = floatval($info['rainrisk']);
357 }
358
359 //Check if rainfall is set
360 if (isset($info['rainfall'])) {
361 //Set rainfall sum
362 $meteo['rainfall'] += floatval($info['rainfall']);
363 }
364
365 //Add temperature
366 $meteo['temperature'][$hour] = $info['temperature'];
367
368 //Hourly type
369 if ($type != 'daily') {
370 //Check min temperature
371 if ($meteo['temperaturemin'] === null || $info['temperature'] < $meteo['temperaturemin']) {
372 $meteo['temperaturemin'] = floatval($info['temperature']);
373 }
374
375 //Check max temperature
376 if ($meteo['temperaturemax'] === null || $info['temperature'] > $meteo['temperaturemax']) {
377 $meteo['temperaturemax'] = floatval($info['temperature']);
378 }
379 }
380
381 //Check if realfeel is set
382 if (isset($info['realfeel'])) {
383 //Add realfeel
384 $meteo['realfeel'][$hour] = $info['realfeel'];
385
386 //Check min realfeel
387 if ($meteo['realfeelmin'] === null || $info['realfeel'] < $meteo['realfeelmin']) {
388 $meteo['realfeelmin'] = floatval($info['realfeel']);
389 }
390
391 //Check max realfeel
392 if ($meteo['realfeelmax'] === null || $info['realfeel'] > $meteo['realfeelmax']) {
393 $meteo['realfeelmax'] = floatval($info['realfeel']);
394 }
395 }
396 }
397
398 //Check if rainfall is set and differ
399 if ($session->getRainfall() !== $meteo['rainfall']) {
400 //Set rainfall
401 $session->setRainfall($meteo['rainfall']);
402 }
403
404 //Check if rainrisk differ
405 if ($session->getRainrisk() !== $meteo['rainrisk']) {
406 //Set rainrisk
407 $session->setRainrisk($meteo['rainrisk']);
408 }
409
410 //Check realfeel array
411 if ($meteo['realfeel'] !== []) {
412 //Compute realfeel
413 $realfeel = floatval(round(array_sum($meteo['realfeel'])/count($meteo['realfeel']),1));
414
415 //Check if realfeel differ
416 if ($session->getRealfeel() !== $realfeel) {
417 //Set average realfeel
418 #$meteo['realfeel'] = array_sum($meteo['realfeel'])/count($meteo['realfeel']);
419 $session->setRealfeel($realfeel);
420 }
421
422 //Check if realfeelmin differ
423 if ($session->getRealfeelmin() !== $meteo['realfeelmin']) {
424 //Set realfeelmin
425 $session->setRealfeelmin($meteo['realfeelmin']);
426 }
427
428 //Check if realfeelmax differ
429 if ($session->getRealfeelmax() !== $meteo['realfeelmax']) {
430 //Set realfeelmax
431 $session->setRealfeelmax($meteo['realfeelmax']);
432 }
433 }
434
435 //Check temperature array
436 if ($meteo['temperature'] !== []) {
437 //Compute temperature
438 $temperature = floatval(round(array_sum($meteo['temperature'])/count($meteo['temperature']),1));
439
440 //Check if temperature differ
441 if ($session->getTemperature() !== $temperature) {
442 //Set average temperature
443 #$meteo['temperature'] = array_sum($meteo['temperature'])/count($meteo['temperature']);
444 $session->setTemperature($temperature);
445 }
446
447 //Check if temperaturemin differ
448 if ($session->getTemperaturemin() !== $meteo['temperaturemin']) {
449 //Set temperaturemin
450 $session->setTemperaturemin($meteo['temperaturemin']);
451 }
452
453 //Check if temperaturemax differ
454 if ($session->getTemperaturemax() !== $meteo['temperaturemax']) {
455 //Set temperaturemax
456 $session->setTemperaturemax($meteo['temperaturemax']);
457 }
458 }
459 }
460 }
461
462 //Flush to get the ids
463 $manager->flush();
464
465 //Close curl handler
466 $this->curl_close();
467
468 //Return success
469 return self::SUCCESS;
470 }
471
472 /**
473 * Init curl handler
474 *
475 * @return bool|void Return success or exit
476 */
477 function curl_init() {
478 //Init curl
479 if (($this->ch = curl_init()) === false) {
480 //Display error
481 echo 'Curl init failed: '.curl_error($this->ch)."\n";
482 //Exit with failure
483 exit(self::FAILURE);
484 }
485
486 //Set curl options
487 if (
488 curl_setopt_array(
489 $this->ch,
490 [
491 //Force http2
492 CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
493 //Set http headers
494 CURLOPT_HTTPHEADER => [
495 //XXX: it seems that you can disable akamai fucking protection with Pragma: akamai-x-cache-off
496 //XXX: see https://support.globaldots.com/hc/en-us/articles/115003996705-Akamai-Pragma-Headers-overview
497 #'Pragma: akamai-x-cache-off',
498 //XXX: working curl command
499 #curl --http2 --cookie file.jar --cookie-jar file.jar -v -i -k -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' -H 'Accept-Language: en-GB,en;q=0.9' -H 'Cache-Control: no-cache' -H 'Connection: keep-alive' -H 'Host: www.accuweather.com' -H 'Pragma: no-cache' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' 'https://www.accuweather.com/'
500 //Set accept
501 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
502 //Set accept language
503 'Accept-Language: en-GB,en;q=0.9',
504 //Disable cache
505 'Cache-Control: no-cache',
506 //Keep connection alive
507 'Connection: keep-alive',
508 //Disable cache
509 'Pragma: no-cache',
510 //Force secure requests
511 'Upgrade-Insecure-Requests: 1',
512 //Set user agent
513 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
514 //Force akamai cookie
515 //XXX: seems to come from http request
516 'Cookie: AKA_A2=A',
517 ],
518 //Enable cookie
519 CURLOPT_COOKIEFILE => '',
520 //Disable location following
521 CURLOPT_FOLLOWLOCATION => false,
522 //Set url
523 #CURLOPT_URL => $url = 'https://www.accuweather.com/',
524 //Return headers too
525 CURLOPT_HEADER => true,
526 //Return content
527 CURLOPT_RETURNTRANSFER => true,
528
529 //XXX: debug
530 CURLINFO_HEADER_OUT => true
531 ]
532 ) === false
533 ) {
534 //Display error
535 echo 'Curl setopt array failed: '.curl_error($this->ch)."\n";
536 //Exit with failure
537 exit(self::FAILURE);
538 }
539
540 //Return success
541 return true;
542 }
543
544 /**
545 * Get url
546 *
547 * @return string|void Return url content or exit
548 */
549 function curl_get($url) {
550 //Set url to fetch
551 if (curl_setopt($this->ch, CURLOPT_URL, $url) === false) {
552 //Display error
553 echo 'Setopt for '.$url.' failed: '.curl_error($this->ch)."\n";
554
555 //Close curl handler
556 curl_close($this->ch);
557
558 //Exit with failure
559 exit(self::FAILURE);
560 }
561
562 //Check return status
563 if (($response = curl_exec($this->ch)) === false) {
564 //Display error
565 echo 'Get for '.$url.' failed: '.curl_error($this->ch)."\n";
566
567 //Display sent headers
568 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
569
570 //Display response
571 var_dump($response);
572
573 //Close curl handler
574 curl_close($this->ch);
575
576 //Exit with failure
577 exit(self::FAILURE);
578 }
579
580 //Get header size
581 if (empty($hs = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE))) {
582 //Display error
583 echo 'Getinfo for '.$url.' failed: '.curl_error($this->ch)."\n";
584
585 //Display sent headers
586 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
587
588 //Display response
589 var_dump($response);
590
591 //Close curl handler
592 curl_close($this->ch);
593
594 //Exit with failure
595 exit(self::FAILURE);
596 }
597
598 //Get header
599 if (empty($header = substr($response, 0, $hs))) {
600 //Display error
601 echo 'Header for '.$url.' empty: '.curl_error($this->ch)."\n";
602
603 //Display sent headers
604 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
605
606 //Display response
607 var_dump($response);
608
609 //Close curl handler
610 curl_close($this->ch);
611
612 //Exit with failure
613 exit(self::FAILURE);
614 }
615
616 //Check request success
617 if (strlen($header) <= 10 || substr($header, 0, 10) !== 'HTTP/2 200') {
618 //Display error
619 echo 'Status for '.$url.' failed: '.curl_error($this->ch)."\n";
620
621 //Display sent headers
622 var_dump(curl_getinfo($this->ch, CURLINFO_HEADER_OUT));
623
624 //Display response
625 var_dump($header);
626
627 //Close curl handler
628 curl_close($this->ch);
629
630 //Exit with failure
631 exit(self::FAILURE);
632 }
633
634 //Return content
635 return substr($response, $hs);
636 }
637
638 /**
639 * Close curl handler
640 *
641 * @return bool Return success or failure
642 */
643 function curl_close() {
644 return curl_close($this->ch);
645 }
646 }