]>
Raphaƫl G. Git Repositories - zipcode/blob - laposte
   5 date_default_timezone_set('UTC'); 
   8 define('MEMPFX', 'zipcode_'); 
  13 //Memcached configuration 
  16         'address' => 'localhost', 
  22         'host' => 'localhost', 
  31         'User-agent: Laposte/0.1' 
  34 //Create mysqli object 
  35 $mysqli = new mysqli($mycfg['host'], $mycfg['username'], $mycfg['passwd'], $mycfg['dbname']); 
  37 //Set to restart on last zipcode in database 
  38 //XXX: use to restart from last zipcode in case of memcache reset since last run 
  39 if ($zipcode = $mysqli->query('SELECT MAX(zipcode) AS zipcode FROM zipcode')) { 
  40         if (($row = $zipcode->fetch_row()) && !empty($row[0])) { 
  46 $url = 'https://www.laposte.fr/particulier/outils/trouver-un-code-postal'; 
  50         //mostly useless in fact 
  52         //Required to simplify simplexml transition 
  54         //Required to avoid xml errors 
  55         'quote-nbsp' => false, 
  56         //Required to fix code 
  61 $mem = new Memcached($memcfg['id']); 
  62 $mem->setOption(Memcached
::OPT_LIBKETAMA_COMPATIBLE
, true); 
  63 $mem->addServer($memcfg['address'], $memcfg['port']); 
  68 //Find out if we have ezxform_token in cache 
  69 if (($ezxform_token = $mem->get(MEMPFX
.'ezxform_token')) === false || ($cookies = $mem->get(MEMPFX
.'cookies')) === false) { 
  71         $ctx = stream_context_create( 
  77                                 'header' => $headers+
array( 
  83         //Load the page for fetching VilleCP[ezxform_token] 
  84         if (($data = file_get_contents($url, false, $ctx)) !== false) { 
  86                 $tidy->parseString($data, $config, 'utf8'); 
  89                 //XXX: don't care about theses errors, tidy is here to fix... 
  90                 #if (!empty($tidy->errorBuffer)) { 
  91                 #       var_dump($tidy->errorBuffer); 
  96                 $sx = new SimpleXMLElement($tidy); 
  98                 //Set ezxform_token in memcache 
  99                 $mem->set(MEMPFX
.'ezxform_token', $ezxform_token = (string)$sx->xpath('//input[@id="VilleCP_ezxform_token"]')[0]->attributes()->value
, time()+
60); 
 106                 foreach($http_response_header as $header) { 
 108                         if (preg_match('/^Set-Cookie:\s+([^;]+)/', $header, $matches)) { 
 109                                 parse_str($matches[1], $tmp); 
 112                         //Handle cookie reset 
 113                         if (preg_match('/^Set-Cookie:\s+([^;]+).*expires=([^;]+)/', $header, $matches) && strtotime($matches[2]) < 100000) { 
 114                                 parse_str($matches[1], $tmp); 
 115                                 foreach($tmp as $key => $null) { 
 116                                         unset($cookies[$key]); 
 121                 //Set cookies in memcache 
 122                 $mem->set(MEMPFX
.'cookies', $cookies, time()+
60); 
 125                 die($_SERVER['PHP_SELF'].': file_get_contents(ezxform_token) failed: '.$url); 
 129 //Find out if we have last code in memcache 
 130 if (($last = $mem->get(MEMPFX
.'last')) === false) { 
 131         //Set last in memcache 
 132         $mem->set(MEMPFX
.'last', ($last = $first), time()+
3600*24); 
 136 if (!($stmt = $mysqli->prepare('INSERT IGNORE INTO zipcode (zipcode, city) VALUES (?, ?)'))) { 
 137         die('Couldn\'t prepare insert'); 
 140 //Loop on all possible postal code 
 141 //TODO: see if we need to validate in extra all insee code with letters 
 142 for($i = $last; $i <= 10000; $i++
) { 
 144         $pdata = http_build_query( 
 146                         'VilleCP[filtre]' => 'ville', 
 147                         'VilleCP[communeCode]' => sprintf('%05d', $i), 
 148                         'VilleCP[save]' => '', 
 149                         'VilleCP[ezxform_token]' => $ezxform_token 
 157         foreach($cookies as $key => $value) { 
 158                 $cookie[] = urlencode($key).'='.urlencode($value); 
 160         $cookie = implode('; ', $cookie); 
 163         $pctx = stream_context_create( 
 167                                 'max_redirects' => 0, 
 168                                 'ignore_errors' => 0, 
 169                                 'header' => $headers+
array( 
 170                                         'Content-type: application/x-www-form-urlencoded', 
 171                                         'Content-Length: '.strlen($pdata), 
 179         //Load the page for fetching VilleCP[ezxform_token] 
 180         if (($data = file_get_contents($url, false, $pctx)) !== false) { 
 182                 $tidy->parseString($data, $config, 'utf8'); 
 185                 //XXX: don't care about theses errors, tidy is here to fix... 
 186                 #if (!empty($tidy->errorBuffer)) { 
 187                 #       var_dump($tidy->errorBuffer); 
 188                 #       die('Tidy errors'); 
 192                 $sx = new SimpleXMLElement($tidy); 
 194                 //Extract and store ezxform_token in memcache 
 195                 $mem->set(MEMPFX
.'ezxform_token', $ezxform_token = (string)$sx->xpath('//input[@id="VilleCP_ezxform_token"]')[0]->attributes()->value
, time()+
30); 
 198                 foreach($http_response_header as $header) { 
 199                         //Handle cookie reset 
 200                         if (preg_match('/^Set-Cookie:\s+([^;]+).*expires=([^;]+)/', $header, $matches) && strtotime($matches[2]) < 100000) { 
 201                                 parse_str($matches[1], $tmp); 
 202                                 foreach($tmp as $key => $null) { 
 203                                         unset($cookies[$key]); 
 206                         } elseif (preg_match('/^Set-Cookie:\s+([^;]+)/', $header, $matches)) { 
 207                                 parse_str($matches[1], $tmp); 
 212                 //Set cookies in memcache 
 213                 $mem->set(MEMPFX
.'cookies', $cookies, time()+
60); 
 216                 foreach($sx->xpath('/html/body/div[@id="app"][1]/div[9]/div[3]/div/div/div/div/table/tbody/tr') as $line) { 
 217                         $zipcode = trim($line->td
[0]); 
 218                         $city = trim($line->td
[1]); 
 219                         if (!$stmt->bind_param('ss', $zipcode, $city)) { 
 220                                 die('Couldn\'t bind params'); 
 222                         if (!$stmt->execute()) { 
 223                                 die('Couldn\'t execute'); 
 228                 die($_SERVER['PHP_SELF'].': file_get_contents('.$pdata.') failed: '.$url); 
 233         //Set last in memcache 
 234         $mem->set(MEMPFX
.'last', $i, time()+
3600*24); 
 237 //Close insert request 
 240 //Close mysqli connection