]>
Raphaƫl G. Git Repositories - zipcode/blob - laposte
5 date_default_timezone_set('UTC');
8 define('MEMPFX', 'zipcode_');
13 //Memcached configuration
16 'address' => 'localhost',
22 'host' => 'localhost',
31 'User-agent: Laposte/0.1'
34 //Create mysqli object
35 $mysqli = new mysqli($mycfg['host'], $mycfg['username'], $mycfg['passwd'], $mycfg['dbname']);
37 //Set to restart on last zipcode in database
38 //XXX: use to restart from last zipcode in case of memcache reset since last run
39 if ($zipcode = $mysqli->query('SELECT MAX(zipcode) AS zipcode FROM zipcode')) {
40 if (($row = $zipcode->fetch_row()) && !empty($row[0])) {
46 $url = 'https://www.laposte.fr/particulier/outils/trouver-un-code-postal';
50 //mostly useless in fact
52 //Required to simplify simplexml transition
54 //Required to avoid xml errors
55 'quote-nbsp' => false,
56 //Required to fix code
61 $mem = new Memcached($memcfg['id']);
62 $mem->setOption(Memcached
::OPT_LIBKETAMA_COMPATIBLE
, true);
63 $mem->addServer($memcfg['address'], $memcfg['port']);
68 //Find out if we have ezxform_token in cache
69 if (($ezxform_token = $mem->get(MEMPFX
.'ezxform_token')) === false || ($cookies = $mem->get(MEMPFX
.'cookies')) === false) {
71 $ctx = stream_context_create(
77 'header' => $headers+
array(
83 //Load the page for fetching VilleCP[ezxform_token]
84 if (($data = file_get_contents($url, false, $ctx)) !== false) {
86 $tidy->parseString($data, $config, 'utf8');
89 //XXX: don't care about theses errors, tidy is here to fix...
90 #if (!empty($tidy->errorBuffer)) {
91 # var_dump($tidy->errorBuffer);
96 $sx = new SimpleXMLElement($tidy);
98 //Set ezxform_token in memcache
99 $mem->set(MEMPFX
.'ezxform_token', $ezxform_token = (string)$sx->xpath('//input[@id="VilleCP_ezxform_token"]')[0]->attributes()->value
, time()+
60);
106 foreach($http_response_header as $header) {
108 if (preg_match('/^Set-Cookie:\s+([^;]+)/', $header, $matches)) {
109 parse_str($matches[1], $tmp);
112 //Handle cookie reset
113 if (preg_match('/^Set-Cookie:\s+([^;]+).*expires=([^;]+)/', $header, $matches) && strtotime($matches[2]) < 100000) {
114 parse_str($matches[1], $tmp);
115 foreach($tmp as $key => $null) {
116 unset($cookies[$key]);
121 //Set cookies in memcache
122 $mem->set(MEMPFX
.'cookies', $cookies, time()+
60);
125 die($_SERVER['PHP_SELF'].': file_get_contents(ezxform_token) failed: '.$url);
129 //Find out if we have last code in memcache
130 if (($last = $mem->get(MEMPFX
.'last')) === false) {
131 //Set last in memcache
132 $mem->set(MEMPFX
.'last', ($last = $first), time()+
3600*24);
136 if (!($stmt = $mysqli->prepare('INSERT IGNORE INTO zipcode (zipcode, city) VALUES (?, ?)'))) {
137 die('Couldn\'t prepare insert');
140 //Loop on all possible postal code
141 //TODO: see if we need to validate in extra all insee code with letters
142 for($i = $last; $i <= 10000; $i++
) {
144 $pdata = http_build_query(
146 'VilleCP[filtre]' => 'ville',
147 'VilleCP[communeCode]' => sprintf('%05d', $i),
148 'VilleCP[save]' => '',
149 'VilleCP[ezxform_token]' => $ezxform_token
157 foreach($cookies as $key => $value) {
158 $cookie[] = urlencode($key).'='.urlencode($value);
160 $cookie = implode('; ', $cookie);
163 $pctx = stream_context_create(
167 'max_redirects' => 0,
168 'ignore_errors' => 0,
169 'header' => $headers+
array(
170 'Content-type: application/x-www-form-urlencoded',
171 'Content-Length: '.strlen($pdata),
179 //Load the page for fetching VilleCP[ezxform_token]
180 if (($data = file_get_contents($url, false, $pctx)) !== false) {
182 $tidy->parseString($data, $config, 'utf8');
185 //XXX: don't care about theses errors, tidy is here to fix...
186 #if (!empty($tidy->errorBuffer)) {
187 # var_dump($tidy->errorBuffer);
188 # die('Tidy errors');
192 $sx = new SimpleXMLElement($tidy);
194 //Extract and store ezxform_token in memcache
195 $mem->set(MEMPFX
.'ezxform_token', $ezxform_token = (string)$sx->xpath('//input[@id="VilleCP_ezxform_token"]')[0]->attributes()->value
, time()+
30);
198 foreach($http_response_header as $header) {
199 //Handle cookie reset
200 if (preg_match('/^Set-Cookie:\s+([^;]+).*expires=([^;]+)/', $header, $matches) && strtotime($matches[2]) < 100000) {
201 parse_str($matches[1], $tmp);
202 foreach($tmp as $key => $null) {
203 unset($cookies[$key]);
206 } elseif (preg_match('/^Set-Cookie:\s+([^;]+)/', $header, $matches)) {
207 parse_str($matches[1], $tmp);
212 //Set cookies in memcache
213 $mem->set(MEMPFX
.'cookies', $cookies, time()+
60);
216 foreach($sx->xpath('/html/body/div[@id="app"][1]/div[9]/div[3]/div/div/div/div/table/tbody/tr') as $line) {
217 $zipcode = trim($line->td
[0]);
218 $city = trim($line->td
[1]);
219 if (!$stmt->bind_param('ss', $zipcode, $city)) {
220 die('Couldn\'t bind params');
222 if (!$stmt->execute()) {
223 die('Couldn\'t execute');
228 die($_SERVER['PHP_SELF'].': file_get_contents('.$pdata.') failed: '.$url);
233 //Set last in memcache
234 $mem->set(MEMPFX
.'last', $i, time()+
3600*24);
237 //Close insert request
240 //Close mysqli connection