source: subversion/applications/utils/import/bulk_upload_06/bulk_upload.php @ 30254

Last change on this file since 30254 was 15271, checked in by isortega, 10 years ago

Typo: changeset sizes back to 25k nodes and 10k ways.

File size: 10.7 KB
Line 
1<?php
2
3/**
4 * ----------------------------------------------------------------------------
5 * "THE BEER-WARE LICENSE":
6 * <ivan@sanchezortega.es> wrote this file. As long as you retain this notice you
7 * can do whatever you want with this stuff. If we meet some day, and you think
8 * this stuff is worth it, you can buy me a beer in return.
9 * ----------------------------------------------------------------------------
10 */
11
12/// NOTE: this script is NOT guaranteed to work right. It is unknown whether there are any bugs on the 0.6 API at this time. The script has NOT gone under strict review nor tests. Use at your own risk!
13
14/// TODO: add support for command-line options: username, passwd, server URL, file, batch sizes
15/// TODO: add support for uploading an entire directory instead of a single .osm file
16/// TODO: add support for parsing and uploading relations - right now relations are NOT handled AT ALL
17
18
19$username = 'ivan@sanchezortega.es';
20$passwd   = '12345678';
21$server_base_url = 'localhost:3000/api';
22// $server_base_url = 'api06.dev.openstreetmap.org/api';
23// $server_base_url = 'osmapi06.shaunmcdonald.me.uk/api';
24
25/// TODO: scan for files in the current dir or allow for multiple files to be passed as parameters
26// $file = 'osm/b0001c2.osm';
27$file = $argv[1];
28
29/// Set this to false in order to not save temporary files after every batch is uploaded.
30$make_backup = true;
31
32
33
34/// TODO: fix this, use mktempfile() or something
35$tmpfilename = '/tmp/bulk_upload_temp_' . posix_getpid() ;
36
37$node_batch_size = 25000;
38$way_batch_size  = 10000;
39
40
41
42$xml = simplexml_load_file($file);
43
44$batch = 0;
45
46$generator = $xml['generator'];
47
48$changeset = NULL;
49
50$updated_node_ids = array();
51
52$username = urlencode($username);
53$passwd   = urlencode($passwd);
54
55
56
57function init_payload(&$payload)
58{
59//      $payload = "<osmChange>";
60        $payload = "<osmChange version='0.6' generator='php_bulk_uploader'><create version='0.6' generator='php_bulk_uploader'>\n";
61//      $payload = "<osmChange version='0.6' generator='php_bulk_uploader'><create version='0.6' generator='php_bulk_uploader'>";
62}
63
64function close_payload(&$payload)
65{
66//      $payload .= "</modify></osmChange>";
67        $payload .= "</create></osmChange>";
68}
69
70function open_changeset($object_type)
71{
72        global $username, $passwd, $server_base_url, $file, $tmpfilename, $batch, $generator, $changeset;
73
74        $batch++;
75
76        /// FIXME: escape XML by using xmlwriter.
77       
78        file_put_contents($tmpfilename,$request = "<osm><changeset><tag k='created_by' v='$generator'/><tag k='uploaded_by' v='php_bulk_upload'/><tag k='comment' v='Upload of file $file, {$object_type}s, batch $batch'/></changeset></osm>");
79       
80        echo "PUT http://$username:$passwd@$server_base_url/0.6/changeset/create\n";
81       
82        $ch = curl_init();
83        curl_setopt($ch, CURLOPT_URL,"http://$username:$passwd@$server_base_url/0.6/changeset/create"); 
84        curl_setopt($ch, CURLOPT_PUT, 1); 
85        curl_setopt($ch, CURLOPT_INFILE, $fp = fopen($tmpfilename,'r')); 
86        curl_setopt($ch, CURLOPT_INFILESIZE, strlen($request)); 
87        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
88        $changeset = curl_exec($ch);
89        fclose($fp);
90        unlink ($tmpfilename);
91       
92        $info = curl_getinfo($ch);
93        echo "API returned HTTP code {$info['http_code']}\n";
94       
95        if (!is_numeric($changeset))
96        {
97                trigger_error("API didn't provide a new changeset ID, aborting. We sent \n" . file_get_contents($tmpfilename) ."\n",E_USER_ERROR);
98                die();
99        }
100       
101        echo "File \"$file\" batch \"$batch\" is being uploaded to changeset \"$changeset\" \n";
102        sleep(1);
103}       
104       
105       
106function send_payload(&$payload)
107{
108        global $username, $passwd, $server_base_url, $file, $tmpfilename, $batch, $generator, $changeset;
109       
110        file_put_contents($tmpfilename,$payload);
111
112//      echo "\nPOST http://$username:$passwd@$server_base_url/0.6/changeset/$changeset/upload\n$payload\n\n";
113        echo "POST http://$username:$passwd@$server_base_url/0.6/changeset/$changeset/upload\n";
114
115        $ch = curl_init();
116        curl_setopt($ch, CURLOPT_URL,"http://$username:$passwd@$server_base_url/0.6/changeset/$changeset/upload"); 
117//      curl_setopt($ch, CURLOPT_VERBOSE, 1);
118        curl_setopt($ch, CURLOPT_POST, 1);
119        curl_setopt($ch, CURLOPT_POSTFIELDS,$payload);
120//      curl_setopt($ch, CURLOPT_PUT, 1);
121//      curl_setopt($ch, CURLOPT_INFILE, $fp = fopen($tmpfilename,'r'));
122//      curl_setopt($ch, CURLOPT_INFILESIZE, filesize($tmpfilename));
123        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
124        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
125        $payload_updates = curl_exec($ch);
126        unlink ($tmpfilename);
127       
128        $info = curl_getinfo($ch);
129        echo "API returned HTTP code {$info['http_code']}\n";
130       
131        if ($info['http_code'] != 200)
132        {
133                trigger_error("The API returned: \n$payload_updates\n\nAPI didn't succesfully accept the the last uploaded batch, aborting.\n",E_USER_ERROR);
134                die();
135        }
136       
137        echo "\n\nIDs updated: \"\n$payload_updates\n\"\n\n\n";
138
139        update_ids($payload_updates);
140       
141        echo "PUT http://$username:$passwd@$server_base_url/0.6/changeset/$changeset/close\n";
142       
143        $ch = curl_init();
144        curl_setopt($ch, CURLOPT_URL,"http://$username:$passwd@$server_base_url/0.6/changeset/$changeset/close"); 
145        curl_setopt($ch, CURLOPT_PUT, 1);
146        curl_setopt($ch, CURLOPT_INFILE, $fp = fopen('/dev/null','r'));
147        curl_setopt($ch, CURLOPT_INFILESIZE, 0);
148        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
149        $close_error = curl_exec($ch);
150        fclose($fp);
151
152        $info = curl_getinfo($ch);
153        echo "API returned HTTP code {$info['http_code']}\n";
154        echo "Changeset $changeset closed, error was \"$error\"\n\n\n";
155        $changeset = NULL;
156        sleep(10);
157       
158}
159
160
161
162
163function update_ids($payload_updates)
164{
165        global $nodes, $ways, $inverse_waynodes;
166
167        $updates = simplexml_load_string($payload_updates);
168       
169//      print_r($updates);
170        $updated_node_count    = 0;
171        $updated_way_count     = 0;
172        $updated_waynode_count = 0;
173       
174        foreach($updates->node as $updated_node)
175        {
176//              $updated_node_ids[ (int) ($updated_node['old_id']) ] = (int) ($updated_node['new_id']);
177                $old_id = (int)$updated_node['old_id'];
178                $new_id = (int)$updated_node['new_id'];
179                if (!$nodes[$old_id])
180                {
181                        echo " ERROR: API returned updated node id for non-existing node $old_id (new ID: $new_id). This should not happen.\n";
182                }
183                else
184                {
185                        $node = $nodes[$old_id];
186                        unset ($nodes[$old_id]);
187                        $node['id'] = $new_id;
188                        $node['version'] =
189                        $nodes[$new_id] = $node;
190                        /// TODO: add sanity check to make sure that this updated node has been indeed uploaded
191                        $updated_node_count++;
192//                      echo "Updated node: $old_id -> $new_id\n";
193                }
194               
195                if ($inverse_waynodes[$old_id])
196                {
197                        foreach($inverse_waynodes[$old_id] as $way_id=>$foo)
198                        {
199                                foreach($ways[$way_id]->nd as $nd)      // Only change IDs of nodes if they have been just uploaded... positive IDs will remain the same. This will fail if there are negative IDs which have not been *just* uploaded.
200                                {
201                                        if ( $nd['ref'] == $old_id )
202                                        {
203                                                $nd['ref'] = $new_id;
204                                                $updated_waynode_count++;
205//                                              echo "Updated waynode: $way_id ($old_id) -> $new_id\n";
206                                        }
207                                }
208                        }
209                        $inverse_waynodes[$new_id] = $inverse_waynodes[$old_id];
210                        unset ($inverse_waynodes[$old_id]);
211                }
212        }
213       
214        foreach($updates->way as $updated_way)
215        {
216//              $updated_way_ids[ (int) ($updated_way['old_id']) ] = (int) ($updated_way['new_id']);
217                $old_id = (int)$updated_way['old_id'];
218                $new_id = (int)$updated_way['new_id'];
219                if (!$ways[$old_id])
220                {
221                        echo " ERROR: API returned updated node id for non-existing way $old_id (new ID: $new_id). This should not happen.\n";
222                }
223                else
224                {
225                        $way = $ways[$old_id];
226                        unset ($ways[$old_id]);
227                        $way['id'] = $new_id;
228                        $ways[$new_id] = $way;
229                        /// TODO: add sanity check to make sure that this updated node has been indeed uploaded
230                        $updated_way_count++;
231//                      echo "Updated way: $old_id -> $new_id\n";
232                }       
233        }
234        /// TODO: update relation IDs.
235       
236        if ($updated_node_count)    echo "Updated IDs of $updated_node_count nodes.\n";
237        if ($updated_way_count)     echo "Updated IDs of $updated_way_count ways.\n";
238        if ($updated_waynode_count) echo "Updated IDs of $updated_waynode_count way nodes.\n";
239
240        global $make_backup;
241        if ($make_backup)
242        {
243                global $batch;
244                $file = "/tmp/bulk_uploader_backup_$batch.osm";
245                echo "Writing data backup after batch $batch to file $file ...\n";
246                $fd = fopen($file,'w');
247                fwrite($fd,"<?xml version='1.0' encoding='UTF-8'?><osm version='0.6' generator='php_bulk_uploader_backup'>");
248                foreach ($nodes as $node)
249                {
250                        fwrite($fd,$node->asXML() . "\n");
251                }
252                foreach ($ways as $way)
253                {
254                        fwrite($fd,$way->asXML() . "\n");
255                }
256                /// TODO: backup relations!!
257                fwrite($fd,"</osm>");
258                fclose($fd);
259                echo "Backup wrote.\n";
260        }
261}
262
263
264
265
266
267
268/// Main stuff
269
270
271
272
273/// Build up auxiliary arrays
274$nodes = array();
275$ways = array();
276$inverse_waynodes = array();
277
278echo "Preparing nodes...\n";
279if ($xml->node)
280foreach($xml->node as $node)
281{
282        $nodeid = (int) $node['id'];
283        $nodes[$nodeid] = $node;
284}
285
286echo "Preparing ways...\n";
287if ($xml->way)
288foreach($xml->way as $way)
289{
290        $wayid = (int) $way['id'];
291        $ways[$wayid] = $way;
292        foreach($way->nd as $nd)
293        {
294                $ref = (int) $nd['ref'];
295                $inverse_waynodes[$ref][$wayid] = true;
296        }
297       
298}
299echo "Data prepared.\n";
300unset($xml);
301
302// print_r($inverse_waynodes);
303
304// die();
305
306
307// open_changeset('node');
308init_payload($payload);
309
310// echo $xml->node[0]->asXML();
311
312// $node_count      = count($xml->node);
313// $way_count       = count($xml->way);
314// $relation_count  = count($xml->relation);
315
316$i = 0;
317
318if ($nodes)
319{
320        foreach($nodes as $node)
321        {
322                if ($node['uploaded'])
323                {
324                        echo "Skipping node " . $node['id'] . "\n";
325                }
326                else
327                {
328                        if ($i == $node_batch_size)
329                        {
330                                close_payload($payload);
331                                send_payload($payload);
332                                init_payload($payload);
333                                open_changeset('node');
334                                $i = 0;
335                        }
336                       
337                        if (!$changeset)
338                                open_changeset('node');
339                       
340                        $node->addAttribute('changeset',$changeset);    /// HACK to make it work with API 0.6
341                //      $node->addAttribute('version',1);
342                        $payload .= $node->asXML() ."\n";
343                       
344                        $node['uploaded'] = true;
345                       
346                        $i++;
347                }
348        }
349
350        close_payload($payload);
351        send_payload($payload);
352        echo "All nodes from file $file fully uploaded; starting to upload ways.\n";
353}
354else
355{
356        echo "No nodes to upload\n";
357}
358
359
360init_payload($payload);
361$changeset = null;
362$i = 0;
363
364if ($ways)
365{
366        foreach($ways as $way)
367        {
368                if ($way['uploaded'])
369                {
370                        echo "Skipping way " . $node['id'] . "\n";
371                }
372                else
373                {
374                        if ($i == $way_batch_size)
375                {
376                        close_payload($payload);
377                        send_payload($payload);
378                        init_payload($payload);
379                        open_changeset('way');
380                        $i = 0;
381                }
382               
383                if (!$changeset)
384                        open_changeset('way');
385               
386                $way->addAttribute('changeset',$changeset);     /// HACK to make it work with API 0.6
387        //      $node->addAttribute('version',1);
388       
389                $payload .= $way->asXML() ."\n";
390               
391                $way['uploaded'] = true;
392               
393                $i++;
394                }
395        }
396       
397        close_payload($payload);
398        send_payload($payload);
399        echo "All nodes from file $file fully uploaded; starting to upload ways.\n";
400}
401else
402{
403        echo "No ways to upload\n";
404}
405
406/// TODO: upload relations !!!!
407
408
409
410
411echo "File $file fully uploaded.\n\n\n";
412
413
414
415
Note: See TracBrowser for help on using the repository browser.