source: subversion/applications/utils/import/bulk_upload_06/bulk_upload_sax.py @ 30254

Last change on this file since 30254 was 25055, checked in by mackerski, 9 years ago

Fixed error in cases where the optional -t command line option is not used

File size: 11.9 KB
Line 
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3#
4#
5# This is a python version of
6# the bulk_upload script for the 0.6 API.
7#
8# usage:
9#      -i input.osm
10#      -u username
11#      -p password
12#      -c comment for change set
13#      -t tag for change set, e.g. -t source=myimport or -t about="an awesome import"
14#         can be supplied multiple times for multiple tags, if comment or
15#         created_by are supplied they'll overwrite the default values
16#
17# After each change set is sent to the server the id mappings are saved
18# in inputfile.osm.db
19# Subsequent calls to the script will read in these mappings,
20#
21# If you change $input.osm between calls to the script (ie different data with the
22# same file name) you should delete $input.osm.db
23#
24# Author: Steve Singer <ssinger_pg@sympatico.ca>
25#
26# COPYRIGHT
27#
28# This program is free software; you can redistribute it and/or
29# modify it under the terms of the GNU General Public License
30# as published by the Free Software Foundation; either version 2
31# of the License, or (at your option) any later version.
32#
33# This program is distributed in the hope that it will be useful,
34# but WITHOUT ANY WARRANTY; without even the implied warranty of
35# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
36# GNU General Public License for more details.
37
38# You should have received a copy of the GNU General Public License
39# along with this program; if not, write to the Free Software
40# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
41
42
43
44import xml.etree.cElementTree as ET
45#import sets
46import optparse
47import httplib2
48import shelve
49import os
50from xml.sax import make_parser, SAXParseException
51from xml.sax.handler import ContentHandler
52from time import sleep
53import sys, traceback
54import socket
55
56#api_host='http://api.openstreetmap.org'
57api_host='http://api06.dev.openstreetmap.org'
58headers = {
59    'User-Agent' : 'bulk_upload_sax.py',
60}
61retryDelays = [0, 10, 60, 300]
62
63
64
65class ImportProcessor:
66    def __init__(self,httpObj,comment,idMap):
67        self.comment=comment
68        self.addElem=ET.Element('create')
69        self.modifyElem=ET.Element('modify')
70        self.deleteElem=ET.Element('delete')
71        self.idMap = idMap
72        self.httpCon = httpObj
73        self.createChangeSet()
74
75    def doHttpRequest(self, message, url, method, data=None, headers=None):
76        count = 0
77        while count <= len(retryDelays):
78            try:
79                resp,content=self.httpCon.request(url, method, data, headers=headers)
80                if resp.status == 500 and count < len(retryDelays):
81                    print '%sError 500, retrying in %u seconds' % (message, retryDelays[count])
82                    sleep(retryDelays[count])
83                    count += 1
84                    continue
85                if resp.status != 200:
86                    print message + str(resp.status) + " " + str(content)
87                    exit(-1)
88                return (resp, content)
89            except socket.error, e:
90                if count < len(retryDelays):
91                    print '%s%s, retrying in %u seconds' % (message, e, retryDelays[count])
92                    sleep(retryDelays[count])
93                    count += 1
94                    continue
95                else:
96                    print message + str(e)
97                    exit(-1)
98
99
100    def createChangeSet(self):
101        createReq=ET.Element('osm',version="0.6")
102        change=ET.Element('changeset')
103
104        # Changeset tags
105        tags = {
106            'created_by': headers['User-Agent'],
107            'comment': self.comment,
108        }
109        if options.tags:
110            for tag in options.tags:
111                (k, v) = tag.split("=", 1)
112                tags[k] = v
113       
114        for key, value in tags.iteritems():
115            change.append(ET.Element('tag',k=key, v=value))
116
117        createReq.append(change)
118        xml=ET.tostring(createReq)
119        resp,content=self.doHttpRequest('Error creating changeset:', api_host +
120                                          '/api/0.6/changeset/create','PUT',xml,headers=headers)
121        self.changesetid=content
122        print "Opened changeset:" + self.changesetid
123
124    def createStructure(self, item):
125        if item['type'] == 'node':
126            struct = ET.Element('node', dict(item['attrs']))
127            for tag in item['tags']:
128                struct.append(ET.Element('tag',k=tag[0],v=tag[1]))
129        elif item['type'] == 'way':
130            struct = ET.Element('way', dict(item['attrs']))
131            for tag in item['tags']:
132                struct.append(ET.Element('tag',k=tag[0],v=tag[1]))
133            for nd in item['childs']:
134                struct.append(ET.Element('nd',ref=nd))
135        elif item['type'] == 'relation':
136            struct = ET.Element('relation', dict(item['attrs']))
137            for tag in item['tags']:
138                struct.append(ET.Element('tag',k=tag[0],v=tag[1]))
139            for member in item['childs']:
140                struct.append(ET.Element('member',type=member['type'],ref=member['ref'],role=member['role']))
141        return struct
142
143    def addItem(self,item):
144        item = self.createStructure(item)
145        item.attrib['changeset']=self.changesetid
146        self.addElem.append(item)
147    def deleteItem(self,item):
148        item = self.createStructure(item)
149        item.attrib['changeset']=self.changesetid
150        self.deleteElem.append(item)
151    def modifyItem(self,item):
152        item = self.createStructure(item)
153        item.attrib['changeset']=self.changesetid
154        self.modifyElem.append(item)
155    def upload(self):
156        xml = ET.Element('osmChange')
157        xml.append(self.addElem)
158        xml.append(self.modifyElem)
159        xml.append(self.deleteElem)
160        resp,content = self.doHttpRequest("Error uploading changeset:", api_host +
161                                            '/api/0.6/changeset/'+self.changesetid+
162                                            '/upload',
163                                            'POST', ET.tostring(xml),headers=headers)       
164        self.processResult(content)
165    def closeSet(self):
166        print "Closing changeset:" + self.changesetid
167        resp,content=self.doHttpRequest("Error closing changeset " + str(self.changesetid) + ":", api_host +
168                                          '/api/0.6/changeset/' +
169                                          self.changesetid + '/close','PUT',headers=headers)
170    #
171    # Uploading a change set returns a <diffResult> containing elements
172    # that map the old id to the new id
173    # Process them.
174    def processResult(self,content):
175        diffResult=ET.fromstring(content)
176        for child in diffResult.getchildren():
177            old_id=child.attrib['old_id'].encode('ascii')
178            if child.attrib.has_key('new_id'):
179                new_id=child.attrib['new_id']
180                self.idMap[old_id]=new_id
181            else:
182                self.idMap[old_id]=old_id
183   
184    def getChangesetLimit(self):
185        return int(options.changeset_limit)
186
187    def getAPILimit(self):
188        return int(options.put_limit)
189
190# Allow enforcing of required arguements
191# code from http://www.python.org/doc/2.3/lib/optparse-extending-examples.html
192class OptionParser (optparse.OptionParser):
193
194    def check_required (self, opt):
195      option = self.get_option(opt)
196
197      # Assumes the option's 'default' is set to None!
198      if getattr(self.values, option.dest) is None:
199          self.error("%s option not supplied" % option)
200   
201class BulkParser(ContentHandler):
202   
203    pathStack = []
204
205    def getRef(self, attrs):
206        ref = attrs.get('ref', None).encode('ascii')
207        if ref:
208            new_id = self.idMap.get(ref, None)
209            if new_id:
210                return new_id
211        return ref
212
213    def startDocument(self):
214        self.httpObj = httplib2.Http()
215        self.httpObj.add_credentials(options.user,options.password)
216        self.idMap=shelve.open('%s.db' % options.infile)
217        self.importer=ImportProcessor(self.httpObj,options.comment,self.idMap)
218        self.object = None
219        self.ob_cnt = 0
220        self.cs_cnt = 0
221
222    def endDocument(self):
223        self.importer.upload()
224        self.importer.closeSet()
225        self.idMap.close()
226
227    def startElement(self, name, attrs):
228        self.pathStack.append(name)
229        self.path = '/'.join(self.pathStack)
230        if self.path in ('osm/node', 'osm/way', 'osm/relation'):
231            id=attrs.get('id', None).encode('ascii')
232            if self.idMap.has_key(id):
233                return
234
235        if self.path == 'osm/node':
236            self.object = {'type': 'node', 'attrs': attrs.copy(), 'tags': []}
237        elif self.path == 'osm/way':
238            self.object = {'type': 'way', 'attrs': attrs.copy(), 'childs': [], 'tags': []}
239        elif self.path == 'osm/relation':
240            self.object = {'type': 'relation', 'attrs': attrs.copy(), 'childs': [], 'tags': []}
241        elif self.path in ('osm/node/tag', 'osm/way/tag', 'osm/relation/tag'):
242            if self.object:
243                self.object['tags'].append([attrs['k'], attrs['v']])
244        elif self.path == 'osm/way/nd' and self.object:
245            self.object['childs'].append(self.getRef(attrs))
246        elif self.path == 'osm/relation/member' and self.object:
247            member = {'type': attrs['type'], 'role': attrs['role']}
248            member['ref'] = self.getRef(attrs)
249            self.object['childs'].append(member)
250
251    def endElement(self, name):
252        if self.object and self.path in ('osm/node', 'osm/way', 'osm/relation'):
253            if self.object:
254                action = self.object['attrs'].get('action', None)
255                if (action == 'delete'):
256                    self.importer.deleteItem(self.object)
257                elif (action == 'modify'):
258                    self.importer.modifyItem(self.object)
259                else:
260                    self.importer.addItem(self.object)
261
262                self.object = None
263
264            if self.ob_cnt >= self.importer.getAPILimit():
265                print "  Uploading to changeset: %d (uploading %d thingies with %d/%d possible thingies in this changeset so far)" % (int(self.importer.changesetid), self.ob_cnt, self.cs_cnt, self.importer.getChangesetLimit())
266                self.importer.upload()
267                self.ob_cnt=0
268            if self.cs_cnt >= self.importer.getChangesetLimit():
269                self.importer.closeSet()
270                self.importer=ImportProcessor(self.httpObj,options.comment,self.idMap)
271                self.cs_cnt=0
272
273            # One more object in this request / upload
274            self.ob_cnt += 1
275            self.cs_cnt += 1
276
277        del self.pathStack[-1]
278        self.path = '/'.join(self.pathStack)
279
280
281    def characters(self, data):
282        pass
283
284usage = "usage: %prog -i input.osm -u user -p password -c comment"
285
286parser = OptionParser(usage)
287parser.add_option("-i", "--input", dest="infile", help="read data from input.osm")
288parser.add_option("-u", "--user", dest="user", help="username")
289parser.add_option("-p", "--password", dest="password", help="password")
290parser.add_option("-c", "--comment", dest="comment", help="changeset comment")
291parser.add_option("-t", "--tag", action="append", dest="tags",
292                  help="Changeset tags e.g. `source=landsat', can be supplied multiple times")
293parser.add_option("", "--changeset-limit", dest="changeset_limit", default=5000,
294                  help="The maximum number of thingies to upload to each changeset")
295parser.add_option("", "--put-limit", dest="put_limit", default=500,
296                  help="The number of thingies to upload in each PUT request")
297(options, args) = parser.parse_args()
298 
299parser.check_required("-i")
300parser.check_required("-u")
301parser.check_required("-p") 
302parser.check_required("-c")
303
304
305xmlParser = make_parser()
306xmlParser.setContentHandler(BulkParser())
307
308try:
309    feedFile = open(options.infile)
310except IOError, e:
311    print("An error occured when opening the feed's URL: %s %s" % (options.infile, e))
312
313try:
314    xmlParser.parse(feedFile)
315except SAXParseException, e:
316    printr("An error occured when parsing the feed: %s line %u: %s" % (options.infile, e.getLineNumber(), e.getMessage()))
317
Note: See TracBrowser for help on using the repository browser.