source: subversion/applications/utils/import/shp2osm/polyshp2osm.py @ 12937

Revision 12937, 13.2 KB checked in by crschmidt, 5 years ago (diff)

more cleanup.

  • Property svn:keywords set to Id
Line 
1#!/usr/bin/python
2
3"""
4This script is designed to act as assistance in converting shapefiles
5to OpenStreetMap data. This file is optimized and tested with MassGIS
6shapefiles, converted to EPSG:4326 before being passed to the script.
7You can perform this conversion with
8
9   ogr2ogr -t_srs EPSG:4326 new_file.shp old_file.shp
10
11Requires OGR: Tested with 1.3.2 and 1.6.0
12
13It is expected that you will modify the fixed_tags, tag_mapping, and
14boring_tags attributes of this script before running. You should read,
15or at least skim, the code up until it says:
16
17  DO NOT CHANGE AFTER THIS LINE.
18
19to accomodate your own data.
20"""
21
22__author__ = "Christopher Schmidt <crschmidt@crschmidt.net>"
23__version__ = "$Id$"
24
25# These tags are attached to all exterior ways. You can put any key/value pairs
26# in this dictionary.
27
28fixed_tags = {
29  'source': 'MassGIS OpenSpace (http://www.mass.gov/mgis/osp.htm)',
30  'area': 'yes',
31  'created_by': 'polyshp2osm'
32} 
33
34# Here are a number of functions: These functions define tag mappings. The API
35# For these functions is that they are passed the attributes from a feature,
36# and they return a list of two-tuples which match to key/value pairs.
37
38def access(data):
39    """Access restrictions.""" 
40    keys = {
41        'Y': 'yes',
42        'N': 'private',
43        'L': 'restricted'
44    }
45    if 'pub_access' in data:
46        if data['pub_access'] in keys:
47            return [('access', keys[data['pub_access']])]
48    return None       
49
50def protection(data):
51    keys = {
52        'P': 'perpetuity',
53        'T': 'temporary',
54        'L': 'limited',
55    }
56    if 'lev_prot' in data:
57        if data['lev_prot'] in keys:
58            return [('protected', keys[data['lev_prot']])]
59    return None
60
61def owner_type(data):
62    """See wiki:Key:ownership""" 
63    keys = {
64        'F': 'national',
65        'S': 'state',
66        'C': 'county',
67        'M': 'municipal',
68        'N': 'private_nonprofit',
69        'P': 'private',
70        'B': 'public_nonprofit',
71        'L': 'land_trust',
72        'G': 'conservation_rganization',
73        'I': 'inholding',
74    }
75    if 'owner_type' in data:
76        if data['owner_type'] in keys:
77            return [['ownership', keys[data['owner_type']]]]
78
79def purpose(data):
80    """Based on a discussion on IRC"""
81    keys = {
82        'R': [('leisure', 'recreation_ground')],
83        'C': [('leisure', 'nature_reserve'), ('landuse', 'conservation')],
84        'B': [('landuse','conservation'), ('leisure','recreation_ground')],
85        'H': [('historical', 'yes')],
86        'A': [('agricultural', 'yes'), ('landuse','farm')], 
87        'W': [('landuse', 'resevoir')],
88        'S': [('scenic','yes')],
89        'F': [('landuse','land')],
90        'Q': [('landuse','conservation')],
91        'U': [('water','yes')]
92    }
93    if 'prim_purp' in data:
94        if data['prim_purp'] in keys:
95            return keys[data['prim_purp']]
96
97def name_tags(data):
98    """This function returns two things: a 'pretty' name to use, and
99       may return a landuse of either 'cemetery' or 'forest' if the name
100       contains those words; based on evaluation the dataset in question."""
101    tags = [] 
102    name = data.get('site_name', None)
103    if not name: 
104        return
105    name = name.title()
106   
107    if "cemetery" in name.lower():
108        tags.append(['landuse', 'cemetery']) 
109    elif "forest" in name.lower():
110        tags.append(['landuse', 'forest']) 
111
112    tags.append(['name', name])
113    return tags
114
115def cal_date(data):
116    """Return YYYY-MM-DD or YYYY formatted dates, based on
117       (m)m/(d)d/yyyy dates"""
118    date = data.get('cal_date_r', None)
119    if not date: return
120    try:
121        m, d, y = map(int, date.split("/"))
122        if m == 1 and d == 1:
123            return [['start_date', '%4i' % y]]
124        return [['start_date', '%04i-%02i-%02i' % (y, m, d)]] 
125    except:
126        print "Invalid date: %s" % date
127        return None
128
129# The most important part of the code: define a set of key/value pairs
130# to iterate over to generate keys. This is a list of two-tuples: first
131# is a 'key', which is only used if the second value is a string. In
132# that case, it is a map of lowercased fielnames to OSM tag names: so
133# fee_owner maps to 'owner' in the OSM output.
134
135# if the latter is callable (has a __call__; is a function), then that
136# method is called, passing in a dict of feature attributes with
137# lowercased key names. Those functions can then return a list of
138# two-tuples to be used as tags, or nothin' to skip the tags. 
139
140
141tag_mapping = [ 
142    ('fee_owner', 'owner'),
143    ('cal_date', cal_date),
144    ('pub_access', access),
145    ('lev_prot', protection),
146    ('owner_type', owner_type),
147    ('prim_purp', purpose),
148    ('site_name', name_tags),
149]   
150
151# These tags are not exported, even with the source data; this should be
152# used for tags which are usually calculated in a GIS. AREA and LEN are
153# common.
154
155boring_tags = [ 'AREA', 'LEN', 'GIS_ACRES' ]
156
157# Namespace is used to prefix existing data attributes. If 'None', or
158# '--no-source' is set, then source attributes are not exported, only
159# attributes in tag_mapping.
160
161namespace = "massgis"
162#namespace = None
163
164# Uncomment the "DONT_RUN = False" line to get started.
165
166#DONT_RUN = True
167DONT_RUN = False
168
169# =========== DO NOT CHANGE AFTER THIS LINE. ===========================
170# Below here is regular code, part of the file. This is not designed to
171# be modified by users.
172# ======================================================================
173
174import sys
175
176try:
177    try:
178        from osgeo import ogr
179    except ImportError:
180        import ogr
181except ImportError:
182    if DONT_RUN:
183        print __doc__
184        sys.exit(2)
185    print "OGR Python Bindings not installed. Fix that, please."
186    sys.exit(1)
187
188def close_file():
189    """ Internal. Close an open file."""
190    global open_file
191    if not open_file.closed: 
192        open_file.write("</osm>")
193        open_file.close()
194
195def start_new_file():
196    """ Internal. Open a new file, closing existing file if neccesary."""
197    global open_file, file_counter
198    file_counter += 1
199    if open_file:
200        close_file()
201    open_file = open("%s.%s.osm" % (file_name, file_counter), "w")
202    print >>open_file, "<osm version='0.5'>"
203
204def clean_attr(val):
205    """Internal. Hacky way to make attribute XML safe."""
206    val = str(val)
207    val = val.replace("&", "&amp;").replace("'", "&quot;").replace("<", "&lt;").replace(">", "&gt;").strip()
208    return val
209
210def add_ring_nodes(ring):
211    """Internal. Write the first ring nodes."""
212    global open_file, id_counter
213    ids = []
214    if range(ring.GetPointCount() - 1) == 0 or ring.GetPointCount() == 0:
215        print >>sys.stderr, "Degenerate ring." 
216        return   
217    for count in range(ring.GetPointCount()-1):
218        ids.append(id_counter)
219        print >>open_file, "<node id='-%s' lon='%s' lat='%s' />" % (id_counter, ring.GetX(count), ring.GetY(count)) #print count
220        id_counter += 1
221    return ids   
222
223def add_ring_way(ring): 
224    """Internal. write out the 'holes' in a polygon."""
225    global open_file, id_counter
226    ids = []
227    for count in range(ring.GetPointCount()-1):
228        ids.append(id_counter)
229        print >>open_file, "<node id='-%s' lon='%s' lat='%s' />" % (id_counter, ring.GetX(count), ring.GetY(count)) #print count
230        id_counter += 1
231    if len(ids) == 0:
232        return None
233    print >>open_file, "<way id='-%s'>" % id_counter
234    way_id = id_counter
235    id_counter += 1
236    for i in ids:
237        print >>open_file, "<nd ref='-%s' />" % i
238    print >>open_file, "<nd ref='-%s' />" % ids[0]
239    print >>open_file, "</way>"
240
241    return way_id
242
243open_file = None
244
245file_name = None 
246
247id_counter = 1
248
249file_counter = 0
250counter = 0
251
252class AppError(Exception): pass
253
254def run(filename, slice_count=1, obj_count=50000, output_location=None, no_source=False):
255    """Run the converter. Requires open_file, file_name, id_counter,
256    file_counter, counter to be defined in global space; not really a very good
257    singleton."""
258    global id_counter, file_counter, counter, file_name, open_file, namespace
259   
260    if no_source:
261        namespace=None
262
263    if output_location:
264       file_name = output_location
265
266    ds = ogr.Open(filename)
267    if not ds:
268        raise AppError("OGR Could not open the file %s" % filename)
269    l = ds.GetLayer(0)
270   
271    max_objs_per_file = obj_count
272
273    extent = l.GetExtent()
274    if extent[0] < -180 or extent[0] > 180 or extent[2] < -90 or extent[2] > 90:
275        raise AppError("Extent does not look like degrees; are you sure it is? \n(%s, %s, %s, %s)" % (extent[0], extent[2], extent[1], extent[3])) 
276    slice_width = (extent[1] - extent[0]) / slice_count
277
278    seen = {}
279
280    print "Running %s slices with %s base filename against shapefile %s" % (
281            slice_count, file_name, filename)
282
283    for i in range(slice_count): 
284
285        l.ResetReading()
286        l.SetSpatialFilterRect(extent[0] + slice_width * i, extent[2], extent[0] + (slice_width * (i + 1)), extent[3])
287
288        start_new_file()
289        f = l.GetNextFeature()
290       
291        obj_counter = 0
292        last_obj_split = 0
293
294        while f:
295            start_id_counter = id_counter
296            if f.GetFID() in seen:
297                f = l.GetNextFeature()
298                continue
299            seen[f.GetFID()] = True             
300           
301            if (obj_counter - last_obj_split) > max_objs_per_file:
302                print "Splitting file with %s objs" % (obj_counter - last_obj_split)
303                start_new_file()
304                last_obj_split = obj_counter
305
306            ways = []
307       
308            geom = f.GetGeometryRef()
309            ring = geom.GetGeometryRef(0)
310            ids = add_ring_nodes(ring)
311            if not ids or len(ids) == 0: 
312                f = l.GetNextFeature()
313                continue
314            print >>open_file, "<way id='-%s'>" % id_counter
315            ways.append(id_counter) 
316            id_counter += 1
317            for i in ids:
318                print >>open_file, "<nd ref='-%s' />" % i
319            print >>open_file, "<nd ref='-%s' />" % ids[0]
320            field_count = f.GetFieldCount()
321            fields  = {}
322            for field in range(field_count):
323                value = f.GetFieldAsString(field)
324                name = f.GetFieldDefnRef(field).GetName()
325                if namespace and name and value and name not in boring_tags:
326                    print >>open_file, "<tag k='%s:%s' v='%s' />" % (namespace, name, clean_attr(value))
327                fields[name.lower()] = value
328            tags={}
329            for tag_name, map_value in tag_mapping:
330                if hasattr(map_value, '__call__'):
331                    tag_values = map_value(fields)
332                    if tag_values:
333                        for tag in tag_values:
334                            tags[tag[0]] = tag[1]
335
336                else:
337                    if tag_name in fields:
338                        tags[map_value] = fields[tag_name].title()
339            for key, value in tags.items():
340                if key and value:
341                    print >>open_file, "<tag k='%s' v='%s' />" % (key, clean_attr(value))
342                   
343            for name, value in fixed_tags.items():
344                print >>open_file, "<tag k='%s' v='%s' />" % (name, clean_attr(value))
345            print >>open_file, "</way>"   
346            if geom.GetGeometryCount() > 1:
347                for i in range(1, geom.GetGeometryCount()):
348                    ways.append(add_ring_way(geom.GetGeometryRef(i)))
349                print >>open_file, "<relation id='-%s'><tag k='type' v='multipolygon' />" % id_counter
350                id_counter += 1
351                print >>open_file, '<member type="way" ref="-%s" role="outer" />' % ways[0]   
352                for way in ways[1:]:
353                    print >>open_file, '<member type="way" ref="-%s" role="inner" />' % way
354                print >>open_file, "</relation>"   
355               
356            counter += 1
357            f = l.GetNextFeature()
358            obj_counter += (id_counter - start_id_counter)
359       
360        close_file()
361
362if __name__ == "__main__":
363    if DONT_RUN:
364        print __doc__
365        sys.exit(2)
366   
367    from optparse import OptionParser
368   
369    parse = OptionParser(usage="%prog [args] filename.shp", version=__version__)
370    parse.add_option("-s", "--slice-count", dest="slice_count", 
371                     help="Number of horizontal slices of data", default=1, 
372                     action="store", type="int")
373    parse.add_option("-o", "--obj-count", 
374                     dest="obj_count", 
375                     help="Target Maximum number of objects in a single .osm file", 
376                     default=50000, type="int")
377    parse.add_option("-n", "--no-source", dest="no_source", 
378                     help="Do not store source attributes as tags.",
379                     action="store_true", default=False)
380    parse.add_option("-l", "--output-location", 
381                        dest="output_location", help="base filepath for output files.", 
382                        default="poly_output") 
383    (options, args) = parse.parse_args()
384   
385    if not len(args):
386        print "No shapefile name given!"
387        parse.print_help()
388        sys.exit(3)
389
390    kw = {}
391    for key in  ('slice_count', 'obj_count', 'output_location', 'no_source'):
392        kw[key] = getattr(options, key)
393   
394    try:
395        run(args[0], **kw)   
396    except AppError, E:
397        print "An error occurred: \n%s" % E 
Note: See TracBrowser for help on using the repository browser.