source: subversion/applications/utils/export/osm2stuff/process.pl @ 8907

Last change on this file since 8907 was 4689, checked in by ojw, 13 years ago

Allow ignoring specific things (e.g. nodes with natural=coastline)
Format the ignore list, with comments

File size: 5.8 KB
Line 
1#-----------------------------------------------------------------
2# Usage: perl parse.pl < data.osm
3#
4# Output:
5#  - creates 2 files:
6#    - nodes.txt (list of interesting nodes)
7#    - ways.txt (list of interesting ways)
8#  - Both files are semicolon-separated list of tags
9#  - 'Interesting' means it's got tags that aren't in the ignore list
10#  - See bottom of file for the ignore list
11#  - special tags:
12#    - 'polyline' is a list of lat,long pairs (comma-separated)
13#    - 'lat' is latitude of a node
14#    - 'lon' is longitude of a node
15#  - limitations
16#    - semicolons in tags are silently converted to commas
17#    - tags in the ignore list can't be used in OSM file (e.g. 'lat')
18#    - there may be blank lines in the output ways file
19#
20# Copying:
21#  Copyright 2007, Oliver White, streetmap@blibbleblobble.co.uk
22#  Licensed under GNU GPL v2 or later
23#  No warranty etc.
24#---------------------------------------------------------------
25use strict;
26my %Nodes;                       # List of all nodes
27my %Segments;                    # List of all segments
28my %Tags;                        # List of tags in the current object
29my @Segments;                    # List of segments in the current way
30my %IgnoreTags = IgnoreTags();   # List of tag keys to ignore
31my $Tagtype = '-';               # What object the parser is in
32
33# File outputs
34open(NODES, '>nodes.txt') || die();
35open(WAYS, '>ways.txt') || die();
36
37while(my $Line = <>){
38  if($Line =~ m{<node (.*)}){
39    # Beginning of a node
40    %Tags = getAttributes($1);
41    $Tagtype = 'n';
42  }
43  elsif($Line =~ m{<tag k="(.*?)" v="(.*?)"\s*/>}){
44    # Tag within an object
45    my ($Name, $Value) = ($1, $2);
46    if($Value ne ''){
47      if(!$IgnoreTags{$Name}){      # Ignored tags
48        if(!$IgnoreTags{$Tagtype.':'.$Name.'='.$Value}){ # Ignored name=tag combos
49          $Tags{$Name} = $Value;
50        }
51      }
52    }
53  }
54  elsif($Line =~ m{</node}){
55    # End of a node
56    my $ID = $Tags{id};
57    $Nodes{$ID.'_lat'} = $Tags{lat};
58    $Nodes{$ID.'_lon'} = $Tags{lon};
59    writeNode();
60    $Tagtype = '-';
61  }
62  elsif($Line =~ m{<segment (.*)}){
63    # Beginning of a segment
64    %Tags = getAttributes($1);
65    $Tagtype = 's';
66  } 
67  elsif($Line =~ m{<way (.*)}){
68    # Beginning of a way
69    %Tags = getAttributes($1);
70    $Tagtype = 'w';
71    @Segments = ();
72  } 
73  elsif($Line =~ m{<seg id="(\d+)"/>}){
74    # Segment within a way
75    push(@Segments, $1);
76  }
77  elsif($Line =~ m{</segment}){
78    # End of a segment
79    my $ID = $Tags{id};
80    $Segments{$ID.'_from_lat'} = $Nodes{$Tags{from} . '_lat'};
81    $Segments{$ID.'_from_lon'} = $Nodes{$Tags{from} . '_lon'};
82    $Segments{$ID.'_to_lat'} = $Nodes{$Tags{to} . '_lat'};
83    $Segments{$ID.'_to_lon'} = $Nodes{$Tags{to} . '_lon'};
84    $Tagtype = '-';
85  }
86  elsif($Line =~ m{</way}){
87    # End of a way
88    writeWay();
89    $Tagtype = '-';
90  }
91}
92
93# Decide if a way is interesting, and write it to disk
94# Split the way if its discontinuous
95sub writeWay{
96  my($LastLat,$LastLon) = (0,0);
97  my $TagList = tagList();
98 
99  return if(!$TagList);
100 
101  while(my $S = shift(@Segments)){
102    my $FromLat = $Segments{$S.'_from_lat'};
103    my $FromLon = $Segments{$S.'_from_lon'};
104    my $ToLat = $Segments{$S.'_to_lat'}; 
105    my $ToLon = $Segments{$S.'_to_lon'};
106
107    if($FromLat != $LastLat || $FromLon != $LastLon){
108      printf WAYS "\n%s;polyline=%f,%f,",
109        $TagList,
110        $FromLat,
111        $FromLon;
112    }
113    printf WAYS "%f,%f,", 
114      $ToLat,
115      $ToLon;
116
117    $LastLat = $ToLat;
118    $LastLon = $ToLon;
119  }
120  print WAYS "\n";
121 
122}
123
124# Decide if a node is interesting, and write it to disk
125sub writeNode{
126  my $TagList = tagList();
127  if($TagList){
128    printf NODES "lat=%f;lon=%f;%s\n",$Tags{lat},$Tags{lon}, $TagList;
129  }
130}
131
132# Get the global tags list, as a semicolon-separated string
133sub tagList{
134  my @Stuff;
135  while(my($k,$v) = each(%Tags)){
136    if(!$IgnoreTags{$k}){
137      $k =~ s/;/,/g;
138      $v =~ s/;/,/g;
139      push(@Stuff, "$k=$v");
140    }
141  }
142  return(join(';',@Stuff));
143}
144
145# Parse an XML attributes string, return hash
146sub getAttributes{
147  my $Text = shift();
148  my %A;
149  while($Text =~ m{(\w+)=\"(.*?)\"}g){
150    $A{$1} = $2;
151  }
152  return(%A);
153}
154
155# Create a list of tags to ignore
156sub IgnoreTags{
157  my %Ignore;
158  foreach my $Tag(
159    'lat','lon','tagtype','id',  # Reserved words (all objects)
160    'created_by', # Not relevant for rendering
161    'ele',        # GPS metadata
162    '',           # Tags without a name
163    'from',       # Reserved word (segment)
164    'to',         # Reserved word (segment)
165    'visible',    # OSM internal metadata
166    'timestamp',  # OSM internal metadata
167    'source',     # Not relevant for rendering
168    'polyline',   # Reserved word (way)
169    'time',       # GPS metadata?
170    'editor',     # Not relevant for rendering
171    'author',     # Not relevant for rendering
172    'hdop',       # GPS metadata
173    'pdop',       # GPS metadata
174    'sat',        # GPS metadata
175    'speed',      # GPS metadata
176    'fix',        # GPS metadata
177    'course',     # GPS metadata
178    'class',      # depreciated
179    'converted_by', # Some program
180    'n:natural=coastline',    # coastline nodes
181    'n:natural=water',        # coastline nodes
182    'n:highway=primary',      # not needed for nodes
183    'n:highway=secondary',    # not needed for nodes
184    'n:highway=minor',        # not needed for nodes
185    'n:highway=unclassified', # not needed for nodes
186    'n:highway=residential',  # not needed for nodes
187    'n:highway=trunk',        # not needed for nodes
188    'n:highway=service',      # not needed for nodes
189    'n:highway=cycleway',     # not needed for nodes
190    'n:highway=bridleway',    # not needed for nodes
191    'n:highway=footway',      # not needed for nodes
192    'n:oneway=yes',           # not relevant for nodes
193    'n:oneway=true',          # not relevant for nodes
194    ){
195    $Ignore{$Tag} = 1;
196  }
197  return(%Ignore);
198}
Note: See TracBrowser for help on using the repository browser.