source: subversion/applications/utils/tagwatch/process.pl @ 5086

Last change on this file since 5086 was 4854, checked in by ojw, 12 years ago

templates-that-arent't-really-templates :(

File size: 3.4 KB
Line 
1#-----------------------------------------------------------------
2# Parses an OpenStreetMap XML file looking for tags, and counting
3# how often each one is used
4#-----------------------------------------------------------------
5# Usage: perl process.pl < data.osm
6# Will create an ./Output/ directory and fill it with text files
7# describing the tags used in data.osm
8#-----------------------------------------------------------------
9# This file is part of Tagwatch
10# Tagwatch is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# Tagwatch is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with Tagwatch.  If not, see <http://www.gnu.org/licenses/>.
22#---------------------------------------------------------------
23use strict;
24my %IgnoreTags = IgnoreTags();   # List of tag keys to ignore
25my $Tagtype = '-';               # What object the parser is in
26my %Tags;
27my %Values;
28my %Usage;
29
30while(my $Line = <>){
31  if($Line =~ m{<tag k=["'](.*?)["'] v=["'](.*?)["']\s*/>}){
32    # Tag within an object
33    my ($Name, $Value) = ($1, $2);
34    if($Value ne ''){
35      if(!$IgnoreTags{$Name}){      # Ignored tags
36        $Tags{$Name}++;
37        $Values{$Name}->{$Value}++;
38        $Usage{$Name}->{$Value}->{$Tagtype}++;
39        #print STDERR "$Name = $Value\n";
40      }
41    }
42  }
43  elsif($Line =~ m{<(node|segment|way) (.*)}){
44    # Beginning of an object
45    $Tagtype = substr($1,0,1);
46  } 
47  elsif($Line =~ m{<seg id=["'](\d+)["']\s*/>}){
48    # Segment within a way
49  }
50  elsif($Line =~ m{</(node|segment|way)}){
51    # End of an item
52    $Tagtype = '-';
53  }
54}
55
56my $Dir = "Output";
57mkdir $Dir if(!-d $Dir);
58open(OUT, ">$Dir/tags.txt");
59foreach my $Tag(keys %Tags){
60  printf OUT "%d %s\n", $Tags{$Tag}, $Tag;
61
62  open(TAG, ">$Dir/tag_$Tag.txt");
63  open(USAGE, ">$Dir/usage_$Tag.txt");
64 
65  foreach my $Value(keys(%{$Values{$Tag}})){
66    printf TAG "%d %s\n", $Values{$Tag}->{$Value}, $Value;
67    printf USAGE "%s %d %d\n", $Value, $Usage{$Tag}->{$Value}->{'n'}, $Usage{$Tag}->{$Value}->{'w'};
68  }
69
70  close TAG; 
71  close USAGE;
72}
73close OUT;
74
75# Create a list of tags to ignore
76# TODO: put this on a wiki page?
77sub IgnoreTags{
78  my %Ignore;
79  foreach my $Tag(
80    'lat','lon','tagtype','id',  # Reserved words (all objects)
81    'created_by', # Not relevant for rendering
82    'ele',        # GPS metadata
83    '',           # Tags without a name
84    'from',       # Reserved word (segment)
85    'to',         # Reserved word (segment)
86    'visible',    # OSM internal metadata
87    'timestamp',  # OSM internal metadata
88    'user',       # OSM internal metadata
89    'source',     # Not relevant for rendering
90    'polyline',   # Reserved word (way)
91    'time',       # GPS metadata?
92    'editor',     # Not relevant for rendering
93    'author',     # Not relevant for rendering
94    'hdop',       # GPS metadata
95    'pdop',       # GPS metadata
96    'sat',        # GPS metadata
97    'speed',      # GPS metadata
98    'fix',        # GPS metadata
99    'course',     # GPS metadata
100    'class',      # depreciated
101    'converted_by', # Some program
102    ){
103    $Ignore{$Tag} = 1;
104  }
105  return(%Ignore);
106}
Note: See TracBrowser for help on using the repository browser.