source: subversion/applications/utils/export/osm2csv/osm2csv-segments.pl @ 11279

Last change on this file since 11279 was 8159, checked in by joerg, 12 years ago

use 0 instead of 0{@{}}, since it is deprecated in newer perl

  • Property svn:executable set to *
File size: 12.7 KB
Line 
1#!/usr/bin/perl
2
3BEGIN {
4    my $dir = $0;
5    $dir =~s,[^/]+/[^/]+$,,;
6    unshift(@INC,"$dir/../perl_lib");
7
8    unshift(@INC,"../perl_perl_lib");
9    unshift(@INC,"~/svn.openstreetmap.org/applications/utils/perl_lib");
10    unshift(@INC,"$ENV{HOME}/svn.openstreetmap.org/applications/utils/perl_lib");
11}
12
13
14use strict;
15use warnings;
16
17use Getopt::Long;
18use Storable ();
19use IO::File;
20use Pod::Usage;
21use Data::Dumper;
22
23use Geo::Filter::Area;
24use Geo::OSM::Planet;
25use Geo::Geometry;
26use Utils::Debug;
27use Utils::File;
28use Utils::LWP::Utils;
29use Utils::Math;
30use File::Slurp;
31use File::Basename;
32
33sub parse_planet($$); # {}
34
35our $man=0;
36our $help=0;
37my $areas_todo;
38my $do_list_areas=0;
39my $do_update_only=0;
40my $tie_nodes_hash=undef;
41my $Filename;
42
43our $SEGMENTS_FILENAME;
44our $EXT=".csv";
45our $FH_OSM_HIGHWAY;
46our $FH_OSM_OTHER;
47
48Getopt::Long::Configure('no_ignore_case');
49GetOptions ( 
50             'debug+'              => \$DEBUG,     
51             'd+'                  => \$DEBUG,     
52             'verbose+'            => \$VERBOSE,
53             'MAN'                 => \$man, 
54             'man'                 => \$man, 
55             'h|help|x'            => \$help, 
56
57             'tie-nodes-hash'      => \$tie_nodes_hash,
58             'no-mirror'           => \$Utils::LWP::Utils::NO_MIRROR,
59             'proxy=s'             => \$Utils::LWP::Utils::PROXY,
60             'osm=s'               => \$Filename,
61             'area=s'              => \$areas_todo,
62             'list-areas'          => \$do_list_areas,
63             'update-only'         => \$do_update_only,
64             )
65    or pod2usage(1);
66
67$areas_todo ||= 'world';
68$areas_todo=lc($areas_todo);
69
70# See if we'll have to tie the Nodes Hash to a File
71# This is at least 10 times slower, but we have less problems with
72# running out of memory
73if ( ! defined $tie_nodes_hash ) {
74    my $max_ram=mem_info("MemTotal");
75    $max_ram =~ s/MB//;
76    my $estimated_memory = {
77        africa     => 2500,
78        france     =>  192,
79        europe     => 3000,
80        germany    =>  500,
81        uk         =>  660,
82        world      => 4000,
83        world_east => 4000,
84        world_west => 4000,
85    };
86    if ( $Filename =~ /planet/ 
87         && ( -s "$Filename" > 1000*1000*500) ) { # only for the original full Planet Files
88        for my $area ( split(",",$areas_todo )){
89            $tie_nodes_hash=1
90                if $estimated_memory->{$area} > $max_ram;
91        }
92        }
93}
94
95pod2usage(1) if $help;
96pod2usage(-verbose=>2) if $man;
97
98if ( $do_list_areas ) {
99    print Geo::Filter::Area->list_areas()."\n";
100    exit;
101}
102
103# TODO:
104# if the input filename is not planet*osm* we have to change the output filename too.
105$Filename ||= shift();
106unless ( $Filename && -s $Filename ) {
107    $Filename = mirror_planet();
108};
109if ( ! -s $Filename ) {
110    die "Cannot read $Filename\n";
111}
112
113pod2usage(1) unless $Filename;
114
115our $READ_FH=undef;
116our $OK_POS=0;
117
118
119our (%MainAttr,$Type,%Tags,@NodeD);
120# Stored data
121our (%Nodes, %Stats);
122our $AREA_FILTER;
123our $PARSING_START_TIME=0;
124our $PARSING_DISPLAY_TIME=0;
125our $PARSING_ELEM_COUNT=0;
126
127my $data_dir=planet_dir()."/csv";
128mkdir_if_needed( $data_dir );
129
130our $IN_BASENAME='';
131$IN_BASENAME=basename($Filename);
132$IN_BASENAME =~ s/\.(gz|bz|bz2)$//;
133$IN_BASENAME =~ s/\.osm$//;
134
135for my $area_name ( split(",",$areas_todo) ) {
136    if ( $do_update_only ) {
137        my $needs_update=0;
138        $needs_update ||= file_needs_re_generation($Filename,"$data_dir/osm-segents-$IN_BASENAME-$area_name${EXT}");
139        next unless $needs_update;
140        print STDERR "Update needed. One of the files is old or non existent\n" if $VERBOSE;
141    }
142    # -----------------------------------------------------------------------------
143    # Temporary data
144
145    (%MainAttr,%Tags)=((),());
146    $Type='';
147    (%Nodes, %Stats)=((),());
148
149    # Currently active Area Filter
150    $PARSING_START_TIME=0;
151    # Estimated Number of elements to show progress while reading in percent
152    for my $type ( qw(elem tag node segment )) {
153        $Stats{"${type} estim"} = estimated_max_count($type);
154        $Stats{"${type} seen"}=0;
155        $Stats{"${type} read"}=0;
156    }
157
158    #----------------------------------------------
159    # Processing stage
160    #----------------------------------------------
161
162    $SEGMENTS_FILENAME = "$data_dir/osm-segents-$IN_BASENAME-$area_name";
163    print STDERR "creating $SEGMENTS_FILENAME${EXT}\n" if $VERBOSE;
164
165    if ( $tie_nodes_hash ) {
166        # maybe we should move this file to /tmp
167        # and lock it, and delete it in an END {} -Block
168        print STDERR "Tie-ing Nodes Hash to '$SEGMENTS_FILENAME-Nodes.db'\n";
169        dbmopen(%Nodes,"$SEGMENTS_FILENAME-Nodes.db",0666) 
170            or die "Could not open DBM File '$SEGMENTS_FILENAME-Nodes.db': $!";
171    }
172    $Stats{"Tie Nodes_hash"} = $tie_nodes_hash;
173
174    printf STDERR "Creating output files\n";
175    die "No Area Name defined\n"
176        unless $area_name;
177
178    $FH_OSM_HIGHWAY = IO::File->new(">$SEGMENTS_FILENAME.part");
179    if( ! $FH_OSM_HIGHWAY ) {
180        warn "output_osm: Cannot write to $SEGMENTS_FILENAME\n";
181        return;
182    }
183    $FH_OSM_HIGHWAY->binmode(":utf8");
184
185    $FH_OSM_OTHER = IO::File->new(">${SEGMENTS_FILENAME}_other.part");
186    if( ! $FH_OSM_OTHER ) {
187        warn "output_osm: Cannot write to ${SEGMENTS_FILENAME}_other.part\n";
188        return;
189    }
190    $FH_OSM_OTHER->binmode(":utf8");
191
192    parse_planet($Filename,$area_name);
193
194    printf STDERR "$area_name Done\n";
195}
196exit;
197
198
199sub percent_string($$){
200    my $part = shift;
201    my $full = shift;
202    my $erg = "";
203    $erg = sprintf("%.0f%%",(100*$part/$full)) if $full;
204    return $erg;
205}
206
207#----------------------------------------------
208# Parsing planet.osm File
209#----------------------------------------------
210sub parse_planet($$){
211    my $Filename = shift;
212    my $area_name = shift;
213
214    print STDERR "Reading and Parsing XML from $Filename for $area_name\n" if $DEBUG|| $VERBOSE;
215
216    $AREA_FILTER = Geo::Filter::Area->new( area => $area_name );
217
218    $PARSING_START_TIME=time();
219    $READ_FH = data_open($Filename);
220    while ( my $line = $READ_FH->getline() ) {
221        parse_line($line);
222    };
223    if ($FH_OSM_OTHER) {
224        $FH_OSM_OTHER->close() || warn "Cannot close Segments other File:$!\n";
225        $FH_OSM_OTHER=0;
226        rename("${SEGMENTS_FILENAME}_other.part","${SEGMENTS_FILENAME}_other${EXT}")
227            if -s "${SEGMENTS_FILENAME}_other.part";
228    }
229    if ($FH_OSM_HIGHWAY) {
230        $FH_OSM_HIGHWAY->close() || warn "Cannot close Segments highway File:$!\n";
231        $FH_OSM_HIGHWAY=0;
232        rename("$SEGMENTS_FILENAME.part","$SEGMENTS_FILENAME${EXT}")
233            if -s "$SEGMENTS_FILENAME.part";
234       
235        print STDERR "\nwe're done\n";
236    }
237
238    $READ_FH->close();
239    if ( $VERBOSE || $DEBUG )  {
240        print STDERR "\n";
241    }
242    $Stats{"time parsing"} = time()-$PARSING_START_TIME;
243    printf("osm2csv: Parsing Osm-Data in %.0f sec\n",time()-$PARSING_START_TIME )
244        if $DEBUG || $VERBOSE;
245
246}
247
248# Function is called whenever an XML tag is ended
249#----------------------------------------------
250sub parse_line(){
251    my $line = shift;
252
253    my $element='';
254
255    if ( $line =~ m/node.*id=[\'\"]([\d\.\+\-]+)[\'\"].*lat=[\'\"]([\d\.\+\-]+)[\'\"].*lon=[\'\"]([\d\.\+\-]+)[\'\"]/ ){
256        my $node={id=>$1,lat=>$2,lon=>$3};
257#       print $line. Dumper(\$node);
258        my $id=$1;
259        $element="node";
260        if ( $AREA_FILTER->inside($node) ) {
261            $Nodes{$id} = sprintf("%f,%f",$node->{lat}, $node->{lon});
262            $Stats{"node read"}++;
263            $Stats{"elem read"}++;
264        }
265    } elsif ( $line =~ m/<nd.*ref=[\'\"]([\d\.\+\-]+)[\'\"]/ ){
266        push (@NodeD,$1);
267#       print "<nd: ",@NodeD,"\n";
268    } elsif ( $line =~ m/<\/way/ ){
269        # TODO: if this way is a member of a relation which has a highway tag
270        # we would have to classify it as highway too, but this would need to keep
271        # every way in Memory. Or read 3 passes (first relation, then way then node).
272        $element = "nd_ref";
273        my $from_node=0;
274#       print "</nd: ",@NodeD,"\n";
275        for my $to_node ( @NodeD ) {
276            if ( $from_node &&
277                 defined($Nodes{$from_node}) &&
278                 defined($Nodes{$to_node}) 
279                ) {
280                my ($lat1,$lon1)=split(",",$Nodes{$from_node});
281                my ($lat2,$lon2)=split(",",$Nodes{$to_node});
282                my $angle = angle_north_relative(
283                    { lat => $lat1 , lon => $lon1 },
284                    { lat => $lat2 , lon => $lon2 });
285
286#               print "highway: $Tags{highway}\n";
287                if ( defined $Tags{highway} && $Tags{highway} ) {
288                    printf $FH_OSM_HIGHWAY "%s,%s,%f\n",$Nodes{$from_node},$Nodes{$to_node},$angle;
289                } else {
290                    printf $FH_OSM_OTHER "%s,%s,%f\n",$Nodes{$from_node},$Nodes{$to_node},$angle;
291                }               
292                $Stats{"nd_ref read"}++;
293                $Stats{"elem read"}++;
294            }
295            $from_node = $to_node;
296        }
297        @NodeD=();
298    } elsif ( $line =~ m/<way.*id=/ ) {
299        (%MainAttr,%Tags)=((),());
300        $element = "way";
301    } elsif ( $line =~ m/<relation/ ){
302        (%MainAttr,%Tags)=((),());
303    } elsif ( $line =~ m/<\/relation/ ){
304    } elsif ( $line =~ m/<memberrelation/ ){
305    } elsif ( $line =~ m/<member / ){
306    } elsif ( $line =~ m/<\/node/ ){
307    } elsif ( $line =~ m/<\/osm/ ){
308    } elsif ( $line =~ m/<bound.*box=[\'\"]/ ){
309    } elsif ( $line =~ m/<tag k="(highway)" v=\"([^\"]*)\"/ ){
310        my $k=$1;
311        my $v=$2;
312        $Tags{$k}=$v;
313#       print "line: $line\n";
314    } elsif ( $line =~ m/<tag/ ){
315    } elsif ( $line =~ m/<?xml version/ ){
316    } elsif ( $line =~ m/<osm version=[\'\"]0\.5[\'\"]/ ){
317    } else {
318        print STDERR "Unknown Line: $line";
319    }
320
321    $Stats{"$element seen"}++;
322    $Stats{"elem seen"}++;
323    if ( defined( $Stats{"$element seen"} )
324         &&( $Stats{"$element seen"}== 1 ) ){
325        $Stats{"memory at 1st $element rss"} = sprintf("%.0f",mem_usage('rss'));
326        $Stats{"memory at 1st $element vsz"} = sprintf("%.0f",mem_usage('vsz'));
327        if ( $DEBUG >1 || $VERBOSE >1) {
328            print STDERR "\n";
329        }
330    }
331
332    $PARSING_ELEM_COUNT++;
333    if ( ( $VERBOSE || $DEBUG ) &&
334#        ! ( $Stats{"tags read"} % 10000 ) &&
335         $PARSING_ELEM_COUNT >1000 && 
336         ( time()-$PARSING_DISPLAY_TIME > 0.9)
337         )  {
338        $PARSING_ELEM_COUNT=0;
339        $PARSING_DISPLAY_TIME= time();
340        print STDERR "\r";
341        print STDERR "Read(".$AREA_FILTER->name()."): ";
342        for my $k ( qw(elem node segment ) ) {
343            if ( $DEBUG>6 || $VERBOSE>6) {
344                print STDERR $k;
345            } else {
346                print STDERR substr($k,0,1);
347            }
348            print STDERR ":";
349            printf STDERR "%d read",$Stats{"$k read"};
350            printf STDERR "=%s",percent_string($Stats{"$k read"},$Stats{"$k seen"});
351
352            printf STDERR "(%d seen",($Stats{"$k seen"}||0);
353            printf STDERR "=%s",percent_string($Stats{"$k seen"},$Stats{"$k estim"});
354            print STDERR ") ";
355        }
356       
357        my $rss = sprintf("%.0f",mem_usage('rss'));
358        $Stats{"max rss"} = max($Stats{"max rss"},$rss) if $rss;
359        printf STDERR "max-rss %d" ,($Stats{"max rss"}) if $Stats{"max rss"} >$rss*1.3;
360        my $vsz = sprintf("%.0f",mem_usage('vsz'));
361        $Stats{"max vsz"} = max($Stats{"max vsz"},$vsz) if $vsz;
362        printf STDERR "max-vsz %d" ,($Stats{"max vsz"}) if $Stats{"max vsz"} >$vsz*1.3;
363
364        print STDERR mem_usage();
365        print STDERR time_estimate($PARSING_START_TIME,
366                                   $Stats{"node seen"}+$Stats{"segment seen"},
367                                   $Stats{"node estim"}+$Stats{"segment estim"});
368        print STDERR "\r";
369    }
370}
371# Function is called whenever text is encountered in the XML file
372#----------------------------------------------
373sub DoChar(){
374    my ($Expat, $String) = @_;
375}
376
377##################################################################
378# Usage/manual
379
380__END__
381
382=head1 NAME
383
384B<osm2csv-segments.pl> Version 0.02
385
386=head1 DESCRIPTION
387
388B<osm2csv-segments.pl> is a program to convert osm-segments from xml format to
389a plain text file in csv form.
390This format then is normally used by osmtrackfilter to compare against osm segments
391
392=head1 SYNOPSIS
393
394B<Common usages:>
395
396osm2csv.pl [-d] [-v] [-h] [--no-mirror] [--proxy=<proxy:port>] [--list-areas] <planet_filename.osm>
397
398=head1 OPTIONS
399
400=over 2
401
402=item B<--man> Complete documentation
403
404Complete documentation
405
406=item B<--proxy=<proxy:port>>
407
408Use proxy Server to get the newest planet.osm File
409
410=item B<--no-mirror>
411
412do not try to get the newest planet.osm first
413
414=item B<--osm=filename>
415
416Source File in OSM Format
417
418=item B<--area=germany> Area Filter
419
420Only read area for processing
421
422=item B<--list-areas>
423
424print all areas possible
425
426=item B<--tie-nodes-hash>
427
428if set we will tie the Nodes Hash to a File
429This is at least 10 times slower, but we have less problems with
430running out of memory.
431We have an internal list of estimated memory use and we'll try
432automgically to tie it if you don't have enough memory for a
433specified region.
434
435=item B<planet_filename.osm>
436
437the file to read from
438
439=back
440
441=head1 COPYRIGHT
442
443Copyright 2006, OJW
444
445This program is free software; you can redistribute it and/or
446modify it under the terms of the GNU General Public License
447as published by the Free Software Foundation; either version 2
448of the License, or (at your option) any later version.
449
450This program is distributed in the hope that it will be useful,
451but WITHOUT ANY WARRANTY; without even the implied warranty of
452MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
453GNU General Public License for more details.
454
455You should have received a copy of the GNU General Public License
456along with this program; if not, write to the Free Software
457Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
458
459=head1 AUTHOR
460
461OJW <streetmap@blibbleblobble.co.uk>
462Jörg Ostertag (osm2csv-for-openstreetmap@ostertag.name)
463
464=head1 SEE ALSO
465
466http://www.openstreetmap.org/
467
468=cut
Note: See TracBrowser for help on using the repository browser.