source: subversion/applications/utils/export/osm2csv/osm2csv-segments.pl @ 27747

Last change on this file since 27747 was 17752, checked in by joerg, 10 years ago

Add option for planet-dir

  • Property svn:executable set to *
File size: 13.0 KB
Line 
1#!/usr/bin/perl
2
3BEGIN {
4    my $dir = $0;
5    $dir =~s,[^/]+/[^/]+$,,;
6    unshift(@INC,"$dir/../perl_lib");
7
8    unshift(@INC,"../perl_perl_lib");
9    unshift(@INC,"~/svn.openstreetmap.org/applications/utils/perl_lib");
10    unshift(@INC,"$ENV{HOME}/svn.openstreetmap.org/applications/utils/perl_lib");
11}
12
13
14use strict;
15use warnings;
16
17use Getopt::Long;
18use Storable ();
19use IO::File;
20use Pod::Usage;
21use Data::Dumper;
22
23use Geo::Filter::Area;
24use Geo::OSM::Planet;
25use Geo::Geometry;
26use Utils::Debug;
27use Utils::File;
28use Utils::LWP::Utils;
29use Utils::Math;
30use File::Slurp;
31use File::Basename;
32
33sub parse_planet($$); # {}
34
35our $man=0;
36our $help=0;
37my $areas_todo;
38my $do_list_areas=0;
39my $do_update_only=0;
40my $tie_nodes_hash=undef;
41my $Filename;
42my $planet_dir='';
43
44our $SEGMENTS_FILENAME;
45our $EXT=".csv";
46our $FH_OSM_HIGHWAY;
47our $FH_OSM_OTHER;
48
49Getopt::Long::Configure('no_ignore_case');
50GetOptions ( 
51             'debug+'              => \$DEBUG,     
52             'd+'                  => \$DEBUG,     
53             'verbose+'            => \$VERBOSE,
54             'MAN'                 => \$man, 
55             'man'                 => \$man, 
56             'h|help|x'            => \$help, 
57
58             'tie-nodes-hash'      => \$tie_nodes_hash,
59             'no-mirror'           => \$Utils::LWP::Utils::NO_MIRROR,
60             'proxy=s'             => \$Utils::LWP::Utils::PROXY,
61             'osm=s'               => \$Filename,
62             'area=s'              => \$areas_todo,
63             'list-areas'          => \$do_list_areas,
64             'update-only'         => \$do_update_only,
65             'planet-dir:s'        => \$planet_dir,
66             )
67    or pod2usage(1);
68
69Geo::OSM::Planet::planet_dir($planet_dir)
70    if $planet_dir;
71
72$areas_todo ||= 'world';
73$areas_todo=lc($areas_todo);
74
75# See if we'll have to tie the Nodes Hash to a File
76# This is at least 10 times slower, but we have less problems with
77# running out of memory
78if ( ! defined $tie_nodes_hash ) {
79    my $max_ram=mem_info("MemTotal");
80    $max_ram =~ s/MB//;
81    my $estimated_memory = {
82        africa     => 2500,
83        france     =>  192,
84        europe     => 3000,
85        germany    =>  500,
86        uk         =>  660,
87        world      => 4000,
88        world_east => 4000,
89        world_west => 4000,
90    };
91    if ( $Filename =~ /planet/ 
92         && ( -s "$Filename" > 1000*1000*4000) ) { # Default for large *.osm (Planet) Files
93        for my $area ( split(",",$areas_todo )){
94            $tie_nodes_hash=1
95                if $estimated_memory->{$area} > $max_ram;
96        }
97        }
98}
99
100pod2usage(1) if $help;
101pod2usage(-verbose=>2) if $man;
102
103if ( $do_list_areas ) {
104    print Geo::Filter::Area->list_areas()."\n";
105    exit;
106}
107
108# TODO:
109# if the input filename is not planet*osm* we have to change the output filename too.
110$Filename ||= shift();
111unless ( $Filename && -s $Filename ) {
112    $Filename = mirror_planet();
113};
114if ( ! -s $Filename ) {
115    die "Cannot read $Filename\n";
116}
117
118pod2usage(1) unless $Filename;
119
120our $READ_FH=undef;
121our $OK_POS=0;
122
123
124our (%MainAttr,$Type,%Tags,@NodeD);
125# Stored data
126our (%Nodes, %Stats);
127our $AREA_FILTER;
128our $PARSING_START_TIME=0;
129our $PARSING_DISPLAY_TIME=0;
130our $PARSING_ELEM_COUNT=0;
131
132my $data_dir=planet_dir()."/csv";
133mkdir_if_needed( $data_dir );
134
135our $IN_BASENAME='';
136$IN_BASENAME=basename($Filename);
137$IN_BASENAME =~ s/\.(gz|bz|bz2)$//;
138$IN_BASENAME =~ s/\.osm$//;
139
140for my $area_name ( split(",",$areas_todo) ) {
141    if ( $do_update_only ) {
142        my $needs_update=0;
143        $needs_update ||= file_needs_re_generation($Filename,"$data_dir/osm-segments-$IN_BASENAME-$area_name${EXT}");
144        next unless $needs_update;
145        print STDERR "Update needed. One of the files is old or non existent\n" if $VERBOSE;
146    }
147    # -----------------------------------------------------------------------------
148    # Temporary data
149
150    (%MainAttr,%Tags)=((),());
151    $Type='';
152    (%Nodes, %Stats)=((),());
153
154    # Currently active Area Filter
155    $PARSING_START_TIME=0;
156    # Estimated Number of elements to show progress while reading in percent
157    for my $type ( qw(elem tag node segment )) {
158        $Stats{"${type} estim"} = estimated_max_count($type);
159        $Stats{"${type} seen"}=0;
160        $Stats{"${type} read"}=0;
161    }
162
163    #----------------------------------------------
164    # Processing stage
165    #----------------------------------------------
166
167    $SEGMENTS_FILENAME = "$data_dir/osm-segments-$IN_BASENAME-$area_name";
168    print STDERR "creating $SEGMENTS_FILENAME${EXT}\n" if $VERBOSE;
169
170    if ( $tie_nodes_hash ) {
171        # maybe we should move this file to /tmp
172        # and lock it, and delete it in an END {} -Block
173        print STDERR "Tie-ing Nodes Hash to '$SEGMENTS_FILENAME-Nodes.db'\n";
174        dbmopen(%Nodes,"$SEGMENTS_FILENAME-Nodes.db",0666) 
175            or die "Could not open DBM File '$SEGMENTS_FILENAME-Nodes.db': $!";
176    }
177    $Stats{"Tie Nodes_hash"} = $tie_nodes_hash;
178
179    printf STDERR "Creating output files\n";
180    die "No Area Name defined\n"
181        unless $area_name;
182
183    $FH_OSM_HIGHWAY = IO::File->new(">$SEGMENTS_FILENAME.part");
184    if( ! $FH_OSM_HIGHWAY ) {
185        warn "output_osm: Cannot write to $SEGMENTS_FILENAME\n";
186        return;
187    }
188    $FH_OSM_HIGHWAY->binmode(":utf8");
189
190    $FH_OSM_OTHER = IO::File->new(">${SEGMENTS_FILENAME}_other.part");
191    if( ! $FH_OSM_OTHER ) {
192        warn "output_osm: Cannot write to ${SEGMENTS_FILENAME}_other.part\n";
193        return;
194    }
195    $FH_OSM_OTHER->binmode(":utf8");
196
197    parse_planet($Filename,$area_name);
198
199    printf STDERR "$area_name Done\n";
200}
201exit;
202
203
204sub percent_string($$){
205    my $part = shift;
206    my $full = shift;
207    my $erg = "";
208    $erg = sprintf("%.0f%%",(100*$part/$full)) if $full;
209    return $erg;
210}
211
212#----------------------------------------------
213# Parsing planet.osm File
214#----------------------------------------------
215sub parse_planet($$){
216    my $Filename = shift;
217    my $area_name = shift;
218
219    print STDERR "Reading and Parsing XML from $Filename for $area_name\n" if $DEBUG|| $VERBOSE;
220
221    $AREA_FILTER = Geo::Filter::Area->new( area => $area_name );
222
223    $PARSING_START_TIME=time();
224    $READ_FH = data_open($Filename);
225    while ( my $line = $READ_FH->getline() ) {
226        parse_line($line);
227    };
228    if ($FH_OSM_OTHER) {
229        $FH_OSM_OTHER->close() || warn "Cannot close Segments other File:$!\n";
230        $FH_OSM_OTHER=0;
231        rename("${SEGMENTS_FILENAME}_other.part","${SEGMENTS_FILENAME}_other${EXT}")
232            if -s "${SEGMENTS_FILENAME}_other.part";
233    }
234    if ($FH_OSM_HIGHWAY) {
235        $FH_OSM_HIGHWAY->close() || warn "Cannot close Segments highway File:$!\n";
236        $FH_OSM_HIGHWAY=0;
237        rename("$SEGMENTS_FILENAME.part","$SEGMENTS_FILENAME${EXT}")
238            if -s "$SEGMENTS_FILENAME.part";
239       
240        print STDERR "\nwe're done\n";
241    }
242
243    $READ_FH->close();
244    if ( $VERBOSE || $DEBUG )  {
245        print STDERR "\n";
246    }
247    $Stats{"time parsing"} = time()-$PARSING_START_TIME;
248    printf("osm2csv: Parsing Osm-Data in %.0f sec\n",time()-$PARSING_START_TIME )
249        if $DEBUG || $VERBOSE;
250
251}
252
253# Function is called whenever an XML tag is ended
254#----------------------------------------------
255sub parse_line(){
256    my $line = shift;
257
258    my $element='';
259
260    if ( $line =~ m/node.*id=[\'\"]([\d\.\+\-]+)[\'\"].*lat=[\'\"]([\d\.\+\-]+)[\'\"].*lon=[\'\"]([\d\.\+\-]+)[\'\"]/ ){
261        my $node={id=>$1,lat=>$2,lon=>$3};
262#       print $line. Dumper(\$node);
263        my $id=$1;
264        $element="node";
265        if ( $AREA_FILTER->inside($node) ) {
266            $Nodes{$id} = sprintf("%f,%f",$node->{lat}, $node->{lon});
267            $Stats{"node read"}++;
268            $Stats{"elem read"}++;
269        }
270    } elsif ( $line =~ m/<nd.*ref=[\'\"]([\d\.\+\-]+)[\'\"]/ ){
271        push (@NodeD,$1);
272#       print "<nd: ",@NodeD,"\n";
273    } elsif ( $line =~ m/<\/way/ ){
274        # TODO: if this way is a member of a relation which has a highway tag
275        # we would have to classify it as highway too, but this would need to keep
276        # every way in Memory. Or read 3 passes (first relation, then way then node).
277        $element = "nd_ref";
278        my $from_node=0;
279#       print "</nd: ",@NodeD,"\n";
280        for my $to_node ( @NodeD ) {
281            if ( $from_node &&
282                 defined($Nodes{$from_node}) &&
283                 defined($Nodes{$to_node}) 
284                ) {
285                my ($lat1,$lon1)=split(",",$Nodes{$from_node});
286                my ($lat2,$lon2)=split(",",$Nodes{$to_node});
287                my $angle = angle_north_relative(
288                    { lat => $lat1 , lon => $lon1 },
289                    { lat => $lat2 , lon => $lon2 });
290
291#               print "highway: $Tags{highway}\n";
292                if ( defined $Tags{highway} && $Tags{highway} ) {
293                    printf $FH_OSM_HIGHWAY "%s,%s,%f\n",$Nodes{$from_node},$Nodes{$to_node},$angle;
294                } else {
295                    printf $FH_OSM_OTHER "%s,%s,%f\n",$Nodes{$from_node},$Nodes{$to_node},$angle;
296                }               
297                $Stats{"nd_ref read"}++;
298                $Stats{"elem read"}++;
299            }
300            $from_node = $to_node;
301        }
302        @NodeD=();
303    } elsif ( $line =~ m/<way.*id=/ ) {
304        (%MainAttr,%Tags)=((),());
305        $element = "way";
306    } elsif ( $line =~ m/<relation/ ){
307        (%MainAttr,%Tags)=((),());
308    } elsif ( $line =~ m/<\/relation/ ){
309    } elsif ( $line =~ m/<memberrelation/ ){
310    } elsif ( $line =~ m/<member / ){
311    } elsif ( $line =~ m/<\/node/ ){
312    } elsif ( $line =~ m/<\/osm/ ){
313    } elsif ( $line =~ m/<bound.*box=[\'\"]/ ){
314    } elsif ( $line =~ m/<tag k="(highway)" v=\"([^\"]*)\"/ ){
315        my $k=$1;
316        my $v=$2;
317        $Tags{$k}=$v;
318#       print "line: $line\n";
319    } elsif ( $line =~ m/<tag/ ){
320    } elsif ( $line =~ m/<?xml version/ ){
321    } elsif ( $line =~ m/<osm version=[\'\"]0\.5[\'\"]/ ){
322    } else {
323        print STDERR "Unknown Line: $line";
324    }
325
326    $Stats{"$element seen"}++;
327    $Stats{"elem seen"}++;
328    if ( defined( $Stats{"$element seen"} )
329         &&( $Stats{"$element seen"}== 1 ) ){
330        $Stats{"memory at 1st $element rss"} = sprintf("%.0f",mem_usage('rss'));
331        $Stats{"memory at 1st $element vsz"} = sprintf("%.0f",mem_usage('vsz'));
332        if ( $DEBUG >1 || $VERBOSE >1) {
333            print STDERR "\n";
334        }
335    }
336
337    $PARSING_ELEM_COUNT++;
338    if ( ( $VERBOSE || $DEBUG ) &&
339#        ! ( $Stats{"tags read"} % 10000 ) &&
340         $PARSING_ELEM_COUNT >1000 && 
341         ( time()-$PARSING_DISPLAY_TIME > 0.9)
342         )  {
343        $PARSING_ELEM_COUNT=0;
344        $PARSING_DISPLAY_TIME= time();
345        print STDERR "\r";
346        print STDERR "Read(".$AREA_FILTER->name()."): ";
347        for my $k ( qw(elem node segment ) ) {
348            if ( $DEBUG>6 || $VERBOSE>6) {
349                print STDERR $k;
350            } else {
351                print STDERR substr($k,0,1);
352            }
353            print STDERR ":";
354            printf STDERR "%d read",$Stats{"$k read"};
355            printf STDERR "=%s",percent_string($Stats{"$k read"},$Stats{"$k seen"});
356
357            printf STDERR "(%d seen",($Stats{"$k seen"}||0);
358            printf STDERR "=%s",percent_string($Stats{"$k seen"},$Stats{"$k estim"});
359            print STDERR ") ";
360        }
361       
362        my $rss = sprintf("%.0f",mem_usage('rss'));
363        $Stats{"max rss"} = max($Stats{"max rss"},$rss) if $rss;
364        printf STDERR "max-rss %d" ,($Stats{"max rss"}) if $Stats{"max rss"} >$rss*1.3;
365        my $vsz = sprintf("%.0f",mem_usage('vsz'));
366        $Stats{"max vsz"} = max($Stats{"max vsz"},$vsz) if $vsz;
367        printf STDERR "max-vsz %d" ,($Stats{"max vsz"}) if $Stats{"max vsz"} >$vsz*1.3;
368
369        print STDERR mem_usage();
370        print STDERR time_estimate($PARSING_START_TIME,
371                                   $Stats{"node seen"}+$Stats{"segment seen"},
372                                   $Stats{"node estim"}+$Stats{"segment estim"});
373        print STDERR "\r";
374    }
375}
376# Function is called whenever text is encountered in the XML file
377#----------------------------------------------
378sub DoChar(){
379    my ($Expat, $String) = @_;
380}
381
382##################################################################
383# Usage/manual
384
385__END__
386
387=head1 NAME
388
389B<osm2csv-segments.pl> Version 0.02
390
391=head1 DESCRIPTION
392
393B<osm2csv-segments.pl> is a program to convert osm-segments from xml format to
394a plain text file in csv form.
395This format then is normally used by osmtrackfilter to compare against osm segments
396
397=head1 SYNOPSIS
398
399B<Common usages:>
400
401osm2csv.pl [-d] [-v] [-h] [--no-mirror] [--proxy=<proxy:port>] [--list-areas] <planet_filename.osm>
402
403=head1 OPTIONS
404
405=over 2
406
407=item B<--man> Complete documentation
408
409Complete documentation
410
411=item B<--proxy=<proxy:port>>
412
413Use proxy Server to get the newest planet.osm File
414
415=item B<--no-mirror>
416
417do not try to get the newest planet.osm first
418
419=item B<--osm=filename>
420
421Source File in OSM Format
422
423=item B<--area=germany> Area Filter
424
425Only read area for processing
426
427=item B<--list-areas>
428
429print all areas possible
430
431=item B<--tie-nodes-hash>
432
433if set we will tie the Nodes Hash to a File
434This is at least 10 times slower, but we have less problems with
435running out of memory.
436We have an internal list of estimated memory use and we'll try
437automgically to tie it if you don't have enough memory for a
438specified region.
439
440=item B<--planet-dir=[path-to-planet-files]>
441
442The directory to put and check the planet Files.
443Default is ~/osm/planet/
444
445
446=item B<planet_filename.osm>
447
448the file to read from
449
450=back
451
452=head1 COPYRIGHT
453
454Copyright 2006, OJW
455
456This program is free software; you can redistribute it and/or
457modify it under the terms of the GNU General Public License
458as published by the Free Software Foundation; either version 2
459of the License, or (at your option) any later version.
460
461This program is distributed in the hope that it will be useful,
462but WITHOUT ANY WARRANTY; without even the implied warranty of
463MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
464GNU General Public License for more details.
465
466You should have received a copy of the GNU General Public License
467along with this program; if not, write to the Free Software
468Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
469
470=head1 AUTHOR
471
472OJW <streetmap@blibbleblobble.co.uk>
473Jörg Ostertag (osm2csv-for-openstreetmap@ostertag.name)
474
475=head1 SEE ALSO
476
477http://www.openstreetmap.org/
478
479=cut
Note: See TracBrowser for help on using the repository browser.