source: subversion/applications/rendering/mapnik/utils/analyze_postgis_log.pl @ 23827

Last change on this file since 23827 was 20441, checked in by jochen, 10 years ago

preliminary support for 900913 proj

File size: 7.1 KB
Line 
1#!/usr/bin/perl
2#
3# analyze_postgis_log.pl [MAPFILE] <postgres.log
4#
5# MAPFILE ist the name of the Mapnik stylefile, default 'osm.xml'
6#
7# A script that reads a PostGIS log file AND an osm.xml style file and tries
8# to match the select queries from both, delivering indications about which
9# layers (at which zoom levels) require the longest execution times.
10#
11# prerequisites:
12# * xmlstarlet installed
13# * Postgres log file on stdin, with lines that look like this:
14#   2009-11-22 06:30:50 CET osm LOG:  duration: 26266.851 ms  execute <unnamed>: SELECT AsBinary("way",'NDR') AS geom,"disused","name","waterway" from (select way,waterway,disused,name,case when tunnel in ('yes','true','1') then 'yes'::text else tunnel end as tunnel from planet_osm_line where waterway IS NOT NULL order by z_order) as water_lines WHERE "way" && SetSRID('BOX3D(-1.428222656250023 53.31774904749086,-1.032714843749977 53.55336278552809)'::box3d,4326)
15#
16# You will have to enable duration logging in Postgres, and you may have to change
17# the regular expressions in this script if your log looks different.
18#
19# This script uses the bounding box from the PostGIS query to find out which
20# zoom level was requested. You may have to change the "guessZoomLevel" function
21# further down depending on the projection your database is in.
22
23# Written by Frederik Ramm <frederik@remote.org>, PD.
24
25use strict;
26#use warnings;
27
28use Math::Trig;
29
30my $input;
31my $statement;
32my $duration;
33my $timings = {};
34
35my $mapfile = shift || 'osm.xml';
36
37while (1)
38{
39    $input = <>;
40    chomp $input;
41
42    if (!defined($input) || $input =~ /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d (.*)/)
43    {
44        my $rest = $1;
45
46        if ($rest =~ /^.*LOG:\s+duration: (\d+)(\.\d+)? ms\s*$/)
47        {
48            # duration given on extra line
49            $duration = $1;
50        }
51        process_statement($statement, $duration) if (defined $statement);
52        last unless defined($input);
53        undef $statement;
54        if ($rest =~ /^.*LOG:\s+(duration: (\d+)(\.\d+)? ms\s+)?execute <[^>]*>: (SELECT .*)/)
55        {
56            if (defined($1))
57            {
58                $duration = $2;
59            }
60            else
61            {
62                undef $duration;
63            }
64            $statement = $4;
65        }
66    }
67    else
68    {
69        $statement .= $input if (defined $statement);
70    }
71}
72
73my $potential_originating_layers = findStatements();
74
75foreach my $ds(sort { $timings->{$b}->{-1} <=> $timings->{$a}->{-1} } keys %$timings)
76{
77    print "$ds\n";
78    printf "* probable originating layers: ";
79    my $comma = "";
80    foreach my $pol(@{$potential_originating_layers->{$ds}})
81    {
82        print $comma.$pol->{layer};
83        $comma = ", ";
84    }
85    print "none found" if ($comma eq "");
86    print "\n";
87    print "  zoom  count  min time  avg time  max time\n";
88    print "  -----------------------------------------\n";
89    foreach my $z (sort { $a <=> $b } keys %{$timings->{$ds}})
90    {
91        next unless $z >= 0;
92
93        my $t = $timings->{$ds}->{$z};
94
95        my $count =  $t->{count};
96        my $min   = ($t->{min}//0)           / 1000;
97        my $avg   =  $t->{sum} / $t->{count} / 1000;
98        my $max   =  $t->{max}               / 1000;
99
100        my $warn = '';
101        $warn = '***'   if ($count >  10 && ($min >  10 || $avg >  20 || $max >  30));
102        $warn = '*****' if (                ($min >  60 || $avg >  80 || $max > 100));
103
104        printf "    %2d %6d %8.1fs %8.1fs %8.1fs $warn\n", $z, $count, $min, $avg, $max;
105    }
106    printf "\n";
107}
108
109sub process_statement
110{
111    my ($stmt, $dur) = @_;
112    $stmt =~ s/\s+/ /g;
113    if ($stmt =~ /SELECT.*from (\(.*\) as \S+) WHERE \S+ && SetSRID\('BOX3D\((\S+) (\S+),(\S+) (\S+)\)'::box3d,\s*(4326|900913)\)/)
114    {
115        my ($datasource, $left, $bottom, $right, $top, $proj) = ($1, $2, $3, $4, $5, $6);
116        my $zoom = guessZoomLevel($left, $bottom, $right, $top, $proj);
117        $timings->{$datasource}->{$zoom}->{count} ++;
118        $timings->{$datasource}->{$zoom}->{sum} += $dur;
119        $timings->{$datasource}->{$zoom}->{min} = $dur if ($dur < ($timings->{$datasource}->{$zoom}->{min} // 999_999_999_999_999));
120        $timings->{$datasource}->{$zoom}->{max} = $dur if ($dur > ($timings->{$datasource}->{$zoom}->{max} // 0));
121        # the following line determines the sorting of the final results.
122        # currently the queries that use the longest total time are at the
123        # top but you could also do a "max" calculation here or simply count
124        # the invocations.
125        $timings->{$datasource}->{-1} += $dur;
126    }
127    else
128    {
129        print STDERR "cannot understand $stmt\n";
130    }
131}
132
133# Guesses the zoom level that was rendered by renderd, based on the bounding box
134# in the PostGIS query. Basically we're looking for a zoom level in which the
135# requested bbox would be just slightly wider than 8 tiles (=1 meta tile).
136# This code is for PostGIS databases in lat/lon (osm2pgsql -l) and will have to
137# be changed to work with spherical mercator coordinates.
138
139sub guessZoomLevel
140{
141    my ($left, $bottom, $right, $top, $proj) = @_;
142    if ($proj == 4326)
143    {
144        my ($left_tile, $bottom_tile) = getTileNumber($bottom, $left, 18);
145        my ($right_tile, $top_tile) = getTileNumber($top, $right, 18);
146        my $width = $right_tile - $left_tile;
147        my $zoom = 18;
148        while ($width >= 16)
149        {
150            $width /= 2;
151            $zoom--;
152        }
153        return $zoom;
154    } elsif ($proj == 900913)
155    {
156        return 99;
157    }
158}
159
160sub getTileNumber
161{
162    my ($lat,$lon,$zoom) = @_;
163    my $xtile = int(($lon+180)/360 *2**$zoom);
164    my $ytile = int((1 - log(tan($lat*pi/180) + sec($lat*pi/180))/pi)/2 *2**$zoom);
165    return(($xtile, $ytile));
166}
167
168# parses the style file to find out which layers use which statements
169sub findStatements
170{
171    my $result;
172
173    # Using xmlstarlet resolves all entities for us...
174    open(STYLE, "xmlstarlet c14n $mapfile|") or die;
175
176    my $styles = {};
177    my $currentlayername;
178    my $where;
179
180    while(<STYLE>)
181    {
182        chomp;
183        if (/<Style name="(.*)"/)
184        {
185            $styles->{$1} = { name => $1 };
186        }
187        elsif (/<Layer\s.*name="([^"]+)"/)
188        {
189            $currentlayername = $1;
190            undef $where;
191        }
192        elsif (/<StyleName>(.*)</)
193        {
194            my $style = $styles->{$1};
195            if (!defined($style))
196            {
197                die "layer '$currentlayername' references undefined style '$1'";
198            }
199        }
200        elsif (/<Parameter name="table">(.*)/)
201        {
202            my $table = $1;
203            while($table !~ /<\/Parameter>/)
204            {
205                $_ = <STYLE>;
206                chomp;
207                $table .= $_;
208            }
209            if ($table !~ /^\s*(\(.*\)\s+as\s+\S+)\s*<\/Parameter>$/i)
210            {
211                die "parse error: $table";
212            }
213            $table = $1;
214            $table =~ s/\s+/ /g;
215            $table =~ /(\(select (.*) from (.*) where (.*)\) as \S+)/ or die $table;
216            $where = $1;
217        }
218        elsif (/<\/Layer>/)
219        {
220            if (defined($where))
221            {
222                push(@{$result->{$where}}, { layer => $currentlayername })
223            }
224        }
225    }
226    return $result;
227}
228
Note: See TracBrowser for help on using the repository browser.