source: subversion/applications/utils/planet.osm/planet.rb @ 4540

Last change on this file since 4540 was 4540, checked in by spaetz, 12 years ago

use smaller page sizes, these are too big. Also do away with extranous spacing

  • Property svn:executable set to *
File size: 7.0 KB
Line 
1#!/usr/bin/ruby -w
2
3#$: << Dir.pwd+"/../../www.openstreetmap.org/ruby/api"
4
5require 'mysql'
6require 'time'
7require 'osm/servinfo.rb'
8require 'cgi'
9
10$mysql = Mysql.real_connect $DBSERVER, $USERNAME, $PASSWORD, $DATABASE
11
12# create a hash of entries out of a list of semi colon seperated key=value pairs
13def read_tags tag_str
14  tags_arr = tag_str.split(';').collect {|tag| tag =~ /=/ ? [$`,$'] : [tag,""] }
15  Hash[*tags_arr.flatten]
16end
17
18# create a timestamp or nil out of a time string
19def read_timestamp time_str
20  (time_str.nil? or time_str == "" or time_str == "NULL") ? Time.at(0) : Time.parse(time_str)
21end
22
23def pageSQL(lastid)
24  if lastid == 0
25    return ""
26  else
27    return " and id > #{lastid}"
28  end
29end
30
31# yields for every node with parameter
32# id, lat, lon, timestamp, tags
33# 'tags' are a hash in format {key1=>value1, key2=>value2...}
34def all_nodes(lastid)
35  $mysql.query "select id, latitude, longitude, timestamp, tags from current_nodes where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
36    rows.each do |row|
37      yield row[0].to_i, row[1].to_f, row[2].to_f, read_timestamp(row[3]), read_tags(row[4])
38    end
39  end
40end
41
42# yields for every segment
43# id, from_id, to_id, timestamp, tags
44def all_segments(lastid)
45  $mysql.query "select id, node_a, node_b, timestamp, tags from current_segments where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
46    rows.each do |row|
47      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
48    end
49  end
50end
51
52# yields for every way
53# id, [id1,id2,id3...], timestamp, tags
54def all_ways(lastid)
55  $mysql.query "select id, timestamp from current_ways where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |ways|
56    ways.each do |row|
57      id = row[0].to_i
58      segs = []
59      all_way_segments(id) do |s|
60        segs << s.to_i
61      end
62      tags_arr = all_way_tags(id)
63      yield id, segs, read_timestamp(row[1]), Hash[*tags_arr]
64    end
65  end
66end
67
68# Here we produce the segments associated with a way. How it works is that
69# instead of doing the segments query for each way, it gets all the data
70# from the beginning in groups of 50000. Since this is sorted we can perform
71# a sort of "Merge join". The caller provides the ID they are interesting in
72# and we scan forward in the table to find it, yield each segment and
73# return. The only hard part is that we when we note we're too far, we have
74# to jump back one row so the next iteration sees it again. That's what all
75# the seek/tell is about.
76$way_segments_data = nil
77$way_segments_current = [0,0]
78$way_segments_done = false
79# yields each segment, one at a time
80def all_way_segments(curr_id)
81  loop do
82    if $way_segments_data == nil
83      $way_segments_data = $mysql.query "select id, sequence_id, segment_id from current_way_segments
84                                                                   where id > #{$way_segments_current[0]} 
85                                                                   or (id = #{$way_segments_current[0]} and sequence_id >  #{$way_segments_current[1]})
86                                                                   order by id, sequence_id limit 500000;" 
87      if $way_segments_data == nil
88        return
89      end
90      $way_segments_done = true
91    end
92    pos = $way_segments_data.row_tell()
93    $way_segments_data.each() do |$way_segments_current|
94      $way_segments_done = false
95      id = $way_segments_current[0].to_i
96      if id < curr_id
97        pos = $way_segments_data.row_tell()
98        next
99      end
100      if id == curr_id
101        pos = $way_segments_data.row_tell()
102        yield $way_segments_current[2]
103        next
104      end
105      # Need to seek back one so we get this row again...
106      $way_segments_data.row_seek( pos )
107      return
108    end
109    $way_segments_data = nil
110    if $way_segments_done
111      return
112    end
113  end
114end
115   
116$way_tags_data = nil
117$way_tags_current = [0]
118$way_tags_first = false
119# Way tags are more irritating because there's no unique key sort by. So we
120# have to collect the results for an ID in an array and if it turns out to
121# hit the end, we toss out what we've collected and start again with a new
122# query...
123
124# Because of this detecting the end of the table becomes tricky, since when
125# we reach the end of the resultset and it's the end of the table, we'd keep
126# requesting the last bit over and over again. So the rule is, if the ID
127# being returned is the *only* ID in this set, we're done. That's what
128# $way_tags_first is tracking.
129
130# yields the tags, all in one go as an array
131def all_way_tags(curr_id)
132  loop do
133    if $way_tags_data == nil
134      $way_tags_data = $mysql.query "select id,k,v from current_way_tags where id >= #{$way_tags_current[0]} order by id limit 50000;" 
135      $way_tags_first = true
136      if $way_tags_data == nil
137        return tags
138      end
139    end
140    pos = $way_tags_data.row_tell()
141    tags = []
142    $way_tags_data.each() do |$way_tags_current|
143      id = $way_tags_current[0].to_i
144      if id < curr_id
145        pos = $way_tags_data.row_tell()
146        next
147      end
148      if id == curr_id
149        pos = $way_tags_data.row_tell()
150        tags << $way_tags_current[1] << $way_tags_current[2]
151        next
152      end
153      # Need to seek back one so we get this row again...
154      $way_tags_data.row_seek( pos )
155      $way_tags_first = false
156      return tags
157    end
158    # So we've hit the end of this dataset. If it's the end of the table, we
159    # return tags, otherwise we clear tags and continue...
160   
161    $way_tags_data = nil
162    if $way_tags_first
163      return tags
164    end
165  end
166end
167   
168# output all tags in the hash
169def out_tags tags
170  tags.each {|key, value| puts %{    <tag k="#{CGI.escapeHTML(key)}" v="#{CGI.escapeHTML(value)}" />}}
171end
172
173puts '<?xml version="1.0" encoding="UTF-8"?>'
174puts '<osm version="0.3" generator="OpenStreetMap planet.rb">'
175puts '  <bound box="-90,-180,90,180" origin="http://www.openstreetmap.org/api/0.4" />'
176
177done = false
178lastid = 0
179
180while not done
181  done = true
182  all_nodes(lastid) do |id, lat, lon, timestamp, tags|
183    done = false
184    lastid = id
185    print %{  <node id="#{id}" lat="#{sprintf('%.7f', lat)}" lon="#{sprintf('%.7f', lon)}" timestamp="#{timestamp.xmlschema}"}
186    if tags.empty?
187      puts "/>"
188    else
189      puts ">"
190      out_tags tags
191      puts "  </node>"
192    end
193  end
194end
195
196done = false
197lastid = 0
198
199while not done
200  done = true
201  all_segments(lastid) do |id, from, to, timestamp, tags|
202    done = false
203    lastid = id
204    print %{  <segment id="#{id}" from="#{from}" to="#{to}" timestamp="#{timestamp.xmlschema}"}
205    if tags.empty?
206      puts "/>"
207    else
208      puts ">"
209      out_tags tags
210      puts "  </segment>"
211    end
212  end
213end
214
215done = false
216lastid = 0
217
218while not done
219  done = true
220  all_ways(lastid) do |id, segs, timestamp, tags|
221    done = false
222    lastid = id
223    print %{  <way id="#{id}" timestamp="#{timestamp.xmlschema}"}
224    if tags.empty? and segs.empty?
225      puts "/>"
226    else
227      puts ">"
228      segs.each {|seg_id| puts %{    <seg id="#{seg_id}" />}}
229      out_tags tags
230      puts "  </way>"
231    end
232  end
233end
234puts "</osm>"
Note: See TracBrowser for help on using the repository browser.