source: subversion/applications/utils/planet.osm/planet.rb @ 4671

Last change on this file since 4671 was 4671, checked in by jonb, 12 years ago

planet.rb needs to use a floating point division, not an int. Otherwise you get 51.0000000 etc

  • Property svn:executable set to *
File size: 7.0 KB
Line 
1#!/usr/bin/ruby -w
2
3#$: << Dir.pwd+"/../../www.openstreetmap.org/ruby/api"
4
5require 'mysql'
6require 'time'
7require 'osm/servinfo.rb'
8require 'cgi'
9
10$mysql = Mysql.real_connect $DBSERVER, $USERNAME, $PASSWORD, $DATABASE
11
12# create a hash of entries out of a list of semi colon seperated key=value pairs
13def read_tags tag_str
14  tags_arr = tag_str.split(';').collect {|tag| tag =~ /=/ ? [$`,$'] : [tag,""] }
15  Hash[*tags_arr.flatten]
16end
17
18# create a timestamp or nil out of a time string
19def read_timestamp time_str
20  (time_str.nil? or time_str == "" or time_str == "NULL") ? Time.at(0) : Time.parse(time_str)
21end
22
23def pageSQL(lastid)
24  if lastid == 0
25    return ""
26  else
27    return " and id > #{lastid}"
28  end
29end
30
31# yields for every node with parameter
32# id, lat, lon, timestamp, tags
33# 'tags' are a hash in format {key1=>value1, key2=>value2...}
34def all_nodes(lastid)
35  $mysql.query "select id, latitude, longitude, timestamp, tags from current_nodes where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
36    rows.each do |row|
37      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
38    end
39  end
40end
41
42# yields for every segment
43# id, from_id, to_id, timestamp, tags
44def all_segments(lastid)
45  $mysql.query "select id, node_a, node_b, timestamp, tags from current_segments where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
46    rows.each do |row|
47      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
48    end
49  end
50end
51
52# yields for every way
53# id, [id1,id2,id3...], timestamp, tags
54def all_ways(lastid)
55  $mysql.query "select id, timestamp from current_ways where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |ways|
56    ways.each do |row|
57      id = row[0].to_i
58      segs = []
59      all_way_segments(id) do |s|
60        segs << s.to_i
61      end
62      tags_arr = all_way_tags(id)
63      yield id, segs, read_timestamp(row[1]), Hash[*tags_arr]
64    end
65  end
66end
67
68# Here we produce the segments associated with a way. How it works is that
69# instead of doing the segments query for each way, it gets all the data
70# from the beginning in groups of 50000. Since this is sorted we can perform
71# a sort of "Merge join". The caller provides the ID they are interesting in
72# and we scan forward in the table to find it, yield each segment and
73# return. The only hard part is that we when we note we're too far, we have
74# to jump back one row so the next iteration sees it again. That's what all
75# the seek/tell is about.
76$way_segments_data = nil
77$way_segments_current = [0,0]
78$way_segments_done = false
79# yields each segment, one at a time
80def all_way_segments(curr_id)
81  loop do
82    if $way_segments_data == nil
83      $way_segments_data = $mysql.query "select id, sequence_id, segment_id from current_way_segments
84                                                                   where id > #{$way_segments_current[0]} 
85                                                                   or (id = #{$way_segments_current[0]} and sequence_id >  #{$way_segments_current[1]})
86                                                                   order by id, sequence_id limit 500000;" 
87      if $way_segments_data == nil
88        return
89      end
90      $way_segments_done = true
91    end
92    pos = $way_segments_data.row_tell()
93    $way_segments_data.each() do |$way_segments_current|
94      $way_segments_done = false
95      id = $way_segments_current[0].to_i
96      if id < curr_id
97        pos = $way_segments_data.row_tell()
98        next
99      end
100      if id == curr_id
101        pos = $way_segments_data.row_tell()
102        yield $way_segments_current[2]
103        next
104      end
105      # Need to seek back one so we get this row again...
106      $way_segments_data.row_seek( pos )
107      return
108    end
109    $way_segments_data = nil
110    if $way_segments_done
111      return
112    end
113  end
114end
115   
116$way_tags_data = nil
117$way_tags_current = [0]
118$way_tags_first = false
119# Way tags are more irritating because there's no unique key sort by. So we
120# have to collect the results for an ID in an array and if it turns out to
121# hit the end, we toss out what we've collected and start again with a new
122# query...
123
124# Because of this detecting the end of the table becomes tricky, since when
125# we reach the end of the resultset and it's the end of the table, we'd keep
126# requesting the last bit over and over again. So the rule is, if the ID
127# being returned is the *only* ID in this set, we're done. That's what
128# $way_tags_first is tracking.
129
130# yields the tags, all in one go as an array
131def all_way_tags(curr_id)
132  loop do
133    if $way_tags_data == nil
134      $way_tags_data = $mysql.query "select id,k,v from current_way_tags where id >= #{$way_tags_current[0]} order by id limit 50000;" 
135      $way_tags_first = true
136      if $way_tags_data == nil
137        return tags
138      end
139    end
140    pos = $way_tags_data.row_tell()
141    tags = []
142    $way_tags_data.each() do |$way_tags_current|
143      id = $way_tags_current[0].to_i
144      if id < curr_id
145        pos = $way_tags_data.row_tell()
146        next
147      end
148      if id == curr_id
149        pos = $way_tags_data.row_tell()
150        tags << $way_tags_current[1] << $way_tags_current[2]
151        next
152      end
153      # Need to seek back one so we get this row again...
154      $way_tags_data.row_seek( pos )
155      $way_tags_first = false
156      return tags
157    end
158    # So we've hit the end of this dataset. If it's the end of the table, we
159    # return tags, otherwise we clear tags and continue...
160   
161    $way_tags_data = nil
162    if $way_tags_first
163      return tags
164    end
165  end
166end
167   
168# output all tags in the hash
169def out_tags tags
170  tags.each {|key, value| puts %{    <tag k="#{CGI.escapeHTML(key)}" v="#{CGI.escapeHTML(value)}" />}}
171end
172
173puts '<?xml version="1.0" encoding="UTF-8"?>'
174puts '<osm version="0.3" generator="OpenStreetMap planet.rb">'
175puts '  <bound box="-90,-180,90,180" origin="http://www.openstreetmap.org/api/0.4" />'
176
177done = false
178lastid = 0
179
180while not done
181  done = true
182  all_nodes(lastid) do |id, lat, lon, timestamp, tags|
183    done = false
184    lastid = id
185    print %{  <node id="#{id}" lat="#{sprintf('%.7f', lat/10000000.0)}" lon="#{sprintf('%.7f', lon/10000000.0)}" timestamp="#{timestamp.xmlschema}"}
186    if tags.empty?
187      puts "/>"
188    else
189      puts ">"
190      out_tags tags
191      puts "  </node>"
192    end
193  end
194end
195
196done = false
197lastid = 0
198
199while not done
200  done = true
201  all_segments(lastid) do |id, from, to, timestamp, tags|
202    done = false
203    lastid = id
204    print %{  <segment id="#{id}" from="#{from}" to="#{to}" timestamp="#{timestamp.xmlschema}"}
205    if tags.empty?
206      puts "/>"
207    else
208      puts ">"
209      out_tags tags
210      puts "  </segment>"
211    end
212  end
213end
214
215done = false
216lastid = 0
217
218while not done
219  done = true
220  all_ways(lastid) do |id, segs, timestamp, tags|
221    done = false
222    lastid = id
223    print %{  <way id="#{id}" timestamp="#{timestamp.xmlschema}"}
224    if tags.empty? and segs.empty?
225      puts "/>"
226    else
227      puts ">"
228      segs.each {|seg_id| puts %{    <seg id="#{seg_id}" />}}
229      out_tags tags
230      puts "  </way>"
231    end
232  end
233end
234puts "</osm>"
Note: See TracBrowser for help on using the repository browser.