source: subversion/applications/utils/planet.osm/planet.rb @ 4687

Last change on this file since 4687 was 4687, checked in by jonb, 12 years ago

planet.rb: ensure we have a UTF8 Mysql connection

  • Property svn:executable set to *
File size: 7.1 KB
Line 
1#!/usr/bin/ruby -w
2
3#$: << Dir.pwd+"/../../www.openstreetmap.org/ruby/api"
4
5require 'mysql'
6require 'time'
7require 'osm/servinfo.rb'
8require 'cgi'
9
10$mysql = Mysql.init
11$mysql.options(Mysql::SET_CHARSET_NAME, "utf8")
12$mysql.real_connect $DBSERVER, $USERNAME, $PASSWORD, $DATABASE
13
14# create a hash of entries out of a list of semi colon seperated key=value pairs
15def read_tags tag_str
16  tags_arr = tag_str.split(';').collect {|tag| tag =~ /=/ ? [$`,$'] : [tag,""] }
17  Hash[*tags_arr.flatten]
18end
19
20# create a timestamp or nil out of a time string
21def read_timestamp time_str
22  (time_str.nil? or time_str == "" or time_str == "NULL") ? Time.at(0) : Time.parse(time_str)
23end
24
25def pageSQL(lastid)
26  if lastid == 0
27    return ""
28  else
29    return " and id > #{lastid}"
30  end
31end
32
33# yields for every node with parameter
34# id, lat, lon, timestamp, tags
35# 'tags' are a hash in format {key1=>value1, key2=>value2...}
36def all_nodes(lastid)
37  $mysql.query "select id, latitude, longitude, timestamp, tags from current_nodes where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
38    rows.each do |row|
39      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
40    end
41  end
42end
43
44# yields for every segment
45# id, from_id, to_id, timestamp, tags
46def all_segments(lastid)
47  $mysql.query "select id, node_a, node_b, timestamp, tags from current_segments where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
48    rows.each do |row|
49      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
50    end
51  end
52end
53
54# yields for every way
55# id, [id1,id2,id3...], timestamp, tags
56def all_ways(lastid)
57  $mysql.query "select id, timestamp from current_ways where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |ways|
58    ways.each do |row|
59      id = row[0].to_i
60      segs = []
61      all_way_segments(id) do |s|
62        segs << s.to_i
63      end
64      tags_arr = all_way_tags(id)
65      yield id, segs, read_timestamp(row[1]), Hash[*tags_arr]
66    end
67  end
68end
69
70# Here we produce the segments associated with a way. How it works is that
71# instead of doing the segments query for each way, it gets all the data
72# from the beginning in groups of 50000. Since this is sorted we can perform
73# a sort of "Merge join". The caller provides the ID they are interesting in
74# and we scan forward in the table to find it, yield each segment and
75# return. The only hard part is that we when we note we're too far, we have
76# to jump back one row so the next iteration sees it again. That's what all
77# the seek/tell is about.
78$way_segments_data = nil
79$way_segments_current = [0,0]
80$way_segments_done = false
81# yields each segment, one at a time
82def all_way_segments(curr_id)
83  loop do
84    if $way_segments_data == nil
85      $way_segments_data = $mysql.query "select id, sequence_id, segment_id from current_way_segments
86                                                                   where id > #{$way_segments_current[0]} 
87                                                                   or (id = #{$way_segments_current[0]} and sequence_id >  #{$way_segments_current[1]})
88                                                                   order by id, sequence_id limit 500000;" 
89      if $way_segments_data == nil
90        return
91      end
92      $way_segments_done = true
93    end
94    pos = $way_segments_data.row_tell()
95    $way_segments_data.each() do |$way_segments_current|
96      $way_segments_done = false
97      id = $way_segments_current[0].to_i
98      if id < curr_id
99        pos = $way_segments_data.row_tell()
100        next
101      end
102      if id == curr_id
103        pos = $way_segments_data.row_tell()
104        yield $way_segments_current[2]
105        next
106      end
107      # Need to seek back one so we get this row again...
108      $way_segments_data.row_seek( pos )
109      return
110    end
111    $way_segments_data = nil
112    if $way_segments_done
113      return
114    end
115  end
116end
117   
118$way_tags_data = nil
119$way_tags_current = [0]
120$way_tags_first = false
121# Way tags are more irritating because there's no unique key sort by. So we
122# have to collect the results for an ID in an array and if it turns out to
123# hit the end, we toss out what we've collected and start again with a new
124# query...
125
126# Because of this detecting the end of the table becomes tricky, since when
127# we reach the end of the resultset and it's the end of the table, we'd keep
128# requesting the last bit over and over again. So the rule is, if the ID
129# being returned is the *only* ID in this set, we're done. That's what
130# $way_tags_first is tracking.
131
132# yields the tags, all in one go as an array
133def all_way_tags(curr_id)
134  loop do
135    if $way_tags_data == nil
136      $way_tags_data = $mysql.query "select id,k,v from current_way_tags where id >= #{$way_tags_current[0]} order by id limit 50000;" 
137      $way_tags_first = true
138      if $way_tags_data == nil
139        return tags
140      end
141    end
142    pos = $way_tags_data.row_tell()
143    tags = []
144    $way_tags_data.each() do |$way_tags_current|
145      id = $way_tags_current[0].to_i
146      if id < curr_id
147        pos = $way_tags_data.row_tell()
148        next
149      end
150      if id == curr_id
151        pos = $way_tags_data.row_tell()
152        tags << $way_tags_current[1] << $way_tags_current[2]
153        next
154      end
155      # Need to seek back one so we get this row again...
156      $way_tags_data.row_seek( pos )
157      $way_tags_first = false
158      return tags
159    end
160    # So we've hit the end of this dataset. If it's the end of the table, we
161    # return tags, otherwise we clear tags and continue...
162   
163    $way_tags_data = nil
164    if $way_tags_first
165      return tags
166    end
167  end
168end
169   
170# output all tags in the hash
171def out_tags tags
172  tags.each {|key, value| puts %{    <tag k="#{CGI.escapeHTML(key)}" v="#{CGI.escapeHTML(value)}" />}}
173end
174
175puts '<?xml version="1.0" encoding="UTF-8"?>'
176puts '<osm version="0.3" generator="OpenStreetMap planet.rb">'
177puts '  <bound box="-90,-180,90,180" origin="http://www.openstreetmap.org/api/0.4" />'
178
179done = false
180lastid = 0
181
182while not done
183  done = true
184  all_nodes(lastid) do |id, lat, lon, timestamp, tags|
185    done = false
186    lastid = id
187    print %{  <node id="#{id}" lat="#{sprintf('%.7f', lat/10000000.0)}" lon="#{sprintf('%.7f', lon/10000000.0)}" timestamp="#{timestamp.xmlschema}"}
188    if tags.empty?
189      puts "/>"
190    else
191      puts ">"
192      out_tags tags
193      puts "  </node>"
194    end
195  end
196end
197
198done = false
199lastid = 0
200
201while not done
202  done = true
203  all_segments(lastid) do |id, from, to, timestamp, tags|
204    done = false
205    lastid = id
206    print %{  <segment id="#{id}" from="#{from}" to="#{to}" timestamp="#{timestamp.xmlschema}"}
207    if tags.empty?
208      puts "/>"
209    else
210      puts ">"
211      out_tags tags
212      puts "  </segment>"
213    end
214  end
215end
216
217done = false
218lastid = 0
219
220while not done
221  done = true
222  all_ways(lastid) do |id, segs, timestamp, tags|
223    done = false
224    lastid = id
225    print %{  <way id="#{id}" timestamp="#{timestamp.xmlschema}"}
226    if tags.empty? and segs.empty?
227      puts "/>"
228    else
229      puts ">"
230      segs.each {|seg_id| puts %{    <seg id="#{seg_id}" />}}
231      out_tags tags
232      puts "  </way>"
233    end
234  end
235end
236puts "</osm>"
Note: See TracBrowser for help on using the repository browser.