source: subversion/applications/utils/planet.osm/planet.rb @ 4817

Last change on this file since 4817 was 4817, checked in by jonb, 12 years ago

planet.rb: Revert UTF8 db change since this is incompatible with the main OSM db

  • Property svn:executable set to *
File size: 7.2 KB
Line 
1#!/usr/bin/ruby -w
2
3#$: << Dir.pwd+"/../../www.openstreetmap.org/ruby/api"
4
5require 'mysql'
6require 'time'
7require 'osm/servinfo.rb'
8require 'cgi'
9
10$mysql = Mysql.init
11# If you have a UTF-8 clean setup then you may need to enable the following line
12# $mysql.options(Mysql::SET_CHARSET_NAME, "utf8")
13$mysql.real_connect $DBSERVER, $USERNAME, $PASSWORD, $DATABASE
14
15# create a hash of entries out of a list of semi colon seperated key=value pairs
16def read_tags tag_str
17  tags_arr = tag_str.split(';').collect {|tag| tag =~ /=/ ? [$`,$'] : [tag,""] }
18  Hash[*tags_arr.flatten]
19end
20
21# create a timestamp or nil out of a time string
22def read_timestamp time_str
23  (time_str.nil? or time_str == "" or time_str == "NULL") ? Time.at(0) : Time.parse(time_str)
24end
25
26def pageSQL(lastid)
27  if lastid == 0
28    return ""
29  else
30    return " and id > #{lastid}"
31  end
32end
33
34# yields for every node with parameter
35# id, lat, lon, timestamp, tags
36# 'tags' are a hash in format {key1=>value1, key2=>value2...}
37def all_nodes(lastid)
38  $mysql.query "select id, latitude, longitude, timestamp, tags from current_nodes where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
39    rows.each do |row|
40      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
41    end
42  end
43end
44
45# yields for every segment
46# id, from_id, to_id, timestamp, tags
47def all_segments(lastid)
48  $mysql.query "select id, node_a, node_b, timestamp, tags from current_segments where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
49    rows.each do |row|
50      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
51    end
52  end
53end
54
55# yields for every way
56# id, [id1,id2,id3...], timestamp, tags
57def all_ways(lastid)
58  $mysql.query "select id, timestamp from current_ways where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |ways|
59    ways.each do |row|
60      id = row[0].to_i
61      segs = []
62      all_way_segments(id) do |s|
63        segs << s.to_i
64      end
65      tags_arr = all_way_tags(id)
66      yield id, segs, read_timestamp(row[1]), Hash[*tags_arr]
67    end
68  end
69end
70
71# Here we produce the segments associated with a way. How it works is that
72# instead of doing the segments query for each way, it gets all the data
73# from the beginning in groups of 50000. Since this is sorted we can perform
74# a sort of "Merge join". The caller provides the ID they are interesting in
75# and we scan forward in the table to find it, yield each segment and
76# return. The only hard part is that we when we note we're too far, we have
77# to jump back one row so the next iteration sees it again. That's what all
78# the seek/tell is about.
79$way_segments_data = nil
80$way_segments_current = [0,0]
81$way_segments_done = false
82# yields each segment, one at a time
83def all_way_segments(curr_id)
84  loop do
85    if $way_segments_data == nil
86      $way_segments_data = $mysql.query "select id, sequence_id, segment_id from current_way_segments
87                                                                   where id > #{$way_segments_current[0]} 
88                                                                   or (id = #{$way_segments_current[0]} and sequence_id >  #{$way_segments_current[1]})
89                                                                   order by id, sequence_id limit 500000;" 
90      if $way_segments_data == nil
91        return
92      end
93      $way_segments_done = true
94    end
95    pos = $way_segments_data.row_tell()
96    $way_segments_data.each() do |$way_segments_current|
97      $way_segments_done = false
98      id = $way_segments_current[0].to_i
99      if id < curr_id
100        pos = $way_segments_data.row_tell()
101        next
102      end
103      if id == curr_id
104        pos = $way_segments_data.row_tell()
105        yield $way_segments_current[2]
106        next
107      end
108      # Need to seek back one so we get this row again...
109      $way_segments_data.row_seek( pos )
110      return
111    end
112    $way_segments_data = nil
113    if $way_segments_done
114      return
115    end
116  end
117end
118   
119$way_tags_data = nil
120$way_tags_current = [0]
121$way_tags_first = false
122# Way tags are more irritating because there's no unique key sort by. So we
123# have to collect the results for an ID in an array and if it turns out to
124# hit the end, we toss out what we've collected and start again with a new
125# query...
126
127# Because of this detecting the end of the table becomes tricky, since when
128# we reach the end of the resultset and it's the end of the table, we'd keep
129# requesting the last bit over and over again. So the rule is, if the ID
130# being returned is the *only* ID in this set, we're done. That's what
131# $way_tags_first is tracking.
132
133# yields the tags, all in one go as an array
134def all_way_tags(curr_id)
135  loop do
136    if $way_tags_data == nil
137      $way_tags_data = $mysql.query "select id,k,v from current_way_tags where id >= #{$way_tags_current[0]} order by id limit 50000;" 
138      $way_tags_first = true
139      if $way_tags_data == nil
140        return tags
141      end
142    end
143    pos = $way_tags_data.row_tell()
144    tags = []
145    $way_tags_data.each() do |$way_tags_current|
146      id = $way_tags_current[0].to_i
147      if id < curr_id
148        pos = $way_tags_data.row_tell()
149        next
150      end
151      if id == curr_id
152        pos = $way_tags_data.row_tell()
153        tags << $way_tags_current[1] << $way_tags_current[2]
154        next
155      end
156      # Need to seek back one so we get this row again...
157      $way_tags_data.row_seek( pos )
158      $way_tags_first = false
159      return tags
160    end
161    # So we've hit the end of this dataset. If it's the end of the table, we
162    # return tags, otherwise we clear tags and continue...
163   
164    $way_tags_data = nil
165    if $way_tags_first
166      return tags
167    end
168  end
169end
170   
171# output all tags in the hash
172def out_tags tags
173  tags.each {|key, value| puts %{    <tag k="#{CGI.escapeHTML(key)}" v="#{CGI.escapeHTML(value)}" />}}
174end
175
176puts '<?xml version="1.0" encoding="UTF-8"?>'
177puts '<osm version="0.3" generator="OpenStreetMap planet.rb">'
178puts '  <bound box="-90,-180,90,180" origin="http://www.openstreetmap.org/api/0.4" />'
179
180done = false
181lastid = 0
182
183while not done
184  done = true
185  all_nodes(lastid) do |id, lat, lon, timestamp, tags|
186    done = false
187    lastid = id
188    print %{  <node id="#{id}" lat="#{sprintf('%.7f', lat/10000000.0)}" lon="#{sprintf('%.7f', lon/10000000.0)}" timestamp="#{timestamp.xmlschema}"}
189    if tags.empty?
190      puts "/>"
191    else
192      puts ">"
193      out_tags tags
194      puts "  </node>"
195    end
196  end
197end
198
199done = false
200lastid = 0
201
202while not done
203  done = true
204  all_segments(lastid) do |id, from, to, timestamp, tags|
205    done = false
206    lastid = id
207    print %{  <segment id="#{id}" from="#{from}" to="#{to}" timestamp="#{timestamp.xmlschema}"}
208    if tags.empty?
209      puts "/>"
210    else
211      puts ">"
212      out_tags tags
213      puts "  </segment>"
214    end
215  end
216end
217
218done = false
219lastid = 0
220
221while not done
222  done = true
223  all_ways(lastid) do |id, segs, timestamp, tags|
224    done = false
225    lastid = id
226    print %{  <way id="#{id}" timestamp="#{timestamp.xmlschema}"}
227    if tags.empty? and segs.empty?
228      puts "/>"
229    else
230      puts ">"
231      segs.each {|seg_id| puts %{    <seg id="#{seg_id}" />}}
232      out_tags tags
233      puts "  </way>"
234    end
235  end
236end
237puts "</osm>"
Note: See TracBrowser for help on using the repository browser.