source: subversion/applications/utils/planet.osm/planet.rb @ 29095

Last change on this file since 29095 was 4817, checked in by jonb, 12 years ago

planet.rb: Revert UTF8 db change since this is incompatible with the main OSM db

  • Property svn:executable set to *
File size: 7.2 KB
RevLine 
[1071]1#!/usr/bin/ruby -w
2
[1499]3#$: << Dir.pwd+"/../../www.openstreetmap.org/ruby/api"
[1071]4
[1212]5require 'mysql'
6require 'time'
7require 'osm/servinfo.rb'
8require 'cgi'
[1071]9
[4687]10$mysql = Mysql.init
[4817]11# If you have a UTF-8 clean setup then you may need to enable the following line
12# $mysql.options(Mysql::SET_CHARSET_NAME, "utf8")
[4687]13$mysql.real_connect $DBSERVER, $USERNAME, $PASSWORD, $DATABASE
[1071]14
[1212]15# create a hash of entries out of a list of semi colon seperated key=value pairs
16def read_tags tag_str
[4671]17  tags_arr = tag_str.split(';').collect {|tag| tag =~ /=/ ? [$`,$'] : [tag,""] }
[1212]18  Hash[*tags_arr.flatten]
19end
[1071]20
[1212]21# create a timestamp or nil out of a time string
22def read_timestamp time_str
23  (time_str.nil? or time_str == "" or time_str == "NULL") ? Time.at(0) : Time.parse(time_str)
[1071]24end
25
[4434]26def pageSQL(lastid)
27  if lastid == 0
28    return ""
29  else
30    return " and id > #{lastid}"
31  end
[1499]32end
33
[1212]34# yields for every node with parameter
35# id, lat, lon, timestamp, tags
36# 'tags' are a hash in format {key1=>value1, key2=>value2...}
[4434]37def all_nodes(lastid)
38  $mysql.query "select id, latitude, longitude, timestamp, tags from current_nodes where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
[1212]39    rows.each do |row|
[4634]40      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
[1212]41    end
42  end
[1071]43end
44
[1212]45# yields for every segment
46# id, from_id, to_id, timestamp, tags
[4434]47def all_segments(lastid)
48  $mysql.query "select id, node_a, node_b, timestamp, tags from current_segments where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |rows|
[1212]49    rows.each do |row|
50      yield row[0].to_i, row[1].to_i, row[2].to_i, read_timestamp(row[3]), read_tags(row[4])
51    end
52  end
[1071]53end
54
[1212]55# yields for every way
56# id, [id1,id2,id3...], timestamp, tags
[4434]57def all_ways(lastid)
58  $mysql.query "select id, timestamp from current_ways where visible = 1 #{pageSQL(lastid)} order by id limit 500000" do |ways|
[1212]59    ways.each do |row|
60      id = row[0].to_i
61      segs = []
[4446]62      all_way_segments(id) do |s|
63        segs << s.to_i
[1212]64      end
[4446]65      tags_arr = all_way_tags(id)
[1212]66      yield id, segs, read_timestamp(row[1]), Hash[*tags_arr]
67    end
[1071]68  end
[1212]69end
[1071]70
[4446]71# Here we produce the segments associated with a way. How it works is that
72# instead of doing the segments query for each way, it gets all the data
73# from the beginning in groups of 50000. Since this is sorted we can perform
74# a sort of "Merge join". The caller provides the ID they are interesting in
75# and we scan forward in the table to find it, yield each segment and
76# return. The only hard part is that we when we note we're too far, we have
77# to jump back one row so the next iteration sees it again. That's what all
78# the seek/tell is about.
79$way_segments_data = nil
80$way_segments_current = [0,0]
81$way_segments_done = false
82# yields each segment, one at a time
83def all_way_segments(curr_id)
84  loop do
85    if $way_segments_data == nil
86      $way_segments_data = $mysql.query "select id, sequence_id, segment_id from current_way_segments
87                                                                   where id > #{$way_segments_current[0]} 
88                                                                   or (id = #{$way_segments_current[0]} and sequence_id >  #{$way_segments_current[1]})
89                                                                   order by id, sequence_id limit 500000;" 
90      if $way_segments_data == nil
91        return
92      end
93      $way_segments_done = true
94    end
95    pos = $way_segments_data.row_tell()
96    $way_segments_data.each() do |$way_segments_current|
97      $way_segments_done = false
98      id = $way_segments_current[0].to_i
99      if id < curr_id
100        pos = $way_segments_data.row_tell()
101        next
102      end
103      if id == curr_id
104        pos = $way_segments_data.row_tell()
105        yield $way_segments_current[2]
106        next
107      end
108      # Need to seek back one so we get this row again...
109      $way_segments_data.row_seek( pos )
110      return
111    end
112    $way_segments_data = nil
113    if $way_segments_done
114      return
115    end
116  end
117end
118   
119$way_tags_data = nil
120$way_tags_current = [0]
121$way_tags_first = false
122# Way tags are more irritating because there's no unique key sort by. So we
123# have to collect the results for an ID in an array and if it turns out to
124# hit the end, we toss out what we've collected and start again with a new
125# query...
126
127# Because of this detecting the end of the table becomes tricky, since when
128# we reach the end of the resultset and it's the end of the table, we'd keep
129# requesting the last bit over and over again. So the rule is, if the ID
130# being returned is the *only* ID in this set, we're done. That's what
131# $way_tags_first is tracking.
132
133# yields the tags, all in one go as an array
134def all_way_tags(curr_id)
135  loop do
136    if $way_tags_data == nil
[4540]137      $way_tags_data = $mysql.query "select id,k,v from current_way_tags where id >= #{$way_tags_current[0]} order by id limit 50000;" 
[4446]138      $way_tags_first = true
139      if $way_tags_data == nil
140        return tags
141      end
142    end
143    pos = $way_tags_data.row_tell()
144    tags = []
145    $way_tags_data.each() do |$way_tags_current|
146      id = $way_tags_current[0].to_i
147      if id < curr_id
148        pos = $way_tags_data.row_tell()
149        next
150      end
151      if id == curr_id
152        pos = $way_tags_data.row_tell()
153        tags << $way_tags_current[1] << $way_tags_current[2]
154        next
155      end
156      # Need to seek back one so we get this row again...
157      $way_tags_data.row_seek( pos )
158      $way_tags_first = false
159      return tags
160    end
161    # So we've hit the end of this dataset. If it's the end of the table, we
162    # return tags, otherwise we clear tags and continue...
163   
164    $way_tags_data = nil
165    if $way_tags_first
166      return tags
167    end
168  end
169end
170   
[1212]171# output all tags in the hash
172def out_tags tags
[1225]173  tags.each {|key, value| puts %{    <tag k="#{CGI.escapeHTML(key)}" v="#{CGI.escapeHTML(value)}" />}}
[1071]174end
175
[1225]176puts '<?xml version="1.0" encoding="UTF-8"?>'
177puts '<osm version="0.3" generator="OpenStreetMap planet.rb">'
[4336]178puts '  <bound box="-90,-180,90,180" origin="http://www.openstreetmap.org/api/0.4" />'
[1212]179
[1499]180done = false
[4434]181lastid = 0
[1499]182
183while not done
184  done = true
[4434]185  all_nodes(lastid) do |id, lat, lon, timestamp, tags|
[1499]186    done = false
[4434]187    lastid = id
[4671]188    print %{  <node id="#{id}" lat="#{sprintf('%.7f', lat/10000000.0)}" lon="#{sprintf('%.7f', lon/10000000.0)}" timestamp="#{timestamp.xmlschema}"}
[1499]189    if tags.empty?
190      puts "/>"
191    else
192      puts ">"
193      out_tags tags
194      puts "  </node>"
195    end
[1071]196  end
197end
198
[1499]199done = false
[4434]200lastid = 0
[1499]201
202while not done
203  done = true
[4434]204  all_segments(lastid) do |id, from, to, timestamp, tags|
[1499]205    done = false
[4434]206    lastid = id
[1499]207    print %{  <segment id="#{id}" from="#{from}" to="#{to}" timestamp="#{timestamp.xmlschema}"}
208    if tags.empty?
209      puts "/>"
210    else
211      puts ">"
212      out_tags tags
213      puts "  </segment>"
214    end
[1071]215  end
216end
217
[1500]218done = false
[4434]219lastid = 0
[1500]220
221while not done
222  done = true
[4434]223  all_ways(lastid) do |id, segs, timestamp, tags|
[1500]224    done = false
[4434]225    lastid = id
[1500]226    print %{  <way id="#{id}" timestamp="#{timestamp.xmlschema}"}
227    if tags.empty? and segs.empty?
228      puts "/>"
229    else
230      puts ">"
231      segs.each {|seg_id| puts %{    <seg id="#{seg_id}" />}}
232      out_tags tags
233      puts "  </way>"
234    end
[1071]235  end
236end
237puts "</osm>"
Note: See TracBrowser for help on using the repository browser.