source: subversion/applications/rendering/forWikipedia/wikipediaMaps.pl @ 30322

Last change on this file since 30322 was 6851, checked in by ojw, 12 years ago

Add TODO list.
Add a proper copyright line before the license

File size: 5.9 KB
Line 
1#!/usr/bin/perl
2#-----------------------------------------------------------------
3# Creates maps for wikipedia articles
4#-----------------------------------------------------------------
5# Copyright, 2008, Oliver White
6#
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# Tagwatch is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#---------------------------------------------------------------
20# TODO:
21#  * Check the map image page, and only create a map if it doesn't
22#    already exist, or if the existing copy is more than 3 months
23#    old
24#  * Is there any way to get "last modified" date for OSM data
25#    in a certain region?  Don't update wikipedia maps unless the
26#    OSM data has changed
27#---------------------------------------------------------------
28use strict;
29use MediaWiki;
30use Data::Dumper;
31use LWP::Simple;
32use File::Slurp;
33
34use myWikiCredentials;
35my ($WikiUser, $WikiPass) = getWikiUserPass();
36
37# Create a connection to wikipedia
38my $Wikipedia = MediaWiki->new;
39my $OK1 = $Wikipedia->setup({
40        'bot' => { 'user' => $WikiUser, 'pass' => $WikiPass },
41        'wiki' => {
42            'host' => 'en.wikipedia.org',
43            'path' => 'w'}});
44
45die("Couldn't login to wikipedia") if(!$OK1);
46printf "Logged into wikipedia as \"%s\"\n", $Wikipedia->user(); 
47
48# Create a connection to wikimedia commons
49my $Commons = MediaWiki->new;
50my $OK2 = $Commons->setup({
51        'bot' => { 'user' => $WikiUser, 'pass' => $WikiPass },
52        'wiki' => {'host' => 'commons.wikimedia.org',
53                'path' => 'w'}});
54
55die("Couldn't login to commons") if(!$OK2);
56printf "Logged into commons as \"%s\"\n", $Commons->user(); 
57
58# Temporary
59my $PicDir = "Pics";
60mkdir $PicDir if ! -d $PicDir;
61
62# Start by deciding what to do.  Ask this wiki page, which contains
63# a list of categories which link to towns we want to render
64my $Categories = GetCategories("Wikipedia:WikiProject OpenStreetMap/OJW list");
65
66# Look through the categories
67my $Count = 0;
68foreach my $Category(@{$Categories})
69{
70  # Find the actual pages in that category
71  my $Pages = PagesInCategory($Category);
72 
73  foreach my $Page(@{$Pages})
74  {
75    # Try to find a geotag on the wiki page
76    my $Location = PageGeoLocation($Page);
77
78    next if(!defined($Location->{lat}) or !defined($Location->{lon}));
79   
80    # Download an OSM map of the area
81    my $Name = SuggestImageName($Page);
82    my $Filename = "$PicDir/$Name";
83   
84    print "Downloading $Name\n";
85    GetMapImage($Location, $Filename, 15, 1200);
86   
87    # Upload that map to wikimedia commons
88    my $ImageName = "$Name";
89    my $ImageData = read_file($Filename);
90    my $ImageDescription = GetImageDescription($Page, $Location);
91   
92    printf "Uploading %s (%d bytes)\n", $ImageName, length($ImageData);
93    $Commons->upload($ImageName, $ImageData, $ImageDescription, 1);
94   
95    # Add a description to the image page
96    $Commons->{summary} = "Map metadata";
97    $Commons->text("Image:$ImageName", $ImageDescription);
98   
99    # Put a message on the wikipedia talk page for that location,
100    # saying that a map is available
101    MessageToWikipedia($Page);
102   
103    exit if(++$Count > 3);
104  }
105}
106
107exit;
108
109sub MessageToWikipedia
110{
111  my $Page = shift();
112  my $TalkPage = "Talk:$Page";
113 
114  my $ExistingTalkPage = $Wikipedia->text($TalkPage);
115 
116  if($ExistingTalkPage =~ m{open \s* street \s* map}ix)
117  {
118    print "$TalkPage already has an OSM discussion\n";
119    return;
120  }
121 
122  my $NewTalkPage = $ExistingTalkPage . "\n\n{{OpenStreetMap_render_available}}\n\n~~~~\n";
123 
124  $Wikipedia->{summary} = "Message about a free map of $Page becoming available";
125  $Wikipedia->text($TalkPage, $NewTalkPage);
126}
127
128sub GetImageDescription
129{
130  my ($Page, $Location) = @_;
131  return sprintf
132    "{{openstreetmap_render|name=%s|lat=%f|lon=%f}}\n",
133    $Page,
134    $Location->{lat},
135    $Location->{lon};
136}
137
138sub GetMapImage
139{
140  my ($Location, $Filename, $Zoom, $Width) = @_;
141
142  my $URL = sprintf("http://%s/MapOf/?lat=%f&long=%f&z=%d&w=%d&h=%d&format=%s",
143    "tah.openstreetmap.org",
144    $Location->{lat},
145    $Location->{lon},
146    $Zoom,
147    $Width,
148    $Width,
149    "png");
150 
151  getstore($URL, $Filename);
152}
153
154sub SuggestImageName
155{
156  my $Page = shift();
157  $Page = "OpenStreetMap_render_$Page.png";
158  return($Page);
159}
160
161sub PageGeoLocation
162{
163  my $Page = shift();
164  my $Text = $Wikipedia->text($Page);
165 
166  my $Attr = {};
167  while($Text =~ m{
168    (latitude|longitude)
169    \s*
170    =
171    \s*
172    (
173      [-+]?
174      [0-9]*
175      \.
176      [0-9]+
177    |
178      [0-9]+
179    )
180    }xg)
181    {
182    $Attr->{substr($1,0,3)} = $2;
183    }
184
185  return $Attr;
186}
187
188sub PagesInCategory
189{
190  my $Category = shift();
191  my ($Pages, $Categories) = $Wikipedia->readcat($Category);
192 
193  my $X;
194  foreach my $Page(@{$Pages})
195  {
196    # Note: the readcat function returns a *lot* of junk.
197    # Try to filter-out anything that obviously isn't an article
198    # about a town or city
199    if($Page =~ m{^(Category|Special):}){}
200    elsif($Page =~ m{Wiki[pm]edia}i){}
201    elsif($Page =~ m{accesskey}){}
202    elsif($Page =~ m{(organization|501|Permanent link|Support us|Find background|About the)}){}
203    else
204    {
205    push(@{$X}, $Page);
206    }
207  }
208  return($X);
209}
210
211sub GetCategories
212{
213  my $Page = shift();
214  my $Cat = [];
215  foreach my $Line(split(/\n/, $Wikipedia->text($Page)))
216  {
217    # The "stuff to do" page just contains lines of the form:
218    # * [[:Category:Some list of towns]]
219    if($Line =~ m{
220      \*
221      \s*
222      \[\[
223      \:
224      Category
225      \:
226      (.*)
227      \]\]
228      }xi)
229      {
230      push(@{$Cat}, $1);
231      }
232  }
233  return($Cat);
234}
235
Note: See TracBrowser for help on using the repository browser.