source: subversion/sites/namefinder/php/postcodelookup.php @ 4135

Last change on this file since 4135 was 4134, checked in by david, 12 years ago

Main application files

File size: 5.9 KB
Line 
1<?php
2
3class postcodelookup {
4
5  /* This helper class consults google to see if it can find an
6     address for a postcode.  The address is then fed back into the
7     name search Make an object of this class using
8     postcodelookup::postcodelookupfactory: you can then retrieve the
9     query to pass on to the name finder using get_query method (which
10     will do the lookup) on the postcodelookup object */
11
12  /* http://www.google.com/search?hl=en&q=%22PE27+5JP%22 */
13
14  var $postcode;
15  var $googlequery = 'http://www.google.com/search?hl=en&q=';
16  var $namefinderquery;
17
18  // --------------------------------------------------
19  /* static */ function postcodelookupfactory($prospectivepostcode) {
20    $postcodelookup = new postcodelookup();
21    if (! preg_match ('/[a-z]{1,2}[0-9]{1,2}[a-z]? [0-9][a-z]{2}/i', $prospectivepostcode)) {
22      $postcodelookup->postcode = NULL;
23      return $postcodelookup; /* not a postcode */
24    }
25    $postcodelookup->postcode = $prospectivepostcode;
26    $postcodelookup->namefinderquery = NULL;
27    return $postcodelookup;
28  }
29
30  // --------------------------------------------------
31  function get_query(&$query) { 
32    if (empty($this->postcode)) { return FALSE; }
33    $this->googleme();
34    if (is_null($this->namefinderquery)) { return FALSE; }
35    $query = $this->namefinderquery;   
36    return TRUE;
37  }
38
39  // --------------------------------------------------
40  /* private */ function match($subject) {
41    include_once('named.php');
42    static $roadetc = 'road|rd|street|st|lane|ln|place|pl|avenue|ave|crescent|cres|close|cl|way|wy|drive|dr|walk|park|pk|row|hill|parade|pde|terrace|tce|court|ct|mews|grove|rise|fields|meadows|path|green|gn|gardens|gdns|garden|gdn|gate';
43    static $namechars = "[a-z \\\\'\\\\.\\\\-]";
44    $subject = strip_tags(preg_replace('~\\<br\\ *\\/?\\>~i', ',', $subject));
45    $roadsection = " *({$roadetc})";
46
47    /* lat,lon to costrain place checks to uk: just clips northern France, and part of RoI,
48       but it is only to slim down the search */
49    $uk = array(49.9,-8.1,61.0,2.0);
50    global $db; 
51
52    /* hash of places to avoid repeatedly looking up the same place */
53    $triedplaces = array();
54   
55    for ($i = 0; $i < 2; $i++) {
56      $nmatches = preg_match_all ("/[^0-9][0-9]{1,3}[\\, ] *({$namechars}{1,30}{$roadsection})\\.?\\, *({$namechars}{1,})(\\, *({$namechars}{1,}))?(\\, *({$namechars}{1,}))?(\\, *({$namechars}{1,}))?\\,? *{$this->postcode}/i", $subject, $matches, PREG_SET_ORDER);
57     
58      foreach($matches as $submatches) {
59        /* $submatches[1] is the street, $submatches[2] is road, rd
60           etc, let's see if $submatches[3] etc is a known place */
61        for ($k = 3; $k < 9; $k++) {
62          if (! empty($submatches[$k]) && $submatches[$k]{0} != ',') {
63            $possibleplace = $submatches[$k];
64            if (! array_key_exists($possibleplace, $triedplaces)) {
65              $places = named::lookupplaces($possibleplace, $uk, TRUE /* exact match */);
66              $triedplaces[$possibleplace] = TRUE;
67            }
68            if (! empty($places)) { break; }
69          }
70        }
71        if (empty($places)) { continue; }
72        $this->namefinderquery = "{$submatches[1]}, {$possibleplace}";
73
74        $db->log("looking up {$this->namefinderquery} for {$this->postcode} ". print_r($submatches,1));
75        return TRUE;
76      }
77
78      /* nothing helped, so relax the search so it doesn't include things like 'Road',
79         e.g. 47 The Brambles, Somwhereville, SG8 1TX */
80      $roadsection = '';
81    }
82
83    /* now try it without a number. But to isolate the place from among the four clauses
84       try looking up each prospective place in the place index, and take the clause before
85       it as the street (or sometimes the business name:
86       eg 'University of East Anglia, Norwich, NR4 7TJ' */
87
88    if (preg_match_all ("/({$namechars}{1,})\\, *({$namechars}{1,30})(\\, *({$namechars}{1,30}))?(\\, *({$namechars}{1,30}))?\\,? *{$this->postcode}/i", $subject, $matchesall, PREG_SET_ORDER))
89    {
90      $db->log("considering: ".print_r($matchesall,1));
91
92      foreach ($matchesall as $matches) {
93        for($j = 2; $j < count($matches); $j++) {
94          $possibleplace = trim($matches[$j]);
95          if (empty ($possibleplace) || $possibleplace{0} == ',') { continue; }
96          if (empty($possibleplace)) { continue; }
97          if (array_key_exists($possibleplace, $triedplaces)) { continue; }
98          $places = named::lookupplaces($possibleplace, $uk, TRUE /* exact match */);
99          $triedplaces[$possibleplace] = TRUE;
100          if (empty($places)) { continue; }
101          $db->log("places found for {$possibleplace}: ".print_r($places,1));
102          $address = $matches[$j-2];
103          $this->namefinderquery = "{$address}, {$possibleplace}";
104          $db->log("looking up {$this->namefinderquery} for {$this->postcode} ". 
105                   print_r($matches,1));
106          return TRUE;
107        }
108      }
109    }
110
111    return FALSE;
112  }
113
114  // --------------------------------------------------
115  /* private */ function googleme() {
116    $qs = $this->googlequery . urlencode("\"{$this->postcode}\"");
117    $googleresult = file_get_contents($qs);
118    if ($googleresult === FALSE) { return; }
119    if ($this->match($googleresult)) { return; }
120
121    /* try the cached pages if we didn't find it on the search results page */
122    preg_match_all ("~\\<a .*href=[\"\']([^\"\']*)[\"\'][^\\>]*\\>cached\\<\\/a\\>~i",
123                    $googleresult,
124                    $matches, PREG_PATTERN_ORDER);
125    $caches =& $matches[1];
126    for ($cn = 0; $cn < count($caches); $cn++) {
127      $cacheresult = file_get_contents($caches[$cn]);
128      if ($cacheresult === FALSE) { return; }
129      if ($this->match($cacheresult)) { 
130        global $db; $db->log ("found in cache {$cn}: {$caches[$cn]}");
131        return;
132      }
133    }
134  }
135
136}
137
138?>
Note: See TracBrowser for help on using the repository browser.