source: subversion/sites/namefinder/php/search.php @ 4134

Last change on this file since 4134 was 4134, checked in by david, 12 years ago

Main application files

File size: 18.2 KB
Line 
1<?php
2
3class search {
4
5  /* The common point of call for doing searches. Call search::xmlise(). */
6
7  // --------------------------------------------------
8  /* static */ function xmlise($find, $maxresults) {
9    /* Given a search string, returns a string of complete xml describing
10       the matches found for that string
11
12       find: the search string from the user,
13         search ::- nameword [nameword ...] ( ("near" | ",") placeword [placeword ...]
14             ("," isinword )?  )?
15         nameword ::- ( word | lat "," lon)
16         placeword ::- ( word | lat "," lon)
17         find ::- search ( ":" search )?
18         
19         The colon form allows great circle distance between two results to be calculated
20         Experimentally, the namewords can also form a UK postcode with no qualifications
21         Comments are allowed in the string between square brackets
22
23       maxresults; the maximum numberof results to return in the XML
24
25       returns: a string, formatted as XML
26    */
27   
28    include_once('postcodelookup.php');
29    global $db;
30    include_once('options.php');
31
32    /* oxml is the return string. Put admin stuff in the top level element */
33    $oxml = '<' . '?' . 'xml version="1.0" encoding="UTF-8"' . '?'. '>' . "\n";
34    $oxml .= "<searchresults find='" . htmlspecialchars($find, ENT_QUOTES, 'UTF-8') . "'";
35    $indexdate = options::getoption('indexdate');
36    if ($indexdate == '') {
37      $oxml .= " error='updating index, temporarily unavailable'>\n</searchresults>\n";
38      return $oxml;
39    }
40    $oxml .= " sourcedate='".
41      htmlspecialchars($indexdate, ENT_QUOTES, 'UTF-8')."'";
42    $oxml .= " date='".date('Y-m-d H:i:s')."'";
43
44    /* remove comments in square brackets from input - these are comments */
45    $find = trim(preg_replace('/\\[.*\\]/', '', $find));
46
47    /* does it look like a postcode? */
48    $postcodelookup = postcodelookup::postcodelookupfactory($find);
49    $originalpostcode = $find;
50    $isapostcode = $postcodelookup->get_query($find);
51
52    $finds = explode(':', $find);
53    if (count($finds) > 2) { 
54      $oxml .= " error='too many colons'>\n</searchresults>\n";
55      return $oxml;
56    }
57    for($i = 0; $i < count($finds); $i++) { $finds[$i] = trim($finds[$i]); }
58    /* no closing > yet: adding find data to header and possibly errors */
59
60    $oxml .= " distancesearch='" . (count($finds) > 1 ? 'yes' : 'no') . "'";
61
62    $near = NULL; // isset($_GET['near']) ? $_GET['near'] : NULL;
63    $multinameds = array();
64
65    for($i = 0; $i < count($finds); $i++) {
66      /* that is, for each component in a search separated by colons, but often only once... */
67      $thisfind = search::explodeterms($finds[$i]);
68     
69      /* the heart of the search - see below */
70      $nameds = search::find($thisfind, $maxresults);
71
72      /* reflect the original search data back in the xml */
73      $ks = count($finds) > 1 ? $i+1 : '';
74      if (count($thisfind) >= 1) {
75        $oxml .= " findname{$ks}='" . htmlspecialchars($thisfind[0], ENT_QUOTES, 'UTF-8')."'";
76        if (count($thisfind) >= 2) {
77          $oxml .= " findplace{$ks}='" . htmlspecialchars($thisfind[1], ENT_QUOTES, 'UTF-8')."'";
78          if (count($thisfind) == 3) {
79            $oxml .= " findisin{$ks}='" . htmlspecialchars($thisfind[2], ENT_QUOTES, 'UTF-8')."'";
80          }
81        }
82      }
83
84      /* if find() returned an error message rather than an array of results, try again,
85         dropping any qualifying is_in term, because places often don't include them */
86      if (is_string($nameds) && ! $isapostcode) {
87        if (count($thisfind) == 2) {
88          $thisfind = array_merge(array($thisfind[0]), $thisfind);
89          $nameds = search::find($thisfind, $maxresults);
90        }
91      } 
92
93      /* if still nothing, report it */
94      if (is_string($nameds)) {
95        $oxml .= " error='place not found'>\n</searchresults>\n";
96        return $oxml;
97      }
98
99      if (count($nameds) == 0) {
100        if ($isapostcode) {
101          $oxml .= " error='name not found for postcode'>\n</searchresults>\n";
102        } else {
103          $oxml .= " error='name not found'>\n</searchresults>\n";
104        }
105        return $oxml;
106      }
107
108      /* foundnearplace indicates whether the nearest place was the qualifying one or
109         whether there was another place closer */
110      $foundnearplace = ! empty($nameds[0]->place);
111      $oxml .= " foundnearplace{$ks}='" . ($foundnearplace ? 'yes' : 'no') . "'";
112
113      /* reflect any postcode requested in the xml */
114      if ($isapostcode) { $oxml .= " postcode='{$originalpostcode}'"; }
115
116      /* keep a not of the result for debugging */
117      $db->log("result: ".print_r($nameds,1));
118      $multinameds[] = $nameds;
119    }
120
121    $oxml .= ">\n";
122    $xml = '';
123
124    if (count($multinameds) == 1) {
125      /* the usual case */
126      foreach($nameds as $named) { $xml .= $named->xmlise(); }
127    } else {
128      /* the colon case: so now compute the great circle distances for each combination
129         of the fist 3 results in each side of the colon  */
130      include_once('greatcircle.php');
131      $gcs = array();
132      for ($i0 = 0; $i0 < min(count($multinameds[0]), 3); $i0++) {
133        $output0 = FALSE;
134        if ($multinameds[0][$i0]->category != 'place') { break; }
135        for ($i1 = 0; $i1 < min(count($multinameds[1]), 3); $i1++) {
136          if ($multinameds[1][$i1]->category != 'place') { break; }
137          if (! $output0) { 
138            $xml .= $multinameds[0][$i0]->xmlise();
139            $output0 = TRUE;
140          }
141          $xml .= $multinameds[1][$i1]->xmlise();
142          $gcs[] = new greatcircle($multinameds[0][$i0], $multinameds[1][$i1]);
143        }
144      }
145      for ($i0 = 0; $i0 < min(count($multinameds[0]), 3); $i0++) {
146        $output0 = FALSE;
147        if ($multinameds[0][$i0]->category == 'place') { continue; }
148        for ($i1 = 0; $i1 < min(count($multinameds[1]), 3); $i1++) {
149          if ($multinameds[1][$i1]->category == 'place') { continue; }
150          if (! $output0) { 
151            $xml .= $multinameds[0][$i0]->xmlise();
152            $output0 = TRUE;
153          }
154          $xml .= $multinameds[1][$i1]->xmlise();
155          $gcs[] = new greatcircle($multinameds[0][$i0], $multinameds[1][$i1]);
156        }
157      }
158      foreach ($gcs as $gc) { $xml .= $gc->xmlise(); }
159    }
160
161    /* and that's it... */
162    return $oxml . $xml . "</searchresults>\n";
163  }
164
165  // --------------------------------------------------
166  /* static */ function find(&$terms, $maxresults) {
167    /* Given a search string, returns an array of named's which are the matches
168       for the given search string. Usually this will be called from xmlise rather
169       than directly.
170
171       terms: an array of strings as follows:
172         (1) terms[0]: name of something to look for (including references like road numbers
173         and IATA codes, non-native versions of a name (e.g. Londres), or generic things
174         like "school" or "hotels" (singular or plural)
175
176         or
177
178         (2) terms[0] as above, and
179
180         terms[1]: qualifying place name so that terms[0]
181         must be found close to (an instance of - there may be more
182         than one match) this place.
183
184         or
185
186         (3) terms[0] and terms[1] as above, and
187
188         terms[2]: if given, a further qualifying string, which must appear in
189         the is_in of the qualyfying place. For example, there are
190         multiple Cambridges, so by setting this to UK, the Cambridge
191         with UK in its is_in will be used. (Actually there are two
192         Cambridges in the UK, so Cambridgeshire might be more
193         appropriate in this case)
194 
195         or
196
197         (4) terms[0] and terms[1] are a lat and lon respectively - just asking 'where am I?'
198
199         or
200
201         (5) terms[0] is a name etc. as case 1, and terms[1] and terms[2] are lat and lon
202         respectively restricting the search to near to that location
203
204         Note that postcode searches are converted toname searches
205         before being presented to the find function. find only deals
206         with names and lat/lon pairs
207
208       maxresults: the maximum number of results to return. 
209
210       returns: an array of named obects - see class named for details
211         or a string which is an error message
212    */
213
214    global $db, $config;
215
216    include_once('canon.php');
217    include_once('named.php');
218    include_once('region.php');
219
220    /* toofars controls what "nearby" means for a place. For example a
221       hamlet is only "near" somewhere if it is within 8km */
222    $toofars = array(0=>10.0, 
223                     named::placerank('hamlet')=>8.0,
224                     named::placerank('village')=>20.0,
225                     named::placerank('suburb')=>20.0,
226                     named::placerank('airport')=>20.0,
227                     named::placerank('town')=>25.0,
228                     named::placerank('city')=>45.0);
229
230    $places = array(); /* the places, if any, which should qualifty the search */
231    $nameds = array(); /* the result */
232
233    $nterms = count($terms);
234
235    if ($nterms > 2 && search::islatlon($terms[1], $terms[2], $pseudoplace)) {
236      /* case 5 above: reduce the lat/lon to a named, one of the places to qualify
237         the search, and remove them from the list */
238      array_splice($terms, 1, 2);
239      $nterms -=2;
240      $places[] = clone $pseudoplace;
241    } else if ($nterms > 1) {
242      if (search::islatlon($terms[0], $terms[1], $pseudoplace)) {
243        /* case 4 above, simply a 'where am i' type of query on
244           lat,lon, so the result is the artifical named for that
245           lat/lon - but of course we need to get its context later,
246           which is the whole point */
247        $terms = array();
248        $pseudoplace->findnearestplace();
249        $pseudoplace->assigndescription($nterms > 1);
250        $nameds[] = clone $pseudoplace; 
251      } else {
252        /* case 2 or 3 above: search is qualified. Find any places of the name given as
253           the second term  */
254        $places = array_merge($places, named::lookupplaces($terms[1]));
255        if (count($places) == 0) { return "I can't find {$terms[1]}"; }
256
257        // $db->log ("found places " . print_r($places, 1));
258
259        /* cull the possible places according to given qualifying is_in in case 3*/
260        $placeisin = $nterms > 2 ? array_slice($terms, 2) : array();
261        if (! empty($placeisin)) {
262          foreach($placeisin as $isin) {
263            $isin = canon::canonical($isin);
264            for ($i = 0; $i < count($places); $i++) {
265              $sourceisin = canon::canonical($places[$i]->is_in);
266              if (strpos($sourceisin, $isin) === FALSE) {
267                array_splice($places, $i, 1);
268                break 2;
269              }
270            }
271          }
272          // $db->log ("places after cull " . print_r($places, 1));
273        }
274
275        if (count($places) == 0) { 
276          /* nothing left, so say so */
277          $isin = '';
278          $prefix = '';
279          for ($i = 2; $i < count($terms); $i++) { 
280            $isin = "{$prefix}{$terms[$i]}";
281            $prefix = ', ';
282          }
283          $unfoundplace = "{$terms[1]} not found";
284          if (! empty($isin)) { $unfoundplace .= " in {$isin}"; }
285        }
286      }
287    }
288
289    /* so, we've got so far a list of places,possibly empty, near
290       which we must search (which may have come from a lat/lon or a
291       name), and maybe a result already from a simple lat/lon for
292       which we only require context */
293
294
295    /* special cases for some plural objects: search on churches near
296       ... => church near; and for place like things, limit search
297       only to places (rank > 0) rather than including streets named
298       'Somewhere Place' etc */
299    switch ($terms[0]) {
300    case 'churches':
301      $terms[0] = 'church'; 
302      break;
303    case 'cities':
304      $terms[0] = 'city'; // and fall through
305    case 'towns':
306    case 'suburbs':
307    case 'villages':
308    case 'hamlets':
309    case 'places':
310      $placesonly = TRUE;
311      break;
312    }
313
314    /* Work out canonical forms of the first search term (the road name or whatever) to
315       try matching against equivalents in the database. There's more than one because
316       Hinton Road becomes Hinton Rd as well, and so on */
317
318    $names = canon::canonical_with_synonym($terms[0]);
319
320    if (count($places) > 0) {
321      /* There are qualifying places.
322
323         SELECT * FROM named WHERE (region=n0 [or region=n1 or ...])
324         ORDER BY ((lat - latplace)^2 + (lon - lonplace)^2 asc  */
325
326      foreach ($places as $place) {
327        $place->assigncontext(); // nearest more important place(s)
328
329        /* find occurences of the name ordered by distance from the place,
330           for each of the places we found */
331        $q = $db->query();
332        $ands = array();
333        $ands[] = canon::likecanon($names);
334        $region = new region($place->lat, $place->lon);
335        $regionnumbers = $region->considerregions();
336        $regionors = array();
337        foreach ($regionnumbers as $regionnumber) {
338          $regionors[] = y_op::eq('region', $regionnumber);
339        }
340        $ands[] = count($regionors) == 1 ? $regionors[0] : y_op::oor($regionors);
341        if (! empty($placesonly)) { $ands[] = y_op::gt('rank', 0); }
342
343        $q->where(y_op::aand($ands));
344        $q->ascending(canon::distancerestriction($place->lat, $place->lon));
345        $q->limit($maxresults);
346
347        $named = new named();
348       
349        $toofar = empty($toofars[$place->rank]) ? $toofars[0]: $toofars[$place->rank];
350
351        while ($q->select($named) > 0) { 
352
353          $named->place = clone $place;
354          $named->place->localdistancefrom($named);
355
356          if ($named->place->distance > $toofar) { break; } // everywhere else is further too
357
358          unset($named->placenearer);
359          $named->findnearestplace(/* other than... */ $place);
360          if (! empty($named->placenearer) && 
361              $named->place->distance < $named->placenearer->distance) 
362          {
363            unset($named->placenearer);
364          }
365          $named->assigndescription($nterms > 1);
366          $nameds[] = clone $named; 
367        }
368
369        // $db->log ("found names near those places " . print_r($nameds, 1));       
370      }
371    }
372
373    if (count($nameds) == 0) {
374      /* Either no qualifying place, or no name found near given place. If there was a
375         qualifying place try general search for name anyway: "but I
376         did find one near..."
377
378         In this case we have no place to order by distance from, so
379         instead do a partial ordering so that exact matches on the
380         name (or one of its abbreviated variants) come first and then
381         partial matches. For example, "Fulbourn" would come before
382         "Fulbourn Post Office" when searching for "Fulbourn". We do
383         this by going round the loop twice, relaxing the exactness
384         condition on the second time round */
385      $limit = $maxresults;
386      $exact = TRUE; 
387      for ($i = 0; $i < 2 && $limit > 0; $i++) {
388        $q = $db->query();
389        $q->limit($limit);
390        $condition = canon::likecanon($names, $exact);
391        if (! $exact) {
392          $condition = y_op::aand($condition, y_op::not(canon::likecanon($names, TRUE)));
393        }
394        // $condition = y_op::aand($condition, y_op::le('rank',named::placerank('city')));
395        $q->where($condition);
396        /* prioritise places, and those in order of importance, cities first */
397        $q->descending('rank');
398        $named= new named();
399        while ($q->select($named) > 0) { 
400          $namedclone = clone $named;
401          if ($namedclone->rank > 0) {
402            $namedclone->assigncontext();
403            $namedclone->findnearestplace($namedclone, $namedclone->isolatedplaceneighbourranks());
404          } else {
405            $namedclone->findnearestplace();
406          }
407          if (isset($unfoundplace)) { $namedclone->place = $unfoundplace; }
408          $namedclone->assigndescription($nterms > 1);
409          $nameds[] = $namedclone;
410        }
411        $limit -= count($nameds);
412        $exact = FALSE;
413      }
414
415      // $db->log ("found names near other places " . print_r($nameds, 1));       
416    }
417
418    /* cull duplicate responses. These are usually because it found the name near more than
419       one place which matched the place name criterion */
420    $namedsunique = array();
421    foreach ($nameds as $named) {
422      if (! array_key_exists( $named->id, $namedsunique)) {
423        $namedsunique[$named->id] = $named;
424      }
425    }
426
427    return array_values($namedsunique);
428  }
429
430  // --------------------------------------------------
431  /* static */ function explodeterms($terms) {
432    /* Helper function to expand a complete search string into an array of terms
433       suitable for the find function above */
434    $terms = str_replace(' near ',',',$terms);
435    $terms = explode(',', $terms);
436    for ($i = 0; $i < count($terms); $i++) { $terms[$i] = trim($terms[$i]); }
437    return $terms;
438  }
439
440  // --------------------------------------------------
441  /* private static */ function islatlon($term1, $term2, &$pseudoplace) {
442    /* Returns a boolean according to whether term1 and term2 (both
443       strings, separate because the comma betwen them caused them to
444       be separated), are both decimal numbers, and therefore together
445       form a latituelongitude pair. If s, constructs and returns in
446       $pseudoplace an anonymous, artificial named which is located
447       atthe lat/lon determined */
448    static $anonid = 0;
449    if (preg_match('/^-?([0-9]+|[0-9]*\\.[0-9]+)$/', $term1) &&
450        preg_match('/^-?([0-9]+|[0-9]*\\.[0-9]+)$/', $term2)) {
451      $lat = (double)$term1;
452      $lon = (double)$term2;
453      $pseudoplace = new named();
454      $pseudoplace->category = 'place';
455      $pseudoplace->lat = $lat;
456      $pseudoplace->lon = $lon;
457      $pseudoplace->name = '';
458      $anonid -= 10;
459      $pseudoplace->id = $anonid; 
460      $pseudoplace->canon = '#;;#';
461      $pseudoplace->rank = named::placerank('city'); // hmm
462      $pseudoplace->info = 'requested location';
463      return TRUE;
464    }
465    return FALSE;
466  }
467
468}
469
470?>
Note: See TracBrowser for help on using the repository browser.