source: subversion/applications/editors/django/osmeditor/third/httplib2/__init__.py @ 13349

Last change on this file since 13349 was 13349, checked in by crschmidt, 11 years ago

Commit first pass at a web UI for doing simplified OSM object editing.

File size: 46.4 KB
Line 
1from __future__ import generators
2"""
3httplib2
4
5A caching http interface that supports ETags and gzip
6to conserve bandwidth.
7
8Requires Python 2.3 or later
9
10Changelog:
112007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
12
13"""
14
15__author__ = "Joe Gregorio (joe@bitworking.org)"
16__copyright__ = "Copyright 2006, Joe Gregorio"
17__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
18    "James Antill",
19    "Xavier Verges Farrero",
20    "Jonathan Feinberg",
21    "Blair Zajac",
22    "Sam Ruby",
23    "Louis Nyffenegger"]
24__license__ = "MIT"
25__version__ = "$Rev: 259 $"
26
27import re 
28import sys 
29import md5
30import email
31import email.Utils
32import email.Message
33import StringIO
34import gzip
35import zlib
36import httplib
37import urlparse
38import base64
39import os
40import copy
41import calendar
42import time
43import random
44import sha
45import hmac
46from gettext import gettext as _
47import socket
48
49try:
50    import socks
51except ImportError:
52    socks = None
53
54if sys.version_info >= (2,3):
55    from iri2uri import iri2uri
56else:
57    def iri2uri(uri):
58        return uri
59
60__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
61  'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', 
62  'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
63  'debuglevel']
64
65
66# The httplib debug level, set to a non-zero value to get debug output
67debuglevel = 0
68
69# Python 2.3 support
70if sys.version_info < (2,4):
71    def sorted(seq):
72        seq.sort()
73        return seq
74
75# Python 2.3 support
76def HTTPResponse__getheaders(self):
77    """Return list of (header, value) tuples."""
78    if self.msg is None:
79        raise httplib.ResponseNotReady()
80    return self.msg.items()
81
82if not hasattr(httplib.HTTPResponse, 'getheaders'):
83    httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
84
85# All exceptions raised here derive from HttpLib2Error
86class HttpLib2Error(Exception): pass
87
88# Some exceptions can be caught and optionally
89# be turned back into responses.
90class HttpLib2ErrorWithResponse(HttpLib2Error):
91    def __init__(self, desc, response, content):
92        self.response = response
93        self.content = content
94        HttpLib2Error.__init__(self, desc)
95
96class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
97class RedirectLimit(HttpLib2ErrorWithResponse): pass
98class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
99class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
100class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
101
102class RelativeURIError(HttpLib2Error): pass
103class ServerNotFoundError(HttpLib2Error): pass
104
105# Open Items:
106# -----------
107# Proxy support
108
109# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
110
111# Pluggable cache storage (supports storing the cache in
112#   flat files by default. We need a plug-in architecture
113#   that can support Berkeley DB and Squid)
114
115# == Known Issues ==
116# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
117# Does not handle Cache-Control: max-stale
118# Does not use Age: headers when calculating cache freshness.
119
120
121# The number of redirections to follow before giving up.
122# Note that only GET redirects are automatically followed.
123# Will also honor 301 requests by saving that info and never
124# requesting that URI again.
125DEFAULT_MAX_REDIRECTS = 5
126
127# Which headers are hop-by-hop headers by default
128HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
129
130def _get_end2end_headers(response):
131    hopbyhop = list(HOP_BY_HOP)
132    hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
133    return [header for header in response.keys() if header not in hopbyhop]
134
135URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
136
137def parse_uri(uri):
138    """Parses a URI using the regex given in Appendix B of RFC 3986.
139
140        (scheme, authority, path, query, fragment) = parse_uri(uri)
141    """
142    groups = URI.match(uri).groups()
143    return (groups[1], groups[3], groups[4], groups[6], groups[8])
144
145def urlnorm(uri):
146    (scheme, authority, path, query, fragment) = parse_uri(uri)
147    if not scheme or not authority:
148        raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
149    authority = authority.lower()
150    scheme = scheme.lower()
151    if not path: 
152        path = "/"
153    # Could do syntax based normalization of the URI before
154    # computing the digest. See Section 6.2.2 of Std 66.
155    request_uri = query and "?".join([path, query]) or path
156    scheme = scheme.lower()
157    defrag_uri = scheme + "://" + authority + request_uri
158    return scheme, authority, request_uri, defrag_uri
159
160
161# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
162re_url_scheme    = re.compile(r'^\w+://')
163re_slash         = re.compile(r'[?/:|]+')
164
165def safename(filename):
166    """Return a filename suitable for the cache.
167
168    Strips dangerous and common characters to create a filename we
169    can use to store the cache in.
170    """
171
172    try:
173        if re_url_scheme.match(filename):
174            if isinstance(filename,str):
175                filename = filename.decode('utf-8')
176                filename = filename.encode('idna')
177            else:
178                filename = filename.encode('idna')
179    except UnicodeError:
180        pass
181    if isinstance(filename,unicode):
182        filename=filename.encode('utf-8')
183    filemd5 = md5.new(filename).hexdigest()
184    filename = re_url_scheme.sub("", filename)
185    filename = re_slash.sub(",", filename)
186
187    # limit length of filename
188    if len(filename)>200:
189        filename=filename[:200]
190    return ",".join((filename, filemd5))
191
192NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
193def _normalize_headers(headers):
194    return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip())  for (key, value) in headers.iteritems()])
195
196def _parse_cache_control(headers):
197    retval = {}
198    if headers.has_key('cache-control'):
199        parts =  headers['cache-control'].split(',')
200        parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
201        parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
202        retval = dict(parts_with_args + parts_wo_args)
203    return retval
204
205# Whether to use a strict mode to parse WWW-Authenticate headers
206# Might lead to bad results in case of ill-formed header value,
207# so disabled by default, falling back to relaxed parsing.
208# Set to true to turn on, usefull for testing servers.
209USE_WWW_AUTH_STRICT_PARSING = 0
210
211# In regex below:
212#    [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+             matches a "token" as defined by HTTP
213#    "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?"    matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
214# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
215#    \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
216WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
217WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
218UNQUOTE_PAIRS = re.compile(r'\\(.)')
219def _parse_www_authenticate(headers, headername='www-authenticate'):
220    """Returns a dictionary of dictionaries, one dict
221    per auth_scheme."""
222    retval = {}
223    if headers.has_key(headername):
224        authenticate = headers[headername].strip()
225        www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
226        while authenticate:
227            # Break off the scheme at the beginning of the line
228            if headername == 'authentication-info':
229                (auth_scheme, the_rest) = ('digest', authenticate)               
230            else:
231                (auth_scheme, the_rest) = authenticate.split(" ", 1)
232            # Now loop over all the key value pairs that come after the scheme,
233            # being careful not to roll into the next scheme
234            match = www_auth.search(the_rest)
235            auth_params = {}
236            while match:
237                if match and len(match.groups()) == 3:
238                    (key, value, the_rest) = match.groups()
239                    auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
240                match = www_auth.search(the_rest)
241            retval[auth_scheme.lower()] = auth_params
242            authenticate = the_rest.strip()
243    return retval
244
245
246def _entry_disposition(response_headers, request_headers):
247    """Determine freshness from the Date, Expires and Cache-Control headers.
248
249    We don't handle the following:
250
251    1. Cache-Control: max-stale
252    2. Age: headers are not used in the calculations.
253
254    Not that this algorithm is simpler than you might think
255    because we are operating as a private (non-shared) cache.
256    This lets us ignore 's-maxage'. We can also ignore
257    'proxy-invalidate' since we aren't a proxy.
258    We will never return a stale document as
259    fresh as a design decision, and thus the non-implementation
260    of 'max-stale'. This also lets us safely ignore 'must-revalidate'
261    since we operate as if every server has sent 'must-revalidate'.
262    Since we are private we get to ignore both 'public' and
263    'private' parameters. We also ignore 'no-transform' since
264    we don't do any transformations.   
265    The 'no-store' parameter is handled at a higher level.
266    So the only Cache-Control parameters we look at are:
267
268    no-cache
269    only-if-cached
270    max-age
271    min-fresh
272    """
273   
274    retval = "STALE"
275    cc = _parse_cache_control(request_headers)
276    cc_response = _parse_cache_control(response_headers)
277
278    if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
279        retval = "TRANSPARENT"
280        if 'cache-control' not in request_headers:
281            request_headers['cache-control'] = 'no-cache'
282    elif cc.has_key('no-cache'):
283        retval = "TRANSPARENT"
284    elif cc_response.has_key('no-cache'):
285        retval = "STALE"
286    elif cc.has_key('only-if-cached'):
287        retval = "FRESH"
288    elif response_headers.has_key('date'):
289        date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
290        now = time.time()
291        current_age = max(0, now - date)
292        if cc_response.has_key('max-age'):
293            try:
294                freshness_lifetime = int(cc_response['max-age'])
295            except ValueError:
296                freshness_lifetime = 0
297        elif response_headers.has_key('expires'):
298            expires = email.Utils.parsedate_tz(response_headers['expires'])
299            if None == expires:
300                freshness_lifetime = 0
301            else:
302                freshness_lifetime = max(0, calendar.timegm(expires) - date)
303        else:
304            freshness_lifetime = 0
305        if cc.has_key('max-age'):
306            try:
307                freshness_lifetime = int(cc['max-age'])
308            except ValueError:
309                freshness_lifetime = 0
310        if cc.has_key('min-fresh'):
311            try:
312                min_fresh = int(cc['min-fresh'])
313            except ValueError:
314                min_fresh = 0
315            current_age += min_fresh
316        if freshness_lifetime > current_age:
317            retval = "FRESH"
318    return retval
319
320def _decompressContent(response, new_content):
321    content = new_content
322    try:
323        encoding = response.get('content-encoding', None)
324        if encoding in ['gzip', 'deflate']:
325            if encoding == 'gzip':
326                content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
327            if encoding == 'deflate':
328                content = zlib.decompress(content)
329            response['content-length'] = str(len(content))
330            del response['content-encoding']
331    except IOError:
332        content = ""
333        raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
334    return content
335
336def _updateCache(request_headers, response_headers, content, cache, cachekey):
337    if cachekey:
338        cc = _parse_cache_control(request_headers)
339        cc_response = _parse_cache_control(response_headers)
340        if cc.has_key('no-store') or cc_response.has_key('no-store'):
341            cache.delete(cachekey)
342        else:
343            info = email.Message.Message()
344            for key, value in response_headers.iteritems():
345                if key not in ['status','content-encoding','transfer-encoding']:
346                    info[key] = value
347
348            status = response_headers.status
349            if status == 304:
350                status = 200
351
352            status_header = 'status: %d\r\n' % response_headers.status
353
354            header_str = info.as_string()
355
356            header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
357            text = "".join([status_header, header_str, content])
358
359            cache.set(cachekey, text)
360
361def _cnonce():
362    dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
363    return dig[:16]
364
365def _wsse_username_token(cnonce, iso_now, password):
366    return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
367
368
369# For credentials we need two things, first
370# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
371# Then we also need a list of URIs that have already demanded authentication
372# That list is tricky since sub-URIs can take the same auth, or the
373# auth scheme may change as you descend the tree.
374# So we also need each Auth instance to be able to tell us
375# how close to the 'top' it is.
376
377class Authentication(object):
378    def __init__(self, credentials, host, request_uri, headers, response, content, http):
379        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
380        self.path = path
381        self.host = host
382        self.credentials = credentials
383        self.http = http
384
385    def depth(self, request_uri):
386        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
387        return request_uri[len(self.path):].count("/")
388
389    def inscope(self, host, request_uri):
390        # XXX Should we normalize the request_uri?
391        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
392        return (host == self.host) and path.startswith(self.path)
393
394    def request(self, method, request_uri, headers, content):
395        """Modify the request headers to add the appropriate
396        Authorization header. Over-rise this in sub-classes."""
397        pass
398
399    def response(self, response, content):
400        """Gives us a chance to update with new nonces
401        or such returned from the last authorized response.
402        Over-rise this in sub-classes if necessary.
403
404        Return TRUE is the request is to be retried, for
405        example Digest may return stale=true.
406        """
407        return False
408
409
410
411class BasicAuthentication(Authentication):
412    def __init__(self, credentials, host, request_uri, headers, response, content, http):
413        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
414
415    def request(self, method, request_uri, headers, content):
416        """Modify the request headers to add the appropriate
417        Authorization header."""
418        headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip() 
419
420
421class DigestAuthentication(Authentication):
422    """Only do qop='auth' and MD5, since that
423    is all Apache currently implements"""
424    def __init__(self, credentials, host, request_uri, headers, response, content, http):
425        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
426        challenge = _parse_www_authenticate(response, 'www-authenticate')
427        self.challenge = challenge['digest']
428        qop = self.challenge.get('qop')
429        self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
430        if self.challenge['qop'] is None:
431            raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
432        self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
433        if self.challenge['algorithm'] != 'MD5':
434            raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
435        self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])   
436        self.challenge['nc'] = 1
437
438    def request(self, method, request_uri, headers, content, cnonce = None):
439        """Modify the request headers"""
440        H = lambda x: md5.new(x).hexdigest()
441        KD = lambda s, d: H("%s:%s" % (s, d))
442        A2 = "".join([method, ":", request_uri])
443        self.challenge['cnonce'] = cnonce or _cnonce() 
444        request_digest  = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], 
445                    '%08x' % self.challenge['nc'], 
446                    self.challenge['cnonce'], 
447                    self.challenge['qop'], H(A2)
448                    )) 
449        headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
450                self.credentials[0], 
451                self.challenge['realm'],
452                self.challenge['nonce'],
453                request_uri, 
454                self.challenge['algorithm'],
455                request_digest,
456                self.challenge['qop'],
457                self.challenge['nc'],
458                self.challenge['cnonce'],
459                )
460        self.challenge['nc'] += 1
461
462    def response(self, response, content):
463        if not response.has_key('authentication-info'):
464            challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
465            if 'true' == challenge.get('stale'):
466                self.challenge['nonce'] = challenge['nonce']
467                self.challenge['nc'] = 1 
468                return True
469        else:
470            updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
471
472            if updated_challenge.has_key('nextnonce'):
473                self.challenge['nonce'] = updated_challenge['nextnonce']
474                self.challenge['nc'] = 1 
475        return False
476
477
478class HmacDigestAuthentication(Authentication):
479    """Adapted from Robert Sayre's code and DigestAuthentication above."""
480    __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
481
482    def __init__(self, credentials, host, request_uri, headers, response, content, http):
483        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
484        challenge = _parse_www_authenticate(response, 'www-authenticate')
485        self.challenge = challenge['hmacdigest']
486        # TODO: self.challenge['domain']
487        self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
488        if self.challenge['reason'] not in ['unauthorized', 'integrity']:
489            self.challenge['reason'] = 'unauthorized'
490        self.challenge['salt'] = self.challenge.get('salt', '')
491        if not self.challenge.get('snonce'):
492            raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
493        self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
494        if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
495            raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
496        self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
497        if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
498            raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
499        if self.challenge['algorithm'] == 'HMAC-MD5':
500            self.hashmod = md5
501        else:
502            self.hashmod = sha
503        if self.challenge['pw-algorithm'] == 'MD5':
504            self.pwhashmod = md5
505        else:
506            self.pwhashmod = sha
507        self.key = "".join([self.credentials[0], ":",
508                    self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
509                    ":", self.challenge['realm']
510                    ])
511        self.key = self.pwhashmod.new(self.key).hexdigest().lower()
512
513    def request(self, method, request_uri, headers, content):
514        """Modify the request headers"""
515        keys = _get_end2end_headers(headers)
516        keylist = "".join(["%s " % k for k in keys])
517        headers_val = "".join([headers[k] for k in keys])
518        created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
519        cnonce = _cnonce()
520        request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
521        request_digest  = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
522        headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
523                self.credentials[0], 
524                self.challenge['realm'],
525                self.challenge['snonce'],
526                cnonce,
527                request_uri, 
528                created,
529                request_digest,
530                keylist,
531                )
532
533    def response(self, response, content):
534        challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
535        if challenge.get('reason') in ['integrity', 'stale']:
536            return True
537        return False
538
539
540class WsseAuthentication(Authentication):
541    """This is thinly tested and should not be relied upon.
542    At this time there isn't any third party server to test against.
543    Blogger and TypePad implemented this algorithm at one point
544    but Blogger has since switched to Basic over HTTPS and
545    TypePad has implemented it wrong, by never issuing a 401
546    challenge but instead requiring your client to telepathically know that
547    their endpoint is expecting WSSE profile="UsernameToken"."""
548    def __init__(self, credentials, host, request_uri, headers, response, content, http):
549        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
550
551    def request(self, method, request_uri, headers, content):
552        """Modify the request headers to add the appropriate
553        Authorization header."""
554        headers['Authorization'] = 'WSSE profile="UsernameToken"'
555        iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
556        cnonce = _cnonce()
557        password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
558        headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
559                self.credentials[0],
560                password_digest,
561                cnonce,
562                iso_now)
563
564class GoogleLoginAuthentication(Authentication):
565    def __init__(self, credentials, host, request_uri, headers, response, content, http):
566        from urllib import urlencode
567        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
568        challenge = _parse_www_authenticate(response, 'www-authenticate')
569        service = challenge['googlelogin'].get('service', 'xapi')
570        # Bloggger actually returns the service in the challenge
571        # For the rest we guess based on the URI
572        if service == 'xapi' and  request_uri.find("calendar") > 0:
573            service = "cl"
574        # No point in guessing Base or Spreadsheet
575        #elif request_uri.find("spreadsheets") > 0:
576        #    service = "wise"
577
578        auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
579        resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
580        lines = content.split('\n')
581        d = dict([tuple(line.split("=", 1)) for line in lines if line])
582        if resp.status == 403:
583            self.Auth = ""
584        else:
585            self.Auth = d['Auth']
586
587    def request(self, method, request_uri, headers, content):
588        """Modify the request headers to add the appropriate
589        Authorization header."""
590        headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
591
592
593AUTH_SCHEME_CLASSES = {
594    "basic": BasicAuthentication,
595    "wsse": WsseAuthentication,
596    "digest": DigestAuthentication,
597    "hmacdigest": HmacDigestAuthentication,
598    "googlelogin": GoogleLoginAuthentication
599}
600
601AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
602
603def _md5(s):
604    return 
605
606class FileCache(object):
607    """Uses a local directory as a store for cached files.
608    Not really safe to use if multiple threads or processes are going to
609    be running on the same cache.
610    """
611    def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
612        self.cache = cache
613        self.safe = safe
614        if not os.path.exists(cache): 
615            os.makedirs(self.cache)
616
617    def get(self, key):
618        retval = None
619        cacheFullPath = os.path.join(self.cache, self.safe(key))
620        try:
621            f = file(cacheFullPath, "r")
622            retval = f.read()
623            f.close()
624        except IOError:
625            pass
626        return retval
627
628    def set(self, key, value):
629        cacheFullPath = os.path.join(self.cache, self.safe(key))
630        f = file(cacheFullPath, "w")
631        f.write(value)
632        f.close()
633
634    def delete(self, key):
635        cacheFullPath = os.path.join(self.cache, self.safe(key))
636        if os.path.exists(cacheFullPath):
637            os.remove(cacheFullPath)
638
639class Credentials(object):
640    def __init__(self):
641        self.credentials = []
642
643    def add(self, name, password, domain=""):
644        self.credentials.append((domain.lower(), name, password))
645
646    def clear(self):
647        self.credentials = []
648
649    def iter(self, domain):
650        for (cdomain, name, password) in self.credentials:
651            if cdomain == "" or domain == cdomain:
652                yield (name, password) 
653
654class KeyCerts(Credentials):
655    """Identical to Credentials except that
656    name/password are mapped to key/cert."""
657    pass
658
659
660class ProxyInfo(object):
661  """Collect information required to use a proxy."""
662  def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
663      """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
664      constants. For example:
665
666p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
667      """
668      self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
669
670  def astuple(self):
671    return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
672        self.proxy_user, self.proxy_pass)
673
674  def isgood(self):
675    return socks and (self.proxy_host != None) and (self.proxy_port != None)
676
677
678class HTTPConnectionWithTimeout(httplib.HTTPConnection):
679    """HTTPConnection subclass that supports timeouts"""
680
681    def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
682        httplib.HTTPConnection.__init__(self, host, port, strict)
683        self.timeout = timeout
684        self.proxy_info = proxy_info
685
686    def connect(self):
687        """Connect to the host and port specified in __init__."""
688        # Mostly verbatim from httplib.py.
689        msg = "getaddrinfo returns an empty list"
690        for res in socket.getaddrinfo(self.host, self.port, 0,
691                socket.SOCK_STREAM):
692            af, socktype, proto, canonname, sa = res
693            try:
694                if self.proxy_info and self.proxy_info.isgood():
695                    self.sock = socks.socksocket(af, socktype, proto)
696                    self.sock.setproxy(*self.proxy_info.astuple())
697                else:
698                    self.sock = socket.socket(af, socktype, proto)
699                # Different from httplib: support timeouts.
700                if self.timeout is not None:
701                    self.sock.settimeout(self.timeout)
702                    # End of difference from httplib.
703                if self.debuglevel > 0:
704                    print "connect: (%s, %s)" % (self.host, self.port)
705                self.sock.connect(sa)
706            except socket.error, msg:
707                if self.debuglevel > 0:
708                    print 'connect fail:', (self.host, self.port)
709                if self.sock:
710                    self.sock.close()
711                self.sock = None
712                continue
713            break
714        if not self.sock:
715            raise socket.error, msg
716
717class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
718    "This class allows communication via SSL."
719
720    def __init__(self, host, port=None, key_file=None, cert_file=None,
721                 strict=None, timeout=None, proxy_info=None):
722        self.timeout = timeout
723        self.proxy_info = proxy_info
724        httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
725                cert_file=cert_file, strict=strict)
726
727    def connect(self):
728        "Connect to a host on a given (SSL) port."
729
730        if self.proxy_info and self.proxy_info.isgood():
731            self.sock.setproxy(*self.proxy_info.astuple())
732            sock.setproxy(*self.proxy_info.astuple())
733        else:
734            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
735        if self.timeout is not None:
736            sock.settimeout(self.timeout)
737        sock.connect((self.host, self.port))
738        ssl = socket.ssl(sock, self.key_file, self.cert_file)
739        self.sock = httplib.FakeSocket(sock, ssl)
740
741
742
743class Http(object):
744    """An HTTP client that handles:
745- all methods
746- caching
747- ETags
748- compression,
749- HTTPS
750- Basic
751- Digest
752- WSSE
753
754and more.
755    """
756    def __init__(self, cache=None, timeout=None, proxy_info=None):
757        """The value of proxy_info is a ProxyInfo instance.
758
759If 'cache' is a string then it is used as a directory name
760for a disk cache. Otherwise it must be an object that supports
761the same interface as FileCache."""
762        self.proxy_info = proxy_info
763        # Map domain name to an httplib connection
764        self.connections = {}
765        # The location of the cache, for now a directory
766        # where cached responses are held.
767        if cache and isinstance(cache, str):
768            self.cache = FileCache(cache)
769        else:
770            self.cache = cache
771
772        # Name/password
773        self.credentials = Credentials()
774
775        # Key/cert
776        self.certificates = KeyCerts()
777
778        # authorization objects
779        self.authorizations = []
780
781        # If set to False then no redirects are followed, even safe ones.
782        self.follow_redirects = True
783
784        # If 'follow_redirects' is True, and this is set to True then
785        # all redirecs are followed, including unsafe ones.
786        self.follow_all_redirects = False
787
788        self.ignore_etag = False
789
790        self.force_exception_to_status_code = False 
791
792        self.timeout = timeout
793
794    def _auth_from_challenge(self, host, request_uri, headers, response, content):
795        """A generator that creates Authorization objects
796           that can be applied to requests.
797        """
798        challenges = _parse_www_authenticate(response, 'www-authenticate')
799        for cred in self.credentials.iter(host):
800            for scheme in AUTH_SCHEME_ORDER:
801                if challenges.has_key(scheme):
802                    yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
803
804    def add_credentials(self, name, password, domain=""):
805        """Add a name and password that will be used
806        any time a request requires authentication."""
807        self.credentials.add(name, password, domain)
808
809    def add_certificate(self, key, cert, domain):
810        """Add a key and cert that will be used
811        any time a request requires authentication."""
812        self.certificates.add(key, cert, domain)
813
814    def clear_credentials(self):
815        """Remove all the names and passwords
816        that are used for authentication"""
817        self.credentials.clear()
818        self.authorizations = []
819
820    def _conn_request(self, conn, request_uri, method, body, headers):
821        for i in range(2):
822            try:
823                conn.request(method, request_uri, body, headers)
824                response = conn.getresponse()
825            except socket.gaierror:
826                conn.close()
827                raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
828            except httplib.HTTPException, e:
829                if i == 0:
830                    conn.close()
831                    conn.connect()
832                    continue
833                else:
834                    raise
835            else:
836                content = response.read()
837                response = Response(response)
838                if method != "HEAD":
839                    content = _decompressContent(response, content)
840
841            break;
842        return (response, content)
843
844
845    def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
846        """Do the actual request using the connection object
847        and also follow one level of redirects if necessary"""
848
849        auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
850        auth = auths and sorted(auths)[0][1] or None
851        if auth: 
852            auth.request(method, request_uri, headers, body)
853
854        (response, content) = self._conn_request(conn, request_uri, method, body, headers)
855
856        if auth: 
857            if auth.response(response, body):
858                auth.request(method, request_uri, headers, body)
859                (response, content) = self._conn_request(conn, request_uri, method, body, headers )
860                response._stale_digest = 1
861
862        if response.status == 401:
863            for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
864                authorization.request(method, request_uri, headers, body) 
865                (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
866                if response.status != 401:
867                    self.authorizations.append(authorization)
868                    authorization.response(response, body)
869                    break
870
871        if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
872            if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
873                # Pick out the location header and basically start from the beginning
874                # remembering first to strip the ETag header and decrement our 'depth'
875                if redirections:
876                    if not response.has_key('location') and response.status != 300:
877                        raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
878                    # Fix-up relative redirects (which violate an RFC 2616 MUST)
879                    if response.has_key('location'):
880                        location = response['location']
881                        (scheme, authority, path, query, fragment) = parse_uri(location)
882                        if authority == None:
883                            response['location'] = urlparse.urljoin(absolute_uri, location)
884                    if response.status == 301 and method in ["GET", "HEAD"]:
885                        response['-x-permanent-redirect-url'] = response['location']
886                        if not response.has_key('content-location'):
887                            response['content-location'] = absolute_uri
888                        _updateCache(headers, response, content, self.cache, cachekey)
889                    if headers.has_key('if-none-match'):
890                        del headers['if-none-match']
891                    if headers.has_key('if-modified-since'):
892                        del headers['if-modified-since']
893                    if response.has_key('location'):
894                        location = response['location']
895                        old_response = copy.deepcopy(response)
896                        if not old_response.has_key('content-location'):
897                            old_response['content-location'] = absolute_uri
898                        redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
899                        (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
900                        response.previous = old_response
901                else:
902                    raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
903            elif response.status in [200, 203] and method == "GET":
904                # Don't cache 206's since we aren't going to handle byte range requests
905                if not response.has_key('content-location'):
906                    response['content-location'] = absolute_uri
907                _updateCache(headers, response, content, self.cache, cachekey)
908
909        return (response, content)
910
911
912# Need to catch and rebrand some exceptions
913# Then need to optionally turn all exceptions into status codes
914# including all socket.* and httplib.* exceptions.
915
916
917    def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
918        """ Performs a single HTTP request.
919The 'uri' is the URI of the HTTP resource and can begin
920with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
921
922The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
923There is no restriction on the methods allowed.
924
925The 'body' is the entity body to be sent with the request. It is a string
926object.
927
928Any extra headers that are to be sent with the request should be provided in the
929'headers' dictionary.
930
931The maximum number of redirect to follow before raising an
932exception is 'redirections. The default is 5.
933
934The return value is a tuple of (response, content), the first
935being and instance of the 'Response' class, the second being
936a string that contains the response entity body.
937        """
938        try:
939            if headers is None:
940                headers = {}
941            else:
942                headers = _normalize_headers(headers)
943
944            if not headers.has_key('user-agent'):
945                headers['user-agent'] = "Python-httplib2/%s" % __version__
946
947            uri = iri2uri(uri)
948
949            (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
950
951            conn_key = scheme+":"+authority
952            if conn_key in self.connections:
953                conn = self.connections[conn_key]
954            else:
955                if not connection_type:
956                    connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
957                certs = list(self.certificates.iter(authority))
958                if scheme == 'https' and certs:
959                    conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
960                        cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
961                else:
962                    conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
963                conn.set_debuglevel(debuglevel)
964
965            if method in ["GET", "HEAD"] and 'range' not in headers:
966                headers['accept-encoding'] = 'compress, gzip'
967
968            info = email.Message.Message()
969            cached_value = None
970            if self.cache:
971                cachekey = defrag_uri
972                cached_value = self.cache.get(cachekey)
973                if cached_value:
974                    info = email.message_from_string(cached_value)
975                    try:
976                        content = cached_value.split('\r\n\r\n', 1)[1]
977                    except IndexError:
978                        self.cache.delete(cachekey)
979                        cachekey = None
980                        cached_value = None
981            else:
982                cachekey = None
983
984            if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
985                # http://www.w3.org/1999/04/Editing/
986                headers['if-match'] = info['etag']
987
988            if method not in ["GET", "HEAD"] and self.cache and cachekey:
989                # RFC 2616 Section 13.10
990                self.cache.delete(cachekey)
991
992            if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
993                if info.has_key('-x-permanent-redirect-url'):
994                    # Should cached permanent redirects be counted in our redirection count? For now, yes.
995                    (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
996                    response.previous = Response(info)
997                    response.previous.fromcache = True
998                else:
999                    # Determine our course of action:
1000                    #   Is the cached entry fresh or stale?
1001                    #   Has the client requested a non-cached response?
1002                    #   
1003                    # There seems to be three possible answers:
1004                    # 1. [FRESH] Return the cache entry w/o doing a GET
1005                    # 2. [STALE] Do the GET (but add in cache validators if available)
1006                    # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1007                    entry_disposition = _entry_disposition(info, headers) 
1008                   
1009                    if entry_disposition == "FRESH":
1010                        if not cached_value:
1011                            info['status'] = '504'
1012                            content = ""
1013                        response = Response(info)
1014                        if cached_value:
1015                            response.fromcache = True
1016                        return (response, content)
1017
1018                    if entry_disposition == "STALE":
1019                        if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1020                            headers['if-none-match'] = info['etag']
1021                        if info.has_key('last-modified') and not 'last-modified' in headers:
1022                            headers['if-modified-since'] = info['last-modified']
1023                    elif entry_disposition == "TRANSPARENT":
1024                        pass
1025
1026                    (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1027
1028                if response.status == 304 and method == "GET":
1029                    # Rewrite the cache entry with the new end-to-end headers
1030                    # Take all headers that are in response
1031                    # and overwrite their values in info.
1032                    # unless they are hop-by-hop, or are listed in the connection header.
1033
1034                    for key in _get_end2end_headers(response):
1035                        info[key] = response[key]
1036                    merged_response = Response(info)
1037                    if hasattr(response, "_stale_digest"):
1038                        merged_response._stale_digest = response._stale_digest
1039                    _updateCache(headers, merged_response, content, self.cache, cachekey)
1040                    response = merged_response
1041                    response.status = 200
1042                    response.fromcache = True 
1043
1044                elif response.status == 200:
1045                    content = new_content
1046                else:
1047                    self.cache.delete(cachekey)
1048                    content = new_content
1049            else: 
1050                (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1051        except Exception, e:
1052            if self.force_exception_to_status_code:
1053                if isinstance(e, HttpLib2ErrorWithResponse):
1054                    response = e.response
1055                    content = e.content
1056                    response.status = 500
1057                    response.reason = str(e) 
1058                elif isinstance(e, socket.timeout):
1059                    content = "Request Timeout"
1060                    response = Response( {
1061                            "content-type": "text/plain",
1062                            "status": "408",
1063                            "content-length": len(content)
1064                            })
1065                    response.reason = "Request Timeout"
1066                else:
1067                    content = str(e) 
1068                    response = Response( {
1069                            "content-type": "text/plain",
1070                            "status": "400",
1071                            "content-length": len(content)
1072                            })
1073                    response.reason = "Bad Request" 
1074            else:
1075                raise
1076
1077 
1078        return (response, content)
1079
1080 
1081
1082class Response(dict):
1083    """An object more like email.Message than httplib.HTTPResponse."""
1084   
1085    """Is this response from our local cache"""
1086    fromcache = False
1087
1088    """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1089    version = 11
1090
1091    "Status code returned by server. "
1092    status = 200
1093
1094    """Reason phrase returned by server."""
1095    reason = "Ok"
1096
1097    previous = None
1098
1099    def __init__(self, info):
1100        # info is either an email.Message or
1101        # an httplib.HTTPResponse object.
1102        if isinstance(info, httplib.HTTPResponse):
1103            for key, value in info.getheaders(): 
1104                self[key] = value
1105            self.status = info.status
1106            self['status'] = str(self.status)
1107            self.reason = info.reason
1108            self.version = info.version
1109        elif isinstance(info, email.Message.Message):
1110            for key, value in info.items(): 
1111                self[key] = value
1112            self.status = int(self['status'])
1113        else:
1114            for key, value in info.iteritems(): 
1115                self[key] = value
1116            self.status = int(self.get('status', self.status))
1117
1118
1119    def __getattr__(self, name):
1120        if name == 'dict':
1121            return self 
1122        else: 
1123            raise AttributeError, name
Note: See TracBrowser for help on using the repository browser.