170 lines
5.9 KiB
Python
170 lines
5.9 KiB
Python
# -*- test-case-name: openid.test.test_yadis_discover -*-
|
|
__all__ = ['discover', 'DiscoveryResult', 'DiscoveryFailure']
|
|
|
|
from io import StringIO
|
|
|
|
from openid import fetchers
|
|
|
|
from openid.yadis.constants import \
|
|
YADIS_HEADER_NAME, YADIS_CONTENT_TYPE, YADIS_ACCEPT_HEADER
|
|
from openid.yadis.parsehtml import MetaNotFound, findHTMLMeta
|
|
|
|
|
|
class DiscoveryFailure(Exception):
|
|
"""Raised when a YADIS protocol error occurs in the discovery process"""
|
|
identity_url = None
|
|
|
|
def __init__(self, message, http_response):
|
|
Exception.__init__(self, message)
|
|
self.http_response = http_response
|
|
|
|
|
|
class DiscoveryResult(object):
|
|
"""Contains the result of performing Yadis discovery on a URI"""
|
|
|
|
# The URI that was passed to the fetcher
|
|
request_uri = None
|
|
|
|
# The result of following redirects from the request_uri
|
|
normalized_uri = None
|
|
|
|
# The URI from which the response text was returned (set to
|
|
# None if there was no XRDS document found)
|
|
xrds_uri = None
|
|
|
|
# The content-type returned with the response_text
|
|
content_type = None
|
|
|
|
# The document returned from the xrds_uri
|
|
response_text = None
|
|
|
|
def __init__(self, request_uri):
|
|
"""Initialize the state of the object
|
|
|
|
sets all attributes to None except the request_uri
|
|
"""
|
|
self.request_uri = request_uri
|
|
|
|
def usedYadisLocation(self):
|
|
"""Was the Yadis protocol's indirection used?"""
|
|
if self.xrds_uri is None:
|
|
return False
|
|
return self.normalized_uri != self.xrds_uri
|
|
|
|
def isXRDS(self):
|
|
"""Is the response text supposed to be an XRDS document?"""
|
|
return (self.usedYadisLocation() or
|
|
self.content_type == YADIS_CONTENT_TYPE)
|
|
|
|
|
|
def discover(uri):
|
|
"""Discover services for a given URI.
|
|
|
|
@param uri: The identity URI as a well-formed http or https
|
|
URI. The well-formedness and the protocol are not checked, but
|
|
the results of this function are undefined if those properties
|
|
do not hold.
|
|
|
|
@return: DiscoveryResult object
|
|
|
|
@raises Exception: Any exception that can be raised by fetching a URL with
|
|
the given fetcher.
|
|
@raises DiscoveryFailure: When the HTTP response does not have a 200 code.
|
|
"""
|
|
result = DiscoveryResult(uri)
|
|
resp = fetchers.fetch(uri, headers={'Accept': YADIS_ACCEPT_HEADER})
|
|
if resp.status not in (200, 206):
|
|
raise DiscoveryFailure(
|
|
'HTTP Response status from identity URL host is not 200. '
|
|
'Got status %r' % (resp.status, ), resp)
|
|
|
|
# Note the URL after following redirects
|
|
result.normalized_uri = resp.final_url
|
|
|
|
# Attempt to find out where to go to discover the document
|
|
# or if we already have it
|
|
result.content_type = resp.headers.get('content-type')
|
|
|
|
result.xrds_uri = whereIsYadis(resp)
|
|
|
|
if result.xrds_uri and result.usedYadisLocation():
|
|
resp = fetchers.fetch(result.xrds_uri)
|
|
if resp.status not in (200, 206):
|
|
exc = DiscoveryFailure(
|
|
'HTTP Response status from Yadis host is not 200. '
|
|
'Got status %r' % (resp.status, ), resp)
|
|
exc.identity_url = result.normalized_uri
|
|
raise exc
|
|
result.content_type = resp.headers.get('content-type')
|
|
|
|
result.response_text = resp.body
|
|
return result
|
|
|
|
|
|
def whereIsYadis(resp):
|
|
"""Given a HTTPResponse, return the location of the Yadis document.
|
|
|
|
May be the URL just retrieved, another URL, or None if no suitable URL can
|
|
be found.
|
|
|
|
[non-blocking]
|
|
|
|
@returns: str or None
|
|
"""
|
|
# Attempt to find out where to go to discover the document
|
|
# or if we already have it
|
|
content_type = resp.headers.get('content-type')
|
|
|
|
# According to the spec, the content-type header must be an exact
|
|
# match, or else we have to look for an indirection.
|
|
if (content_type and
|
|
content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
|
|
return resp.final_url
|
|
else:
|
|
# Try the header
|
|
yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())
|
|
|
|
if not yadis_loc:
|
|
# Parse as HTML if the header is missing.
|
|
#
|
|
# XXX: do we want to do something with content-type, like
|
|
# have a whitelist or a blacklist (for detecting that it's
|
|
# HTML)?
|
|
|
|
# Decode body by encoding of file
|
|
content_type = content_type or ''
|
|
encoding = content_type.rsplit(';', 1)
|
|
if (len(encoding) == 2 and
|
|
encoding[1].strip().startswith('charset=')):
|
|
encoding = encoding[1].split('=', 1)[1].strip()
|
|
else:
|
|
encoding = 'utf-8'
|
|
|
|
if isinstance(resp.body, bytes):
|
|
try:
|
|
content = resp.body.decode(encoding)
|
|
except UnicodeError:
|
|
# All right, the detected encoding has failed. Try with
|
|
# UTF-8 (even if there was no detected encoding and we've
|
|
# defaulted to UTF-8, it's not that expensive an operation)
|
|
try:
|
|
content = resp.body.decode('utf-8')
|
|
except UnicodeError:
|
|
# At this point the content cannot be decoded to a str
|
|
# using the detected encoding or falling back to utf-8,
|
|
# so we have to resort to replacing undecodable chars.
|
|
# This *will* result in broken content but there isn't
|
|
# anything else that can be done.
|
|
content = resp.body.decode(encoding, 'replace')
|
|
else:
|
|
content = resp.body
|
|
|
|
try:
|
|
yadis_loc = findHTMLMeta(StringIO(content))
|
|
except (MetaNotFound, UnicodeError):
|
|
# UnicodeError: Response body could not be encoded and xrds
|
|
# location could not be found before troubles occur.
|
|
pass
|
|
|
|
return yadis_loc
|