diff --git a/test/common.py b/test/common.py index c242aa6..eda155d 100644 --- a/test/common.py +++ b/test/common.py @@ -1,401 +1,401 @@ # Copyright (C) 2007-2010 AG-Projects. # import os import re import socket import sys import time import traceback import types import unittest from copy import copy -from ConfigParser import SafeConfigParser as ConfigParser +from configparser import SafeConfigParser as ConfigParser from lxml import etree from optparse import OptionParser, SUPPRESS_HELP from xcaplib import xcapclient apps = ['pres-rules', 'org.openmobilealliance.pres-rules', 'resource-lists', 'pidf-manipulation', 'watchers', 'rls-services', 'test-app', 'xcap-caps'] def succeed(r): return 200 <= r.status <= 299 class XCAPTest(unittest.TestCase): # if true, each PUT or DELETE will be followed by GET to ensure that it has indeed succeeded invariant_check = True @classmethod def setupOptionParser(cls, parser): xcapclient.setup_parser_client(parser) def initialize(self, options, args = []): if not hasattr(self, '_options'): self._options = copy(options) if not hasattr(self, '_args'): self._args = copy(args) def new_client(self): return xcapclient.make_xcapclient(self.options) def update_client_options(self): self.client = self.new_client() def setUp(self): self.options = copy(self._options) self.args = copy(self._args) self.update_client_options() def assertStatus(self, r, status, msg=None): if status is None: return elif isinstance(status, int): if r.status != status: if msg is None: msg = 'Status (%s) != %s' % (r.status, status) raise self.failureException(msg) else: ## status is a tuple or a list if r.status not in status: if msg is None: msg = 'Status (%s) not in %s' % (r.status, str(status)) raise self.failureException(msg) def assertHeader(self, r, key, value=None, msg=None): """Fail if (key, [value]) not in r.headers.""" lowkey = key.lower() - for k, v in r.headers.items(): + for k, v in list(r.headers.items()): if k.lower() == lowkey: if value is None or str(value) == v: return v if msg is None: if value is None: msg = '%s not in headers' % key else: msg = '%s:%s not in headers' % (key, value) raise self.failureException(msg) def assertETag(self, r): v = self.assertHeader(r, 'ETag') return r.etag def assertNoHeader(self, r, key, msg=None): """Fail if key in r.headers.""" lowkey = key.lower() matches = [k for k, v in r.headers if k.lower() == lowkey] if matches: if msg is None: msg = '%s in headers' % key raise self.failureException(msg) def assertBody(self, r, value, msg=None): """Fail if value != r.body.""" if value != r.body: if msg is None: msg = 'expected body:\n"%s"\n\nactual body:\n"%s"' % (value, r.body) raise self.failureException(msg) def assertInBody(self, r, value, msg=None): """Fail if value not in r.body.""" if value not in r.body: if msg is None: msg = '%r not in body\nbody: %r' % (value, r.body) raise self.failureException(msg) def assertNotInBody(self, r, value, msg=None): """Fail if value in r.body.""" if value in r.body: if msg is None: msg = '%s found in body' % value raise self.failureException(msg) def assertMatchesBody(self, r, pattern, msg=None, flags=0): """Fail if value (a regex pattern) is not in r.body.""" if re.search(pattern, r.body, flags) is None: if msg is None: msg = 'No match for %s in body' % pattern raise self.failureException(msg) def assertDocument(self, application, body, client=None): r = self.get(application, client=client) self.assertBody(r, body) def get(self, application, node=None, status=200, **kwargs): client = kwargs.pop('client', None) or self.client r = client._get(application, node, **kwargs) self.validate_error(r, application) self.assertStatus(r, status) if 200<=status<=299: self.assertHeader(r, 'ETag') return r def get_global(self, *args, **kwargs): kwargs['globaltree'] = True return self.get(*args, **kwargs) def put(self, application, resource, node=None, status=[200,201], content_type_in_GET=None, client=None, **kwargs): client = client or self.client r_put = client._put(application, resource, node, **kwargs) self.validate_error(r_put, application) self.assertStatus(r_put, status) # if PUTting succeed, check that document is there and equals to resource if self.invariant_check and succeed(r_put): r_get = self.get(application, node, status=None, client=client) self.assertStatus(r_get, 200, 'although PUT succeed, following GET on the same URI did not: %s %s' % \ (r_get.status, r_get.reason)) self.assertEqual(resource.strip(), r_get.body) # is body put equals to body got? if content_type_in_GET is not None: self.assertHeader(r_get, 'content-type', content_type_in_GET) return r_put def put_new(self, application, resource, node=None, status=201, content_type_in_GET=None, client=None): # QQQ use If-None-Match or some other header to do that without get self.get(application, node=node, status=404, client=client) return self.put(application, resource, node, status, content_type_in_GET, client) def delete(self, application, node=None, status=200, client=None, **kwargs): client = client or self.client r = client._delete(application, node, **kwargs) self.validate_error(r, application) self.assertStatus(r, status) # if deleting succeed, GET should return 404 if self.invariant_check and succeed(r) or r.status == 404: r_get = self.get(application, node, status=None) self.assertStatus(r_get, 404, 'although DELETE succeed, following GET on the same URI did not return 404: %s %s' % \ (r_get.status, r_get.reason)) return r def put_rejected(self, application, resource, status=409, client=None): """DELETE the document, then PUT it and expect 409 error. Return PUT result. If PUT has indeed failed, also check that GET returns 404 """ self.delete(application, status=[200,404], client=client) put_result = self.put(application, resource, status=status, client=client) self.get(application, status=404, client=client) return put_result def getputdelete(self, application, document, content_type, client=None): self.delete(application, status=[200,404], client=client) self.get(application, status=404, client=client) self.put(application, document, status=201, content_type_in_GET=content_type, client=client) self.put(application, document, status=200, content_type_in_GET=content_type, client=client) self.put(application, document, status=200, content_type_in_GET=content_type, client=client) self.delete(application, status=200, client=client) self.delete(application, status=404, client=client) def validate_error(self, r, application): if r.status==409 or r.headers.gettype()=='application/xcap-error+xml': self.assertEqual(r.headers.gettype(), 'application/xcap-error+xml') xml = validate_xcaps_error(r.body) if ' Bill Doe Close Friends Joe Smith Nancy Gross Marketing """ def get_xcapdiff(xcap_root, resource, username, old_etag, new_etag): uri = xcap_root + '/' + resource + '/users/' + username + '/index.xml' return xml_xcapdiff(xcap_root, xml_document(uri, old_etag, new_etag)) queue = Queue() packet_count = 0 start_time = None is_subscribed = False def event_handler(event_name, **kwargs): global start_time, packet_count, is_subscribed if event_name == "Subscription_state": if kwargs["state"] == "ACTIVE": is_subscribed = True #elif kwargs["state"] == "TERMINATED": # if kwargs.has_key("code"): # print "Unsubscribed: %(code)d %(reason)s" % kwargs # else: # print "Unsubscribed" elif event_name == "Subscription_notify": queue.put(("NOTIFY", kwargs)) elif event_name == "siptrace": if start_time is None: start_time = kwargs["timestamp"] packet_count += 1 if kwargs["received"]: direction = "RECEIVED" else: direction = "SENDING" - print "%s: Packet %d, +%s" % (direction, packet_count, (kwargs["timestamp"] - start_time)) - print "%(timestamp)s: %(source_ip)s:%(source_port)d --> %(destination_ip)s:%(destination_port)d" % kwargs - print kwargs["data"] + print("%s: Packet %d, +%s" % (direction, packet_count, (kwargs["timestamp"] - start_time))) + print("%(timestamp)s: %(source_ip)s:%(source_port)d --> %(destination_ip)s:%(destination_port)d" % kwargs) + print(kwargs["data"]) elif event_name=='log': pass else: - print 'UNHANDLED EVENT', event_name, kwargs + print('UNHANDLED EVENT', event_name, kwargs) def get(queue, blocking=True, timeout=1): try: return queue.get(blocking, timeout) except Empty: return None class Test(XCAPTest): def assertContains(self, element, list): if element not in list: raise self.failureException("%s not in %s" % (element, list)) @classmethod def setupOptionParser(_cls, parser): parser.add_option("-p", "--outbound-proxy", type="string", action="callback", callback=parse_proxy_cb, help="Outbound SIP proxy to use. By default a lookup is performed based on SRV and A records.", metavar="IP[:PORT]") parser.add_option("-t", "--siptrace", default=False, action='store_true') setup_parser_client(parser) def test(self): opts = self.options self.delete(resource, status=[200,404]) initial_events = Engine.init_options_defaults["initial_events"] if content_type is not None: initial_events[event] = [content_type] e = Engine(event_handler, do_siptrace=opts.siptrace, auto_sound=False, initial_events=initial_events) e.start() try: if opts.outbound_proxy is None: route = None else: route = Route(opts.proxy_ip, opts.proxy_port) sub = Subscription(Credentials(SIPURI(user=opts.username, host=opts.domain), opts.password), SIPURI(user=opts.username, host=opts.domain), event, route=route, expires=expires) sub.subscribe() try: # wait for SUBSCRIBE to succeed AND absorb out-of-date NOTIFYs end = time.time() + 1.5 while time.time() < end: get(queue, timeout=0.1) - self.failUnless(is_subscribed, 'SUBSCRIBE failed') + self.assertTrue(is_subscribed, 'SUBSCRIBE failed') # try: # X = queue.get(True, timeout = 1) # except Empty: # pass # else: # self.assertEqual(X[0], 'NOTIFY') def get_notify(comment = ''): try: X = queue.get(True, timeout = 1) except Empty: self.fail("Didn't get a NOTIFY %s" % comment) self.assertEqual(X[0], 'NOTIFY') return X[1] r = self.put(resource, body) etag = r.headers['ETag'].strip('"') X = get_notify('after put') xcap_root = opts.xcap_root.replace(':8000', '') self.assertEqual(X['body'], get_xcapdiff(xcap_root, resource, opts.username, None, etag)) #print etag r = self.put(resource, body.replace('Close', 'Intimate')) new_etag = r.headers['ETag'].strip('"') X = get_notify() self.assertEqual(X['body'], get_xcapdiff(xcap_root, resource, opts.username, etag, new_etag)) #print etag, new_etag r = self.delete(resource) X = get_notify() self.assertEqual(X['body'], get_xcapdiff(xcap_root, resource, opts.username, new_etag, None)) #print new_etag, None finally: sub.unsubscribe() time.sleep(2) finally: e.stop() re_ip_port = re.compile("^(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(:(?P\d+))?$") def parse_proxy(value, parser): match = re_ip_port.match(value) if match is None: raise OptionValueError("Could not parse supplied outbound proxy address") parser.values.proxy_ip = match.group('proxy_ip') parser.values.proxy_port = int(match.group('proxy_port') or '5060') def parse_proxy_cb(_option, _opt_str, value, parser): return parse_proxy(value, parser) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test.py b/test/test.py index 9c2bb79..ffc6956 100755 --- a/test/test.py +++ b/test/test.py @@ -1,64 +1,64 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # import sys import os import traceback -import common as c +from . import common as c class TestHarness(object): """A test harness for OpenXCAP.""" def __init__(self, tests, option_parser): """Constructor to populate the TestHarness instance. tests should be a list of module names (strings). """ self.tests = tests self.option_parser = option_parser self.test_suites = [] self.import_errors = 0 for testmod in self.tests: try: self.import_errors += 1 m = __import__(testmod, globals(), locals()) suite = c.loadSuiteFromModule(m, option_parser) suite.modname = testmod self.test_suites.append(suite) self.import_errors -= 1 except Exception: traceback.print_exc() def run(self, options, args): c.run_suite(c.TestSuite(self.test_suites), options, args) def all_tests(): my_dir = os.path.dirname(os.path.abspath(__file__)) lst = [x.strip('.py') for x in os.listdir(my_dir) if x.startswith('test_') and x.endswith('.py')] return lst def run(): parser = c.prepare_optparser() parser.add_option("-l", "--list", action="store_true", help="Print list of all tests") t = TestHarness(all_tests(), parser) options, args = parser.parse_args() if options.list: for x in t.test_suites: - print x.modname + print(x.modname) for i in x: - print ' - ', i - print + print(' - ', i) + print() return c.process_options(options) c.run_command(lambda : t.run(options, args), options) if t.import_errors: sys.exit('there were import errors!\n') if __name__ == '__main__': run() diff --git a/test/test_attribute.py b/test/test_attribute.py index a6d29ee..5f2c35b 100755 --- a/test/test_attribute.py +++ b/test/test_attribute.py @@ -1,69 +1,69 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * resource_list_xml = """ Joe Smith Nancy Gross Petri Aukia """ class AttributeTest(XCAPTest): def test_get(self): self.put('resource-lists', resource_list_xml) self.get('resource-lists', '/resource-lists/list[@name="other"]/@some-attribute', status=404) r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/@name') self.assertBody(r, "friends") self.assertHeader(r, 'ETag') self.assertHeader(r, 'Content-type', 'application/xcap-att+xml') r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/external/@anchor') uri = 'http://xcap.example.org/resource-lists/users/sip:a@example.org/index/~~/resource-lists/list%5b@name=%22mkting%22%5d' self.assertBody(r, uri) - print 'WARNING: test with URI in att_value is disabled' + print('WARNING: test with URI in att_value is disabled') # r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/external[@anchor="%s"]/@anchor' % uri) # self.assertBody(r, uri) r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/external[]/@anchor', status=400) def test_delete(self): self.put('resource-lists', resource_list_xml) self.delete('resource-lists', '/resource-lists/list[@name="other"]/@some-attribute', status=404) # XXX is it legal for parent selector (/resource-lists/list[@name="friends"]) to become invalid? # I don't think it is, check with RFC self.delete('resource-lists', '/resource-lists/list[@name="friends"]/@name', status=200) self.delete('resource-lists', '/resource-lists/list[@name="friends"]/@name', status=404) def test_put(self): self.put('resource-lists', resource_list_xml) self.put('resource-lists', 'coworkers', '/resource-lists/list[@name="friends"]/@some-attribute', status=409) # fails GET(PUT(x))==x test. must be rejected in the server #self.put('resource-lists', 'coworkers', '/resource-lists/list[@name="friends"]/@name', status=409) # XXX parent's selector becomes invalid r = self.client._put('resource-lists', 'coworkers', '/resource-lists/list[@name="friends"]/@name') self.assertStatus(r, 200) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_auth.py b/test/test_auth.py index 40fefae..fdd1b9d 100755 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1,44 +1,44 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * class AuthTest(XCAPTest): def test_users_auth(self): self.get(self.app, status=[200,404]) self.options.password += 'x' self.update_client_options() self.get(self.app, status=[401]) def test_global_auth(self): self.get_global(self.app, status=[200,404]) #self.options.password += 'x' #self.update_client_options() #for app in apps: # self.get_global(app, status=401) # XXX test PUT/DELETE auth as well? # XXX test digest authentication # XXX test authorization #def test_authorization(self): ### the request cannot be authorized (we're trying to access someone else' resource) #account = self.account #self.account = "dummy" + self.account #r = self.get('resource-lists', status=401) #self.client.account = account for app in apps: - exec """class AuthTest_%s(AuthTest): + exec("""class AuthTest_%s(AuthTest): app = %r -""" % (app.replace('-', '_').replace('.', '_'), app) +""" % (app.replace('-', '_').replace('.', '_'), app)) del AuthTest if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_element.py b/test/test_element.py index fc6c9a0..64215e8 100755 --- a/test/test_element.py +++ b/test/test_element.py @@ -1,112 +1,112 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * xml = """ Joe Smith Nancy Gross Petri Aukia """ def index(s, sub, skip=0, start=0): while skip >= 0: found = s.index(sub, start) skip -= 1 start = found + 1 return found def eindex(s, sub, skip=0): return index(s, sub, skip)+len(sub) lst = xml[xml.index('')] nancy = xml[xml.index(' Alice """ external = xml[xml.index('')] class ElementTest(XCAPTest): def test_get(self): self.delete('resource-lists', status=[200,404]) self.put('resource-lists', xml) self.get('resource-lists', '/resource-lists/list[@name="other"]', status=404) self.get('resource-lists', '/resource-lists/list/entry[4]', status=404) r = self.get('resource-lists', '/resource-lists/list[@name="friends"]') self.assertBody(r, lst) self.assertHeader(r, 'ETag') self.assertHeader(r, 'Content-type', 'application/xcap-el+xml') r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/entry[2]') self.assertBody(r, nancy) self.assertHeader(r, 'ETag') self.assertHeader(r, 'Content-type', 'application/xcap-el+xml') r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/*[2]') self.assertBody(r, nancy) self.assertHeader(r, 'ETag') self.assertHeader(r, 'Content-type', 'application/xcap-el+xml') - print 'WARNING: test with URI in att_value is disabled' + print('WARNING: test with URI in att_value is disabled') # r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/external[@anchor="http://xcap.example.org/resource-lists/users/sip:a@example.org/index/~~/resource-lists/list%5b@name="mkting"5d"]') # self.assertBody(r, external) # self.assertHeader(r, 'ETag') # self.assertHeader(r, 'Content-type', 'application/xcap-el+xml') def test_delete(self): self.put('resource-lists', xml) # cannot delete something in the middle self.delete('resource-lists', '/resource-lists/list[@name="friends"]/entry[2]', status=409) self.delete('resource-lists', '/resource-lists/list[@name="friends"]/*[3]', status=409) # it's ok to delete the last one though r = self.delete('resource-lists', '/resource-lists/list[@name="friends"]/*[4]') self.assertHeader(r, 'ETag') r = self.delete('resource-lists', '/resource-lists/list[@name="friends"]/*[3]') self.assertHeader(r, 'ETag') r = self.delete('resource-lists', '/resource-lists/list[@name="friends"]/*[2]') self.assertHeader(r, 'ETag') r = self.delete('resource-lists', '/resource-lists/list[@name="friends"]/entry') self.assertHeader(r, 'ETag') r = self.get('resource-lists', '/resource-lists/list') self.assertMatchesBody(r, '^\\s*$') self.delete('resource-lists', '/resource-lists/list[@name="friends"]/entry[@uri="sip:joe@example.com"]', status=404) def test_put_error(self): self.put('resource-lists', xml) # 415 content type not set self.put('resource-lists', nancy, '/resource-lists/list[@name="friends"]', headers={'Content-Type' : None},status=415) # 409 r = self.put('resource-lists', broken, '/resource-lists/list[@name="friends"]', status=409) # 409 r = self.put('resource-lists', nancy, '/resource-lists/list[@name="others"]/entry[2]', status=409) # 409 r = self.put('resource-lists', nancy, '/resource-lists/list[@name="friends"]/entry[1]', status=409) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_element_put.py b/test/test_element_put.py index 0f690a8..72fa671 100755 --- a/test/test_element_put.py +++ b/test/test_element_put.py @@ -1,177 +1,177 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * app = 'test-app' start = ''' ''' # when changing to the document could be put, but # element GET respons with 404. # either GET should return what expected or a document without namespaces declaration # should be rejected class PutElementTest(XCAPTest): def reverse(self, node_selector): self.delete(app, node_selector) self.assertDocument(app, start) def test_creation(self): """Testing different ways of inserting an element as described in examples from Section 8.2.3 (http://tools.ietf.org/html/rfc4825#section-8.2.3) After each PUT, DELETE is executed on the same URI and the resulting document must be the same as before the insertion. """ self.put(app, start) for node_selector in ['/root/el1[@att="third"]', '/root/el1[3][@att="third"]', '/root/*[3][@att="third"]']: self.put_new(app, '', node_selector) self.assertDocument(app, ''' ''') self.reverse(node_selector) # out-of-bound positional index in node selector results in 409 (XXX or 404?) for node_selector in ['root/el1[4][@att="third"]', 'root/*[0][@att="third"]']: self.put_new(app, '', node_selector, status=409) self.assertDocument(app, start) # replace 500 with something more appropriate #for node_selector in ['root/*[-1][@att="third"]']: # self.put_new(app, '', node_selector, status=500) # self.assertDocument(app, start) # following request would fail idempotency requirement (GET(PUT(x))=>x) if succeeded for node_selector in ['root/el1[@att="third"]', 'root/el1[3][@att="third"]', 'root/*[3][@att="third"]']: r = self.put_new(app, '', node_selector, status=409) self.assertInBody(r, 'cannot-insert') self.assertDocument(app, start) self.put_new(app, '', 'root/el3') self.assertDocument(app, ''' ''') self.reverse('root/el3') for node_selector in ['root/el2[@att="2"]', 'root/el2[2][@att="2"]']: self.put_new(app, '', node_selector) self.assertDocument(app, ''' ''') self.reverse(node_selector) self.put_new(app, '', 'root/*[2][@att="2"]') self.assertDocument(app, ''' ''') self.reverse('root/*[2][@att="2"]') self.put_new(app, '', 'root/el2[1][@att="2"]') self.assertDocument(app, ''' ''') self.reverse('root/el2[1][@att="2"]') def test_creation_starattr(self): """Testing PUT requests of form '*[@att="some"]' which require looking into body of PUT""" self.put(app, start) for selector in ['root/*[@att="2"]', 'root/el1[@att="2"]']: self.put_new(app, '', selector) self.assertDocument(app, ''' ''') self.reverse(selector) # the same request - different body for selector in ['root/*[@att="2"]', 'root/el2[@att="2"]']: self.put_new(app, '', selector) self.assertDocument(app, ''' ''') self.reverse(selector) # the same request - different body for selector in ['root/*[@att="2"]', 'root/el3[@att="2"]']: self.put_new(app, '', selector) self.assertDocument(app, ''' ''') self.reverse(selector) def test_replacement(self): self.put(app, start) for node_selector in ['root/el1[@att="first"]', 'root/el1[1][@att="first"]', 'root/*[1][@att="first"]']: self.put(app, '', node_selector, status=409) self.assertDocument(app, start) for node_selector in ['root/el1[1]', 'root/*[1]']: self.put(app, start) self.put(app, '', node_selector, status=200) self.assertDocument(app, ''' ''') if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_errors.py b/test/test_errors.py index 5025401..54dd6fa 100755 --- a/test/test_errors.py +++ b/test/test_errors.py @@ -1,73 +1,73 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -import common as c -from urlparse import urlparse +from . import common as c +from urllib.parse import urlparse class ErrorsTest(c.XCAPTest): def communicate(self, data): s = c.socket.socket() x = urlparse(self.options.xcap_root) if x.port is None: port = {'http': 80, 'https': 443}.get(x.scheme) s.connect((x.hostname, x.port or port)) if x.scheme == 'https': s = c.socket.ssl(s) s.write(data) return s.read(1024*8) s.send(data) return s.recv(1024*8) def test_gibberish(self): response = self.communicate('\r\r\r\n\r\n') - assert '400 Bad Request' in response, `response` + assert '400 Bad Request' in response, repr(response) def test409(self): self.put('resource-lists', 'xxx', status=409) def check(self, code, message, *uris): for uri in uris: r = self.client.con.request('GET', uri) self.assertEqual(r.status, code) self.assertInBody(r, message) def test400_1(self): self.get('resource-lists', '/resource-lists/list[@name="friends"]/external[]/@anchor', status=400) def test400_2(self): self.check(400, "to parse node", 'resource-lists/users/alice@example.com/index.xml~~') def test404(self): self.check(404, 'XCAP Root', '') self.check(404, 'context', 'xxx') self.check(404, "context", 'resource-lists/user/alice@example.com/index.xml') self.check(404, 'user id', 'resource-lists/users') self.check(404, "not contain ", 'resource-lists/users/alice@example.com', 'resource-lists/users/alice@example.com/') # XXX test for multiple matches def test405(self): r = self.client.con.request('POST', '') self.assertEqual(r.status, 405) r = self.client.con.request('XXX', '') self.assertEqual(r.status, 405) # but apache responds with 501 # 412: tested in test_etags.py if __name__ == '__main__': c.runSuiteFromModule() diff --git a/test/test_etags.py b/test/test_etags.py index 45be536..a478d6c 100755 --- a/test/test_etags.py +++ b/test/test_etags.py @@ -1,113 +1,113 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * resource_list_xml = """ """ class ETagTest(XCAPTest): def test_conditional_GET(self): r = self.put('resource-lists', resource_list_xml) etag = self.assertHeader(r, 'ETag') # Test If-Match (both valid and invalid) self.get('resource-lists', headers={'If-Match': etag}) self.get('resource-lists', headers={'If-Match': '*'}) self.get('resource-lists', headers={'if-Match': "another-etag"}, status=412) # Test If-None-Match (both valid and invalid) self.get('resource-lists', headers={'If-None-Match': etag}, status=304) self.get('resource-lists', headers={'If-None-Match': '*'}, status=304) self.get('resource-lists', headers={'If-None-Match': "another-etag"}, status=200) def test_conditional_PUT(self): self.delete('resource-lists', status=[200,404]) self.get('resource-lists', status=404) # Test conditional PUT when document doesn't exist self.put('resource-lists', resource_list_xml, headers={'If-Match': '12345asdf'}, status=412) r = self.put('resource-lists', resource_list_xml) etag = self.assertHeader(r, 'ETag') # Test conditional PUT logic ## Alice and Bob initially share the same etag alice_etag = bob_etag = etag ## Bob modifies the resource r = self.put('resource-lists', resource_list_xml, headers={'If-Match': bob_etag}) bob_etag = self.assertHeader(r, 'ETag') ## now Alice tries to modify the resource self.put('resource-lists', resource_list_xml, headers={'If-Match': alice_etag}, status=412) ## the etag has changed so now she updates her in-memory document r = self.get('resource-lists') new_alice_etag = self.assertHeader(r, 'ETag') self.assertEqual(bob_etag, new_alice_etag) self.put('resource-lists', resource_list_xml, headers={'If-Match': new_alice_etag}) def test_conditional_PUT_2(self): self.delete('resource-lists', status=[200,404]) self.get('resource-lists', status=404) self.put('resource-lists', resource_list_xml, headers={'If-None-Match': '*'}, status=201) self.put('resource-lists', resource_list_xml, headers={'If-None-Match': '*'}, status=412) class ETagTest2(XCAPTest): # the same as prev, but using 'etag' param def test_conditional_GET(self): r = self.put('resource-lists', resource_list_xml) etag = self.assertHeader(r, 'ETag') # Test If-Match (both valid and invalid) self.get('resource-lists', etag=etag) self.get('resource-lists') self.get('resource-lists', etag="another-etag", status=412) def test_conditional_PUT(self): r = self.put('resource-lists', resource_list_xml) etag = self.assertETag(r) - assert etag is not None, `etag` + assert etag is not None, repr(etag) # Test conditional PUT logic ## Alice and Bob initially share the same etag alice_etag = bob_etag = etag ## Bob modifies the resource r = self.put('resource-lists', resource_list_xml, etag=bob_etag) bob_etag = self.assertETag(r) ## now Alice tries to modify the resource self.put('resource-lists', resource_list_xml, etag=alice_etag, status=412) ## the etag has changed so now she updates her in-memory document r = self.get('resource-lists') new_alice_etag = self.assertETag(r) self.assertEqual(bob_etag, new_alice_etag) self.put('resource-lists', resource_list_xml, etag=new_alice_etag) def test_etag_parsing(self): r = self.put('resource-lists', resource_list_xml) etag = self.assertETag(r) # no quotes r = self.put('resource-lists', resource_list_xml, headers = {'if-match' : 'xxx' }, status=412) r = self.put('resource-lists', resource_list_xml, headers = {'if-match' : etag }, status=200) etag = self.assertETag(r) r = self.put('resource-lists', resource_list_xml, headers = {'if-match' : '"' + etag + '"' }, status=200) etag = self.assertETag(r) self.put('resource-lists', resource_list_xml, headers = {'if-match' : '"' + etag + 'xx"' }, status=412) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_etags2.py b/test/test_etags2.py index 135006a..1decb87 100755 --- a/test/test_etags2.py +++ b/test/test_etags2.py @@ -1,53 +1,53 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * resource_list_xml = """ """ class ETagTest(XCAPTest): def test_conditional_PUT(self): self.delete('resource-lists', status=[200,404]) self.get('resource-lists', status=404) # Test conditional PUT when document doesn't exist self.put('resource-lists', resource_list_xml, headers={'If-Match': '12345asdf'}, status=412) # r = self.put('resource-lists', resource_list_xml) # etag = self.assertHeader(r, 'ETag') # # # Test conditional PUT logic # ## Alice and Bob initially share the same etag # alice_etag = bob_etag = etag # # ## Bob modifies the resource # r = self.put('resource-lists', resource_list_xml, headers={'If-Match': bob_etag}) # bob_etag = self.assertHeader(r, 'ETag') # # ## now Alice tries to modify the resource # self.put('resource-lists', resource_list_xml, headers={'If-Match': alice_etag}, status=412) # # ## the etag has changed so now she updates her in-memory document # r = self.get('resource-lists') # new_alice_etag = self.assertHeader(r, 'ETag') # self.assertEqual(bob_etag, new_alice_etag) # # self.put('resource-lists', resource_list_xml, headers={'If-Match': new_alice_etag}) # def test_conditional_PUT_2(self): self.delete('resource-lists', status=[200,404]) self.get('resource-lists', status=404) self.put('resource-lists', resource_list_xml, headers={'If-None-Match': '*'}, status=201) self.put('resource-lists', resource_list_xml, headers={'If-None-Match': '*'}, status=412) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_fragment.py b/test/test_fragment.py index 91c9dc2..3dc3c72 100755 --- a/test/test_fragment.py +++ b/test/test_fragment.py @@ -1,50 +1,50 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -import common +from . import common document = """ Foo """ # well-formed fragment that would've been rejected by XML parser because of # unbound namespace prefix fragment = """ Test """ node = '/resource-lists/list/entry[@uri="sip:xxx@yyyyy.net"]' class FragmentTest(common.XCAPTest): def test_success(self): self.put('resource-lists', document) self.put('resource-lists', fragment, node) def test_errors(self): self.put('resource-lists', document) r = self.put('resource-lists', "", node, status=409) self.assertInBody(r, 'mismatched tag') r = self.put('resource-lists', "", node, status=409) self.assertInBody(r, 'not well-formed (invalid token)') r = self.put('resource-lists', "", node, status=409) self.assertInBody(r, 'not well-formed (invalid token)') r = self.put('resource-lists', "", node, status=409) self.assertInBody(r, 'junk after document element') r = self.put('resource-lists', "", node, status=409) self.assertInBody(r, 'not well-formed (invalid token)') if __name__ == '__main__': common.runSuiteFromModule() diff --git a/test/test_global.py b/test/test_global.py index a3f9348..6bc03b6 100755 --- a/test/test_global.py +++ b/test/test_global.py @@ -1,32 +1,32 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * has_global = ['xcap-caps'] no_global = set(apps) - set(has_global) class TestGlobal(XCAPTest): def test_no_global(self): for app in no_global: self.get(app, status=404, globaltree=True) # at the moment, no one authorized to do that # NOTE, even though 404 would be also a valid response here, 401 should take priority # 404 or 401? # self.put(app, xml, status=401, globaltree=True) # self.delete(app, status=401, globaltree=True) def test_has_global(self): for app in has_global: self.get(app, status=200, globaltree=True) # # at the moment, no one authorized to do that # #self.put(app, xml, status=401, globaltree=True) # self.delete(app, status=401, globaltree=True) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_nsbindings.py b/test/test_nsbindings.py index 84adfd8..4c9f11b 100755 --- a/test/test_nsbindings.py +++ b/test/test_nsbindings.py @@ -1,33 +1,33 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * resource_list_xml = """ Joe Smith Nancy Gross Petri Aukia """ class NSBindingsTest(XCAPTest): def test_ns_bindings(self): self.put('resource-lists', resource_list_xml) r = self.get('resource-lists', '/resource-lists/list[@name="friends"]/namespace::*') self.assertHeader(r, 'ETag') self.assertHeader(r, 'Content-type', 'application/xcap-ns+xml') # add expected content if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_pidf.py b/test/test_pidf.py index 99e7996..0c01ee7 100755 --- a/test/test_pidf.py +++ b/test/test_pidf.py @@ -1,31 +1,31 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * pidf_xml = """ open """ class PIDFTest(XCAPTest): def test_pidf_manipulation(self): self.getputdelete('pidf-manipulation', pidf_xml, 'application/pidf+xml') if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_presrules.py b/test/test_presrules.py index 9cef542..655becb 100755 --- a/test/test_presrules.py +++ b/test/test_presrules.py @@ -1,42 +1,42 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * pres_rules_xml = """ allow """ class PresenceRulesTest(XCAPTest): def test_pidf_manipulation(self): self.getputdelete('pres-rules', pres_rules_xml, 'application/auth-policy+xml') if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_resourcelists.py b/test/test_resourcelists.py index 8f42f0f..6409efc 100755 --- a/test/test_resourcelists.py +++ b/test/test_resourcelists.py @@ -1,131 +1,131 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * resource_lists_xml = """ Bill Doe Close Friends Joe Smith Nancy Gross Marketing """ resource_lists_xml_badformed = """ Bill Doe Close Friends Joe Smith Nancy Gross Marketing """ # well-formed, but fails to meet constraints resource_lists_xml_non_unique_list = """ Bill Doe Close Friends Joe Smith Nancy Gross Marketing """ resource_lists_xml_baduri = """ Bill Doe Close Friends Joe Smith Nancy Gross Marketing """ class DocumentTest(XCAPTest): def test_operations1(self): self.getputdelete('resource-lists', resource_lists_xml, 'application/resource-lists+xml') def test_operations2(self): self.getputdelete('resource-lists', resource_lists_xml.replace('UTF-8', 'utf-8'), 'application/resource-lists+xml') def test_operations3(self): r = self.put_rejected('resource-lists', resource_lists_xml_badformed) self.assertInBody(r, ' if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_rlsservices.py b/test/test_rlsservices.py index a69682f..4cdc452 100755 --- a/test/test_rlsservices.py +++ b/test/test_rlsservices.py @@ -1,124 +1,124 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * rls_services_xml = """ http://xcap.example.com/resource-lists/users/sip:joe@example.com/index/~~/resource-lists/list%5b@name=%22l1%22%5d presence presence """ rls_services_xml_badformed = """ http://xcap.example.com/resource-lists/users/sip:joe@example.com/index/~~/resource-lists/list%5b@name=%22l1%22%5d presence presence """ # resource-lists constraints should be checked as well rls_services_xml_non_unique_list = """ http://xcap.example.com/resource-lists/users/sip:joe@example.com/index/~~/resource-lists/list%5b@name=%22l1%22%5d presence presence """ # this one is actually caught by schema validation, not by code rls_services_xml_non_unique_service = """ http://xcap.example.com/resource-lists/users/sip:joe@example.com/index/~~/resource-lists/list%5b@name=%22l1%22%5d presence presence """ # check for that service uniqueness is enforced across different users # check index class DocumentTest(XCAPTest): def test_operations1(self): self.getputdelete('rls-services', rls_services_xml, 'application/rls-services+xml') def test_operations2(self): self.put_rejected('rls-services', rls_services_xml_badformed) def test_operations3(self): self.put_rejected('rls-services', rls_services_xml_non_unique_list) def test_operations4(self): self.put_rejected('rls-services', rls_services_xml_non_unique_service) #self.account = 'test2@example.com' #self.delete_resource('rls-services') #self.assertStatus([200, 404]) ## we aint doing that ## rejected because the other user has the services with the same name ##self.put_rejected('rls-services', rls_services_xml) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_watchers.py b/test/test_watchers.py index a85fe3b..07f7409 100755 --- a/test/test_watchers.py +++ b/test/test_watchers.py @@ -1,33 +1,33 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import XCAPTest, runSuiteFromModule +from .common import XCAPTest, runSuiteFromModule watchers = """ """ class Test(XCAPTest): def test_get(self): self.get('org.openxcap.watchers') self.get('org.openxcap.watchers', '/watchers') self.get('org.openxcap.watchers', globaltree=True, status=404) self.get('org.openxcap.watchers', '/watchers', globaltree=True, status=404) # def test_put_not_allowed(self): # self.put('watchers', watchers, status=405) # self.put('watchers', watchers, '/watchers', status=405) # self.put('watchers', watchers, globaltree=True, status=405) # self.put('watchers', watchers, '/watchers', globaltree=True, status=405) # def test_delete_not_allowed(self): # self.delete('watchers', status=405) # self.delete('watchers', '/watchers', status=405) # self.delete('watchers', globaltree=True, status=405) # self.delete('watchers', '/watchers', globaltree=True, status=405) if __name__ == '__main__': runSuiteFromModule() diff --git a/test/test_xcap_caps.py b/test/test_xcap_caps.py index f53f6f0..bb4d85e 100755 --- a/test/test_xcap_caps.py +++ b/test/test_xcap_caps.py @@ -1,24 +1,24 @@ #!/usr/bin/python2 # Copyright (C) 2007-2010 AG-Projects. # -from common import * +from .common import * class XCAPCaps(XCAPTest): def test_schema(self): r = self.get_global('xcap-caps') validate_xcapcaps_schema(r.body) # TODO: auto check schema for every get schema = load_schema('xcap-caps.xsd') def validate_xcapcaps_schema(document): xml = validate(document, schema) assert xml.find('{urn:ietf:params:xml:ns:xcap-caps}auids') is not None assert xml.find('{urn:ietf:params:xml:ns:xcap-caps}extensions') is not None assert xml.find('{urn:ietf:params:xml:ns:xcap-caps}namespaces') is not None if __name__ == '__main__': runSuiteFromModule() diff --git a/test/xcapclientwrap.py b/test/xcapclientwrap.py index 3ef469f..44d485d 100644 --- a/test/xcapclientwrap.py +++ b/test/xcapclientwrap.py @@ -1,137 +1,137 @@ # Copyright (C) 2007-2010 AG-Projects. # import os import re from subprocess import Popen, PIPE from xcaplib.httpclient import HTTPResponse DEBUG = 0 def make_client(options): return XCAPClient(options.xcap_root, options.sip_address, options.password) class XCAPClient(object): """Wrapper of command-line utility xcapclient. Pointless, unless you want to test xcapclient itself. """ XCAPCLIENT = '/home/denis/work/python-xcaplib/xcapclient' def __init__(self, xcap_root, sip_address, password): self.params = ['--xcap-root', xcap_root, '--sip-address', sip_address, '--password', password] def get_params(self, etag=None, globaltree=False, filename=None, headers=None): params = self.params[:] if etag is not None: params += ['--etag', etag] if globaltree: params += ['-c', 'global'] else: params += ['-c', 'users'] if filename is not None: params += ['--filename', filename] - for k, v in (headers or {}).iteritems(): + for k, v in (headers or {}).items(): if v is None: params += ['--add-header', k] else: params += ['--add-header', '%s:%s' % (k, v)] return params def request(self, method, application, input=None, node=None, **params): params = ['--app', application] + self.get_params(**params) params.append(method) if node is not None: if node[:1]!='/': node = '/' + node params.append(node) return self._run(params, input) def _get(self, application, node=None, **params): return self.request('get', application, node=node, **params) def _put(self, application, resource, node=None, **params): return self.request('put', application, input=resource, node=node, **params) def _delete(self, application, node=None, **params): return self.request('delete', application, node=node, **params) def _run(self, params, input=None): params = [self.XCAPCLIENT] + params p = Popen(params, stdin=input and PIPE, stdout=PIPE, stderr=PIPE, env=os.environ) (stdout, stderr) = p.communicate(input=input) if DEBUG: - print '\n______________' - print stdout - print '--------------' - print stderr - print '^^^^^^^^^^^^^^' + print('\n______________') + print(stdout) + print('--------------') + print(stderr) + print('^^^^^^^^^^^^^^') code, comment, etag, content_type = parse_stderr(stderr) hdrs = headers() if p.wait() == 0: if code is None: code, comment = 200, 'OK' else: - assert code is not None, `stderr` - assert comment is not None, `stderr` + assert code is not None, repr(stderr) + assert comment is not None, repr(stderr) if etag is not None: hdrs['ETag'] = etag if content_type is not None: hdrs['Content-Type'] = content_type return HTTPResponse(None, code, comment, hdrs, stdout) class headers(dict): def gettype(self): typ = self.get('Content-Type') if typ is None: return typ return typ.split(';', 1)[0] re_status_line = re.compile("^(\d\d\d) (.*?)$", re.M) re_etag = re.compile('^etag: (".*?")$', re.M | re.I) re_content_type = re.compile("^content-type: (.*?)$", re.M | re.I) def findone(re, str): m = re.findall(str) assert len(m)<=1, (m, str) if not m: return None elif len(m)==1: return m[0] def parse_stderr(stderr): """ >>> parse_stderr('''url: https://10.1.1.3/xcap-root/resource-lists/listxx ... 404 Not Found ... content-length: 121 ... ''') (404, 'Not Found', None, None) >>> parse_stderr('''url: https://10.1.1.3/xcap-root/resource-lists/users/alice@example ... etag: "5342d9c443c7fad5d76669c7253688f0" ... content-length: 1829 ... ''') (None, None, '"5342d9c443c7fad5d76669c7253688f0"', None) >>> parse_stderr('url: https://10.1.1.3/xcap-root/xcap-caps/global/index\\netag: "6fc08e7c18116bb145c7052fc9a2d6bf"\\ncontent-length: 826\\n\\n') (None, None, '"6fc08e7c18116bb145c7052fc9a2d6bf"', None) """ m = findone(re_status_line, stderr) if m is None: code, comment = None, None else: code, comment = m code = int(code) etag = findone(re_etag, stderr) content_type = findone(re_content_type, stderr) return code, comment, etag, content_type if __name__=='__main__': import doctest doctest.testmod() diff --git a/xcap/appusage/__init__.py b/xcap/appusage/__init__.py index cde1d8a..b242d7d 100644 --- a/xcap/appusage/__init__.py +++ b/xcap/appusage/__init__.py @@ -1,374 +1,374 @@ """XCAP application usage module""" import os import sys -from cStringIO import StringIO +from io import StringIO from lxml import etree from application.configuration import ConfigSection, ConfigSetting from application.configuration.datatypes import StringList from application import log import xcap from xcap import errors from xcap import element from xcap.backend import StatusResponse class Backend(object): """Configuration datatype, used to select a backend module from the configuration file.""" def __new__(typ, value): value = value.lower() try: return __import__('xcap.backend.%s' % value, globals(), locals(), ['']) - except (ImportError, AssertionError), e: + except (ImportError, AssertionError) as e: log.critical('Cannot load %r backend module: %s' % (value, e)) sys.exit(1) except Exception: log.exception() sys.exit(1) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' backend = ConfigSetting(type=Backend, value=None) disabled_applications = ConfigSetting(type=StringList, value=[]) document_validation = True if ServerConfig.backend is None: log.critical('OpenXCAP needs a backend to be specified in order to run') sys.exit(1) class ApplicationUsage(object): """Base class defining an XCAP application""" id = None ## the Application Unique ID (AUID) default_ns = None ## the default XML namespace mime_type = None ## the MIME type schema_file = None ## filename of the schema for the application def __init__(self, storage): ## the XML schema that defines valid documents for this application if self.schema_file: xml_schema_doc = etree.parse(open(os.path.join(os.path.dirname(__file__), 'xml-schemas', self.schema_file), 'r')) self.xml_schema = etree.XMLSchema(xml_schema_doc) else: class EverythingIsValid(object): def __call__(self, *args, **kw): return True def validate(self, *args, **kw): return True self.xml_schema = EverythingIsValid() if storage is not None: self.storage = storage ## Validation def _check_UTF8_encoding(self, xml_doc): """Check if the document is UTF8 encoded. Raise an NotUTF8Error if it's not.""" if xml_doc.docinfo.encoding.lower() != 'utf-8': raise errors.NotUTF8Error(comment='document encoding is %s' % xml_doc.docinfo.encoding) def _check_schema_validation(self, xml_doc): """Check if the given XCAP document validates against the application's schema""" if not self.xml_schema(xml_doc): raise errors.SchemaValidationError(comment=self.xml_schema.error_log) def _check_additional_constraints(self, xml_doc): """Check additional validations constraints for this XCAP document. Should be overriden in subclasses if specified by the application usage, and raise a ConstraintFailureError if needed.""" def validate_document(self, xcap_doc): """Check if a document is valid for this application.""" try: xml_doc = etree.parse(StringIO(xcap_doc)) # XXX do not use TreeBuilder here - except etree.XMLSyntaxError, ex: + except etree.XMLSyntaxError as ex: ex.http_error = errors.NotWellFormedError(comment=str(ex)) raise - except Exception, ex: + except Exception as ex: ex.http_error = errors.NotWellFormedError() raise self._check_UTF8_encoding(xml_doc) if ServerConfig.document_validation: self._check_schema_validation(xml_doc) self._check_additional_constraints(xml_doc) ## Authorization policy def is_authorized(self, xcap_user, xcap_uri): """Default authorization policy. Authorizes an XCAPUser for an XCAPUri. Return True if the user is authorized, False otherwise.""" return xcap_user and xcap_user == xcap_uri.user ## Document management def _not_implemented(self, context): raise errors.ResourceNotFound("Application %s does not implement %s context" % (self.id, context)) def get_document(self, uri, check_etag): context = uri.doc_selector.context if context == 'global': return self.get_document_global(uri, check_etag) elif context == 'users': return self.get_document_local(uri, check_etag) else: self._not_implemented(context) def get_document_global(self, uri, check_etag): self._not_implemented('global') def get_document_local(self, uri, check_etag): return self.storage.get_document(uri, check_etag) def put_document(self, uri, document, check_etag): self.validate_document(document) return self.storage.put_document(uri, document, check_etag) def delete_document(self, uri, check_etag): return self.storage.delete_document(uri, check_etag) ## Element management def _cb_put_element(self, response, uri, element_body, check_etag): """This is called when the document that relates to the element is retrieved.""" if response.code == 404: ### XXX let the storate raise raise errors.NoParentError ### catch error in errback and attach http_error fixed_element_selector = uri.node_selector.element_selector.fix_star(element_body) try: result = element.put(response.data, fixed_element_selector, element_body) - except element.SelectorError, ex: + except element.SelectorError as ex: ex.http_error = errors.NoParentError(comment=str(ex)) raise if result is None: raise errors.NoParentError new_document, created = result get_result = element.get(new_document, uri.node_selector.element_selector) if get_result != element_body.strip(): raise errors.CannotInsertError('PUT request failed GET(PUT(x))==x invariant') d = self.put_document(uri, new_document, check_etag) def set_201_code(response): try: if response.code==200: response.code = 201 except AttributeError: pass return response if created: d.addCallback(set_201_code) return d def put_element(self, uri, element_body, check_etag): try: element.check_xml_fragment(element_body) - except element.sax.SAXParseException, ex: + except element.sax.SAXParseException as ex: ex.http_error = errors.NotXMLFragmentError(comment=str(ex)) raise - except Exception, ex: + except Exception as ex: ex.http_error = errors.NotXMLFragmentError() raise d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_put_element, callbackArgs=(uri, element_body, check_etag)) def _cb_get_element(self, response, uri): """This is called when the document related to the element is retrieved.""" if response.code == 404: ## XXX why not let the storage raise? raise errors.ResourceNotFound("The requested document %s was not found on this server" % uri.doc_selector) result = element.get(response.data, uri.node_selector.element_selector) if not result: msg = "The requested element %s was not found in the document %s" % (uri.node_selector, uri.doc_selector) raise errors.ResourceNotFound(msg) return StatusResponse(200, response.etag, result) def get_element(self, uri, check_etag): d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_get_element, callbackArgs=(uri, )) def _cb_delete_element(self, response, uri, check_etag): if response.code == 404: raise errors.ResourceNotFound("The requested document %s was not found on this server" % uri.doc_selector) new_document = element.delete(response.data, uri.node_selector.element_selector) if not new_document: raise errors.ResourceNotFound get_result = element.find(new_document, uri.node_selector.element_selector) if get_result: raise errors.CannotDeleteError('DELETE request failed GET(DELETE(x))==404 invariant') return self.put_document(uri, new_document, check_etag) def delete_element(self, uri, check_etag): d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_delete_element, callbackArgs=(uri, check_etag)) ## Attribute management def _cb_get_attribute(self, response, uri): """This is called when the document that relates to the attribute is retrieved.""" if response.code == 404: raise errors.ResourceNotFound document = response.data xml_doc = etree.parse(StringIO(document)) application = getApplicationForURI(uri) ns_dict = uri.node_selector.get_ns_bindings(application.default_ns) try: xpath = uri.node_selector.replace_default_prefix() attribute = xml_doc.xpath(xpath, namespaces = ns_dict) - except Exception, ex: + except Exception as ex: ex.http_error = errors.ResourceNotFound() raise if not attribute: raise errors.ResourceNotFound elif len(attribute) != 1: raise errors.ResourceNotFound('XPATH expression is ambiguous') # TODO # The server MUST NOT add namespace bindings representing namespaces # used by the element or its children, but declared in ancestor elements return StatusResponse(200, response.etag, attribute[0]) def get_attribute(self, uri, check_etag): d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_get_attribute, callbackArgs=(uri, )) def _cb_delete_attribute(self, response, uri, check_etag): if response.code == 404: raise errors.ResourceNotFound document = response.data xml_doc = etree.parse(StringIO(document)) application = getApplicationForURI(uri) ns_dict = uri.node_selector.get_ns_bindings(application.default_ns) try: elem = xml_doc.xpath(uri.node_selector.replace_default_prefix(append_terminal=False),namespaces=ns_dict) - except Exception, ex: + except Exception as ex: ex.http_error = errors.ResourceNotFound() raise if not elem: raise errors.ResourceNotFound if len(elem) != 1: raise errors.ResourceNotFound('XPATH expression is ambiguous') elem = elem[0] attribute = uri.node_selector.terminal_selector.attribute if elem.get(attribute): ## check if the attribute exists XXX use KeyError instead del elem.attrib[attribute] else: raise errors.ResourceNotFound new_document = etree.tostring(xml_doc, encoding='UTF-8', xml_declaration=True) return self.put_document(uri, new_document, check_etag) def delete_attribute(self, uri, check_etag): d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_delete_attribute, callbackArgs=(uri, check_etag)) def _cb_put_attribute(self, response, uri, attribute, check_etag): """This is called when the document that relates to the element is retrieved.""" if response.code == 404: raise errors.NoParentError document = response.data xml_doc = etree.parse(StringIO(document)) application = getApplicationForURI(uri) ns_dict = uri.node_selector.get_ns_bindings(application.default_ns) try: elem = xml_doc.xpath(uri.node_selector.replace_default_prefix(append_terminal=False),namespaces=ns_dict) - except Exception, ex: + except Exception as ex: ex.http_error = errors.NoParentError() raise if not elem: raise errors.NoParentError if len(elem) != 1: raise errors.NoParentError('XPATH expression is ambiguous') elem = elem[0] attr_name = uri.node_selector.terminal_selector.attribute elem.set(attr_name, attribute) new_document = etree.tostring(xml_doc, encoding='UTF-8', xml_declaration=True) return self.put_document(uri, new_document, check_etag) def put_attribute(self, uri, attribute, check_etag): ## TODO verify if the attribute is valid d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_put_attribute, callbackArgs=(uri, attribute, check_etag)) ## Namespace Bindings def _cb_get_ns_bindings(self, response, uri): """This is called when the document that relates to the element is retrieved.""" if response.code == 404: raise errors.ResourceNotFound document = response.data xml_doc = etree.parse(StringIO(document)) application = getApplicationForURI(uri) ns_dict = uri.node_selector.get_ns_bindings(application.default_ns) try: elem = xml_doc.xpath(uri.node_selector.replace_default_prefix(append_terminal=False),namespaces=ns_dict) - except Exception, ex: + except Exception as ex: ex.http_error = errors.ResourceNotFound() raise if not elem: raise errors.ResourceNotFound elif len(elem)!=1: raise errors.ResourceNotFound('XPATH expression is ambiguous') elem = elem[0] namespaces = '' - for prefix, ns in elem.nsmap.items(): + for prefix, ns in list(elem.nsmap.items()): namespaces += ' xmlns%s="%s"' % (prefix and ':%s' % prefix or '', ns) result = '<%s %s/>' % (elem.tag, namespaces) return StatusResponse(200, response.etag, result) def get_ns_bindings(self, uri, check_etag): d = self.get_document(uri, check_etag) return d.addCallbacks(self._cb_get_ns_bindings, callbackArgs=(uri, )) from xcap.appusage.capabilities import XCAPCapabilitiesApplication from xcap.appusage.dialogrules import DialogRulesApplication from xcap.appusage.directory import XCAPDirectoryApplication from xcap.appusage.pidf import PIDFManipulationApplication from xcap.appusage.prescontent import PresContentApplication from xcap.appusage.presrules import PresenceRulesApplication from xcap.appusage.purge import PurgeApplication from xcap.appusage.resourcelists import ResourceListsApplication from xcap.appusage.rlsservices import RLSServicesApplication from xcap.appusage.test import TestApplication from xcap.appusage.watchers import WatchersApplication storage = ServerConfig.backend.Storage() applications = { DialogRulesApplication.id: DialogRulesApplication(storage), PIDFManipulationApplication.id: PIDFManipulationApplication(storage), PresenceRulesApplication.id: PresenceRulesApplication(storage), PresenceRulesApplication.oma_id: PresenceRulesApplication(storage), PurgeApplication.id: PurgeApplication(storage), ResourceListsApplication.id: ResourceListsApplication(storage), RLSServicesApplication.id: RLSServicesApplication(storage), TestApplication.id: TestApplication(storage), WatchersApplication.id: WatchersApplication(storage), XCAPCapabilitiesApplication.id: XCAPCapabilitiesApplication(), XCAPDirectoryApplication.id: XCAPDirectoryApplication(storage) } # public GET applications (GET is not challenged for auth) public_get_applications = {PresContentApplication.id: PresContentApplication(storage)} applications.update(public_get_applications) for application in ServerConfig.disabled_applications: applications.pop(application, None) -namespaces = dict((k, v.default_ns) for (k, v) in applications.items()) +namespaces = dict((k, v.default_ns) for (k, v) in list(applications.items())) def getApplicationForURI(xcap_uri): return applications.get(xcap_uri.application_id, None) __all__ = ['applications', 'namespaces', 'public_get_applications', 'getApplicationForURI', 'ApplicationUsage', 'Backend'] diff --git a/xcap/appusage/capabilities.py b/xcap/appusage/capabilities.py index f4e18f2..9622426 100644 --- a/xcap/appusage/capabilities.py +++ b/xcap/appusage/capabilities.py @@ -1,44 +1,44 @@ from lxml import etree from twisted.internet import defer from xcap import errors from xcap.appusage import ApplicationUsage from xcap.dbutil import make_etag from xcap.backend import StatusResponse class XCAPCapabilitiesApplication(ApplicationUsage): ## RFC 4825 id = "xcap-caps" default_ns = "urn:ietf:params:xml:ns:xcap-caps" mime_type= "application/xcap-caps+xml" def __init__(self): pass def _get_document(self): if hasattr(self, 'doc'): return self.doc, self.etag root = etree.Element("xcap-caps", nsmap={None: self.default_ns}) auids = etree.SubElement(root, "auids") extensions = etree.SubElement(root, "extensions") namespaces = etree.SubElement(root, "namespaces") from xcap.appusage import applications - for (id, app) in applications.items(): + for (id, app) in list(applications.items()): etree.SubElement(auids, "auid").text = id etree.SubElement(namespaces, "namespace").text = app.default_ns self.doc = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=True) self.etag = make_etag('xcap-caps', self.doc) return self.doc, self.etag def get_document_global(self, uri, check_etag): doc, etag = self._get_document() return defer.succeed(StatusResponse(200, etag=etag, data=doc)) def get_document_local(self, uri, check_etag): self._not_implemented('users') def put_document(self, uri, document, check_etag): raise errors.ResourceNotFound("This application does not support PUT method") diff --git a/xcap/appusage/directory.py b/xcap/appusage/directory.py index 1ba61d8..72947be 100644 --- a/xcap/appusage/directory.py +++ b/xcap/appusage/directory.py @@ -1,39 +1,39 @@ from lxml import etree from twisted.internet import defer from xcap import errors from xcap.appusage import ApplicationUsage from xcap.backend import StatusResponse class XCAPDirectoryApplication(ApplicationUsage): id = "org.openmobilealliance.xcap-directory" default_ns = "urn:oma:xml:xdm:xcap-directory" mime_type= "application/vnd.oma.xcap-directory+xml" schema_file = "xcap-directory.xsd" def _docs_to_xml(self, docs, uri): sip_uri = "sip:%s@%s" % (uri.user.username, uri.user.domain) root = etree.Element("xcap-directory", nsmap={None: self.default_ns}) if docs: - for k, v in docs.iteritems(): + for k, v in docs.items(): folder = etree.SubElement(root, "folder", attrib={'auid': k}) for item in v: # We may have more than one document for the same application entry_uri = "%s/%s/users/%s/%s" % (uri.xcap_root, k, sip_uri, item[0]) entry = etree.SubElement(folder, "entry") entry.set("uri", entry_uri) entry.set("etag", item[1]) doc = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=True) #self.validate_document(doc) return defer.succeed(StatusResponse(200, etag=None, data=doc)) def get_document_local(self, uri, check_etag): docs_def = self.storage.get_documents_list(uri) docs_def.addCallback(self._docs_to_xml, uri) return docs_def def put_document(self, uri, document, check_etag): raise errors.ResourceNotFound("This application does not support PUT method") diff --git a/xcap/appusage/prescontent.py b/xcap/appusage/prescontent.py index cc411c3..f0090c5 100644 --- a/xcap/appusage/prescontent.py +++ b/xcap/appusage/prescontent.py @@ -1,49 +1,49 @@ -from cStringIO import StringIO +from io import StringIO from lxml import etree from xcap import errors from xcap.appusage import ApplicationUsage class PresContentApplication(ApplicationUsage): id = "org.openmobilealliance.pres-content" default_ns = "urn:oma:xml:prs:pres-content" mime_type = "application/vnd.oma.pres-content+xml" icon_mime_types = ('image/jpeg', 'image/gif', 'image/png') icon_encoding = 'base64' icon_max_size = 300*1024 def _validate_icon(self, document): mime_type = None encoding = None data = None xml = StringIO(document) try: tree = etree.parse(xml) root = tree.getroot() ns = root.nsmap[None] for element in root: if element.tag == "{%s}mime-type" % ns: mime_type = element.text.lower() if element.tag == "{%s}encoding" % ns: encoding = element.text.lower() if element.tag == "{%s}data" % ns: data = element.text except etree.ParseError: raise errors.NotWellFormedError() else: if mime_type not in self.icon_mime_types: raise errors.ConstraintFailureError(phrase="Unsupported MIME type. Allowed MIME types: %s" % ','.join(self.icon_mime_types)) if encoding != self.icon_encoding: raise errors.ConstraintFailureError(phrase="Unsupported encoding. Allowed enconding: %s" % self.icon_encoding) if data is None: raise errors.ConstraintFailureError(phrase="No icon data was provided") if len(data) > self.icon_max_size: raise errors.ConstraintFailureError(phrase="Size limit exceeded, maximum allowed size is %d bytes" % self.icon_max_size) def put_document(self, uri, document, check_etag): if uri.doc_selector.document_path.startswith('oma_status-icon'): self._validate_icon(document) return self.storage.put_document(uri, document, check_etag) diff --git a/xcap/appusage/presrules.py b/xcap/appusage/presrules.py index 913a108..e8d486a 100644 --- a/xcap/appusage/presrules.py +++ b/xcap/appusage/presrules.py @@ -1,116 +1,116 @@ from application.configuration import ConfigSection, ConfigSetting -from cStringIO import StringIO +from io import StringIO from lxml import etree -from urllib import unquote +from urllib.parse import unquote import xcap from xcap import errors from xcap.appusage import ApplicationUsage from xcap.datatypes import XCAPRootURI from xcap.uri import XCAPUri from xcap.xpath import DocumentSelectorError, NodeParsingError class AuthenticationConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Authentication' default_realm = ConfigSetting(type=str, value=None) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' allow_external_references = False root = ConfigSetting(type=XCAPRootURI, value=None) def parseExternalListURI(node_uri, default_realm): from xcap.appusage import namespaces xcap_root = None for uri in ServerConfig.root.uris: if node_uri.startswith(uri): xcap_root = uri break if xcap_root is None: raise errors.ConstraintFailureError("XCAP root not found for URI: %s" % node_uri) resource_selector = node_uri[len(xcap_root):] if not resource_selector or resource_selector == '/': raise errors.ConstraintFailureError("Resource selector missing") try: uri = XCAPUri(xcap_root, resource_selector, namespaces) - except (DocumentSelectorError, NodeParsingError), e: + except (DocumentSelectorError, NodeParsingError) as e: raise errors.ConstraintFailureError(phrase=str(e)) else: if uri.user.domain is None: uri.user.domain = default_realm return uri class PresenceRulesApplication(ApplicationUsage): id = "pres-rules" oma_id = "org.openmobilealliance.pres-rules" default_ns = "urn:ietf:params:xml:ns:pres-rules" mime_type = "application/auth-policy+xml" schema_file = 'presence-rules.xsd' def _check_external_list(self, external_list, node_uri): if not external_list: return external_list = unquote(external_list) external_list_uri = parseExternalListURI(external_list, AuthenticationConfig.default_realm) if external_list_uri.xcap_root != node_uri.xcap_root: raise errors.ConstraintFailureError(phrase="XCAP root in the external list doesn't match PUT requests'") if external_list_uri.user != node_uri.user: raise errors.ConstraintFailureError(phrase="Cannot link to another user's list") def _validate_rules(self, document, node_uri): common_policy_namespace = 'urn:ietf:params:xml:ns:common-policy' oma_namespace = 'urn:oma:xml:xdm:common-policy' actions_tag = '{%s}actions' % common_policy_namespace conditions_tag = '{%s}conditions' % common_policy_namespace identity_tag = '{%s}identity' % common_policy_namespace rule_tag = '{%s}rule' % common_policy_namespace transformations_tag = '{%s}transformations' % common_policy_namespace sub_handling_tag = '{%s}sub-handling' % self.default_ns oma_anonymous_request_tag = '{%s}anonymous-request' % oma_namespace oma_entry_tag = '{%s}entry' % oma_namespace oma_external_list_tag = '{%s}external-list' % oma_namespace oma_other_identity_tag = '{%s}other-identity' % oma_namespace try: xml = StringIO(document) tree = etree.parse(xml) root = tree.getroot() - if oma_namespace in root.nsmap.values(): + if oma_namespace in list(root.nsmap.values()): # Condition constraints for element in root.iter(conditions_tag): if any([len(element.findall(item)) > 1 for item in (identity_tag, oma_external_list_tag, oma_other_identity_tag, oma_anonymous_request_tag)]): raise errors.ConstraintFailureError(phrase="Complex rules are not allowed") # Transformations constraints for rule in root.iter(rule_tag): actions = rule.find(actions_tag) if actions is not None: sub_handling = actions.find(sub_handling_tag) transformations = rule.find(transformations_tag) if sub_handling is not None and sub_handling.text != 'allow' and transformations is not None and transformations.getchildren(): raise errors.ConstraintFailureError(phrase="transformations element not allowed") # External list constraints if not ServerConfig.allow_external_references: for element in root.iter(oma_external_list_tag): for entry in element.iter(oma_entry_tag): self._check_external_list(entry.attrib.get('anc', None), node_uri) except etree.ParseError: raise errors.NotWellFormedError() def put_document(self, uri, document, check_etag): self.validate_document(document) self._validate_rules(document, uri) return self.storage.put_document(uri, document, check_etag) diff --git a/xcap/appusage/resourcelists.py b/xcap/appusage/resourcelists.py index 8d57bf7..02bca4d 100644 --- a/xcap/appusage/resourcelists.py +++ b/xcap/appusage/resourcelists.py @@ -1,149 +1,149 @@ from application.configuration import ConfigSection, ConfigSetting -from cStringIO import StringIO +from io import StringIO from lxml import etree -from urllib import unquote -from urlparse import urlparse +from urllib.parse import unquote +from urllib.parse import urlparse import xcap from xcap import errors from xcap.appusage import ApplicationUsage from xcap.datatypes import XCAPRootURI from xcap.uri import XCAPUri from xcap.xpath import DocumentSelectorError, NodeParsingError class AuthenticationConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Authentication' default_realm = ConfigSetting(type=str, value=None) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' allow_external_references = False root = ConfigSetting(type=XCAPRootURI, value=None) def parseExternalListURI(node_uri, default_realm): from xcap.appusage import namespaces xcap_root = None for uri in ServerConfig.root.uris: if node_uri.startswith(uri): xcap_root = uri break if xcap_root is None: raise errors.ConstraintFailureError("XCAP root not found for URI: %s" % node_uri) resource_selector = node_uri[len(xcap_root):] if not resource_selector or resource_selector == '/': raise errors.ConstraintFailureError("Resource selector missing") try: uri = XCAPUri(xcap_root, resource_selector, namespaces) - except (DocumentSelectorError, NodeParsingError), e: + except (DocumentSelectorError, NodeParsingError) as e: raise errors.ConstraintFailureError(phrase=str(e)) else: if uri.user.domain is None: uri.user.domain = default_realm return uri def get_xpath(elem): """Return XPATH expression to obtain elem in the document. This could be done better, of course, not using stars, but the real tags. But that would be much more complicated and I'm not sure if such effort is justified""" res = '' while elem is not None: parent = elem.getparent() if parent is None: res = '/*' + res else: res = '/*[%s]' % parent.index(elem) + res elem = parent return res def attribute_not_unique(elem, attr): raise errors.UniquenessFailureError(exists = get_xpath(elem) + '/@' + attr) class ResourceListsApplication(ApplicationUsage): # RFC 4826 id = "resource-lists" default_ns = "urn:ietf:params:xml:ns:resource-lists" mime_type= "application/resource-lists+xml" schema_file = 'resource-lists.xsd' @classmethod def check_list(cls, element, node_uri): from xcap.authentication import parseNodeURI entry_tag = "{%s}entry" % cls.default_ns entry_ref_tag = "{%s}entry-ref" % cls.default_ns external_tag ="{%s}external" % cls.default_ns list_tag = "{%s}list" % cls.default_ns anchor_attrs = set() name_attrs = set() ref_attrs = set() uri_attrs = set() for child in element.getchildren(): if child.tag == list_tag: name = child.get("name") if name in name_attrs: attribute_not_unique(child, 'name') else: name_attrs.add(name) cls.check_list(child, node_uri) elif child.tag == entry_tag: uri = child.get("uri") if uri in uri_attrs: attribute_not_unique(child, 'uri') else: uri_attrs.add(uri) elif child.tag == entry_ref_tag: ref = child.get("ref") if ref in ref_attrs: attribute_not_unique(child, 'ref') else: try: ref = unquote(ref) ref_uri = parseNodeURI("%s/%s" % (node_uri.xcap_root, ref), AuthenticationConfig.default_realm) if not ServerConfig.allow_external_references and ref_uri.user != node_uri.user: raise errors.ConstraintFailureError(phrase="Cannot link to another users' list") try: if ref_uri.node_selector.element_selector[-1].name[1] != "entry": raise ValueError except LookupError: raise ValueError - except (DocumentSelectorError, NodeParsingError), e: + except (DocumentSelectorError, NodeParsingError) as e: raise errors.ConstraintFailureError(phrase=str(e)) except ValueError: raise errors.ConstraintFailureError else: ref_attrs.add(ref) elif child.tag == external_tag: anchor = child.get("anchor") if anchor in anchor_attrs: attribute_not_unique(child, 'anchor') else: anchor = unquote(anchor) if not ServerConfig.allow_external_references: external_list_uri = parseExternalListURI(anchor, AuthenticationConfig.default_realm) if external_list_uri.xcap_root != node_uri.xcap_root: raise errors.ConstraintFailureError(phrase="XCAP root in the external list doesn't match PUT requests'") if external_list_uri.user != node_uri.user: raise errors.ConstraintFailureError(phrase="Cannot link to another users' list") else: parsed_url = urlparse(anchor) if parsed_url.scheme not in ('http', 'https'): raise errors.ConstraintFailureError(phrase='Specified anchor is not a valid URL') else: anchor_attrs.add(anchor) def put_document(self, uri, document, check_etag): self.validate_document(document) # Check additional constraints (see section 3.4.5 of RFC 4826) xml_doc = etree.parse(StringIO(document)) self.check_list(xml_doc.getroot(), uri) return self.storage.put_document(uri, document, check_etag) diff --git a/xcap/appusage/watchers.py b/xcap/appusage/watchers.py index 50d7a30..704830c 100644 --- a/xcap/appusage/watchers.py +++ b/xcap/appusage/watchers.py @@ -1,35 +1,35 @@ from lxml import etree from xcap import errors from xcap.appusage import ApplicationUsage from xcap.dbutil import make_etag from xcap.backend import StatusResponse class WatchersApplication(ApplicationUsage): id = "org.openxcap.watchers" default_ns = "http://openxcap.org/ns/watchers" mime_type= "application/xml" schema_file = 'watchers.xsd' # who needs schema for readonly application? def _watchers_to_xml(self, watchers, uri, check_etag): root = etree.Element("watchers", nsmap={None: self.default_ns}) for watcher in watchers: watcher_elem = etree.SubElement(root, "watcher") - for name, value in watcher.iteritems(): + for name, value in watcher.items(): etree.SubElement(watcher_elem, name).text = value doc = etree.tostring(root, encoding="utf-8", pretty_print=True, xml_declaration=True) #self.validate_document(doc) etag = make_etag(uri, doc) check_etag(etag) return StatusResponse(200, data=doc, etag=etag) def get_document_local(self, uri, check_etag): watchers_def = self.storage.get_watchers(uri) watchers_def.addCallback(self._watchers_to_xml, uri, check_etag) return watchers_def def put_document(self, uri, document, check_etag): raise errors.ResourceNotFound("This application does not support PUT method") diff --git a/xcap/authentication.py b/xcap/authentication.py index e48ec8e..f0c154f 100644 --- a/xcap/authentication.py +++ b/xcap/authentication.py @@ -1,366 +1,366 @@ """XCAP authentication module""" # XXX this module should be either renamed or refactored as it does more then just auth. from hashlib import md5 from zope.interface import Interface, implements from twisted.internet import defer from twisted.python import failure from twisted.cred import credentials, portal, checkers, error as credError from application.configuration.datatypes import NetworkRangeList from application.configuration import ConfigSection, ConfigSetting import struct import socket -import urlparse +import urllib.parse import xcap from xcap.datatypes import XCAPRootURI from xcap.appusage import getApplicationForURI, namespaces, public_get_applications from xcap.errors import ResourceNotFound from xcap.uri import XCAPUser, XCAPUri from xcap.web import http, server, stream, responsecode, http_headers from xcap.web.auth import basic, digest from xcap.web.auth.wrapper import HTTPAuthResource, UnauthorizedResponse # body of 404 error message to render when user requests xcap-root # it's html, because XCAP root is often published on the web. # NOTE: there're no plans to convert other error messages to html. # Since a web-browser is not the primary tool for accessing XCAP server, text/plain # is easier for clients to present to user/save to logs/etc. WELCOME = ('Not Found' '

Not Found

XCAP server does not serve anything ' 'directly under XCAP Root URL. You have to be more specific.' '

' '
OpenXCAP/%s
' '') % xcap.__version__ class AuthenticationConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Authentication' default_realm = ConfigSetting(type=str, value=None) trusted_peers = ConfigSetting(type=NetworkRangeList, value=NetworkRangeList('none')) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' root = ConfigSetting(type=XCAPRootURI, value=None) def generateWWWAuthenticate(headers): _generated = [] for seq in headers: scheme, challenge = seq[0], seq[1] # If we're going to parse out to something other than a dict # we need to be able to generate from something other than a dict try: l = [] - for k,v in dict(challenge).iteritems(): + for k,v in dict(challenge).items(): l.append("%s=%s" % (k, k in ("algorithm", "stale") and v or http_headers.quoteString(v))) _generated.append("%s %s" % (scheme, ", ".join(l))) except ValueError: _generated.append("%s %s" % (scheme, challenge)) return _generated http_headers.generator_response_headers["WWW-Authenticate"] = (generateWWWAuthenticate,) http_headers.DefaultHTTPHandler.updateGenerators(http_headers.generator_response_headers) del generateWWWAuthenticate def parseNodeURI(node_uri, default_realm): """Parses the given Node URI, containing the XCAP root, document selector, and node selector, and returns an XCAPUri instance if succesful.""" xcap_root = None for uri in ServerConfig.root.uris: if node_uri.startswith(uri): xcap_root = uri break if xcap_root is None: raise ResourceNotFound("XCAP root not found for URI: %s" % node_uri) resource_selector = node_uri[len(xcap_root):] if not resource_selector or resource_selector=='/': raise ResourceNotFound(WELCOME, http_headers.MimeType("text", "html")) r = XCAPUri(xcap_root, resource_selector, namespaces) if r.user.domain is None: r.user.domain = default_realm return r class ITrustedPeerCredentials(credentials.ICredentials): def checkPeer(self, trusted_peers): pass class TrustedPeerCredentials(object): implements(ITrustedPeerCredentials) def __init__(self, peer): self.peer = peer def checkPeer(self, trusted_peers): for range in trusted_peers: if struct.unpack('!L', socket.inet_aton(self.peer))[0] & range[1] == range[0]: return True return False class IPublicGetApplicationCredentials(credentials.ICredentials): def checkApplication(self): pass class PublicGetApplicationCredentials(object): implements(IPublicGetApplicationCredentials) def checkApplication(self): return True ## credentials checkers class TrustedPeerChecker(object): implements(checkers.ICredentialsChecker) credentialInterfaces = (ITrustedPeerCredentials,) def __init__(self, trusted_peers): self.trusted_peers = trusted_peers def requestAvatarId(self, credentials): """Return the avatar ID for the credentials which must have a 'peer' attribute, or an UnauthorizedLogin in case of a failure.""" if credentials.checkPeer(self.trusted_peers): return defer.succeed(credentials.peer) return defer.fail(credError.UnauthorizedLogin()) class PublicGetApplicationChecker(object): implements(checkers.ICredentialsChecker) credentialInterfaces = (IPublicGetApplicationCredentials,) def requestAvatarId(self, credentials): """We already know that the method is GET and the application is a 'public GET application', we just need to say that the authentication succeeded.""" if credentials.checkApplication(): return defer.succeed(None) return defer.fail(credError.UnauthorizedLogin()) ## avatars class IAuthUser(Interface): pass class ITrustedPeer(Interface): pass class IPublicGetApplication(Interface): pass class AuthUser(str): """Authenticated XCAP User avatar.""" implements(IAuthUser) class TrustedPeer(str): """Trusted peer avatar.""" implements(ITrustedPeer) class PublicGetApplication(str): """Public get application avatar.""" implements(IPublicGetApplication) ## realm class XCAPAuthRealm(object): """XCAP authentication realm. Receives an avatar ID (a string identifying the user) and a list of interfaces the avatar needs to support. It returns an avatar that encapsulates data about that user.""" implements(portal.IRealm) def requestAvatar(self, avatarId, mind, *interfaces): if IAuthUser in interfaces: return IAuthUser, AuthUser(avatarId) elif ITrustedPeer in interfaces: return ITrustedPeer, TrustedPeer(avatarId) elif IPublicGetApplication in interfaces: return IPublicGetApplication, PublicGetApplication(avatarId) raise NotImplementedError("Only IAuthUser and ITrustedPeer interfaces are supported") def get_cred(request, default_realm): auth = request.headers.getHeader('authorization') if auth: typ, data = auth if typ == 'basic': return data.decode('base64').split(':', 1)[0], default_realm elif typ == 'digest': raise NotImplementedError return None, default_realm ## authentication wrapper for XCAP resources class XCAPAuthResource(HTTPAuthResource): def allowedMethods(self): return 'GET', 'PUT', 'DELETE' def _updateRealm(self, realm): """Updates the realm of the attached credential factories.""" - for factory in self.credentialFactories.values(): + for factory in list(self.credentialFactories.values()): factory.realm = realm def authenticate(self, request): """Authenticates an XCAP request.""" - parsed_url = urlparse.urlparse(request.uri) + parsed_url = urllib.parse.urlparse(request.uri) if request.port in (80, 443): uri = request.scheme + "://" + request.host + parsed_url.path else: uri = request.scheme + "://" + request.host + ":" + str(request.port) + parsed_url.path if parsed_url.query: uri += "?%s" % parsed_url.query xcap_uri = parseNodeURI(uri, AuthenticationConfig.default_realm) request.xcap_uri = xcap_uri if xcap_uri.doc_selector.context=='global': return defer.succeed(self.wrappedResource) ## For each request the authentication realm must be ## dinamically deducted from the XCAP request URI realm = xcap_uri.user.domain if realm is None: raise ResourceNotFound('Unknown domain (the domain part of "username@domain" is required because this server has no default domain)') if not xcap_uri.user.username: # for 'global' requests there's no username@domain in the URI, # so we will use username and domain from Authorization header xcap_uri.user.username, xcap_uri.user.domain = get_cred(request, AuthenticationConfig.default_realm) self._updateRealm(realm) # If we receive a GET to a 'public GET application' we will not authenticate it - if request.method == "GET" and public_get_applications.has_key(xcap_uri.application_id): + if request.method == "GET" and xcap_uri.application_id in public_get_applications: return self.portal.login(PublicGetApplicationCredentials(), None, IPublicGetApplication ).addCallbacks(self._loginSucceeded, self._publicGetApplicationLoginFailed, (request,), None, (request,), None) remote_addr = request.remoteAddr.host if AuthenticationConfig.trusted_peers: return self.portal.login(TrustedPeerCredentials(remote_addr), None, ITrustedPeer ).addCallbacks(self._loginSucceeded, self._trustedPeerLoginFailed, (request,), None, (request,), None) return HTTPAuthResource.authenticate(self, request) def _trustedPeerLoginFailed(self, result, request): """If the peer is not trusted, fallback to HTTP basic/digest authentication.""" return HTTPAuthResource.authenticate(self, request) def _publicGetApplicationLoginFailed(self, result, request): return HTTPAuthResource.authenticate(self, request) def _loginSucceeded(self, avatar, request): """Authorizes an XCAP request after it has been authenticated.""" interface, avatar_id = avatar ## the avatar is the authenticated XCAP User xcap_uri = request.xcap_uri application = getApplicationForURI(xcap_uri) if not application: raise ResourceNotFound if interface is IAuthUser and application.is_authorized(XCAPUser.parse(avatar_id), xcap_uri): return HTTPAuthResource._loginSucceeded(self, avatar, request) elif interface is ITrustedPeer or interface is IPublicGetApplication: return HTTPAuthResource._loginSucceeded(self, avatar, request) else: return failure.Failure( http.HTTPError( UnauthorizedResponse( self.credentialFactories, request.remoteAddr))) def locateChild(self, request, seg): """ Authenticate the request then return the C{self.wrappedResource} and the unmodified segments. We're not using path location, we want to fall back to the renderHTTP() call. """ #return self.authenticate(request), seg return self, server.StopTraversal def renderHTTP(self, request): """ Authenticate the request then return the result of calling renderHTTP on C{self.wrappedResource} """ if request.method not in self.allowedMethods(): response = http.Response(responsecode.NOT_ALLOWED) response.headers.setHeader("allow", self.allowedMethods()) return response def _renderResource(resource): return resource.renderHTTP(request) def _finished_reading(ignore, result): data = ''.join(result) request.attachment = data d = self.authenticate(request) d.addCallback(_renderResource) return d if request.method in ('PUT', 'DELETE'): # we need to authenticate the request after all the attachment stream # has been read # QQQ DELETE doesn't have any attachments, does it? nor does GET. # QQQ Reading attachment when there isn't one won't hurt, will it? # QQQ So why don't we just do it all the time for all requests? data = [] d = stream.readStream(request.stream, data.append) d.addCallback(_finished_reading, data) return d else: d = self.authenticate(request) d.addCallback(_renderResource) return d class BasicCredentials(credentials.UsernamePassword): """Custom Basic Credentials, which support both plain and hashed checks.""" implements(credentials.IUsernamePassword, digest.IUsernameDigestHash) def __init__(self, username, password, realm): credentials.UsernamePassword.__init__(self, username, password) self.realm = realm @property def hash(self): return md5('{0.username}:{0.realm}:{0.password}'.format(self)).hexdigest() def checkHash(self, digestHash): return digestHash == self.hash class BasicCredentialFactory(basic.BasicCredentialFactory): def decode(self, response, request): credential = super(BasicCredentialFactory, self).decode(response, request) return BasicCredentials(credential.username, credential.password, self.realm) class DigestCredentialFactory(digest.DigestCredentialFactory): def generateOpaque(self, nonce, clientip): return super(DigestCredentialFactory, self).generateOpaque(nonce=nonce, clientip=clientip or '') def verifyOpaque(self, opaque, nonce, clientip): return super(DigestCredentialFactory, self).verifyOpaque(opaque=opaque, nonce=nonce, clientip=clientip or '') diff --git a/xcap/backend/database.py b/xcap/backend/database.py index 8482278..2dc8ef3 100644 --- a/xcap/backend/database.py +++ b/xcap/backend/database.py @@ -1,407 +1,406 @@ """Implementation of a database backend.""" import sys from application import log from application.configuration import ConfigSection from application.python.types import Singleton from zope.interface import implements from twisted.cred import credentials, checkers, error as credError from twisted.internet import defer from _mysql_exceptions import IntegrityError import xcap from xcap.backend import IStorage, StatusResponse from xcap.dbutil import connectionForURI, repeat_on_error, make_random_etag class Config(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Database' authentication_db_uri = '' storage_db_uri = '' subscriber_table = 'subscriber' user_col = 'username' domain_col = 'domain' password_col = 'password' ha1_col = 'ha1' xcap_table = 'xcap' if not Config.authentication_db_uri or not Config.storage_db_uri: log.critical('Authentication DB URI and Storage DB URI must be provided') sys.exit(1) class DBBase(object): def __init__(self): self._db_connect() class PasswordChecker(DBBase): """A credentials checker against a database subscriber table.""" implements(checkers.ICredentialsChecker) credentialInterfaces = (credentials.IUsernamePassword, credentials.IUsernameHashedPassword) def _db_connect(self): self.conn = auth_db_connection(Config.authentication_db_uri) def _query_credentials(self, credentials): raise NotImplementedError def _got_query_results(self, rows, credentials): if not rows: raise credError.UnauthorizedLogin("Unauthorized login") else: return self._authenticate_credentials(rows[0][0], credentials) def _authenticate_credentials(self, password, credentials): raise NotImplementedError def _checkedPassword(self, matched, username, realm): if matched: username = username.split('@', 1)[0] ## this is the avatar ID return "%s@%s" % (username, realm) else: raise credError.UnauthorizedLogin("Unauthorized login") def requestAvatarId(self, credentials): """Return the avatar ID for the credentials which must have the username and realm attributes, or an UnauthorizedLogin in case of a failure.""" d = self._query_credentials(credentials) return d class PlainPasswordChecker(PasswordChecker): """A credentials checker against a database subscriber table, where the passwords are stored in plain text.""" implements(checkers.ICredentialsChecker) def _query_credentials(self, credentials): username, domain = credentials.username.split('@', 1)[0], credentials.realm query = """SELECT %(password_col)s FROM %(table)s WHERE %(user_col)s = %%(username)s AND %(domain_col)s = %%(domain)s""" % { "password_col": Config.password_col, "user_col": Config.user_col, "domain_col": Config.domain_col, "table": Config.subscriber_table } params = {"username": username, "domain": domain} return self.conn.runQuery(query, params).addCallback(self._got_query_results, credentials) def _authenticate_credentials(self, hash, credentials): return defer.maybeDeferred( credentials.checkPassword, hash).addCallback( self._checkedPassword, credentials.username, credentials.realm) class HashPasswordChecker(PasswordChecker): """A credentials checker against a database subscriber table, where the passwords are stored as MD5 hashes.""" implements(checkers.ICredentialsChecker) def _query_credentials(self, credentials): username, domain = credentials.username.split('@', 1)[0], credentials.realm query = """SELECT %(ha1_col)s FROM %(table)s WHERE %(user_col)s = %%(username)s AND %(domain_col)s = %%(domain)s""" % { "ha1_col": Config.ha1_col, "user_col": Config.user_col, "domain_col": Config.domain_col, "table": Config.subscriber_table} params = {"username": username, "domain": domain} return self.conn.runQuery(query, params).addCallback(self._got_query_results, credentials) def _authenticate_credentials(self, hash, credentials): return defer.maybeDeferred( credentials.checkHash, hash).addCallback( self._checkedPassword, credentials.username, credentials.realm) class Error(Exception): def __init__(self): if hasattr(self, 'msg'): return Exception.__init__(self, self.msg) else: return Exception.__init__(self) class RaceError(Error): """The errors of this type are raised for the requests that failed because of concurrent modification of the database by other clients. For example, before DELETE we do SELECT first, to check that a document of the right etag exists. The actual check is performed by a function in twisted that is passed as a callback. Then etag from the SELECT request is used in the DELETE request. This seems unnecessary convoluted and probably should be changed to 'DELETE .. WHERE etag=ETAG'. We still need to find out whether DELETE was actually performed. """ class UpdateFailed(RaceError): msg = 'UPDATE request failed' class DeleteFailed(RaceError): msg = 'DELETE request failed' class MultipleResultsError(Error): """This should never happen. If it did happen. that means either the table was corrupted or there's a logic error""" def __init__(self, params): Exception.__init__(self, 'database request has more than one result: ' + repr(params)) -class Storage(DBBase): - __metaclass__ = Singleton +class Storage(DBBase, metaclass=Singleton): implements(IStorage) app_mapping = {"pres-rules" : 1<<1, "resource-lists" : 1<<2, "rls-services" : 1<<3, "pidf-manipulation" : 1<<4, "org.openmobilealliance.pres-rules" : 1<<5, "org.openmobilealliance.pres-content" : 1<<6, "org.openxcap.dialog-rules" : 1<<7, "test-app" : 0} def _db_connect(self): self.conn = storage_db_connection(Config.storage_db_uri) def _normalize_document_path(self, uri): if uri.application_id in ("pres-rules", "org.openmobilealliance.pres-rules"): # some clients e.g. counterpath's eyebeam save presence rules under # different filenames between versions and they expect to find the same # information, thus we are forcing all presence rules documents to be # saved under "index.xml" default filename uri.doc_selector.document_path = "index.xml" def _get_document(self, trans, uri, check_etag): username, domain = uri.user.username, uri.user.domain self._normalize_document_path(uri) doc_type = self.app_mapping[uri.application_id] query = """SELECT doc, etag FROM %(table)s WHERE username = %%(username)s AND domain = %%(domain)s AND doc_type= %%(doc_type)s AND doc_uri=%%(document_path)s""" % { "table": Config.xcap_table} params = {"username": username, "domain" : domain, "doc_type": doc_type, "document_path": uri.doc_selector.document_path} trans.execute(query, params) result = trans.fetchall() if len(result)>1: raise MultipleResultsError(params) elif result: doc, etag = result[0] - if isinstance(doc, unicode): + if isinstance(doc, str): doc = doc.encode('utf-8') check_etag(etag) response = StatusResponse(200, etag, doc) else: response = StatusResponse(404) return response def _put_document(self, trans, uri, document, check_etag): username, domain = uri.user.username, uri.user.domain self._normalize_document_path(uri) doc_type = self.app_mapping[uri.application_id] document_path = uri.doc_selector.document_path query = """SELECT etag FROM %(table)s WHERE username = %%(username)s AND domain = %%(domain)s AND doc_type= %%(doc_type)s AND doc_uri=%%(document_path)s""" % { "table": Config.xcap_table} params = {"username": username, "domain" : domain, "doc_type": doc_type, "document_path": document_path} trans.execute(query, params) result = trans.fetchall() if len(result) > 1: raise MultipleResultsError(params) elif not result: check_etag(None, False) ## the document doesn't exist, create it etag = make_random_etag(uri) query = """INSERT INTO %(table)s (username, domain, doc_type, etag, doc, doc_uri) VALUES (%%(username)s, %%(domain)s, %%(doc_type)s, %%(etag)s, %%(document)s, %%(document_path)s)""" % { "table": Config.xcap_table } params = {"username": username, "domain" : domain, "doc_type": doc_type, "etag": etag, "document": document, "document_path": document_path} # may raise IntegrityError here, if the document was created in another connection # will be catched by repeat_on_error trans.execute(query, params) return StatusResponse(201, etag) else: old_etag = result[0][0] ## first check the etag of the existing resource check_etag(old_etag) ## the document exists, replace it etag = make_random_etag(uri) query = """UPDATE %(table)s SET doc = %%(document)s, etag = %%(etag)s WHERE username = %%(username)s AND domain = %%(domain)s AND doc_type = %%(doc_type)s AND etag = %%(old_etag)s AND doc_uri = %%(document_path)s""" % { "table": Config.xcap_table } params = {"document": document, "etag": etag, "username": username, "domain" : domain, "doc_type": doc_type, "old_etag": old_etag, "document_path": document_path} trans.execute(query, params) # the request may not update anything (e.g. if etag was changed by another connection # after we did SELECT); if so, we should retry updated = getattr(trans._connection, 'affected_rows', lambda : 1)() if not updated: raise UpdateFailed assert updated == 1, updated return StatusResponse(200, etag, old_etag=old_etag) def _delete_document(self, trans, uri, check_etag): username, domain = uri.user.username, uri.user.domain self._normalize_document_path(uri) doc_type = self.app_mapping[uri.application_id] document_path = uri.doc_selector.document_path query = """SELECT etag FROM %(table)s WHERE username = %%(username)s AND domain = %%(domain)s AND doc_type= %%(doc_type)s AND doc_uri = %%(document_path)s""" % { "table": Config.xcap_table} params = {"username": username, "domain" : domain, "doc_type": doc_type, "document_path": document_path} trans.execute(query, params) result = trans.fetchall() if len(result)>1: raise MultipleResultsError(params) elif result: etag = result[0][0] check_etag(etag) query = """DELETE FROM %(table)s WHERE username = %%(username)s AND domain = %%(domain)s AND doc_type= %%(doc_type)s AND doc_uri = %%(document_path)s AND etag = %%(etag)s""" % {"table" : Config.xcap_table} params = {"username": username, "domain" : domain, "doc_type": doc_type, "document_path": document_path, "etag": etag} trans.execute(query, params) deleted = getattr(trans._connection, 'affected_rows', lambda : 1)() if not deleted: # the document was replaced/removed after the SELECT but before the DELETE raise DeleteFailed assert deleted == 1, deleted return StatusResponse(200, old_etag=etag) else: return StatusResponse(404) def _delete_all_documents(self, trans, uri): username, domain = uri.user.username, uri.user.domain query = """DELETE FROM %(table)s WHERE username = %%(username)s AND domain = %%(domain)s """ % {"table" : Config.xcap_table} params = {"username": username, "domain" : domain} trans.execute(query, params) return StatusResponse(200) def get_document(self, uri, check_etag): return self.conn.runInteraction(self._get_document, uri, check_etag) def put_document(self, uri, document, check_etag): return repeat_on_error(10, (UpdateFailed, IntegrityError), self.conn.runInteraction, self._put_document, uri, document, check_etag) def delete_document(self, uri, check_etag): return repeat_on_error(10, DeleteFailed, self.conn.runInteraction, self._delete_document, uri, check_etag) def delete_documents(self, uri): return self.conn.runInteraction(self._delete_all_documents, uri) # Application-specific functions def _get_watchers(self, trans, uri): status_mapping = {1: "allow", 2: "confirm", 3: "deny"} presentity_uri = "sip:%s@%s" % (uri.user.username, uri.user.domain) query = """SELECT watcher_username, watcher_domain, status FROM watchers WHERE presentity_uri = %(puri)s""" params = {'puri': presentity_uri} trans.execute(query, params) result = trans.fetchall() watchers = [{"id": "%s@%s" % (w_user, w_domain), "status": status_mapping.get(subs_status, "unknown"), "online": "false"} for w_user, w_domain, subs_status in result] query = """SELECT watcher_username, watcher_domain FROM active_watchers WHERE presentity_uri = %(puri)s AND event = 'presence'""" trans.execute(query, params) result = trans.fetchall() active_watchers = set("%s@%s" % pair for pair in result) for watcher in watchers: if watcher["id"] in active_watchers: watcher["online"] = "true" return watchers def get_watchers(self, uri): return self.conn.runInteraction(self._get_watchers, uri) def _get_documents_list(self, trans, uri): query = """SELECT doc_type, doc_uri, etag FROM %(table)s WHERE username = %%(username)s AND domain = %%(domain)s""" % {'table': Config.xcap_table} params = {'username': uri.user.username, 'domain': uri.user.domain} trans.execute(query, params) result = trans.fetchall() docs = {} for r in result: - app = [k for k, v in self.app_mapping.iteritems() if v == r[0]][0] - if docs.has_key(app): + app = [k for k, v in self.app_mapping.items() if v == r[0]][0] + if app in docs: docs[app].append((r[1], r[2])) else: docs[app] = [(r[1], r[2])] # Ex: {'pres-rules': [('index.html', '4564fd9c9a2a2e3e796310b00c9908aa')]} return docs def get_documents_list(self, uri): return self.conn.runInteraction(self._get_documents_list, uri) installSignalHandlers = True def auth_db_connection(uri): conn = connectionForURI(uri) return conn def storage_db_connection(uri): conn = connectionForURI(uri) def cb(res): if res[0:1][0:1] and res[0][0]: - print '%s xcap documents in the database' % res[0][0] + print('%s xcap documents in the database' % res[0][0]) return res def eb(fail): fail.printTraceback() return fail # connect early, so database problem are detected early d = conn.runQuery('SELECT count(*) from %s' % Config.xcap_table) d.addCallback(cb) d.addErrback(eb) return conn diff --git a/xcap/backend/opensips.py b/xcap/backend/opensips.py index 62dd40c..b133e7e 100644 --- a/xcap/backend/opensips.py +++ b/xcap/backend/opensips.py @@ -1,123 +1,121 @@ """Implementation of an OpenSIPS backend.""" import re from application import log from application.configuration import ConfigSection, ConfigSetting from application.configuration.datatypes import IPAddress from application.notification import IObserver, NotificationCenter from application.python import Null from application.python.types import Singleton from sipsimple.core import Engine, FromHeader, Header, Publication, RouteHeader, SIPURI from sipsimple.configuration.datatypes import SIPProxyAddress from sipsimple.threading import run_in_twisted_thread from zope.interface import implements import xcap from xcap.datatypes import XCAPRootURI from xcap.backend import database from xcap.xcapdiff import Notifier class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' address = ConfigSetting(type=IPAddress, value='0.0.0.0') root = ConfigSetting(type=XCAPRootURI, value=None) class Config(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'OpenSIPS' publish_xcapdiff = False outbound_sip_proxy = '' class PlainPasswordChecker(database.PlainPasswordChecker): pass class HashPasswordChecker(database.HashPasswordChecker): pass -class SIPNotifier(object): - __metaclass__ = Singleton - +class SIPNotifier(object, metaclass=Singleton): implements(IObserver) def __init__(self): self.engine = Engine() self.engine.start( ip_address=None if ServerConfig.address == '0.0.0.0' else ServerConfig.address, user_agent="OpenXCAP %s" % xcap.__version__, ) self.sip_prefix_re = re.compile('^sips?:') try: self.outbound_proxy = SIPProxyAddress.from_description(Config.outbound_sip_proxy) except ValueError: log.warning('Invalid SIP proxy address specified: %s' % Config.outbound_sip_proxy) self.outbound_proxy = None def send_publish(self, uri, body): if self.outbound_proxy is None: return uri = self.sip_prefix_re.sub('', uri) publication = Publication(FromHeader(SIPURI(uri)), "xcap-diff", "application/xcap-diff+xml", duration=0, extra_headers=[Header('Thor-Scope', 'publish-xcap')]) NotificationCenter().add_observer(self, sender=publication) route_header = RouteHeader(SIPURI(host=self.outbound_proxy.host, port=self.outbound_proxy.port, parameters=dict(transport=self.outbound_proxy.transport))) publication.publish(body, route_header, timeout=5) @run_in_twisted_thread def handle_notification(self, notification): handler = getattr(self, '_NH_%s' % notification.name, Null) handler(notification) def _NH_SIPPublicationDidSucceed(self, notification): log.info('PUBLISH for xcap-diff event successfully sent to %s for %s' % (notification.data.route_header.uri, notification.sender.from_header.uri)) def _NH_SIPPublicationDidEnd(self, notification): log.info('PUBLISH for xcap-diff event ended for %s' % notification.sender.from_header.uri) notification.center.remove_observer(self, sender=notification.sender) def _NH_SIPPublicationDidFail(self, notification): log.info('PUBLISH for xcap-diff event failed to %s for %s' % (notification.data.route_header.uri, notification.sender.from_header.uri)) notification.center.remove_observer(self, sender=notification.sender) class NotifyingStorage(database.Storage): def __init__(self): super(NotifyingStorage, self).__init__() self._sip_notifier = SIPNotifier() self.notifier = Notifier(ServerConfig.root, self._sip_notifier.send_publish) def put_document(self, uri, document, check_etag): d = super(NotifyingStorage, self).put_document(uri, document, check_etag) d.addCallback(lambda result: self._on_put(result, uri)) return d def _on_put(self, result, uri): if result.succeed: self.notifier.on_change(uri, result.old_etag, result.etag) return result def delete_document(self, uri, check_etag): d = super(NotifyingStorage, self).delete_document(uri, check_etag) d.addCallback(lambda result: self._on_delete(result, uri)) return d def _on_delete(self, result, uri): if result.succeed: self.notifier.on_change(uri, result.old_etag, None) return result if Config.publish_xcapdiff: Storage = NotifyingStorage else: Storage = database.Storage installSignalHandlers = database.installSignalHandlers diff --git a/xcap/backend/sipthor.py b/xcap/backend/sipthor.py index 9eba81a..8652aa6 100644 --- a/xcap/backend/sipthor.py +++ b/xcap/backend/sipthor.py @@ -1,596 +1,590 @@ import re import signal import cjson from formencode import validators from application import log from application.notification import IObserver, NotificationCenter from application.python import Null from application.python.types import Singleton from application.system import host from application.process import process from application.configuration import ConfigSection, ConfigSetting from application.configuration.datatypes import IPAddress from sqlobject import sqlhub, connectionForURI, SQLObject, AND from sqlobject import StringCol, IntCol, DateTimeCol, SOBLOBCol, Col from sqlobject import MultipleJoin, ForeignKey from zope.interface import implements from twisted.internet import reactor from twisted.internet import defer from twisted.internet.defer import Deferred, maybeDeferred from twisted.cred.checkers import ICredentialsChecker from twisted.cred.credentials import IUsernamePassword, IUsernameHashedPassword from twisted.cred.error import UnauthorizedLogin from thor.link import ControlLink, Notification, Request from thor.eventservice import EventServiceClient, ThorEvent from thor.entities import ThorEntitiesRoleMap, GenericThorEntity as ThorEntity from gnutls.interfaces.twisted import TLSContext, X509Credentials from sipsimple.core import Engine, FromHeader, Header, Publication, RouteHeader, SIPURI from sipsimple.threading import run_in_twisted_thread from sipsimple.configuration.datatypes import Port import xcap from xcap.tls import Certificate, PrivateKey from xcap.backend import StatusResponse from xcap.datatypes import XCAPRootURI from xcap.dbutil import make_random_etag from xcap.xcapdiff import Notifier class ThorNodeConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'ThorNetwork' domain = "sipthor.net" multiply = 1000 certificate = ConfigSetting(type=Certificate, value=None) private_key = ConfigSetting(type=PrivateKey, value=None) ca = ConfigSetting(type=Certificate, value=None) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' address = ConfigSetting(type=IPAddress, value='0.0.0.0') root = ConfigSetting(type=XCAPRootURI, value=None) tcp_port = ConfigSetting(type=Port, value=35060) class JSONValidator(validators.Validator): def to_python(self, value, state): if value is None: return None try: return cjson.decode(value) except Exception: raise validators.Invalid("expected a decodable JSON object in the JSONCol '%s', got %s %r instead" % (self.name, type(value), value), value, state) def from_python(self, value, state): if value is None: return None try: return cjson.encode(value) except Exception: raise validators.Invalid("expected an encodable JSON object in the JSONCol '%s', got %s %r instead" % (self.name, type(value), value), value, state) class SOJSONCol(SOBLOBCol): def createValidators(self): return [JSONValidator()] + super(SOJSONCol, self).createValidators() class JSONCol(Col): baseClass = SOJSONCol class SipAccount(SQLObject): class sqlmeta: table = 'sip_accounts_meta' username = StringCol(length=64) domain = StringCol(length=64) firstName = StringCol(length=64) lastName = StringCol(length=64) email = StringCol(length=64) customerId = IntCol(default=0) resellerId = IntCol(default=0) ownerId = IntCol(default=0) changeDate = DateTimeCol(default=DateTimeCol.now) ## joins data = MultipleJoin('SipAccountData', joinColumn='account_id') def _set_profile(self, value): data = list(self.data) if not data: SipAccountData(account=self, profile=value) else: data[0].profile = value def _get_profile(self): return self.data[0].profile def set(self, **kwargs): kwargs = kwargs.copy() profile = kwargs.pop('profile', None) SQLObject.set(self, **kwargs) if profile is not None: self._set_profile(profile) class SipAccountData(SQLObject): class sqlmeta: table = 'sip_accounts_data' account = ForeignKey('SipAccount', cascade=True) profile = JSONCol() class ThorEntityAddress(str): def __new__(cls, ip, control_port=None, version='unknown'): instance = str.__new__(cls, ip) instance.ip = ip instance.version = version instance.control_port = control_port return instance class GetSIPWatchers(Request): def __new__(cls, account): command = "get sip_watchers for %s" % account instance = Request.__new__(cls, command) return instance -class XCAPProvisioning(EventServiceClient): - __metaclass__ = Singleton +class XCAPProvisioning(EventServiceClient, metaclass=Singleton): topics = ["Thor.Members"] def __init__(self): self._database = DatabaseConnection() self.node = ThorEntity(host.default_ip if ServerConfig.address == '0.0.0.0' else ServerConfig.address, ['xcap_server'], version=xcap.__version__) self.networks = {} self.presence_message = ThorEvent('Thor.Presence', self.node.id) self.shutdown_message = ThorEvent('Thor.Leave', self.node.id) credentials = X509Credentials(ThorNodeConfig.certificate, ThorNodeConfig.private_key, [ThorNodeConfig.ca]) credentials.verify_peer = True tls_context = TLSContext(credentials) self.control = ControlLink(tls_context) EventServiceClient.__init__(self, ThorNodeConfig.domain, tls_context) process.signals.add_handler(signal.SIGHUP, self._handle_signal) process.signals.add_handler(signal.SIGINT, self._handle_signal) process.signals.add_handler(signal.SIGTERM, self._handle_signal) def _disconnect_all(self, result): self.control.disconnect_all() EventServiceClient._disconnect_all(self, result) def lookup(self, key): network = self.networks.get("sip_proxy", None) if network is None: return None try: node = network.lookup_node(key) except LookupError: node = None except Exception: log.exception() node = None return node def notify(self, operation, entity_type, entity): node = self.lookup(entity) if node is not None: if node.control_port is None: log.error("Could not send notify because node %s has no control port" % node.ip) return self.control.send_request(Notification("notify %s %s %s" % (operation, entity_type, entity)), (node.ip, node.control_port)) def get_watchers(self, key): node = self.lookup(key) if node is None: return defer.fail("no nodes found when searching for key %s" % str(key)) if node.control_port is None: return defer.fail("could not send notify because node %s has no control port" % node.ip) request = GetSIPWatchers(key) request.deferred = Deferred() self.control.send_request(request, (node.ip, node.control_port)) return request.deferred def handle_event(self, event): # print "Received event: %s" % event networks = self.networks role_map = ThorEntitiesRoleMap(event.message) ## mapping between role names and lists of nodes with that role thor_databases = role_map.get('thor_database', []) if thor_databases: thor_databases.sort(lambda x, y: cmp(x.priority, y.priority) or cmp(x.ip, y.ip)) dburi = thor_databases[0].dburi else: dburi = None self._database.update_dburi(dburi) - all_roles = role_map.keys() + networks.keys() + all_roles = list(role_map.keys()) + list(networks.keys()) for role in all_roles: try: network = networks[role] ## avoid setdefault here because it always evaluates the 2nd argument except KeyError: from thor import network as thor_network if role in ["thor_manager", "thor_monitor", "provisioning_server", "media_relay", "thor_database"]: continue else: network = thor_network.new(ThorNodeConfig.multiply) networks[role] = network new_nodes = set([ThorEntityAddress(node.ip, getattr(node, 'control_port', None), getattr(node, 'version', 'unknown')) for node in role_map.get(role, [])]) old_nodes = set(network.nodes) added_nodes = new_nodes - old_nodes removed_nodes = old_nodes - new_nodes if removed_nodes: for node in removed_nodes: network.remove_node(node) self.control.discard_link((node.ip, node.control_port)) log.info('Removed %s nodes: %s' % (role, ', '.join(removed_nodes))) if added_nodes: for node in added_nodes: network.add_node(node) log.info('Added %s nodes: %s' % (role, ', '.join(added_nodes))) # print('Thor %s nodes: %s' % (role, str(network.nodes))) class NotFound(Exception): pass class NoDatabase(Exception): pass -class DatabaseConnection(object): - __metaclass__ = Singleton - +class DatabaseConnection(object, metaclass=Singleton): def __init__(self): self.dburi = None # Methods to be called from the Twisted thread: def put(self, uri, document, check_etag, new_etag): defer = Deferred() operation = lambda profile: self._put_operation(uri, document, check_etag, new_etag, profile) reactor.callInThread(self.retrieve_profile, uri.user.username, uri.user.domain, operation, True, defer) return defer def delete(self, uri, check_etag): defer = Deferred() operation = lambda profile: self._delete_operation(uri, check_etag, profile) reactor.callInThread(self.retrieve_profile, uri.user.username, uri.user.domain, operation, True, defer) return defer def delete_all(self, uri): defer = Deferred() operation = lambda profile: self._delete_all_operation(uri, profile) reactor.callInThread(self.retrieve_profile, uri.user.username, uri.user.domain, operation, True, defer) return defer def get(self, uri): defer = Deferred() operation = lambda profile: self._get_operation(uri, profile) reactor.callInThread(self.retrieve_profile, uri.user.username, uri.user.domain, operation, False, defer) return defer def get_profile(self, username, domain): defer = Deferred() reactor.callInThread(self.retrieve_profile, username, domain, lambda profile: profile, False, defer) return defer def get_documents_list(self, uri): defer = Deferred() operation = lambda profile: self._get_documents_list_operation(uri, profile) reactor.callInThread(self.retrieve_profile, uri.user.username, uri.user.domain, operation, False, defer) return defer # Methods to be called in a separate thread: def _put_operation(self, uri, document, check_etag, new_etag, profile): xcap_docs = profile.setdefault("xcap", {}) try: etag = xcap_docs[uri.application_id][uri.doc_selector.document_path][1] except KeyError: found = False etag = None check_etag(None, False) else: found = True check_etag(etag) xcap_app = xcap_docs.setdefault(uri.application_id, {}) xcap_app[uri.doc_selector.document_path] = (document, new_etag) return found, etag, new_etag def _delete_operation(self, uri, check_etag, profile): xcap_docs = profile.setdefault("xcap", {}) try: etag = xcap_docs[uri.application_id][uri.doc_selector.document_path][1] except KeyError: raise NotFound() check_etag(etag) del(xcap_docs[uri.application_id][uri.doc_selector.document_path]) return (etag) def _delete_all_operation(self, uri, profile): xcap_docs = profile.setdefault("xcap", {}) xcap_docs.clear() return None def _get_operation(self, uri, profile): try: xcap_docs = profile["xcap"] doc, etag = xcap_docs[uri.application_id][uri.doc_selector.document_path] except KeyError: raise NotFound() return doc, etag def _get_documents_list_operation(self, uri, profile): try: xcap_docs = profile["xcap"] except KeyError: raise NotFound() return xcap_docs def retrieve_profile(self, username, domain, operation, update, defer): transaction = None try: if self.dburi is None: raise NoDatabase() transaction = sqlhub.processConnection.transaction() try: db_account = SipAccount.select(AND(SipAccount.q.username == username, SipAccount.q.domain == domain), connection = transaction, forUpdate = update)[0] except IndexError: raise NotFound() profile = db_account.profile result = operation(profile) # NB: may modify profile! if update: db_account.profile = profile transaction.commit(close=True) - except Exception, e: + except Exception as e: if transaction: transaction.rollback() reactor.callFromThread(defer.errback, e) else: reactor.callFromThread(defer.callback, result) finally: if transaction: transaction.cache.clear() def update_dburi(self, dburi): if self.dburi != dburi: if self.dburi is not None: sqlhub.processConnection.close() if dburi is None: sqlhub.processConnection else: sqlhub.processConnection = connectionForURI(dburi) self.dburi = dburi class SipthorPasswordChecker(object): implements(ICredentialsChecker) credentialInterfaces = (IUsernamePassword, IUsernameHashedPassword) def __init__(self): self._database = DatabaseConnection() def _query_credentials(self, credentials): username, domain = credentials.username.split('@', 1)[0], credentials.realm result = self._database.get_profile(username, domain) result.addCallback(self._got_query_results, credentials) result.addErrback(self._got_unsuccessfull) return result def _got_unsuccessfull(self, failure): failure.trap(NotFound) raise UnauthorizedLogin("Unauthorized login") def _got_query_results(self, profile, credentials): return self._authenticate_credentials(profile, credentials) def _authenticate_credentials(self, profile, credentials): raise NotImplementedError def _checkedPassword(self, matched, username, realm): if matched: username = username.split('@', 1)[0] ## this is the avatar ID return "%s@%s" % (username, realm) else: raise UnauthorizedLogin("Unauthorized login") def requestAvatarId(self, credentials): """Return the avatar ID for the credentials which must have the username and realm attributes, or an UnauthorizedLogin in case of a failure.""" d = self._query_credentials(credentials) return d class PlainPasswordChecker(SipthorPasswordChecker): """A credentials checker against a database subscriber table, where the passwords are stored in plain text.""" implements(ICredentialsChecker) def _authenticate_credentials(self, profile, credentials): return maybeDeferred( credentials.checkPassword, profile["password"]).addCallback( self._checkedPassword, credentials.username, credentials.realm) class HashPasswordChecker(SipthorPasswordChecker): """A credentials checker against a database subscriber table, where the passwords are stored as MD5 hashes.""" implements(ICredentialsChecker) def _authenticate_credentials(self, profile, credentials): return maybeDeferred( credentials.checkHash, profile["ha1"]).addCallback( self._checkedPassword, credentials.username, credentials.realm) -class SIPNotifier(object): - __metaclass__ = Singleton - +class SIPNotifier(object, metaclass=Singleton): implements(IObserver) def __init__(self): self.provisioning = XCAPProvisioning() self.engine = Engine() self.engine.start( ip_address=None if ServerConfig.address == '0.0.0.0' else ServerConfig.address, tcp_port=ServerConfig.tcp_port, user_agent="OpenXCAP %s" % xcap.__version__, ) def send_publish(self, uri, body): uri = re.sub("^(sip:|sips:)", "", uri) destination_node = self.provisioning.lookup(uri) if destination_node is not None: # TODO: add configuration settings for SIP transport. -Saul publication = Publication(FromHeader(SIPURI(uri)), "xcap-diff", "application/xcap-diff+xml", duration=0, extra_headers=[Header('Thor-Scope', 'publish-xcap')]) NotificationCenter().add_observer(self, sender=publication) route_header = RouteHeader(SIPURI(host=str(destination_node), port='5060', parameters=dict(transport='tcp'))) publication.publish(body, route_header, timeout=5) @run_in_twisted_thread def handle_notification(self, notification): handler = getattr(self, '_NH_%s' % notification.name, Null) handler(notification) def _NH_SIPPublicationDidSucceed(self, notification): log.info('PUBLISH xcap-diff sent to %s for %s' % (notification.data.route_header.uri, notification.sender.from_header.uri)) def _NH_SIPPublicationDidEnd(self, notification): #log.info('PUBLISH for xcap-diff event ended for %s' % notification.sender.from_header.uri) NotificationCenter().remove_observer(self, sender=notification.sender) def _NH_SIPPublicationDidFail(self, notification): log.info('PUBLISH xcap-diff failed to %s for %s' % (notification.data.route_header.uri, notification.sender.from_header.uri)) NotificationCenter().remove_observer(self, sender=notification.sender) -class Storage(object): - __metaclass__ = Singleton - +class Storage(object, metaclass=Singleton): def __init__(self): self._database = DatabaseConnection() self._provisioning = XCAPProvisioning() self._sip_notifier = SIPNotifier() self._notifier = Notifier(ServerConfig.root, self._sip_notifier.send_publish) def _normalize_document_path(self, uri): if uri.application_id in ("pres-rules", "org.openmobilealliance.pres-rules"): # some clients e.g. counterpath's eyebeam save presence rules under # different filenames between versions and they expect to find the same # information, thus we are forcing all presence rules documents to be # saved under "index.xml" default filename uri.doc_selector.document_path = "index.xml" def get_document(self, uri, check_etag): self._normalize_document_path(uri) result = self._database.get(uri) result.addCallback(self._got_document, check_etag) result.addErrback(self._eb_not_found) return result def _eb_not_found(self, failure): failure.trap(NotFound) return StatusResponse(404) - def _got_document(self, (doc, etag), check_etag): + def _got_document(self, xxx_todo_changeme, check_etag): + (doc, etag) = xxx_todo_changeme check_etag(etag) return StatusResponse(200, etag, doc.encode('utf-8')) def put_document(self, uri, document, check_etag): document = document.decode('utf-8') self._normalize_document_path(uri) etag = make_random_etag(uri) result = self._database.put(uri, document, check_etag, etag) result.addCallback(self._cb_put, uri, "%s@%s" % (uri.user.username, uri.user.domain)) result.addErrback(self._eb_not_found) return result def _cb_put(self, result, uri, thor_key): if result[0]: code = 200 else: code = 201 self._provisioning.notify("update", "sip_account", thor_key) self._notifier.on_change(uri, result[1], result[2]) return StatusResponse(code, result[2]) def delete_documents(self, uri): result = self._database.delete_all(uri) result.addCallback(self._cb_delete_all, uri, "%s@%s" % (uri.user.username, uri.user.domain)) result.addErrback(self._eb_not_found) return result def _cb_delete_all(self, result, uri, thor_key): self._provisioning.notify("update", "sip_account", thor_key) return StatusResponse(200) def delete_document(self, uri, check_etag): self._normalize_document_path(uri) result = self._database.delete(uri, check_etag) result.addCallback(self._cb_delete, uri, "%s@%s" % (uri.user.username, uri.user.domain)) result.addErrback(self._eb_not_found) return result def _cb_delete(self, result, uri, thor_key): self._provisioning.notify("update", "sip_account", thor_key) self._notifier.on_change(uri, result[1], None) return StatusResponse(200) def get_watchers(self, uri): thor_key = "%s@%s" % (uri.user.username, uri.user.domain) result = self._provisioning.get_watchers(thor_key) result.addCallback(self._get_watchers_decode) return result def _get_watchers_decode(self, response): if response.code == 200: watchers = cjson.decode(response.data) for watcher in watchers: watcher["online"] = str(watcher["online"]).lower() return watchers else: - print "error: %s" % response + print("error: %s" % response) def get_documents_list(self, uri): result = self._database.get_documents_list(uri) result.addCallback(self._got_documents_list) result.addErrback(self._got_documents_list_error) return result def _got_documents_list(self, xcap_docs): docs = {} if xcap_docs: - for k, v in xcap_docs.iteritems(): - for k2, v2 in v.iteritems(): - if docs.has_key(k): + for k, v in xcap_docs.items(): + for k2, v2 in v.items(): + if k in docs: docs[k].append((k2, v2[1])) else: docs[k] = [(k2, v2[1])] return docs def _got_documents_list_error(self, failure): failure.trap(NotFound) return {} installSignalHandlers = False diff --git a/xcap/datatypes.py b/xcap/datatypes.py index e3f9d29..f61ee2c 100644 --- a/xcap/datatypes.py +++ b/xcap/datatypes.py @@ -1,66 +1,66 @@ """Configuration data types""" import re -import urlparse +import urllib.parse from application import log class XCAPRootURI(str): """An XCAP root URI and a number of optional aliases""" def __new__(cls, value): if value is None: return None - elif not isinstance(value, basestring): + elif not isinstance(value, str): raise TypeError("value must be a string, unicode or None") if value.strip() == '': return None valid_uris = [] for uri in re.split(r'\s*,\s*', value): - scheme, host, path, params, query, fragment = urlparse.urlparse(uri) + scheme, host, path, params, query, fragment = urllib.parse.urlparse(uri) if host and scheme in ('http', 'https'): for u in valid_uris: if u == uri or uri.startswith(u) or u.startswith(uri): log.warning('Ignoring XCAP Root URI %r (similar to %r)' % (uri, u)) break else: valid_uris.append(uri) else: log.warning('Invalid XCAP Root URI: %r' % uri) if not valid_uris: return None instance = str.__new__(cls, valid_uris[0]) instance.uris = tuple(valid_uris) return instance def _get_port_from_uri(self, uri): - scheme, netloc, path, params, query, fragment = urlparse.urlparse(uri) + scheme, netloc, path, params, query, fragment = urllib.parse.urlparse(uri) if scheme and netloc: if len(netloc.split(":")) == 2: try: port = int(netloc.split(":")[1]) except ValueError: return None else: return port if port < 65536 else None if scheme.lower() == "http": return 80 if scheme.lower() == "https": return 443 return None @property def aliases(self): return self.uris[1:] @property def port(self): listen_port = self._get_port_from_uri(self) if listen_port: for uri in self.aliases: if self._get_port_from_uri(uri) != listen_port: raise ValueError("All XCAP root aliases must have the same port number") return listen_port else: raise ValueError("Invalid port specified") diff --git a/xcap/dbutil.py b/xcap/dbutil.py index 8a8ba9e..75ec974 100644 --- a/xcap/dbutil.py +++ b/xcap/dbutil.py @@ -1,136 +1,136 @@ """Database utilities""" import time import random from hashlib import md5 from twisted.enterprise import adbapi from twisted.python import reflect db_modules = {"mysql": "MySQLdb"} def make_random_etag(uri): return md5("%s%s%s" % (uri, time.time(), random.random())).hexdigest() def make_etag(uri, document): return md5("%s%s" % (uri, document)).hexdigest() def parseURI(uri): schema, rest = uri.split(':', 1) assert rest.startswith('//'), "DB URIs must start with scheme:// -- you did not include a / (in %r)" % rest rest = rest[2:] if rest.find('/') != -1: host, rest = rest.split('/', 1) else: raise ValueError("You MUST specify a database in the DB URI.") if host and host.find('@') != -1: user, host = host.split('@', 1) if user.find(':') != -1: user, password = user.split(':', 1) else: password = None if not user: raise ValueError("You MUST specify a user in the DB URI.") else: raise ValueError("You MUST specify a host in the DB URI.") if host and host.find(':') != -1: host, port = host.split(':') try: port = int(port) except ValueError: - raise ValueError, "port must be integer, got '%s' instead" % port + raise ValueError("port must be integer, got '%s' instead" % port) if not (1 <= port <= 65535): - raise ValueError, "port must be integer in the range 1-65535, got '%d' instead" % port + raise ValueError("port must be integer in the range 1-65535, got '%d' instead" % port) else: port = None db = rest return schema, user, password, host, port, db def connectionForURI(uri): """Return a Twisted adbapi connection pool for a given database URI.""" schema, user, password, host, port, db = parseURI(uri) try: module = db_modules[schema] except KeyError: raise ValueError("Database scheme '%s' is not supported." % schema) # Reconnecting is safe since we don't use transactions. # the following code prefers MySQLdb native reconnect if it's available, # falling back to twisted's cp_reconnect. # mysql's reconnect is preferred because it's better tested than twisted's # MySQLdb reconnect just works with version 1.2.2 it has been removed after args = () kwargs = {} if module == 'MySQLdb': MySQLdb = reflect.namedModule(module) if MySQLdb.version_info[:3] == (1, 2, 2): kwargs.setdefault('reconnect', 1) kwargs.setdefault('host', host or 'localhost') kwargs.setdefault('port', int(port or '3306')) kwargs.setdefault('user', user or '') kwargs.setdefault('passwd', password or '') kwargs.setdefault('db', db) if 'reconnect' not in kwargs: # note that versions other than 1.2.2 of MySQLdb don't provide reconnect parameter. # hopefully, if underlying reconnect was enabled, twisted will never see # a disconnect and its reconnection code won't interfere. kwargs.setdefault('cp_reconnect', 1) kwargs.setdefault('cp_noisy', False) pool = adbapi.ConnectionPool(module, *args, **kwargs) pool.schema = schema return pool def repeat_on_error(N, errorinfo, func, *args, **kwargs): d = func(*args, **kwargs) counter = [N] def try_again(error): if isinstance(error.value, errorinfo) and counter[0]>0: counter[0] -= 1 d = func(*args, **kwargs) d.addErrback(try_again) return d return error d.addErrback(try_again) return d if __name__=='__main__': from twisted.internet import defer def s(): - print 's()' + print('s()') return defer.succeed(True) def f(): - print 'f()' + print('f()') return defer.fail(ZeroDivisionError()) def getcb(msg): def callback(x): - print '%s callback: %r' % (msg, x) + print('%s callback: %r' % (msg, x)) def errback(x): - print '%s errback: %r' % (msg, x) + print('%s errback: %r' % (msg, x)) return callback, errback # calls s()'s callback d = repeat_on_error(1, Exception, s) d.addCallbacks(*getcb('s')) # calls f() for 4 times (1+3), then gives up and calls last f()'s errback d = repeat_on_error(3, Exception, f) d.addCallbacks(*getcb('f')) x = Exception() x.lst = [f, f, s] def bad_func(): f, x.lst = x.lst[0], x.lst[1:] return f() d = repeat_on_error(1, Exception, bad_func) d.addCallbacks(*getcb('bad_func')) diff --git a/xcap/element.py b/xcap/element.py index ce25b7c..4525ce0 100644 --- a/xcap/element.py +++ b/xcap/element.py @@ -1,872 +1,872 @@ """Element handling as described in RFC 4825. This module implements * location of an element in xml document * location of insertion point for a new element in xml document This allows to implement GET/PUT/DELETE for elements in XCAP server. Syntax for element selectors is a subset of XPATH, but an XPATH implementation was not used. One reason is that XPATH only implements locating an element but not an insertion point for an element selector which does not point to an existing element (but will point to the inserted element after PUT). For element selectors of type *[@att="value"] insertion point depends on the content of a new element. For RFC compliant behavior, fix such requests by replacing '*' with the root tag of the new element. """ -from StringIO import StringIO +from io import StringIO from xcap import uri from xml import sax def make_parser(): parser = sax.make_parser(['xcap.sax.expatreader']) parser.setFeature(sax.handler.feature_namespaces, 1) parser.setFeature(sax.handler.feature_namespace_prefixes, 1) return parser class ThrowEventsAway(sax.ContentHandler): pass def check_xml_fragment(element_str): """Run SAX parser on element_str to check its well-formedness. Ignore unbound namespaces prefixes. >>> check_xml_fragment("") >>> check_xml_fragment(''' ... Test ... ''') >>> check_xml_fragment("") Traceback (most recent call last): ... SAXParseException: :1:7: mismatched tag >>> check_xml_fragment("") Traceback (most recent call last): ... SAXParseException: :1:5: not well-formed (invalid token) >>> check_xml_fragment("") Traceback (most recent call last): ... SAXParseException: :1:7: junk after document element >>> check_xml_fragment("") Traceback (most recent call last): ... SAXParseException: :1:4: not well-formed (invalid token) """ parser = sax.make_parser(['xcap.sax.expatreader']) # ignore namespaces and prefixes parser.setFeature(sax.handler.feature_namespaces, 0) parser.setFeature(sax.handler.feature_namespace_prefixes, 0) parser.setContentHandler(ThrowEventsAway()) parser.parse(StringIO(element_str)) class Step(object): # to be matched against uri.Step def __init__(self, name, position = 0): self.name = name # this integer holds index of a child element currently in processing self.position = position def __repr__(self): return '%s[pos=%s]' % (self.name, self.position) class ContentHandlerBase(sax.ContentHandler): def __init__(self, selector): sax.ContentHandler.__init__(self) self.selector = selector self.state = None self.locator = None def setDocumentLocator(self, locator): self.locator = locator def pos(self): return self.locator._ref._parser.CurrentByteIndex def set_state(self, new_state): #print new_state, 'at %s' % str(self.pos()) self.state = new_state def set_end_pos(self, end_pos, end_tag = None, end_pos_2 = None): self.end_pos = end_pos self.end_tag = end_tag self.end_pos_2 = end_pos_2 def fix_end_pos(self, document): if self.end_tag is not None and self.end_tag in document[self.end_pos:self.end_pos_2]: if self.end_pos_2 is None: self.end_pos = 1 + document.index('>', self.end_pos) else: self.end_pos = 1 + document.index('>', self.end_pos, self.end_pos_2) def __repr__(self): return '<%s selector=%r state=%r>' % (self.__class__.__name__, self.selector, self.state) class ElementLocator(ContentHandlerBase): """Locates element in a document by element selector expression (subset of XPATH defined in RFC 4825) There's an intentional difference from XPATH (at least as implemented in lxml): tail following the closing tag is not included in the end result (this doesn't make sense for XCAP and incompatible with some of the requirements in RFC). """ def startDocument(self): if self.locator is None: raise RuntimeError("The parser doesn't support locators") self.path = [] self.state = 'LOOKING' self.curstep = 0 self.skiplevel = 0 self.set_end_pos(None, None, None) def startElementNS(self, name, qname, attrs): #print '-' * (len(self.path) + self.skiplevel), '<', name, '/' + '/'.join(map(str, self.path)) if self.state=='DONE' and self.end_pos_2 is None: self.end_pos_2 = self.pos() if self.skiplevel>0: self.skiplevel += 1 return if self.curstep>=len(self.selector): self.skiplevel = 1 return if self.path: parent = self.path[-1] else: parent = None curstep = self.selector[self.curstep] #print `name`, `curstep.name` if curstep.name == '*' or curstep.name == name: if parent: parent.position += 1 else: self.skiplevel = 1 return if parent is None: if curstep.position not in [None, 1]: self.skiplevel = 1 return else: if curstep.position is not None and curstep.position != parent.position: self.skiplevel = 1 return if curstep.att_name is not None and attrs.get(curstep.att_name)!=curstep.att_value: self.skiplevel = 1 return #print '%r matched' % curstep self.curstep += 1 self.path.append(Step(qname)) if len(self.path)==len(self.selector): if self.state=='LOOKING': self.set_state('FOUND') self.start_pos = self.pos() elif self.state=='DONE': self.set_state('MANY') def endElementNS(self, name, qname): #print '-' * (len(self.path) + self.skiplevel-1), '>', name, '/' + '/'.join(map(str, self.path)) if self.state=='DONE' and self.end_pos_2 is None: self.end_pos_2 = self.pos() if self.skiplevel>0: self.skiplevel -= 1 return if len(self.path)==len(self.selector) and self.state=='FOUND': self.set_state('DONE') # QQQ why qname passed to endElementNS is None? qname = self.path[-1].name self.set_end_pos(self.pos(), '') # where does pos() point to? two cases: # 1. ....*HERE* # 2. *HERE*... # If it's the first case we need to adjust pos() by len('') # To determine the case, let's mark the position of the next startElement/endElement # and see if there '' substring right after end_pos limited by end_pos_2 # 1. ....*end_pos*...*end_pos_2*<... # 2. *end_pos*...*end_pos_2*<... element = self.path.pop() self.curstep -= 1 class InsertPointLocator(ContentHandlerBase): """Locate the insertion point -- where in the document a new element should be inserted. It operates under assumption that the request didn't yield any matches with ElementLocator (its state was 'LOOKING' after parsing). Note, that this class doesn't know what will be inserted and therefore may do not do what you want with requests like 'labels/*[att="new-att"]'. """ def startDocument(self): if self.locator is None: raise RuntimeError("The parser doesn't support locators") self.path = [] self.state = 'LOOKING' self.curstep = 0 self.skiplevel = 0 self.set_end_pos(None, None, None) def startElementNS(self, name, qname, attrs): #print '<' * (1+len(self.path) + self.skiplevel), name, '/' + '/'.join(map(str, self.path)), #print self.curstep, self.skiplevel if self.state=='DONE' and self.end_pos_2 is None: self.end_pos_2 = self.pos() if self.skiplevel>0: self.skiplevel += 1 return if self.curstep>=len(self.selector): self.skiplevel = 1 return if self.path: parent = self.path[-1] else: parent = None curstep = self.selector[self.curstep] if curstep.name == '*' or curstep.name == name: if parent: parent.position += 1 else: self.skiplevel = 1 return is_last_step = len(self.path)+1 == len(self.selector) if not is_last_step: if curstep.position is not None and curstep.position != parent.position: self.skiplevel = 1 return if curstep.att_name is not None and \ attrs.get(curstep.att_name)!=curstep.att_value: self.skiplevel = 1 return else: if curstep.position == 1 and parent.position == 1: self.set_state('DONE') self.set_end_pos(self.pos(), end_pos_2=self.pos()) self.curstep += 1 self.path.append(Step(qname)) def endElementNS(self, name, qname): #print '>' * (1+len(self.path)+self.skiplevel-1), name, '/' + '/'.join(map(str, self.path)), #print self.curstep, self.skiplevel if self.state=='DONE' and self.end_pos_2 is None: self.end_pos_2 = self.pos() if self.skiplevel>0: self.skiplevel -= 1 return qname = self.path[-1].name curstep = self.selector[-1] if len(self.path)==len(self.selector): parent = self.path[-2] if curstep.position is None: if self.state=='DONE': self.set_state('MANY') else: self.set_state('CLOSED') self.set_end_pos(self.pos(), '') elif curstep.position-1 == parent.position: if self.state=='DONE': self.set_state('MANY') else: self.set_state('DONE') self.set_end_pos(self.pos(), '') elif len(self.path)+1==len(self.selector): if self.state == 'CLOSED': self.set_state('DONE') if curstep.name=='*' and curstep.position is None: self.set_end_pos(self.pos(), end_pos_2 = self.pos()) elif self.state == 'LOOKING': self.set_state('DONE') self.set_end_pos(self.pos(), end_pos_2 = self.pos()) element = self.path.pop() self.curstep -= 1 class LocatorError(ValueError): def __init__(self, msg, handler=None): ValueError.__init__(self, msg) self.handler = handler @staticmethod def generate_error(locator, element_selector): if locator.state == 'LOOKING': return None elif locator.state == 'MANY': raise SelectorError(element_selector._original_string, locator) else: raise LocatorError('Internal error in %s' % locator.__class__.__name__, locator) class SelectorError(LocatorError): http_error = 404 def __init__(self, selector, handler=None): msg = 'The requested node selector %s matches more than one element' % selector LocatorError.__init__(self, msg, handler) def find(document, element_selector): """Return an element as (first index, last index+1) If it couldn't be found, return None. If there're several matches, raise SelectorError. """ parser = make_parser() el = ElementLocator(element_selector) parser.setContentHandler(el) parser.parse(StringIO(document)) if el.state == 'DONE': el.fix_end_pos(document) return el.start_pos, el.end_pos else: return LocatorError.generate_error(el, element_selector) def get(document, element_selector): """Return an element as a string. If it couldn't be found, return None. If there're several matches, raise SelectorError. """ location = find(document, element_selector) if location is not None: start, end = location return document[start:end] def delete(document, element_selector): """Return document with element deleted. If it couldn't be found, return None. If there're several matches, raise SelectorError. """ location = find(document, element_selector) if location is not None: start, end = location return document[:start] + document[end:] def put(document, element_selector, element_str): """Return a 2-items tuple: (new_document, created). new_document is a copy of document with element_str inside. created is True if insertion was performed as opposed to replacement. If element_selector matches an existing element, it is replaced with element_str. If not, it is inserted at appropriate place. If it's impossible to insert at this location, return None. If element_selector matches more than one element or more than one possible place to insert and there're no rule to resolve the ambiguity then SelectorError is raised. """ location = find(document, element_selector) if location is None: ipl = InsertPointLocator(element_selector) parser = make_parser() parser.setContentHandler(ipl) parser.parse(StringIO(document)) if ipl.state == 'DONE': ipl.fix_end_pos(document) start, end = ipl.end_pos, ipl.end_pos created = True else: return LocatorError.generate_error(ipl, element_selector) else: start, end = location created = False return (document[:start] + element_str + document[end:], created) # Q: why create a new parser for every parsing? # A: when sax.make_parser() was called once, I've occasionaly encountered an exception like this: # # File "/usr/lib/python2.5/site-packages/xcap/appusage/__init__.py", line 178, in _cb_get_element # result = XCAPElement.get(response.data, uri.node_selector.element_selector) # File "/usr/lib/python2.5/site-packages/xcap/element.py", line 323, in get # location = cls.find(document, element_selector) # File "/usr/lib/python2.5/site-packages/xcap/element.py", line 308, in find # cls.parser.setContentHandler(el) # File "/usr/lib/python2.5/site-packages/_xmlplus/sax/expatreader.py", line 128, in setContentHandler # self._reset_cont_handler() # File "/usr/lib/python2.5/site-packages/_xmlplus/sax/expatreader.py", line 234, in _reset_cont_handler # self._cont_handler.processingInstruction # exceptions.AttributeError: 'NoneType' object has no attribute 'ProcessingInstructionHandler' # # I have no idea what does that mean, but probably something to do with parser's state becoming invalid # under some circumstances. class _test(object): source1 = """ hello hi! """ source2 = """ """ rls_services_xml = """ http://xcap.example.com/resource-lists/users/sip:joe@example.com/index/~~/resource-lists/list%5b@name=%22l1%22%5d presence presence """ @staticmethod def trim(s0): "remove tail from the result" s = s0 while s and s[-1]!='>': s = s[:-1] if s: return s else: return s0 @classmethod def lxml_xpath_get(cls, xpath_expr, source=source1, namespace=None, namespaces={}): "First, use xpath from lxml, which should produce the same results for existing nodes" assert '/'.startswith(xpath_expr[:1]), xpath_expr doc = etree.parse(StringIO(source)) try: # where to put namespace? r = doc.xpath(xpath_expr, namespaces=namespaces) except etree.XPathEvalError: return uri.NodeParsingError - except Exception, ex: + except Exception as ex: traceback.print_exc() return ex if len(r)==1: return cls.trim(etree.tostring(r[0])) elif len(r)>1: return SelectorError @staticmethod def xcap_get(xpath_expr, source=source1, namespace=None, namespaces={}): "Second, use xpath_get_element" try: selector = uri.parse_node_selector(xpath_expr, namespace, namespaces)[0] return get(source, selector) - except (uri.NodeParsingError, SelectorError), ex : + except (uri.NodeParsingError, SelectorError) as ex : return ex.__class__ - except Exception, ex: + except Exception as ex: traceback.print_exc() return ex @staticmethod def xcap_put(xpath_expr, element, source=source1, namespace=None, namespaces={}): try: selector = uri.parse_node_selector(xpath_expr, namespace, namespaces)[0] return put(source, selector, element)[0] - except (uri.NodeParsingError, SelectorError), ex : + except (uri.NodeParsingError, SelectorError) as ex : return ex.__class__ - except Exception, ex: + except Exception as ex: traceback.print_exc() return ex @classmethod def test_get(cls): emph1 = 'Midwinter Spring' thomas = 'Thomas Eliot' ezra = 'Ezra Pound' hi = 'hi!' yesterday = '
correspondingly) # Such element selectors therefore, should be fixed: replace star with the actual # element name (take it from the body of XCAP request). # Beware though, that if you do that, you still must guarantee that [@att="2"] # doesn't match any of the existing elements, no matter what their names are. # This suggests 2-pass procedure for PUT: # First try to match element using the original element selector. # If it did match, run replacement procedure. # If it didn't match, fix the element selector and try to locate insertion point. # This will guarantee, that when if client uses non-fixed request next time, # it will match exactly once check('/root/*[@att="2"]', '', """ """) if __name__ == "__main__": from xcap import __version__ as xcap_version - print __file__, xcap_version + print(__file__, xcap_version) import doctest doctest.testmod() from lxml import etree import traceback _test.test_get() _test.test_put0() _test.test_put1() _test.test_put2() diff --git a/xcap/logutil.py b/xcap/logutil.py index 3444808..1be4e08 100644 --- a/xcap/logutil.py +++ b/xcap/logutil.py @@ -1,171 +1,169 @@ import os import re from application import log from application.configuration import ConfigSection, ConfigSetting from application.python.types import Singleton from application.system import makedirs from logging import FileHandler import xcap class Code(int): def __new__(cls, x): instance = super(Code, cls).__new__(cls, x) if not 100 <= instance <= 999: raise ValueError('Invalid HTTP response code: {}'.format(x)) return instance class MatchAnyCode(object): def __contains__(self, item): return True def __repr__(self): return '{0.__class__.__name__}()'.format(self) class ResponseCodeList(object): def __init__(self, value): value = value.strip().lower() if value in ('all', 'any', 'yes', '*'): self._codes = MatchAnyCode() elif value in ('none', 'no'): self._codes = set() else: self._codes = {Code(code) for code in re.split(r'\s*,\s*', value)} def __contains__(self, item): return item in self._codes def __repr__(self): if isinstance(self._codes, MatchAnyCode): value = 'all' elif not self._codes: value = 'none' else: value = ','.join(sorted(self._codes)) return '{0.__class__.__name__}({1!r})'.format(self, value) class Logging(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Logging' directory = '/var/log/openxcap' # directory where access.log will be created (if not specified, access logs will be logged as application log messages) log_request = ConfigSetting(type=ResponseCodeList, value=ResponseCodeList('none')) log_response = ConfigSetting(type=ResponseCodeList, value=ResponseCodeList('none')) class _LoggedTransaction(object): def __init__(self, request, response): self._request = request self._response = response def __str__(self): return self.access_info @property def access_info(self): return '{0.remote_host} - {0.request_line!r} {0.response_code} {0.response_length} {0.user_agent!r} {0.etag!r}'.format(self) @property def etag(self): etag = self._response.headers.getHeader('etag') or '-' if hasattr(etag, 'tag'): etag = etag.tag return etag @property def remote_host(self): try: return self._request.remoteAddr.host except AttributeError: try: return self._request.chanRequest.getRemoteHost().host except (AttributeError, TypeError): return '-' @property def user_agent(self): return self._request.headers.getHeader('user-agent', '-') @property def request_line(self): return '{request.method} {request.uri} HTTP/{request.clientproto[0]}.{request.clientproto[1]}'.format(request=self._request) @property def request_content(self): headers = '\n'.join('{}: {}'.format(name, header) for name, headers in self._request.headers.getAllRawHeaders() for header in headers) body = getattr(self._request, 'attachment', '') content = '\n\n'.join(item for item in (headers, body) if item) return '{}'.format(content) if content else '' @property def response_code(self): return self._response.code @property def response_length(self): return self._response.stream.length if self._response.stream else 0 @property def response_content(self): headers = '\n'.join('{}: {}'.format(name, header) for name, headers in self._response.headers.getAllRawHeaders() for header in headers) body = self._response.stream.mem if self._response.stream else '' content = '\n\n'.join(item for item in (headers, body) if item) return '{}'.format(content) if content else '' -class WEBLogger(object): - __metaclass__ = Singleton - +class WEBLogger(object, metaclass=Singleton): def __init__(self): self.logger = log.get_logger('weblog') self.logger.setLevel(log.level.INFO) if Logging.directory: if not os.path.exists(Logging.directory): try: makedirs(Logging.directory) except OSError as e: raise RuntimeError('Cannot create logging directory {}: {}'.format(Logging.directory, e)) self.filename = os.path.join(Logging.directory, 'access.log') formatter = log.Formatter() formatter.prefix_format = '' handler = FileHandler(self.filename) handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.propagate = False else: self.filename = None def log_access(self, request, response): web_transaction = _LoggedTransaction(request, response) if web_transaction.response_code == 200: - print(web_transaction.access_info) + print((web_transaction.access_info)) if response.code in Logging.log_request: self.logger.info(web_transaction.access_info) request_content = web_transaction.request_content[0:500] if request_content: self.logger.info("\n") self.logger.info("Request from %s: %s" % (web_transaction.remote_host, request.uri)) self.logger.info("---") self.logger.info(request_content) if response.code in Logging.log_response: self.logger.info(web_transaction.access_info) response_content = web_transaction.response_content[0:500] if response_content: self.logger.info("\n") self.logger.info("Response: %d" % web_transaction.response_code) self.logger.info("---") self.logger.info(response_content) root_logger = log.get_logger() root_logger.name = 'server' web_logger = WEBLogger() diff --git a/xcap/sax/__init__.py b/xcap/sax/__init__.py index b6fce4a..4f57ba2 100644 --- a/xcap/sax/__init__.py +++ b/xcap/sax/__init__.py @@ -1,47 +1,47 @@ """Simple API for XML (SAX) implementation for Python. This module provides an implementation of the SAX 2 interface; information about the Java version of the interface can be found at http://www.megginson.com/SAX/. The Python version of the interface is documented at <...>. This package contains the following interface classes and functions: ContentHandler, ErrorHandler - base classes for SAX2 handlers SAXException, SAXNotRecognizedException, SAXParseException, SAXNotSupportedException - SAX exceptions make_parser - creation of a new parser object parse, parseString - parse a document, using a provided handler """ -from xmlreader import InputSource -from handler import ContentHandler, ErrorHandler -from _exceptions import SAXException, SAXNotRecognizedException,\ +from .xmlreader import InputSource +from .handler import ContentHandler, ErrorHandler +from ._exceptions import SAXException, SAXNotRecognizedException,\ SAXParseException, SAXNotSupportedException,\ SAXReaderNotAvailable -from sax2exts import make_parser +from .sax2exts import make_parser def parse(filename_or_stream, handler, errorHandler=ErrorHandler()): parser = make_parser() parser.setContentHandler(handler) parser.setErrorHandler(errorHandler) parser.parse(filename_or_stream) def parseString(string, handler, errorHandler=ErrorHandler()): try: - from cStringIO import StringIO + from io import StringIO except ImportError: - from StringIO import StringIO + from io import StringIO if errorHandler is None: errorHandler = ErrorHandler() parser = make_parser() parser.setContentHandler(handler) parser.setErrorHandler(errorHandler) inpsrc = InputSource() inpsrc.setByteStream(StringIO(string)) parser.parse(inpsrc) diff --git a/xcap/sax/drivers/drv_ltdriver.py b/xcap/sax/drivers/drv_ltdriver.py index ce73cee..3a19dec 100644 --- a/xcap/sax/drivers/drv_ltdriver.py +++ b/xcap/sax/drivers/drv_ltdriver.py @@ -1,130 +1,130 @@ """ A SAX driver for the LT XML Python interface. """ version="0.10" from types import * from xml.sax import saxlib,saxutils from XMLinter import * # --- The parser class SAX_XMLinter(saxlib.Parser): def __init__(self): saxlib.Parser.__init__(self) def parse(self,sysID): self._parse(Open(sysID,NSL_read)) def parseFile(self,file): self._parse(FOpen(file,NSL_read)) def setLocale(self, locale): raise SAXException("Locales not supported") # --- EXPERIMENTAL PYTHON SAX EXTENSIONS: def get_parser_name(self): return "XMLinter" def get_parser_version(self): return "Unknown" def get_driver_version(self): return version def is_validating(self): return 0 def is_dtd_reading(self): return 1 def reset(self): raise SAXException("Incremental parsing not supported") def feed(self,data): raise SAXException("Incremental parsing not supported") def close(self): raise SAXException("Incremental parsing not supported") # --- INTERNAL METHODS def _parse(self,file): bit=GetNextBit(file) while bit: if bit.type=="start": self.doc_handler.startElement(bit.label, AttributeItem(bit.item)) elif bit.type=="end": self.doc_handler.endElement(bit.label) elif bit.type=="text": self.doc_handler.characters(bit.body,0,len(bit.body)) elif bit.type=="empty": self.doc_handler.startElement(bit.label, AttributeItem(bit.item)) self.doc_handler.endElement(bit.label) elif bit.type=="bad": self.err_handler.fatalError(saxlib.SAXException("Syntax error",None)) elif bit.type=="pi": - print "?pi" + print("?pi") else: - print "###"+bit.type + print("###"+bit.type) bit=GetNextBit(file) # --- AttributeItem def name(pair): return pair[0] class AttributeItem: def __init__(self,item): self.item=item self.list=ItemActualAttributes(item) def getLength(self): return len(self.list) def getName(self, i): return self.list[i][0] def getType(self, i): return "CDATA" def getValue(self, i): if type(i)==StringType: return GetAttrVal(self.item,i) else: return self.list[i][1] def __len__(self): return len(self.list) def __getitem__(self, key): if type(key)==StringType: return GetAttrVal(self.item,key) else: return self.list[key][0] def keys(self): - return map(name,self.list) + return list(map(name,self.list)) def has_key(self, key): return GetAttrVal(self.item,key) # --- Global functions def create_parser(): return SAX_XMLinter() # --- Testing if __name__=="__main__": p=create_parser() p.setDocumentHandler(saxutils.Canonizer()) p.setErrorHandler(saxutils.ErrorPrinter()) p.parse("tst.xml") diff --git a/xcap/sax/drivers/drv_ltdriver_val.py b/xcap/sax/drivers/drv_ltdriver_val.py index cbe4295..06c7216 100644 --- a/xcap/sax/drivers/drv_ltdriver_val.py +++ b/xcap/sax/drivers/drv_ltdriver_val.py @@ -1,42 +1,42 @@ """ A validating-mode SAX driver for the LT XML Python interface. """ version="0.10" -import drv_ltdriver +from . import drv_ltdriver from XMLinter import * class SAX_XMLinter_val(drv_ltdriver.SAX_XMLinter): def __init__(self): drv_ltdriver.SAX_XMLinter.__init__(self) def parse(self,sysID): self._parse(Open(sysID,NSL_read | NSL_read_validate)) def parseFile(self,file): self._parse(FOpen(file,NSL_read | NSL_read_validate)) def get_parser_name(self): return "XMLinter_val" def get_driver_version(self): return version def is_validating(self): return 0 # --- Global functions def create_parser(): return SAX_XMLinter_val() # --- Testing if __name__=="__main__": from xml.sax import saxutils p=create_parser() p.setDocumentHandler(saxutils.Canonizer()) p.setErrorHandler(saxutils.ErrorPrinter()) p.parse("tst.xml") diff --git a/xcap/sax/drivers/drv_pyexpat.py b/xcap/sax/drivers/drv_pyexpat.py index 5dcd6a6..a39850b 100644 --- a/xcap/sax/drivers/drv_pyexpat.py +++ b/xcap/sax/drivers/drv_pyexpat.py @@ -1,228 +1,228 @@ # -*- coding: iso-8859-1 -*- """ SAX driver for the Pyexpat C module. $Id: drv_pyexpat.py,v 1.19 2004/11/29 13:38:23 loewis Exp $ """ # Event handling can be speeded up by bypassing the driver for some events. # This will be implemented later when I can test this driver. # # This driver has been much improved by Geir Ove Grønmo. version="0.13" from xml.sax import saxlib, saxutils, SAXReaderNotAvailable try: from xml.parsers import expat except ImportError: raise SAXReaderNotAvailable("expat not supported",None) -import urllib2,types +import urllib.request, urllib.error, urllib.parse,types # --- SAX_expat class SAX_expat(saxlib.Parser,saxlib.Locator): "SAX driver for the Pyexpat C module." def __init__(self): saxlib.Parser.__init__(self) self.reset() def startElement(self,name,attrs): at = {} # Backward compatibility code, for older versions of the # PyExpat module if type(attrs) == type({}): at = attrs else: # Assume it's a list containing alternating names & values at = {} for i in range(0, len(attrs), 2): at[attrs[i]] = attrs[i+1] self.doc_handler.startElement(name,saxutils.AttributeMap(at)) # FIXME: bypass! def endElement(self,name): self.doc_handler.endElement(name) def characters(self,data): self.doc_handler.characters(data,0,len(data)) # FIXME: bypass! def processingInstruction(self,target,data): self.doc_handler.processingInstruction(target,data) def parse(self,sysID): - self.parseFile(urllib2.urlopen(sysID),sysID) + self.parseFile(urllib.request.urlopen(sysID),sysID) def parseFile(self,fileobj,sysID=None): self.reset() self.sysID=sysID self.doc_handler.startDocument() buf = fileobj.read(16384) while buf != "": if self.parser.Parse(buf, 0) != 1: self.__report_error() buf = fileobj.read(16384) self.parser.Parse("", 1) self.doc_handler.endDocument() self.close(needFinal=0) # --- Locator methods. Only usable after errors. def getSystemId(self): if self.sysID!=None: return self.sysID else: return "Unknown" def getLineNumber(self): return self.parser.ErrorLineNumber def getColumnNumber(self): return self.parser.ErrorColumnNumber # --- Internal def __report_error(self): errc=self.parser.ErrorCode msg=expat.ErrorString(errc) exc=saxlib.SAXParseException(msg,None,self) self.err_handler.fatalError(exc) # --- EXPERIMENTAL PYTHON SAX EXTENSIONS def get_parser_name(self): return "pyexpat" def get_parser_version(self): return "Unknown" def get_driver_version(self): return version def is_validating(self): return 0 def is_dtd_reading(self): return 0 def reset(self): self.sysID=None self.parser=expat.ParserCreate() self.parser.StartElementHandler = self.startElement self.parser.EndElementHandler = self.endElement self.parser.CharacterDataHandler = self.characters self.parser.ProcessingInstructionHandler = self.processingInstruction self.doc_handler.setDocumentLocator(self) def feed(self, data): if self.parser.Parse(data, 0) != 1: self.__report_error() def close(self, needFinal=1): if self.parser is None: # make sure close is idempotent return if needFinal: if self.parser.Parse("", 1) != 1: self.__report_error() self.parser = None # --- An expat driver that uses the lazy map class LazyExpatDriver(SAX_expat): def __init__(self): SAX_expat.__init__(self) self.map=LazyAttributeMap([]) def startElement(self,name,attrs): self.map.list=attrs self.doc_handler.startElement(name,self.map) # --- A lazy attribute map # This avoids the costly conversion from a list to a hash table class LazyAttributeMap: """A lazy implementation of AttributeList that takes an [attr,val,attr,val,...] list and uses it to implement the AttributeList interface.""" def __init__(self, list): self.list=list def getLength(self): return len(self.list)/2 def getName(self, i): try: return self.list[2*i] - except IndexError,e: + except IndexError as e: return None def getType(self, i): return "CDATA" def getValue(self, i): try: - if type(i)==types.IntType: + if type(i)==int: return self.list[2*i+1] else: for ix in range(0,len(self.list),2): if self.list[ix]==i: return self.list[ix+1] return None - except IndexError,e: + except IndexError as e: return None def __len__(self): return len(self.list)/2 def __getitem__(self, key): - if type(key)==types.IntType: + if type(key)==int: return self.list[2*key+1] else: for ix in range(0,len(self.list),2): if self.list[ix]==key: return self.list[ix+1] return None def items(self): result=[""]*(len(self.list)/2) for ix in range(0,len(self.list),2): result[ix/2]=(self.list[ix],self.list[ix+1]) return result def keys(self): result=[""]*(len(self.list)/2) for ix in range(0,len(self.list),2): result[ix/2]=self.list[ix] return result def has_key(self,key): for ix in range(0,len(self.list),2): if self.list[ix]==key: return 1 return 0 def get(self, key, alternative): for ix in range(0,len(self.list),2): if self.list[ix]==key: return self.list[ix+1] return alternative # --- def create_parser(): return SAX_expat() diff --git a/xcap/sax/drivers/drv_sgmlop.py b/xcap/sax/drivers/drv_sgmlop.py index 5da4429..f5cbc14 100644 --- a/xcap/sax/drivers/drv_sgmlop.py +++ b/xcap/sax/drivers/drv_sgmlop.py @@ -1,110 +1,110 @@ """ SAX driver for the sgmlop parser. $Id: drv_sgmlop.py,v 1.10 2002/08/13 09:28:52 afayolle Exp $ """ version="0.12" from xml.parsers import sgmlop from xml.sax import saxlib,saxutils from xml.sax import SAXException -import urllib2,string +import urllib.request, urllib.error, urllib.parse,string # --- Driver class Parser(saxlib.Parser): def __init__(self): saxlib.Parser.__init__(self) self.reset() def setDocumentHandler(self, dh): self.parser.register(self) # older version wanted ,1 arg self.doc_handler=dh def parse(self, url): - self.parseFile(urllib2.urlopen(url)) + self.parseFile(urllib.request.urlopen(url)) def parseFile(self, file): self._parsing = 1 self.doc_handler.startDocument() parser = self.parser while 1: data = file.read(16384) if not data: break parser.feed(data) self.close() # --- SAX 1.0 METHODS def handle_cdata(self, data): self.doc_handler.characters(data,0,len(data)) def handle_data(self, data): #ignore white space outside the toplevel element if self._nesting == 0: if string.strip(data)!="": # It's not whitespace? self.err_handler.error(SAXException( "characters '%s' outside root element" % data)) return self.doc_handler.characters(data,0,len(data)) def handle_proc(self, target, data): if target=='xml': # Don't report as a processing instruction return self.doc_handler.processingInstruction(target,data) def handle_charref(self, charno): if charno<256: self.doc_handler.characters(chr(charno),0,1) def finish_starttag(self, name, attrs): self._nesting = self._nesting + 1 self.doc_handler.startElement(name,saxutils.AttributeMap(attrs)) def finish_endtag(self,name): self._nesting = self._nesting - 1 self.doc_handler.endElement(name) # --- EXPERIMENTAL PYTHON SAX EXTENSIONS def get_parser_name(self): return "sgmlop" def get_parser_version(self): return "Unknown" def get_driver_version(self): return version def is_validating(self): return 0 def is_dtd_reading(self): return 0 def reset(self): self.parser=sgmlop.XMLParser() self._parsing=0 self._nesting=0 def feed(self,data): if not self._parsing: self.doc_handler.startDocument() self._parsing=1 self.parser.feed(data) def close(self): self.parser.close() self.doc_handler.endDocument() # ---- def create_parser(): return Parser() diff --git a/xcap/sax/drivers/drv_xmldc.py b/xcap/sax/drivers/drv_xmldc.py index 046f665..a14a4a9 100644 --- a/xcap/sax/drivers/drv_xmldc.py +++ b/xcap/sax/drivers/drv_xmldc.py @@ -1,154 +1,154 @@ """ SAX driver for Dan Connollys XML scanner. Should work with Python 1.4. """ version="0.10" -import sys,urllib2,re,string +import sys,urllib.request,urllib.error,urllib.parse,re,string if sys.version[:3]<"1.5": import saxlib else: from xml.sax import saxlib import xml_dc reg_ws="[\n\r\t ]+" predef_ents={"lt":"<","gt":"<","amp":"&","apos":"'","quot":'"'} # --- Driver class SAX_xmldc(saxlib.Parser,saxlib.Locator): def __init__(self): saxlib.Parser.__init__(self) self.current_sysid="" self.reset() # --- Parser methods def parse(self, systemId): try: self.current_sysid=systemId - infile=urllib2.urlopen(systemId) + infile=urllib.request.urlopen(systemId) self.parseFile(infile) finally: self.current_sysid="" def parseFile(self, fileobj): self.doc_handler.setDocumentLocator(self) self.reset() try: while 1: buf=fileobj.read(16384) if buf=="": break self.feed(buf) self.close() - except xml_dc.ScanError,e: + except xml_dc.ScanError as e: self.err_handler.fatalError(saxlib.SAXParseException(e,None,self)) - except xml_dc.NotWellFormed,e: + except xml_dc.NotWellFormed as e: self.err_handler.fatalError(saxlib.SAXParseException(e,None,self)) # --- Passing on parse events to document handler def text(self, str): self.doc_handler.characters(str,0,len(str)) def openStart(self, name): self.current_elem=name self.current_attrs_val={} self.current_attrs_type={} def attribute(self, name, type, value): self.current_attrs_val[name]=value self.current_attrs_type[name]=type def closeStart(self): self.doc_handler.startElement(self.current_elem, self.current_attrs_val) def closeEmpty(self): self.doc_handler.startElement(self.current_elem, self.current_attrs_val) self.doc_handler.endElement(self.current_elem) def endTag(self, name=None): self.doc_handler.endElement(name) def comment(self, stuff): pass def pi(self, stuff): match=re.search(reg_ws,stuff) if not match: self.doc_handler.processingInstruction(stuff,"") else: end_of_target,start_of_data=match.span() self.doc_handler.processingInstruction(stuff[:end_of_target], stuff[start_of_data:]) def decl(self, name, parts): pass def cref(self, numeral): numeral=string.atoi(numeral) self.doc_handler.characters(chr(numeral),0,1) def eref(self, name): pass def eof(self): pass # --- Locator methods def getLineNumber(self): return self.parser.line() def getSystemId(self): return self.current_sysid # --- EXPERIMENTAL PYTHON SAX EXTENSIONS def get_parser_name(self): return "xmldc" def get_parser_version(self): return "1.8" def get_driver_version(self): return version def is_validating(self): return 0 def is_dtd_reading(self): return 0 def reset(self): self.parser=xml_dc.Scanner() self.checker=xml_dc.WellFormed() self.checker.scanner(self.parser) self.unfed_so_far=1 def feed(self,data): if self.unfed_so_far: self.doc_handler.startDocument() self.unfed_so_far=0 self.parser.feed(data) self.parser.next(self) def close(self): self.checker.eof() self.doc_handler.endDocument() # --- def create_parser(): return SAX_xmldc() diff --git a/xcap/sax/drivers/drv_xmllib.py b/xcap/sax/drivers/drv_xmllib.py index a5d21b0..cdc3aa8 100644 --- a/xcap/sax/drivers/drv_xmllib.py +++ b/xcap/sax/drivers/drv_xmllib.py @@ -1,108 +1,108 @@ """ SAX driver for xmllib.py """ version="0.91" from xml.sax import saxutils from xml.sax.drivers import pylibs import xmllib # Make it generate Unicode if possible, UTF-8 else try: - unicode("") + str("") except NameError: - from xml.unicode.iso8859 import wstring - def unicode(str, encoding): + from xml.str.iso8859 import wstring + def str(str, encoding): return wstring.decode(encoding, str).utf8() # --- SAX_XLParser class SAX_XLParser(pylibs.LibParser, xmllib.XMLParser): "SAX driver for xmllib.py." def __init__(self): xmllib.XMLParser.__init__(self) pylibs.LibParser.__init__(self) self.standalone = 0 self.reset() def _convert(self, str): - return unicode(str, self.encoding) + return str(str, self.encoding) def unknown_starttag(self, tag, attributes): - tag = unicode(tag, self.encoding) + tag = str(tag, self.encoding) newattr = {} - for k, v in attributes.items(): - newattr[unicode(k, self.encoding)] = unicode(v, self.encoding) + for k, v in list(attributes.items()): + newattr[str(k, self.encoding)] = str(v, self.encoding) self.doc_handler.startElement(tag, saxutils.AttributeMap(newattr)) def handle_endtag(self, tag, method): - self.doc_handler.endElement(unicode(tag, self.encoding)) + self.doc_handler.endElement(str(tag, self.encoding)) def handle_proc(self, name, data): self.doc_handler.processingInstruction(name, data[1:]) def handle_xml(self, encoding, standalone): self.standalone = standalone == "yes" if encoding is not None: self.encoding = encoding def handle_data(self, data): "Handles PCDATA." - data = unicode(data, self.encoding) + data = str(data, self.encoding) self.doc_handler.characters(data, 0, len(data)) def handle_cdata(self, data): "Handles CDATA marked sections." - data = unicode(data, self.encoding) + data = str(data, self.encoding) self.doc_handler.characters(data, 0, len(data)) def getLineNumber(self): return self.lineno def getSystemId(self): return self.sysID def _can_locate(self): "Internal: returns true if location info is available." return 1 # --- EXPERIMENTAL SAX PYTHON EXTENSIONS def get_parser_name(self): return "xmllib" def get_parser_version(self): return xmllib.version def get_driver_version(self): return version def is_validating(self): return 0 def is_dtd_reading(self): return 0 def reset(self): xmllib.XMLParser.reset(self) self.unfed_so_far = 1 self.encoding = "utf-8" def feed(self, data): if self.unfed_so_far: self.doc_handler.startDocument() self.unfed_so_far = 0 xmllib.XMLParser.feed(self, data) def close(self): xmllib.XMLParser.close(self) self.doc_handler.endDocument() # --- Global functions def create_parser(): return SAX_XLParser() diff --git a/xcap/sax/drivers/drv_xmlproc_val.py b/xcap/sax/drivers/drv_xmlproc_val.py index d333813..afdbace 100644 --- a/xcap/sax/drivers/drv_xmlproc_val.py +++ b/xcap/sax/drivers/drv_xmlproc_val.py @@ -1,73 +1,73 @@ """ A SAX driver for xmlproc with validation and DTD information. $Id: drv_xmlproc_val.py,v 1.9 2001/12/30 12:13:45 loewis Exp $ """ version="0.92" from xml.sax import saxlib,saxutils from xml.parsers.xmlproc import xmlval from xml.sax.drivers.drv_xmlproc import * import types # --- SAX_XPValParser class SAX_XPValParser(SAX_XPParser): def __init__(self): SAX_XPParser.__init__(self) def _create_parser(self): return xmlval.XMLValidator() def handle_start_tag(self, name, attrs): try: self.doc_handler.startElement(name, XPAttributes(attrs,\ self.parser.dtd.get_elem(name))) - except KeyError,e: + except KeyError as e: self.doc_handler.startElement(name,XPAttributes(attrs,None)) # --- EXPERIMENTAL PYTHON SAX EXTENSIONS: def get_parser_name(self): return "xmlproc_val" def get_driver_version(self): return version def is_validating(self): return 1 # --- XPAttributes class XPAttributes(saxutils.AttributeMap): def __init__(self,map,elemdecl): saxutils.AttributeMap.__init__(self,map) self.elemdecl=elemdecl if elemdecl==None: self.getType=self.getTypeStatic def getTypeStatic(self,i): return "CDATA" # Used for undeclared elements def getType(self, i): - if type(i)==types.IntType: + if type(i)==int: try: - i=self.map.keys()[i] - except KeyError,e: + i=list(self.map.keys())[i] + except KeyError as e: return "CDATA" try: return self.elemdecl.get_attr(i).get_type() - except KeyError,e: + except KeyError as e: return "CDATA" # --- Global functions def create_parser(): return SAX_XPValParser() diff --git a/xcap/sax/drivers/drv_xmltoolkit.py b/xcap/sax/drivers/drv_xmltoolkit.py index 40d901c..7b8019a 100644 --- a/xcap/sax/drivers/drv_xmltoolkit.py +++ b/xcap/sax/drivers/drv_xmltoolkit.py @@ -1,106 +1,106 @@ """ A SAX driver for David Scheres XML-Toolkit parser. """ version="0.20" import sys from xml.sax import saxlib,saxutils -import XMLFactory,XMLClient,urllib2 +import XMLFactory,XMLClient,urllib.request,urllib.error,urllib.parse class SAX_XTClient(saxlib.Parser,XMLClient.ClientBase): def __init__(self): XMLClient.ClientBase.__init__(self) saxlib.Parser.__init__(self) self.reset() def text(self,obj): v=obj.value() self.doc_handler.characters(v,0,len(v)) def pi(self,obj): if obj.nameOf()=="xml": return # Don't report the XML declaration content="" for part in obj.value(): content=content+part.value()+" " self.doc_handler.processingInstruction(obj.nameOf(),content[:-1]) def emptyTag(self,obj): attrs={} for assoc in obj.value(): attrs[assoc.nameOf()]=assoc.value() self.doc_handler.startElement(obj.nameOf(), saxutils.AttributeMap(attrs)) self.doc_handler.endElement(obj.nameOf()) def nonEmptyTag(self,obj): attrs={} for assoc in obj.value(): attrs[assoc.nameOf()]=assoc.value() self.doc_handler.startElement(obj.nameOf(), saxutils.AttributeMap(attrs)) def endTag(self,obj): self.doc_handler.endElement(obj.nameOf()) def CDATA(self,obj): v=obj.value() self.doc_handler.characters(v,0,len(v)) def comment(self,obj): pass # SAX ignores comments def parse(self, sysID): - i=urllib2.urlopen(sysID) + i=urllib.request.urlopen(sysID) self.parseFile(i) i.close() def parseFile(self, file): self.reset() while 1: buf=file.read(16384) if buf=="": break self.feed(buf) self.close() # --- EXPERIMENTAL SAX PYTHON EXTENSIONS def get_parser_name(self): return "xmltoolkit" def get_parser_version(self): return "Unknown" def get_driver_version(self): return version def is_validating(self): return 0 def is_dtd_reading(self): return 0 def reset(self): self.parser=XMLFactory.XMLFactory(self) self.unfed_so_far=1 def feed(self,data): if self.unfed_so_far: self.doc_handler.startDocument() self.unfed_so_far=0 self.parser.feed(data) def close(self): self.parser.endfile() self.doc_handler.endDocument() def create_parser(): return SAX_XTClient() diff --git a/xcap/sax/drivers/pylibs.py b/xcap/sax/drivers/pylibs.py index 01c4462..9189a35 100644 --- a/xcap/sax/drivers/pylibs.py +++ b/xcap/sax/drivers/pylibs.py @@ -1,109 +1,109 @@ """ Common code for the sgmllib, htmllib and xmllib parser drivers. $Id: pylibs.py,v 1.6 2002/08/13 09:28:52 afayolle Exp $ """ from xml.sax import saxlib,saxutils -import urllib2 +import urllib.request, urllib.error, urllib.parse # --- LibParser class LibParser(saxlib.Parser,saxlib.Locator): "Common code for the sgmllib, htmllib and xmllib parser drivers." def __init__(self): saxlib.Parser.__init__(self) def parse(self,sysID): "Parses the referenced document." self.sysID=sysID - self.parseFile(urllib2.urlopen(sysID)) + self.parseFile(urllib.request.urlopen(sysID)) def parseFile(self,fileobj): "Parses the given file." if self._can_locate(): self.doc_handler.setDocumentLocator(self) self.reset() while 1: buf=fileobj.read(16384) if buf=="": break try: self.feed(buf) - except RuntimeError,e: + except RuntimeError as e: self.err_handler.fatalError(saxlib.SAXException(str(e),e)) self.close() def unknown_endtag(self,tag): "Handles end tags." self.doc_handler.endElement(tag) def handle_xml(self,encoding,standalone): "Remembers whether the document is standalone." self.standalone= standalone=="yes" def handle_data(self,data): "Handles PCDATA." self.doc_handler.characters(data,0,len(data)) def handle_cdata(self,data): "Handles CDATA marked sections." self.doc_handler.characters(data,0,len(data)) def syntax_error(self, message): "Handles fatal errors." if self._can_locate(): self.err_handler.fatalError(saxlib.SAXParseException(message,None, self)) else: self.err_handler.fatalError(saxlib.SAXException(message,None)) # --- SGMLParsers class SGMLParsers(LibParser): "Common code for the sgmllib and htmllib parsers." def handle_pi(self,data): "Handles processing instructions." # Should we try to parse out the name if there is one? self.doc_handler.processingInstruction("",data) def handle_starttag(self,tag,method,attributes): self.unknown_starttag(tag,attributes) def unknown_starttag(self,tag,attributes): "Handles start tags." attrs={} for (a,v) in attributes: attrs[a]=v self.doc_handler.startElement(tag,saxutils.AttributeMap(attrs)) def handle_endtag(self,tag,method): "Handles end tags." self.doc_handler.endElement(tag) def unknown_entityref(self,name): "Handles entity references by throwing an error." self.err_handler.fatalError(saxlib.SAXException("Reference to unknown entity " "'%s'" % name,None)) def unknown_charref(self,no): "Handles non-ASCII character references." self.err_handler.fatalError(saxlib.SAXException("Reference to unknown character '%d'" % no,None)) def handle_data(self,data): "Handles character data in element content." self.doc_handler.characters(data,0,len(data)) def report_unbalanced(self,gi): "Reports unbalanced tags." self.err_handler.fatalError(saxlib.SAXException("Unbalanced end tag for '%s'" % gi,None)) def _can_locate(self): "Internal: returns true if location info is available." return 0 diff --git a/xcap/sax/drivers2/drv_htmllib.py b/xcap/sax/drivers2/drv_htmllib.py index 52440d0..9055834 100644 --- a/xcap/sax/drivers2/drv_htmllib.py +++ b/xcap/sax/drivers2/drv_htmllib.py @@ -1,20 +1,20 @@ """ A SAX 2.0 driver for htmllib. $Id: drv_htmllib.py,v 1.2 2001/12/30 12:13:45 loewis Exp $ """ import types, string from xml.sax import SAXNotSupportedException, SAXNotRecognizedException from xml.sax.xmlreader import IncrementalParser -from drv_sgmllib import SgmllibDriver +from .drv_sgmllib import SgmllibDriver class HtmllibDriver(SgmllibDriver): - from htmlentitydefs import entitydefs + from html.entities import entitydefs # --- def create_parser(): return HtmllibDriver() diff --git a/xcap/sax/drivers2/drv_javasax.py b/xcap/sax/drivers2/drv_javasax.py index 2682c26..a5e2316 100644 --- a/xcap/sax/drivers2/drv_javasax.py +++ b/xcap/sax/drivers2/drv_javasax.py @@ -1,212 +1,212 @@ """ SAX driver for the Java SAX parsers. Can only be used in Jython. $Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $ """ # --- Initialization version = "0.10" revision = "$Revision: 1.5 $" import string from xml.sax import xmlreader, saxutils from xml.sax.handler import feature_namespaces from xml.sax import _exceptions # we only work in jython import sys if sys.platform[:4] != "java": raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None) del sys # get the necessary Java SAX classes try: from java.lang import String from org.xml.sax import ContentHandler, SAXException from org.xml.sax.helpers import XMLReaderFactory except ImportError: raise SAXReaderNotAvailable("SAX is not on the classpath", None) # get some JAXP stuff try: from javax.xml.parsers import SAXParserFactory, ParserConfigurationException factory = SAXParserFactory.newInstance() jaxp = 1 except ImportError: jaxp = 0 # --- JavaSAXParser class JavaSAXParser(xmlreader.XMLReader, ContentHandler): "SAX driver for the Java SAX parsers." def __init__(self, jdriver = None): self._parser = create_java_parser(jdriver) self._parser.setFeature(feature_namespaces, 0) self._parser.setContentHandler(self) self._attrs = AttributesImpl() self._nsattrs = AttributesNSImpl() # XMLReader methods def parse(self, source): "Parse an XML document from a URL or an InputSource." self._source = saxutils.prepare_input_source(source) try: self._parser.parse(source) - except SAXException, e: + except SAXException as e: raise _exceptions.SAXException("", e) def getFeature(self, name): return self._parser.getFeature(name) def setFeature(self, name, state): self._parser.setFeature(name, state) def getProperty(self, name): return self._parser.getProperty(name) def setProperty(self, name, value): self._parser.setProperty(name, value) # ContentHandler methods def setDocumentLocator(self, locator): self._cont_handler.setDocumentLocator(locator) def startDocument(self): self._cont_handler.startDocument() self._namespaces = self._parser.getFeature(feature_namespaces) def startElement(self, uri, lname, qname, attrs): if self._namespaces: self._nsattrs._attrs = attrs self._cont_handler.startElementNS((uri or None, lname), qname, self._nsattrs) else: self._attrs._attrs = attrs self._cont_handler.startElement(qname, self._attrs) def characters(self, char, start, len): self._cont_handler.characters(str(String(char, start, len))) def ignorableWhitespace(self, char, start, len): self._cont_handler.ignorableWhitespace(str(String(char, start, len))) def endElement(self, uri, lname, qname): if self._namespaces: self._cont_handler.endElementNS((uri or None, lname), qname) else: self._cont_handler.endElement(qname) def endDocument(self): self._cont_handler.endDocument() def processingInstruction(self, target, data): self._cont_handler.processingInstruction(target, data) # --- AttributesImpl class AttributesImpl: def __init__(self, attrs = None): self._attrs = attrs def getLength(self): return self._attrs.getLength() def getType(self, name): return self._attrs.getType(name) def getValue(self, name): value = self._attrs.getValue(name) if value == None: raise KeyError(name) return value def getValueByQName(self, name): value = self._attrs.getValueByQName(name) if value == None: raise KeyError(name) return value def getNameByQName(self, name): value = self._attrs.getNameByQName(name) if value == None: raise KeyError(name) return value def getQNameByName(self, name): value = self._attrs.getQNameByName(name) if value == None: raise KeyError(name) return value def getNames(self): return self._attrs.getNames() def getQNames(self): return self._attrs.getQNames() def __len__(self): return self._attrs.getLength() def __getitem__(self, name): value = self._attrs.getValue(name) if value == None: raise KeyError(name) return value def keys(self): qnames = [] for ix in range(self._attrs.getLength()): qnames.append(self._attrs.getQName(ix)) return qnames def copy(self): return self.__class__(self._attrs) def items(self): list = [] for name in self._attrs.getQNames(): list.append((name, self._attrs.getValue(name))) return list def values(self): - return map(self._attrs.getValue, self._attrs.getQNames()) + return list(map(self._attrs.getValue, self._attrs.getQNames())) def get(self, name, alt = None): value = self._attrs.getValue(name) if value != None: return value else: return alt def has_key(self, name): return self._attrs.getValue(name) != None # --- AttributesNSImpl class AttributesNSImpl: def __init__(self): self._attrs = None # --- def create_java_parser(jdriver = None): try: if jdriver: return XMLReaderFactory.createXMLReader(jdriver) elif jaxp: return factory.newSAXParser().getXMLReader() else: return XMLReaderFactory.createXMLReader() - except ParserConfigurationException, e: + except ParserConfigurationException as e: raise SAXReaderNotAvailable(e.getMessage()) - except SAXException, e: + except SAXException as e: raise SAXReaderNotAvailable(e.getMessage()) def create_parser(jdriver = None): return JavaSAXParser(jdriver) diff --git a/xcap/sax/drivers2/drv_sgmllib.py b/xcap/sax/drivers2/drv_sgmllib.py index 4051686..d0eefda 100644 --- a/xcap/sax/drivers2/drv_sgmllib.py +++ b/xcap/sax/drivers2/drv_sgmllib.py @@ -1,152 +1,152 @@ """ A SAX 2.0 driver for sgmllib. $Id: drv_sgmllib.py,v 1.3 2001/12/30 12:13:45 loewis Exp $ """ import types, string import sgmllib from xml.sax import SAXNotSupportedException, SAXNotRecognizedException from xml.sax.xmlreader import IncrementalParser # ===== DRIVER class SgmllibDriver(sgmllib.SGMLParser, IncrementalParser): # ===== SAX 2.0 INTERFACES # --- XMLReader methods def __init__(self): sgmllib.SGMLParser.__init__(self) IncrementalParser.__init__(self) self._sysid = None self._pubid = None def prepareParser(self, source): self._sysid = source.getSystemId() self._pubid = source.getPublicId() self._cont_handler.startDocument() def close(self): sgmllib.SGMLParser.close(self) self._cont_handler.endDocument() def setLocale(self, locale): raise SAXNotSupportedException("setLocale not supported") def getFeature(self, name): raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def getProperty(self, name): raise SAXNotRecognizedException("Property '%s' not recognized" % name) def setProperty(self, name, value): raise SAXNotRecognizedException("Property '%s' not recognized" % name) # --- Locator methods def getColumnNumber(self): return -1 def getLineNumber(self): return -1 def getPublicId(self): return self._pubid def getSystemId(self): return self._sysid # ===== HTMLLIB INTERFACES def unknown_starttag(self, name, attrs): self._cont_handler.startElement(name, AttributesImpl(attrs)) def unknown_endtag(self, name): self._cont_handler.endElement(name) def handle_data(self, data): self._cont_handler.characters(data) # ===== ATTRIBUTESIMPL ===== class AttributesImpl: def __init__(self, attrs): "attrs has the form [(name, value), (name, value)...]" self._attrs = attrs def getLength(self): return len(self._attrs) def getType(self, name): return "CDATA" def getValue(self, name): for (aname, avalue) in self._attrs: if aname == name: return avalue - raise KeyError, name + raise KeyError(name) def getValueByQName(self, name): for (aname, avalue) in self._attrs: if aname == name: return avalue - raise KeyError, name + raise KeyError(name) def getNameByQName(self, name): for (aname, avalue) in self._attrs: if aname == name: return name - raise KeyError, name + raise KeyError(name) def getQNameByName(self, name): return self.getNameByQName(name) def getNames(self): - return map(lambda x: x[0], self._attrs) + return [x[0] for x in self._attrs] def getQNames(self): - return map(lambda x: x[0], self._attrs) + return [x[0] for x in self._attrs] def __len__(self): return len(self._attrs) def __getitem__(self, name): for (aname, avalue) in self._attrs: if aname == name: return avalue - raise KeyError, name + raise KeyError(name) def keys(self): return self.getNames() def has_key(self, name): for (aname, avalue) in self._attrs: if aname == name: return 1 return 0 def get(self, name, alternative=None): for (aname, avalue) in self._attrs: if aname == name: return avalue def copy(self): return self.__class__(self._attrs) def items(self): return self._attrs def values(self): - return map(lambda x: x[1], self._attrs) + return [x[1] for x in self._attrs] # --- def create_parser(): return SgmllibDriver() diff --git a/xcap/sax/drivers2/drv_sgmlop.py b/xcap/sax/drivers2/drv_sgmlop.py index c4add17..ff5bceb 100644 --- a/xcap/sax/drivers2/drv_sgmlop.py +++ b/xcap/sax/drivers2/drv_sgmlop.py @@ -1,131 +1,131 @@ """ SAX2 driver for the sgmlop parser. $Id: drv_sgmlop.py,v 1.7 2003/01/21 12:42:28 loewis Exp $ """ version = "0.1" from xml.parsers.sgmllib import SGMLParser from xml.sax import saxlib, handler from xml.sax.xmlreader import AttributesImpl, XMLReader from xml.sax.saxutils import ContentGenerator, prepare_input_source try: import codecs def to_xml_string(str,encoding): try: decoder = codecs.lookup(encoding)[1] return decoder(str)[0] except LookupError: return str except ImportError: - from xml.unicode.iso8859 import wstring + from xml.str.iso8859 import wstring def to_xml_string(str,encoding): if string.lower(self._encoding) == 'utf-8': return str else: return wstring.decode(encoding,str).utf8() class SaxParser(SGMLParser, XMLReader): """ Implements IncrementalReader """ def __init__(self, bufsize = 65536, encoding = 'UTF-8'): XMLReader.__init__(self) SGMLParser.__init__(self) self._bufsize = bufsize self._lexical_handler = None self._encoding = encoding self.documentStarted = 0 def parse(self, source): source = prepare_input_source(source) self.prepareParser(source) file = source.getByteStream() buffer = file.read(self._bufsize) while buffer != "": self.feed(buffer) buffer = file.read(self._bufsize) self.close() def feed(self,buffer): if not self.documentStarted: self._cont_handler.startDocument() self.documentStarted = 1 SGMLParser.feed(self,buffer) def prepareParser(self, source): # not used pass def close(self): """This method is called when the entire XML document has been passed to the parser through the feed method, to notify the parser that there are no more data. This allows the parser to do the final checks on the document and empty the internal data buffer. The parser will not be ready to parse another document until the reset method has been called. close may raise SAXException.""" SGMLParser.close(self) self._cont_handler.endDocument() def _make_attr_dict(self,attr_list): d = {} cvrt = lambda str,e=self._encoding:to_xml_string(str,e) for (a,b) in attr_list: d[cvrt(a)]=cvrt(b) return d def unknown_starttag(self,tag,attrs): self._cont_handler.startElement(to_xml_string(tag,self._encoding), AttributesImpl(self._make_attr_dict(attrs))) def unknown_endtag(self,tag): self._cont_handler.endElement(to_xml_string(tag,self._encoding)) def handle_data(self,data): self._cont_handler.characters(to_xml_string(data,self._encoding)) def unknown_entityref(self, entity): self._cont_handler.skippedEntity(to_xml_string(entity,self._encoding)) def handle_comment(self,data): if self._lexical_handler is not None: self._lexical_handler.comment(to_xml_string(data,self._encoding)) def setProperty(self,name,value): if name == handler.property_lexical_handler: self._lexical_handler = value elif name == handler.property_encoding: self._encoding = value else: raise SAXNotRecognizedException("Property '%s' not recognized" % name) def getProperty(self, name): if name == handler.property_lexical_handler: return self._lexical_handler elif name == handler.property_encoding: return self._encoding raise SAXNotRecognizedException("Property '%s' not recognized" % name) ## def getFeature(self, name): ## if name == handler.feature_namespaces: ## return self._namespaces ## raise SAXNotRecognizedException("Feature '%s' not recognized" % name) ## def setFeature(self, name, state): ## if self._parsing: ## raise SAXNotSupportedException("Cannot set features while parsing") ## if name == handler.feature_namespaces: ## self._namespaces = state ## else: ## raise SAXNotRecognizedException("Feature '%s' not recognized" % ## name) def create_parser(): return SaxParser() diff --git a/xcap/sax/drivers2/drv_sgmlop_html.py b/xcap/sax/drivers2/drv_sgmlop_html.py index 6c44844..5825e59 100644 --- a/xcap/sax/drivers2/drv_sgmlop_html.py +++ b/xcap/sax/drivers2/drv_sgmlop_html.py @@ -1,75 +1,75 @@ """ SAX2 driver for parsing HTML with the sgmlop parser. $Id: drv_sgmlop_html.py,v 1.3 2002/05/10 14:50:06 akuchling Exp $ """ version = "0.1" -from drv_sgmlop import * +from .drv_sgmlop import * from xml.dom.html import HTML_CHARACTER_ENTITIES, HTML_FORBIDDEN_END, HTML_OPT_END, HTML_DTD from string import strip, upper class SaxHtmlParser(SaxParser): def __init__(self, bufsize = 65536, encoding = 'iso-8859-1', verbose = 0): SaxParser.__init__(self, bufsize, encoding) self.verbose = verbose def finish_starttag(self, tag, attrs): """uses the HTML DTD to automatically generate events for missing tags""" # guess omitted close tags while self.stack and \ upper(self.stack[-1]) in HTML_OPT_END and \ tag not in HTML_DTD.get(self.stack[-1],[]): self.unknown_endtag(self.stack[-1]) del self.stack[-1] if self.stack and tag not in HTML_DTD.get(self.stack[-1],[]) and self.verbose: - print 'Warning : trying to add %s as a child of %s'%\ - (tag,self.stack[-1]) + print('Warning : trying to add %s as a child of %s'%\ + (tag,self.stack[-1])) self.unknown_starttag(tag,attrs) if upper(tag) in HTML_FORBIDDEN_END: # close immediately tags for which we won't get an end self.unknown_endtag(tag) return 0 else: self.stack.append(tag) return 1 def finish_endtag(self, tag): if tag in HTML_FORBIDDEN_END : # do nothing: we've already closed it return if tag in self.stack: while self.stack and self.stack[-1] != tag: self.unknown_endtag(self.stack[-1]) del self.stack[-1] self.unknown_endtag(tag) del self.stack[-1] elif self.verbose: - print "Warning: I don't see where tag %s was opened"%tag + print("Warning: I don't see where tag %s was opened"%tag) def handle_data(self,data): if self.stack: if '#PCDATA' not in HTML_DTD.get(self.stack[-1],[]) and not strip(data): # this is probably ignorable whitespace self._cont_handler.ignorableWhitespace(data) else: self._cont_handler.characters(to_xml_string(data,self._encoding)) def close(self): SGMLParser.close(self) self.stack.reverse() for tag in self.stack: self.unknown_endtag(tag) self.stack = [] self._cont_handler.endDocument() def create_parser(): return SaxHtmlParser() diff --git a/xcap/sax/drivers2/drv_xmlproc.py b/xcap/sax/drivers2/drv_xmlproc.py index c7bfae2..336bd0c 100644 --- a/xcap/sax/drivers2/drv_xmlproc.py +++ b/xcap/sax/drivers2/drv_xmlproc.py @@ -1,424 +1,424 @@ """ A SAX 2.0 driver for xmlproc. $Id: drv_xmlproc.py,v 1.16 2003/07/27 17:58:20 loewis Exp $ """ import types, string from xml.parsers.xmlproc import xmlproc, xmlval, xmlapp from xml.sax import saxlib from xml.sax.xmlreader import AttributesImpl, AttributesNSImpl from xml.sax.xmlreader import IncrementalParser from xml.sax.saxutils import ContentGenerator, prepare_input_source # Todo # - EntityResolver InputSource handling # - as much as possible of LexicalHandler # - entity expansion features # - core properties # - extra properties/features # - element stack # - entity stack # - current error code # - byte offset # - DTD object # - catalog path # - use catalogs # - regression test # - methods from Python SAX extensions? # - remove FIXMEs class XmlprocDriver(IncrementalParser): # ===== SAX 2.0 INTERFACES # --- XMLReader methods def __init__(self): IncrementalParser.__init__(self) self.__parsing = 0 self.__validate = 0 self.__namespaces = 0 self.__ext_pes = 0 self.__locator = 0 self._lex_handler = saxlib.LexicalHandler() self._decl_handler = saxlib.DeclHandler() self._parser = None def prepareParser(self, source): self.__parsing = 1 # create parser if self.__validate: parser = xmlval.XMLValidator() else: parser = xmlproc.XMLProcessor() # set handlers if self._cont_handler != None or self._lex_handler != None: if self._cont_handler == None: self._cont_handler = saxlib.ContentHandler() if self._lex_handler == None: self._lex_handler = saxlib.LexicalHandler() if self.__namespaces: filter = NamespaceFilter(parser, self._cont_handler, self._lex_handler, self) parser.set_application(filter) else: parser.set_application(self) if self._err_handler != None: parser.set_error_handler(self) if self._decl_handler != None or self._dtd_handler != None: parser.set_dtd_listener(self) parser.set_pubid_resolver(self) # FIXME: set other handlers if self.__ext_pes: parser.set_read_external_subset(1) self._parser = parser # make it available for callbacks if source: parser.set_sysid(source.getSystemId()) def feed(self, data): if not self._parser: self.prepareParser(None) self._parser.feed(data) def close(self): self._parser.flush() self._parser.parseEnd() def reset(self): self._parser = None self.__parsing = 0 def setLocale(self, locale): pass def getFeature(self, name): if name == saxlib.feature_string_interning or \ name == saxlib.feature_external_ges: return 1 elif name == saxlib.feature_external_pes: return self.__ext_pes elif name == saxlib.feature_validation: return self.__validate elif name == saxlib.feature_namespaces: return self.__namespaces elif name == saxlib.feature_namespace_prefixes: return 0 else: raise saxlib.SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): if self.__parsing: raise saxlib.SAXNotSupportedException("Cannot set feature '%s' during parsing" % name) if name == saxlib.feature_validation: self.__validate = state if self.__validate: self.__ext_pes = 1 elif name == saxlib.feature_namespaces: self.__namespaces = state elif name == saxlib.feature_external_ges or \ name == saxlib.feature_string_interning: if not state: raise saxlib.SAXNotSupportedException("This feature cannot be turned off with xmlproc.") elif name == saxlib.feature_namespace_prefixes: if state: raise saxlib.SAXNotSupportedException("This feature cannot be turned on with xmlproc.") elif name == saxlib.feature_external_pes: self.__ext_pes = state else: raise saxlib.SAXNotRecognizedException("Feature '%s' not recognized" % name) def getProperty(self, name): if name == saxlib.property_lexical_handler: return self._lex_handler elif name == saxlib.property_declaration_handler: return self._decl_handler raise saxlib.SAXNotRecognizedException("Property '%s' not recognized" % name) def setProperty(self, name, value): if name == saxlib.property_lexical_handler: self._lex_handler = value elif name == saxlib.property_declaration_handler: self._decl_handler = value else: raise saxlib.SAXNotRecognizedException("Property '%s' not recognized" % name) # --- Locator methods def getColumnNumber(self): return self._parser.get_column() def getLineNumber(self): return self._parser.get_line() def getPublicId(self): return None # FIXME: Try to find this. Perhaps from InputSource? def getSystemId(self): return self._parser.get_current_sysid() # FIXME? # ===== XMLPROC INTERFACES # --- Application methods def set_locator(self, locator): self._locator = locator def doc_start(self): self._cont_handler.startDocument() def doc_end(self): self._cont_handler.endDocument() def handle_comment(self, data): self._lex_handler.comment(data) def handle_start_tag(self, name, attrs): self._cont_handler.startElement(name, AttributesImpl(attrs)) def handle_end_tag(self,name): self._cont_handler.endElement(name) def handle_data(self, data, start, end): self._cont_handler.characters(data[start:end]) def handle_ignorable_data(self, data, start, end): self._cont_handler.ignorableWhitespace(data[start:end]) def handle_pi(self, target, data): self._cont_handler.processingInstruction(target, data) def handle_doctype(self, root, pubId, sysId): self._lex_handler.startDTD(root, pubId, sysId) def set_entity_info(self, xmlver, enc, sddecl): pass # --- ErrorHandler methods # set_locator implemented as Application method above def get_locator(self): return self._locator def warning(self, msg): self._err_handler.warning(saxlib.SAXParseException(msg, None, self)) def error(self, msg): self._err_handler.error(saxlib.SAXParseException(msg, None, self)) def fatal(self, msg): self._err_handler.fatalError(saxlib.SAXParseException(msg, None, self)) # --- DTDConsumer methods def dtd_start(self): pass # this is done by handle_doctype def dtd_end(self): self._lex_handler.endDTD() def handle_comment(self, contents): self._lex_handler.comment(contents) def handle_pi(self, target, rem): self._cont_handler.processingInstruction(target, rem) def new_general_entity(self, name, val): self._decl_handler.internalEntityDecl(name, val) def new_external_entity(self, ent_name, pub_id, sys_id, ndata): if not ndata: self._decl_handler.externalEntityDecl(ent_name, pub_id, sys_id) else: self._dtd_handler.unparsedEntityDecl(ent_name, pub_id, sys_id, ndata) def new_parameter_entity(self, name, val): self._decl_handler.internalEntityDecl("%" + name, val) def new_external_pe(self, name, pubid, sysid): self._decl_handler.externalEntityDecl("%" + name, pubid, sysid) def new_notation(self, name, pubid, sysid): self._dtd_handler.notationDecl(name, pubid, sysid) def new_element_type(self, elem_name, elem_cont): if elem_cont == None: elem_cont = "ANY" elif elem_cont == ("", [], ""): elem_cont = "EMPTY" self._decl_handler.elementDecl(elem_name, elem_cont) def new_attribute(self, elem, attr, type, a_decl, a_def): self._decl_handler.attributeDecl(elem, attr, type, a_decl, a_def) # --- PubIdResolver methods def resolve_pe_pubid(self, pubid, sysid): # Delegate out to the instance's EntityResolver. # TODO: does not support returning an InputSource from resolveEntity. return self._ent_handler.resolveEntity(pubid, sysid) def resolve_doctype_pubid(self, pubid, sysid): # Delegate out to the instance's EntityResolver. # TODO: does not support returning an InputSource from resolveEntity. return self._ent_handler.resolveEntity(pubid, sysid) def resolve_entity_pubid(self, pubid, sysid): # Delegate out to the instance's EntityResolver. # TODO: does not support returning an InputSource from resolveEntity. return self._ent_handler.resolveEntity(pubid, sysid) # --- NamespaceFilter class NamespaceFilter: """An xmlproc application that processes qualified names and reports them as (URI, local-part). It reports errors through the error reporting mechanisms of the parser.""" def __init__(self, parser, content, lexical, driver): self._cont_handler = content self._lex_handler = lexical self.driver = driver self.ns_map = {"" : None} # Current prefix -> URI map self.ns_map["xml"] = "http://www.w3.org/XML/1998/namespace" self.ns_stack = [] # Pushed for each element, used to maint ns_map self.rep_ns_attrs = 0 # Report xmlns-attributes? self.parser = parser def set_locator(self, locator): self.driver.set_locator(locator) def doc_start(self): self._cont_handler.startDocument() def doc_end(self): self._cont_handler.endDocument() def handle_comment(self, data): self._lex_handler.comment(data) def handle_start_tag(self,name,attrs): old_ns={} # Reset ns_map to these values when we leave this element del_ns=[] # Delete these prefixes from ns_map when we leave element # attrs=attrs.copy() Will have to do this if more filters are made # Find declarations, update self.ns_map and self.ns_stack - for (a,v) in attrs.items(): + for (a,v) in list(attrs.items()): if a[:6]=="xmlns:": prefix=a[6:] if string.find(prefix,":")!=-1: self.parser.report_error(1900) #if v=="": # self.parser.report_error(1901) elif a=="xmlns": prefix="" else: continue - if self.ns_map.has_key(prefix): + if prefix in self.ns_map: old_ns[prefix]=self.ns_map[prefix] if v: self.ns_map[prefix]=v else: del self.ns_map[prefix] if not self.rep_ns_attrs: del attrs[a] self.ns_stack.append((old_ns,del_ns)) # Process elem and attr names cooked_name = self.__process_name(name) ns = cooked_name[0] rawnames = {} - for (a,v) in attrs.items(): + for (a,v) in list(attrs.items()): del attrs[a] aname = self.__process_name(a, is_attr=1) - if attrs.has_key(aname): + if aname in attrs: self.parser.report_error(1903) attrs[aname] = v rawnames[aname] = a # Report event self._cont_handler.startElementNS(cooked_name, name, AttributesNSImpl(attrs, rawnames)) def handle_end_tag(self, rawname): name = self.__process_name(rawname) # Clean up self.ns_map and self.ns_stack (old_ns,del_ns)=self.ns_stack[-1] del self.ns_stack[-1] self.ns_map.update(old_ns) for prefix in del_ns: del self.ns_map[prefix] self._cont_handler.endElementNS(name, rawname) def handle_data(self, data, start, end): self._cont_handler.characters(data[start:end]) def handle_ignorable_data(self, data, start, end): self._cont_handler.ignorableWhitespace(data[start:end]) def handle_pi(self, target, data): self._cont_handler.processingInstruction(target, data) def handle_doctype(self, root, pubId, sysId): self._lex_handler.startDTD(root, pubId, sysId) def set_entity_info(self, xmlver, enc, sddecl): pass # --- Internal methods def __process_name(self, name, default_to=None, is_attr=0): n=string.split(name,":") if len(n)>2: self.parser.report_error(1900) return (None, name) elif len(n)==2: if n[0]=="xmlns": return None, name try: return (self.ns_map[n[0]], n[1]) except KeyError: self.parser.report_error(1902) return None, name elif is_attr: return None, name elif default_to != None: return (default_to, name) - elif self.ns_map.has_key("") and name != "xmlns": + elif "" in self.ns_map and name != "xmlns": return self.ns_map[""], name else: return (None, name) def create_parser(): return XmlprocDriver() diff --git a/xcap/sax/expatreader.py b/xcap/sax/expatreader.py index c5c7775..8498f17 100644 --- a/xcap/sax/expatreader.py +++ b/xcap/sax/expatreader.py @@ -1,429 +1,429 @@ """ SAX driver for the pyexpat C module. This driver works with pyexpat.__version__ == '2.22'. """ version = "0.20" from xml.sax._exceptions import * from xml.sax.handler import feature_validation, feature_namespaces from xml.sax.handler import feature_namespace_prefixes from xml.sax.handler import feature_external_ges, feature_external_pes from xml.sax.handler import feature_string_interning from xml.sax.handler import property_xml_string, property_interning_dict # xml.parsers.expat does not raise ImportError in Jython import sys if sys.platform[:4] == "java": raise SAXReaderNotAvailable("expat not available in Java", None) del sys try: from xml.parsers import expat except ImportError: raise SAXReaderNotAvailable("expat not supported", None) else: if not hasattr(expat, "ParserCreate"): raise SAXReaderNotAvailable("expat not supported", None) from xml.sax import xmlreader, saxutils, handler AttributesImpl = xmlreader.AttributesImpl AttributesNSImpl = xmlreader.AttributesNSImpl # If we're using a sufficiently recent version of Python, we can use # weak references to avoid cycles between the parser and content # handler, otherwise we'll just have to pretend. try: import _weakref except ImportError: def _mkproxy(o): return o else: import weakref _mkproxy = weakref.proxy del weakref, _weakref # --- ExpatLocator class ExpatLocator(xmlreader.Locator): """Locator for use with the ExpatParser class. This uses a weak reference to the parser object to avoid creating a circular reference between the parser and the content handler. """ def __init__(self, parser): self._ref = _mkproxy(parser) def getColumnNumber(self): parser = self._ref if parser._parser is None: return None return parser._parser.ErrorColumnNumber def getLineNumber(self): parser = self._ref if parser._parser is None: return 1 return parser._parser.ErrorLineNumber def getPublicId(self): parser = self._ref if parser is None: return None return parser._source.getPublicId() def getSystemId(self): parser = self._ref if parser is None: return None return parser._source.getSystemId() # --- ExpatParser class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): """SAX driver for the pyexpat C module.""" def __init__(self, namespaceHandling=0, bufsize=2**16-20): xmlreader.IncrementalParser.__init__(self, bufsize) self._source = xmlreader.InputSource() self._parser = None self._namespaces = namespaceHandling self._lex_handler_prop = None self._parsing = 0 self._entity_stack = [] self._external_ges = 1 self._interning = None self._namespace_prefixes = 1 # XMLReader methods def parse(self, source): "Parse an XML document from a URL or an InputSource." source = saxutils.prepare_input_source(source) self._source = source self.reset() self._cont_handler.setDocumentLocator(ExpatLocator(self)) try: xmlreader.IncrementalParser.parse(self, source) finally: # Drop reference to Expat parser, but read potential # error state before that. Also, if close has completed, # we don't have a parser anymore, anyway. if self._parser: self._ColumnNumber = self._parser.ErrorColumnNumber self._LineNumber = self._parser.ErrorLineNumber self._parser = None def prepareParser(self, source): if source.getSystemId() != None: self._parser.SetBase(source.getSystemId()) # Redefined setContentHandler to allow changing handlers during parsing def setContentHandler(self, handler): xmlreader.IncrementalParser.setContentHandler(self, handler) if self._parsing: self._reset_cont_handler() def getFeature(self, name): if name == feature_namespaces: return self._namespaces elif name == feature_string_interning: return self._interning is not None elif name == feature_namespace_prefixes: return self._namespace_prefixes elif name in (feature_validation, feature_external_pes): return 0 elif name == feature_external_ges: return self._external_ges raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): if self._parsing: raise SAXNotSupportedException("Cannot set features while parsing") if name == feature_namespaces: self._namespaces = state elif name == feature_external_ges: self._external_ges = state elif name == feature_string_interning: if state: if self._interning is None: self._interning = {} else: self._interning = None elif name == feature_namespace_prefixes: self._namespace_prefixes = state elif name == feature_validation: if state: raise SAXNotSupportedException( "expat does not support validation") elif name == feature_external_pes: if state: raise SAXNotSupportedException( "expat does not read external parameter entities") else: raise SAXNotRecognizedException( "Feature '%s' not recognized" % name) def getProperty(self, name): if name == handler.property_lexical_handler: return self._lex_handler_prop elif name == property_interning_dict: return self._interning elif name == property_xml_string: if self._parser: if hasattr(self._parser, "GetInputContext"): return self._parser.GetInputContext() else: raise SAXNotRecognizedException( "This version of expat does not support getting" " the XML string") else: raise SAXNotSupportedException( "XML string cannot be returned when not parsing") raise SAXNotRecognizedException("Property '%s' not recognized" % name) def setProperty(self, name, value): if name == handler.property_lexical_handler: self._lex_handler_prop = value if self._parsing: self._reset_lex_handler_prop() elif name == property_interning_dict: self._interning = value elif name == property_xml_string: raise SAXNotSupportedException("Property '%s' cannot be set" % name) else: raise SAXNotRecognizedException("Property '%s' not recognized" % name) # IncrementalParser methods def feed(self, data, isFinal = 0): if not self._parsing: self.reset() self._parsing = 1 self._cont_handler.startDocument() try: # The isFinal parameter is internal to the expat reader. # If it is set to true, expat will check validity of the entire # document. When feeding chunks, they are not normally final - # except when invoked from close. self._parser.Parse(data, isFinal) - except expat.error, e: + except expat.error as e: exc = SAXParseException(expat.ErrorString(e.code), e, self) # FIXME: when to invoke error()? self._err_handler.fatalError(exc) def close(self): if self._entity_stack: # If we are completing an external entity, do nothing here return self.feed("", isFinal = 1) self._cont_handler.endDocument() self._parsing = 0 # break cycle created by expat handlers pointing to our methods self._parser = None def _reset_cont_handler(self): self._parser.ProcessingInstructionHandler = \ self._cont_handler.processingInstruction self._parser.CharacterDataHandler = self._cont_handler.characters def _reset_lex_handler_prop(self): lex = self._lex_handler_prop parser = self._parser if lex is None: parser.CommentHandler = None parser.StartCdataSectionHandler = None parser.EndCdataSectionHandler = None parser.StartDoctypeDeclHandler = None parser.EndDoctypeDeclHandler = None else: parser.CommentHandler = lex.comment parser.StartCdataSectionHandler = lex.startCDATA parser.EndCdataSectionHandler = lex.endCDATA parser.StartDoctypeDeclHandler = self.start_doctype_decl parser.EndDoctypeDeclHandler = lex.endDTD def reset(self): if self._namespaces: self._parser = expat.ParserCreate(None, " ", intern=self._interning) self._parser.namespace_prefixes = 1 self._parser.StartElementHandler = self.start_element_ns self._parser.EndElementHandler = self.end_element_ns else: self._parser = expat.ParserCreate(intern = self._interning) self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element self._reset_cont_handler() self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl self._parser.NotationDeclHandler = self.notation_decl self._parser.StartNamespaceDeclHandler = self.start_namespace_decl self._parser.EndNamespaceDeclHandler = self.end_namespace_decl self._decl_handler_prop = None if self._lex_handler_prop: self._reset_lex_handler_prop() # self._parser.DefaultHandler = # self._parser.DefaultHandlerExpand = # self._parser.NotStandaloneHandler = self._parser.ExternalEntityRefHandler = self.external_entity_ref try: self._parser.SkippedEntityHandler = self.skipped_entity_handler except AttributeError: # This pyexpat does not support SkippedEntity pass self._parser.SetParamEntityParsing( expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) self._parsing = 0 self._entity_stack = [] # default values when _parser goes aways self._ColumnNumber = None self._LineNumber = 1 # Locator methods def getColumnNumber(self): if self._parser is None: return self._ColumnNumber return self._parser.ErrorColumnNumber def getLineNumber(self): if self._parser is None: return self._LineNumber return self._parser.ErrorLineNumber def getPublicId(self): return self._source.getPublicId() def getSystemId(self): return self._source.getSystemId() # event handlers def start_element(self, name, attrs): self._cont_handler.startElement(name, AttributesImpl(attrs)) def end_element(self, name): self._cont_handler.endElement(name) def start_element_ns(self, name, attrs): pair = name.split() if len(pair) == 1: # no namespace elem_qname = name pair = (None, name) elif len(pair) == 3: # namespace plus prefix elem_qname = "%s:%s" % (pair[2], pair[1]) pair = pair[0], pair[1] else: # default namespace elem_qname = pair[1] pair = tuple(pair) newattrs = {} qnames = {} - for (aname, value) in attrs.items(): + for (aname, value) in list(attrs.items()): parts = aname.split() length = len(parts) if length == 1: # no namespace qname = aname apair = (None, aname) elif length == 3: qname = "%s:%s" % (parts[2], parts[1]) apair = parts[0], parts[1] else: # default namespace qname = parts[1] apair = tuple(parts) newattrs[apair] = value qnames[apair] = qname self._cont_handler.startElementNS(pair, elem_qname, AttributesNSImpl(newattrs, qnames)) def end_element_ns(self, name): pair = name.split() if len(pair) == 1: pair = (None, name) elif len(pair) == 3: pair = pair[0], pair[1] else: pair = tuple(pair) self._cont_handler.endElementNS(pair, None) # this is not used (call directly to ContentHandler) def processing_instruction(self, target, data): self._cont_handler.processingInstruction(target, data) # this is not used (call directly to ContentHandler) def character_data(self, data): self._cont_handler.characters(data) def start_namespace_decl(self, prefix, uri): self._cont_handler.startPrefixMapping(prefix, uri) def end_namespace_decl(self, prefix): self._cont_handler.endPrefixMapping(prefix) def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): self._lex_handler_prop.startDTD(name, pubid, sysid) def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) def notation_decl(self, name, base, sysid, pubid): self._dtd_handler.notationDecl(name, pubid, sysid) def external_entity_ref(self, context, base, sysid, pubid): if not self._external_ges: return 1 source = self._ent_handler.resolveEntity(pubid, sysid) source = saxutils.prepare_input_source(source, self._source.getSystemId() or "") self._entity_stack.append((self._parser, self._source)) self._parser = self._parser.ExternalEntityParserCreate(context) self._source = source try: xmlreader.IncrementalParser.parse(self, source) except: return 0 # FIXME: save error info here? (self._parser, self._source) = self._entity_stack[-1] del self._entity_stack[-1] return 1 def skipped_entity_handler(self, name, is_pe): if is_pe: # The SAX spec requires to report skipped PEs with a '%' name = '%'+name self._cont_handler.skippedEntity(name) # --- def create_parser(*args, **kwargs): - return apply(ExpatParser, args, kwargs) + return ExpatParser(*args, **kwargs) # --- if __name__ == "__main__": import xml.sax p = create_parser() p.setContentHandler(xml.sax.XMLGenerator()) p.setErrorHandler(xml.sax.ErrorHandler()) p.parse("../../../hamlet.xml") diff --git a/xcap/sax/handler.py b/xcap/sax/handler.py index 6342e55..5a4394a 100644 --- a/xcap/sax/handler.py +++ b/xcap/sax/handler.py @@ -1,345 +1,345 @@ """ This module contains the core classes of version 2.0 of SAX for Python. This file provides only default classes with absolutely minimum functionality, from which drivers and applications can be subclassed. Many of these classes are empty and are included only as documentation of the interfaces. $Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $ """ version = '2.0beta' #============================================================================ # # HANDLER INTERFACES # #============================================================================ # ===== ERRORHANDLER ===== class ErrorHandler: """Basic interface for SAX error handlers. If you create an object that implements this interface, then register the object with your XMLReader, the parser will call the methods in your object to report all warnings and errors. There are three levels of errors available: warnings, (possibly) recoverable errors, and unrecoverable errors. All methods take a SAXParseException as the only parameter.""" def error(self, exception): "Handle a recoverable error." raise exception def fatalError(self, exception): "Handle a non-recoverable error." raise exception def warning(self, exception): "Handle a warning." - print exception + print(exception) # ===== CONTENTHANDLER ===== class ContentHandler: """Interface for receiving logical document content events. This is the main callback interface in SAX, and the one most important to applications. The order of events in this interface mirrors the order of the information in the document.""" def __init__(self): self._locator = None def setDocumentLocator(self, locator): """Called by the parser to give the application a locator for locating the origin of document events. SAX parsers are strongly encouraged (though not absolutely required) to supply a locator: if it does so, it must supply the locator to the application by invoking this method before invoking any of the other methods in the DocumentHandler interface. The locator allows the application to determine the end position of any document-related event, even if the parser is not reporting an error. Typically, the application will use this information for reporting its own errors (such as character content that does not match an application's business rules). The information returned by the locator is probably not sufficient for use with a search engine. Note that the locator will return correct information only during the invocation of the events in this interface. The application should not attempt to use it at any other time.""" self._locator = locator def startDocument(self): """Receive notification of the beginning of a document. The SAX parser will invoke this method only once, before any other methods in this interface or in DTDHandler (except for setDocumentLocator).""" def endDocument(self): """Receive notification of the end of a document. The SAX parser will invoke this method only once, and it will be the last method invoked during the parse. The parser shall not invoke this method until it has either abandoned parsing (because of an unrecoverable error) or reached the end of input.""" def startPrefixMapping(self, prefix, uri): """Begin the scope of a prefix-URI Namespace mapping. The information from this event is not necessary for normal Namespace processing: the SAX XML reader will automatically replace prefixes for element and attribute names when the http://xml.org/sax/features/namespaces feature is true (the default). There are cases, however, when applications need to use prefixes in character data or in attribute values, where they cannot safely be expanded automatically; the start/endPrefixMapping event supplies the information to the application to expand prefixes in those contexts itself, if necessary. Note that start/endPrefixMapping events are not guaranteed to be properly nested relative to each-other: all startPrefixMapping events will occur before the corresponding startElement event, and all endPrefixMapping events will occur after the corresponding endElement event, but their order is not guaranteed.""" def endPrefixMapping(self, prefix): """End the scope of a prefix-URI mapping. See startPrefixMapping for details. This event will always occur after the corresponding endElement event, but the order of endPrefixMapping events is not otherwise guaranteed.""" def startElement(self, name, attrs): """Signals the start of an element in non-namespace mode. The name parameter contains the raw XML 1.0 name of the element type as a string and the attrs parameter holds an instance of the Attributes class containing the attributes of the element.""" def endElement(self, name): """Signals the end of an element in non-namespace mode. The name parameter contains the name of the element type, just as with the startElement event.""" def startElementNS(self, name, qname, attrs): """Signals the start of an element in namespace mode. The name parameter contains the name of the element type as a (uri, localname) tuple, the qname parameter the raw XML 1.0 name used in the source document, and the attrs parameter holds an instance of the Attributes class containing the attributes of the element. The uri part of the name tuple is None for elements which have no namespace.""" def endElementNS(self, name, qname): """Signals the end of an element in namespace mode. The name parameter contains the name of the element type, just as with the startElementNS event.""" def characters(self, content): """Receive notification of character data. The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous character data in a single chunk, or they may split it into several chunks; however, all of the characters in any single event must come from the same external entity so that the Locator provides useful information.""" def ignorableWhitespace(self, whitespace): """Receive notification of ignorable whitespace in element content. Validating Parsers must use this method to report each chunk of ignorable whitespace (see the W3C XML 1.0 recommendation, section 2.10): non-validating parsers may also use this method if they are capable of parsing and using content models. SAX parsers may return all contiguous whitespace in a single chunk, or they may split it into several chunks; however, all of the characters in any single event must come from the same external entity, so that the Locator provides useful information. The application must not attempt to read from the array outside of the specified range.""" def processingInstruction(self, target, data): """Receive notification of a processing instruction. The Parser will invoke this method once for each processing instruction found: note that processing instructions may occur before or after the main document element. A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a text declaration (XML 1.0, section 4.3.1) using this method.""" def skippedEntity(self, name): """Receive notification of a skipped entity. The Parser will invoke this method once for each entity skipped. Non-validating processors may skip entities if they have not seen the declarations (because, for example, the entity was declared in an external DTD subset). All processors may skip external entities, depending on the values of the http://xml.org/sax/features/external-general-entities and the http://xml.org/sax/features/external-parameter-entities properties.""" # ===== DTDHandler ===== class DTDHandler: """Handle DTD events. This interface specifies only those DTD events required for basic parsing (unparsed entities and attributes).""" def notationDecl(self, name, publicId, systemId): "Handle a notation declaration event." def unparsedEntityDecl(self, name, publicId, systemId, ndata): "Handle an unparsed entity declaration event." # ===== ENTITYRESOLVER ===== class EntityResolver: """Basic interface for resolving entities. If you create an object implementing this interface, then register the object with your Parser, the parser will call the method in your object to resolve all external entities. Note that DefaultHandler implements this interface with the default behaviour.""" def resolveEntity(self, publicId, systemId): """Resolve the system identifier of an entity and return either the system identifier to read from as a string, or an InputSource to read from.""" return systemId #============================================================================ # # CORE FEATURES # #============================================================================ feature_namespaces = "http://xml.org/sax/features/namespaces" # true: Perform Namespace processing (default). # false: Optionally do not perform Namespace processing # (implies namespace-prefixes). # access: (parsing) read-only; (not parsing) read/write feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" # true: Report the original prefixed names and attributes used for Namespace # declarations. # false: Do not report attributes used for Namespace declarations, and # optionally do not report original prefixed names (default). # access: (parsing) read-only; (not parsing) read/write feature_string_interning = "http://xml.org/sax/features/string-interning" # true: All element names, prefixes, attribute names, Namespace URIs, and # local names are interned using the built-in intern function. # false: Names are not necessarily interned, although they may be (default). # access: (parsing) read-only; (not parsing) read/write feature_validation = "http://xml.org/sax/features/validation" # true: Report all validation errors (implies external-general-entities and # external-parameter-entities). # false: Do not report validation errors. # access: (parsing) read-only; (not parsing) read/write feature_external_ges = "http://xml.org/sax/features/external-general-entities" # true: Include all external general (text) entities. # false: Do not include external general entities. # access: (parsing) read-only; (not parsing) read/write feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" # true: Include all external parameter entities, including the external # DTD subset. # false: Do not include any external parameter entities, even the external # DTD subset. # access: (parsing) read-only; (not parsing) read/write all_features = [feature_namespaces, feature_namespace_prefixes, feature_string_interning, feature_validation, feature_external_ges, feature_external_pes] #============================================================================ # # CORE PROPERTIES # #============================================================================ property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" # data type: xml.sax.sax2lib.LexicalHandler # description: An optional extension handler for lexical events like comments. # access: read/write property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" # data type: xml.sax.sax2lib.DeclHandler # description: An optional extension handler for DTD-related events other # than notations and unparsed entities. # access: read/write property_dom_node = "http://xml.org/sax/properties/dom-node" # data type: org.w3c.dom.Node # description: When parsing, the current DOM node being visited if this is # a DOM iterator; when not parsing, the root DOM node for # iteration. # access: (parsing) read-only; (not parsing) read/write property_xml_string = "http://xml.org/sax/properties/xml-string" # data type: String # description: The literal string of characters that was the source for # the current event. # access: read-only property_encoding = "http://www.python.org/sax/properties/encoding" # data type: String # description: The name of the encoding to assume for input data. # access: write: set the encoding, e.g. established by a higher-level # protocol. May change during parsing (e.g. after # processing a META tag) # read: return the current encoding (possibly established through # auto-detection. # initial value: UTF-8 # property_interning_dict = "http://www.python.org/sax/properties/interning-dict" # data type: Dictionary # description: The dictionary used to intern common strings in the document # access: write: Request that the parser uses a specific dictionary, to # allow interning across different documents # read: return the current interning dictionary, or None # all_properties = [property_lexical_handler, property_dom_node, property_declaration_handler, property_xml_string, property_encoding, property_interning_dict] diff --git a/xcap/sax/sax2exts.py b/xcap/sax/sax2exts.py index 9ab39d4..41584a5 100644 --- a/xcap/sax/sax2exts.py +++ b/xcap/sax/sax2exts.py @@ -1,37 +1,37 @@ """ Various extensions to the core SAX 2.0 API. $Id: sax2exts.py,v 1.5 2001/12/30 22:17:03 loewis Exp $ """ -import saxexts,saxlib +from . import saxexts,saxlib # In SAX2, validation is turned-on through a property. Make sure # that all parsers returned from this factory are validating class ValidatingReaderFactory(saxexts.ParserFactory): def make_parser(self, parser_list = []): p = saxexts.ParserFactory.make_parser(self,parser_list) p.setFeature(saxlib.feature_validation, 1) return p # --- XMLReader factory XMLReaderFactory = saxexts.ParserFactory # --- Creating parser factories XMLParserFactory = XMLReaderFactory(["xml.sax.drivers2.drv_pyexpat", "xml.sax.drivers2.drv_xmlproc"]) XMLValParserFactory = ValidatingReaderFactory(["xml.sax.drivers2.drv_xmlproc"]) HTMLParserFactory=XMLReaderFactory(["xml.sax.drivers2.drv_htmllib", "xml.sax.drivers2.drv_sgmlop", "xml.sax.drivers2.drv_sgmllib"]) SGMLParserFactory=XMLReaderFactory(["xml.sax.drivers2.drv_sgmlop", "xml.sax.drivers2.drv_sgmllib"]) def make_parser(parser_list = []): return XMLParserFactory.make_parser(parser_list) diff --git a/xcap/sax/saxexts.py b/xcap/sax/saxexts.py index d9cfa37..16870ea 100644 --- a/xcap/sax/saxexts.py +++ b/xcap/sax/saxexts.py @@ -1,168 +1,168 @@ """ A module of experimental extensions to the standard SAX interface. $Id: saxexts.py,v 1.14 2003/01/21 13:02:44 loewis Exp $ """ import _exceptions, handler, sys, string, os, types # --- Parser factory class ParserFactory: """A general class to be used by applications for creating parsers on foreign systems where it is unknown which parsers exist.""" def __init__(self,list=[]): # Python 2 compatibility: let consider environment variables # and properties override list argument - if os.environ.has_key("PY_SAX_PARSER"): + if "PY_SAX_PARSER" in os.environ: list = string.split(os.environ["PY_SAX_PARSER"], ",") _key = "python.xml.sax.parser" if sys.platform[:4] == "java" \ and sys.registry.containsKey(_key): list = string.split(sys.registry.getProperty(_key), ",") self.parsers=list def get_parser_list(self): "Returns the list of possible drivers." return self.parsers def set_parser_list(self,list): "Sets the driver list." self.parsers=list if sys.platform[ : 4] == "java": def _create_parser(self,parser_name): from org.python.core import imp drv_module = imp.importName(parser_name, 0, globals()) return drv_module.create_parser() else: def _create_parser(self,parser_name): drv_module = __import__(parser_name,{},{},['create_parser']) return drv_module.create_parser() def make_parser(self, parser_list = []): """Returns a SAX driver for the first available parser of the parsers in the list. Note that the list is one of drivers, so it first tries the driver and if that exists imports it to see if the parser also exists. If no parsers are available a SAXException is thrown. Accepts a list of driver package names as an optional argument.""" import sys # SAX1 expected a single package name as optional argument # Python 2 changed this to be a list of parser names # We now support both, as well as None (which was the default) if parser_list is None: parser_list = [] - elif type(parser_list) == types.StringType: + elif type(parser_list) == bytes: parser_list = [parser_list] for parser_name in parser_list+self.parsers: try: return self._create_parser(parser_name) - except ImportError,e: - if sys.modules.has_key(parser_name): + except ImportError as e: + if parser_name in sys.modules: # The parser module was found, but importing it # failed unexpectedly, pass this exception through raise - except _exceptions.SAXReaderNotAvailable, e: + except _exceptions.SAXReaderNotAvailable as e: # The parser module detected that it won't work properly, # so mark it as unusable, and try the next one def _create_parser(msg = str(e)): raise _exceptions.SAXReaderNotAvailable(msg) sys.modules[parser_name].create_parser = _create_parser raise _exceptions.SAXReaderNotAvailable("No parsers found", None) # --- Experimental extension to Parser interface -import saxlib +from . import saxlib class ExtendedParser(saxlib.Parser): "Experimental unofficial SAX level 2 extended parser interface." def get_parser_name(self): "Returns a single-word parser name." raise _exceptions.SAXException("Method not supported.",None) def get_parser_version(self): """Returns the version of the imported parser, which may not be the one the driver was implemented for.""" raise _exceptions.SAXException("Method not supported.",None) def get_driver_version(self): "Returns the version number of the driver." raise _exceptions.SAXException("Method not supported.",None) def is_validating(self): "True if the parser is validating, false otherwise." raise _exceptions.SAXException("Method not supported.",None) def is_dtd_reading(self): """True if the parser is non-validating, but conforms to the spec by reading the DTD.""" raise _exceptions.SAXException("Method not supported.",None) def reset(self): "Makes the parser start parsing afresh." raise _exceptions.SAXException("Method not supported.",None) def feed(self,data): "Feeds data to the parser." raise _exceptions.SAXException("Method not supported.",None) def close(self): "Called after the last call to feed, when there are no more data." raise _exceptions.SAXException("Method not supported.",None) # --- Experimental document handler which does not slice strings class NosliceDocumentHandler(saxlib.DocumentHandler): """A document handler that does not force the client application to slice character data strings.""" def __init__(self): handler.DocumentHandler.__init__() self.characters=self.safe_handler def safe_handler(self,data,start,length): """A characters event handler that always works, but doesn't always slice strings.""" if start==0 and length==len(data): self.handle_data(data) else: self.handle_data(data[start:start+length]) def slice_handler(self,data,start,length): "A character event handler that always slices strings." self.handle_data(data[start:start+length]) def noslice_handler(self,data,start,length): "A character event handler that never slices strings." self.handle_data(data) def handle_data(self,data): "This is the character data event method to override." pass # --- Creating parser factories XMLParserFactory=ParserFactory(["xml.sax.drivers.drv_pyexpat", "xml.sax.drivers.drv_xmltok", "xml.sax.drivers.drv_xmlproc", "xml.sax.drivers.drv_xmltoolkit", "xml.sax.drivers.drv_xmllib", "xml.sax.drivers.drv_xmldc", "xml.sax.drivers.drv_sgmlop"]) XMLValParserFactory=ParserFactory(["xml.sax.drivers.drv_xmlproc_val"]) HTMLParserFactory=ParserFactory(["xml.sax.drivers.drv_htmllib", "xml.sax.drivers.drv_sgmlop", "xml.sax.drivers.drv_sgmllib"]) SGMLParserFactory=ParserFactory(["xml.sax.drivers.drv_sgmlop", "xml.sax.drivers.drv_sgmllib"]) def make_parser(parser_list = []): return XMLParserFactory.make_parser(parser_list) diff --git a/xcap/sax/saxlib.py b/xcap/sax/saxlib.py index 6460379..f77e43a 100644 --- a/xcap/sax/saxlib.py +++ b/xcap/sax/saxlib.py @@ -1,430 +1,430 @@ """ This module contains the core classes of version 2.0 of SAX for Python. This file provides only default classes with absolutely minimum functionality, from which drivers and applications can be subclassed. Many of these classes are empty and are included only as documentation of the interfaces. $Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $ """ version = '2.0beta' # A number of interfaces used to live in saxlib, but are now in # various other modules for Python 2 compatibility. If nobody uses # them here any longer, the references can be removed -from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver -from xmlreader import XMLReader, InputSource, Locator, IncrementalParser -from _exceptions import * +from .handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver +from .xmlreader import XMLReader, InputSource, Locator, IncrementalParser +from ._exceptions import * -from handler import \ +from .handler import \ feature_namespaces,\ feature_namespace_prefixes,\ feature_string_interning,\ feature_validation,\ feature_external_ges,\ feature_external_pes,\ all_features,\ property_lexical_handler,\ property_declaration_handler,\ property_dom_node,\ property_xml_string,\ all_properties #============================================================================ # # MAIN INTERFACES # #============================================================================ # ===== XMLFILTER ===== class XMLFilter(XMLReader): """Interface for a SAX2 parser filter. A parser filter is an XMLReader that gets its events from another XMLReader (which may in turn also be a filter) rather than from a primary source like a document or other non-SAX data source. Filters can modify a stream of events before passing it on to its handlers.""" def __init__(self, parent = None): """Creates a filter instance, allowing applications to set the parent on instantiation.""" XMLReader.__init__(self) self._parent = parent def setParent(self, parent): """Sets the parent XMLReader of this filter. The argument may not be None.""" self._parent = parent def getParent(self): "Returns the parent of this filter." return self._parent # ===== ATTRIBUTES ===== class Attributes: """Interface for a list of XML attributes. Contains a list of XML attributes, accessible by name.""" def getLength(self): "Returns the number of attributes in the list." raise NotImplementedError("This method must be implemented!") def getType(self, name): "Returns the type of the attribute with the given name." raise NotImplementedError("This method must be implemented!") def getValue(self, name): "Returns the value of the attribute with the given name." raise NotImplementedError("This method must be implemented!") def getValueByQName(self, name): """Returns the value of the attribute with the given raw (or qualified) name.""" raise NotImplementedError("This method must be implemented!") def getNameByQName(self, name): """Returns the namespace name of the attribute with the given raw (or qualified) name.""" raise NotImplementedError("This method must be implemented!") def getNames(self): """Returns a list of the names of all attributes in the list.""" raise NotImplementedError("This method must be implemented!") def getQNames(self): """Returns a list of the raw qualified names of all attributes in the list.""" raise NotImplementedError("This method must be implemented!") def __len__(self): "Alias for getLength." raise NotImplementedError("This method must be implemented!") def __getitem__(self, name): "Alias for getValue." raise NotImplementedError("This method must be implemented!") def keys(self): "Returns a list of the attribute names in the list." raise NotImplementedError("This method must be implemented!") def has_key(self, name): "True if the attribute is in the list, false otherwise." raise NotImplementedError("This method must be implemented!") def get(self, name, alternative=None): """Return the value associated with attribute name; if it is not available, then return the alternative.""" raise NotImplementedError("This method must be implemented!") def copy(self): "Return a copy of the Attributes object." raise NotImplementedError("This method must be implemented!") def items(self): "Return a list of (attribute_name, value) pairs." raise NotImplementedError("This method must be implemented!") def values(self): "Return a list of all attribute values." raise NotImplementedError("This method must be implemented!") #============================================================================ # # HANDLER INTERFACES # #============================================================================ # ===== DECLHANDLER ===== class DeclHandler: """Optional SAX2 handler for DTD declaration events. Note that some DTD declarations are already reported through the DTDHandler interface. All events reported to this handler will occur between the startDTD and endDTD events of the LexicalHandler. To set the DeclHandler for an XMLReader, use the setProperty method with the identifier http://xml.org/sax/handlers/DeclHandler.""" def attributeDecl(self, elem_name, attr_name, type, value_def, value): """Report an attribute type declaration. Only the first declaration will be reported. The type will be one of the strings "CDATA", "ID", "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or a list of names (in the case of enumerated definitions). elem_name is the element type name, attr_name the attribute type name, type a string representing the attribute type, value_def a string representing the default declaration ('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string representing the attribute's default value, or None if there is none.""" def elementDecl(self, elem_name, content_model): """Report an element type declaration. Only the first declaration will be reported. content_model is the string 'EMPTY', the string 'ANY' or the content model structure represented as tuple (separator, tokens, modifier) where separator is the separator in the token list (that is, '|' or ','), tokens is the list of tokens (element type names or tuples representing parentheses) and modifier is the quantity modifier ('*', '?' or '+').""" def internalEntityDecl(self, name, value): """Report an internal entity declaration. Only the first declaration of an entity will be reported. name is the name of the entity. If it is a parameter entity, the name will begin with '%'. value is the replacement text of the entity.""" def externalEntityDecl(self, name, public_id, system_id): """Report a parsed entity declaration. (Unparsed entities are reported to the DTDHandler.) Only the first declaration for each entity will be reported. name is the name of the entity. If it is a parameter entity, the name will begin with '%'. public_id and system_id are the public and system identifiers of the entity. public_id will be None if none were declared.""" # ===== LEXICALHANDLER ===== class LexicalHandler: """Optional SAX2 handler for lexical events. This handler is used to obtain lexical information about an XML document, that is, information about how the document was encoded (as opposed to what it contains, which is reported to the ContentHandler), such as comments and CDATA marked section boundaries. To set the LexicalHandler of an XMLReader, use the setProperty method with the property identifier 'http://xml.org/sax/handlers/LexicalHandler'. There is no guarantee that the XMLReader will support or recognize this property.""" def comment(self, content): """Reports a comment anywhere in the document (including the DTD and outside the document element). content is a string that holds the contents of the comment.""" def startDTD(self, name, public_id, system_id): """Report the start of the DTD declarations, if the document has an associated DTD. A startEntity event will be reported before declaration events from the external DTD subset are reported, and this can be used to infer from which subset DTD declarations derive. name is the name of the document element type, public_id the public identifier of the DTD (or None if none were supplied) and system_id the system identfier of the external subset (or None if none were supplied).""" def endDTD(self): "Signals the end of DTD declarations." def startEntity(self, name): """Report the beginning of an entity. The start and end of the document entity is not reported. The start and end of the external DTD subset is reported with the pseudo-name '[dtd]'. Skipped entities will be reported through the skippedEntity event of the ContentHandler rather than through this event. name is the name of the entity. If it is a parameter entity, the name will begin with '%'.""" def endEntity(self, name): """Reports the end of an entity. name is the name of the entity, and follows the same conventions as for startEntity.""" def startCDATA(self): """Reports the beginning of a CDATA marked section. The contents of the CDATA marked section will be reported through the characters event.""" def endCDATA(self): "Reports the end of a CDATA marked section." #============================================================================ # # SAX 1.0 COMPATIBILITY CLASSES # Note that these are all deprecated. # #============================================================================ # ===== ATTRIBUTELIST ===== class AttributeList: """Interface for an attribute list. This interface provides information about a list of attributes for an element (only specified or defaulted attributes will be reported). Note that the information returned by this object will be valid only during the scope of the DocumentHandler.startElement callback, and the attributes will not necessarily be provided in the order declared or specified.""" def getLength(self): "Return the number of attributes in list." def getName(self, i): "Return the name of an attribute in the list." def getType(self, i): """Return the type of an attribute in the list. (Parameter can be either integer index or attribute name.)""" def getValue(self, i): """Return the value of an attribute in the list. (Parameter can be either integer index or attribute name.)""" def __len__(self): "Alias for getLength." def __getitem__(self, key): "Alias for getName (if key is an integer) and getValue (if string)." def keys(self): "Returns a list of the attribute names." def has_key(self, key): "True if the attribute is in the list, false otherwise." def get(self, key, alternative=None): """Return the value associated with attribute name; if it is not available, then return the alternative.""" def copy(self): "Return a copy of the AttributeList." def items(self): "Return a list of (attribute_name,value) pairs." def values(self): "Return a list of all attribute values." # ===== DOCUMENTHANDLER ===== class DocumentHandler: """Handle general document events. This is the main client interface for SAX: it contains callbacks for the most important document events, such as the start and end of elements. You need to create an object that implements this interface, and then register it with the Parser. If you do not want to implement the entire interface, you can derive a class from HandlerBase, which implements the default functionality. You can find the location of any document event using the Locator interface supplied by setDocumentLocator().""" def characters(self, ch, start, length): "Handle a character data event." def endDocument(self): "Handle an event for the end of a document." def endElement(self, name): "Handle an event for the end of an element." def ignorableWhitespace(self, ch, start, length): "Handle an event for ignorable whitespace in element content." def processingInstruction(self, target, data): "Handle a processing instruction event." def setDocumentLocator(self, locator): "Receive an object for locating the origin of SAX document events." def startDocument(self): "Handle an event for the beginning of a document." def startElement(self, name, atts): "Handle an event for the beginning of an element." # ===== HANDLERBASE ===== class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\ ErrorHandler): """Default base class for handlers. This class implements the default behaviour for four SAX interfaces: EntityResolver, DTDHandler, DocumentHandler, and ErrorHandler: rather than implementing those full interfaces, you may simply extend this class and override the methods that you need. Note that the use of this class is optional (you are free to implement the interfaces directly if you wish).""" # ===== PARSER ===== class Parser: """Basic interface for SAX (Simple API for XML) parsers. All SAX parsers must implement this basic interface: it allows users to register handlers for different types of events and to initiate a parse from a URI, a character stream, or a byte stream. SAX parsers should also implement a zero-argument constructor.""" def __init__(self): self.doc_handler = DocumentHandler() self.dtd_handler = DTDHandler() self.ent_handler = EntityResolver() self.err_handler = ErrorHandler() def parse(self, systemId): "Parse an XML document from a system identifier." def parseFile(self, fileobj): "Parse an XML document from a file-like object." def setDocumentHandler(self, handler): "Register an object to receive basic document-related events." self.doc_handler=handler def setDTDHandler(self, handler): "Register an object to receive basic DTD-related events." self.dtd_handler=handler def setEntityResolver(self, resolver): "Register an object to resolve external entities." self.ent_handler=resolver def setErrorHandler(self, handler): "Register an object to receive error-message events." self.err_handler=handler def setLocale(self, locale): """Allow an application to set the locale for errors and warnings. SAX parsers are not required to provide localisation for errors and warnings; if they cannot support the requested locale, however, they must throw a SAX exception. Applications may request a locale change in the middle of a parse.""" raise SAXNotSupportedException("Locale support not implemented") diff --git a/xcap/sax/saxutils.py b/xcap/sax/saxutils.py index d668c9b..46c1fd5 100644 --- a/xcap/sax/saxutils.py +++ b/xcap/sax/saxutils.py @@ -1,809 +1,809 @@ """ A library of useful helper classes to the saxlib classes, for the convenience of application and driver writers. $Id: saxutils.py,v 1.35 2004/03/20 07:46:04 fdrake Exp $ """ -import os, urlparse, urllib2, types -import handler -import xmlreader +import os, urllib.parse, urllib.request, urllib.error, urllib.parse, types +from . import handler +from . import xmlreader import sys, _exceptions, saxlib try: - _StringTypes = [types.StringType, types.UnicodeType] + _StringTypes = [bytes, str] except AttributeError: # 1.5 compatibility:UnicodeType not defined - _StringTypes = [types.StringType] + _StringTypes = [bytes] def __dict_replace(s, d): """Replace substrings of a string using a dictionary.""" - for key, value in d.items(): + for key, value in list(d.items()): s = s.replace(key, value) return s def escape(data, entities={}): """Escape &, <, and > in a string of data. You can escape other strings of data by passing a dictionary as the optional entities parameter. The keys and values must all be strings; each key will be replaced with its corresponding value. """ data = data.replace("&", "&") data = data.replace("<", "<") data = data.replace(">", ">") if entities: data = __dict_replace(data, entities) return data def unescape(data, entities={}): """Unescape &, <, and > in a string of data. You can unescape other strings of data by passing a dictionary as the optional entities parameter. The keys and values must all be strings; each key will be replaced with its corresponding value. """ data = data.replace("<", "<") data = data.replace(">", ">") if entities: data = __dict_replace(data, entities) # must do ampersand last return data.replace("&", "&") def quoteattr(data, entities={}): """Escape and quote an attribute value. Escape &, <, and > in a string of data, then quote it for use as an attribute value. The \" character will be escaped as well, if necessary. You can escape other strings of data by passing a dictionary as the optional entities parameter. The keys and values must all be strings; each key will be replaced with its corresponding value. """ data = escape(data, entities) if '"' in data: if "'" in data: data = '"%s"' % data.replace('"', """) else: data = "'%s'" % data else: data = '"%s"' % data return data # --- DefaultHandler class DefaultHandler(handler.EntityResolver, handler.DTDHandler, handler.ContentHandler, handler.ErrorHandler): """Default base class for SAX2 event handlers. Implements empty methods for all callback methods, which can be overridden by application implementors. Replaces the deprecated SAX1 HandlerBase class.""" # --- Location class Location: """Represents a location in an XML entity. Initialized by being passed a locator, from which it reads off the current location, which is then stored internally.""" def __init__(self, locator): self.__col = locator.getColumnNumber() self.__line = locator.getLineNumber() self.__pubid = locator.getPublicId() self.__sysid = locator.getSystemId() def getColumnNumber(self): return self.__col def getLineNumber(self): return self.__line def getPublicId(self): return self.__pubid def getSystemId(self): return self.__sysid def __str__(self): if self.__line is None: line = "?" else: line = self.__line if self.__col is None: col = "?" else: col = self.__col return "%s:%s:%s" % ( self.__sysid or self.__pubid or "", line, col) # --- ErrorPrinter class ErrorPrinter: "A simple class that just prints error messages to standard out." def __init__(self, level=0, outfile=sys.stderr): self._level = level self._outfile = outfile def warning(self, exception): if self._level <= 0: self._outfile.write("WARNING in %s: %s\n" % (self.__getpos(exception), exception.getMessage())) def error(self, exception): if self._level <= 1: self._outfile.write("ERROR in %s: %s\n" % (self.__getpos(exception), exception.getMessage())) def fatalError(self, exception): if self._level <= 2: self._outfile.write("FATAL ERROR in %s: %s\n" % (self.__getpos(exception), exception.getMessage())) def __getpos(self, exception): if isinstance(exception, _exceptions.SAXParseException): return "%s:%s:%s" % (exception.getSystemId(), exception.getLineNumber(), exception.getColumnNumber()) else: return "" # --- ErrorRaiser class ErrorRaiser: "A simple class that just raises the exceptions it is passed." def __init__(self, level = 0): self._level = level def error(self, exception): if self._level <= 1: raise exception def fatalError(self, exception): if self._level <= 2: raise exception def warning(self, exception): if self._level <= 0: raise exception # --- AttributesImpl now lives in xmlreader -from xmlreader import AttributesImpl +from .xmlreader import AttributesImpl # --- XMLGenerator is the SAX2 ContentHandler for writing back XML import codecs def _outputwrapper(stream,encoding): writerclass = codecs.lookup(encoding)[3] return writerclass(stream) if hasattr(codecs, "register_error"): def writetext(stream, text, entities={}): stream.errors = "xmlcharrefreplace" stream.write(escape(text, entities)) stream.errors = "strict" else: def writetext(stream, text, entities={}): text = escape(text, entities) try: stream.write(text) except UnicodeError: for c in text: try: stream.write(c) except UnicodeError: - stream.write(u"&#%d;" % ord(c)) + stream.write("&#%d;" % ord(c)) def writeattr(stream, text): countdouble = text.count('"') if countdouble: countsingle = text.count("'") if countdouble <= countsingle: entities = {'"': """} quote = '"' else: entities = {"'": "'"} quote = "'" else: entities = {} quote = '"' stream.write(quote) writetext(stream, text, entities) stream.write(quote) class XMLGenerator(handler.ContentHandler): GENERATED_PREFIX = "xml.sax.saxutils.prefix%s" def __init__(self, out=None, encoding="iso-8859-1"): if out is None: import sys out = sys.stdout handler.ContentHandler.__init__(self) self._out = _outputwrapper(out,encoding) self._ns_contexts = [{}] # contains uri -> prefix dicts self._current_context = self._ns_contexts[-1] self._undeclared_ns_maps = [] self._encoding = encoding self._generated_prefix_ctr = 0 return # ContentHandler methods def startDocument(self): self._out.write('\n' % self._encoding) def startPrefixMapping(self, prefix, uri): self._ns_contexts.append(self._current_context.copy()) self._current_context[uri] = prefix self._undeclared_ns_maps.append((prefix, uri)) def endPrefixMapping(self, prefix): self._current_context = self._ns_contexts[-1] del self._ns_contexts[-1] def startElement(self, name, attrs): self._out.write('<' + name) - for (name, value) in attrs.items(): + for (name, value) in list(attrs.items()): self._out.write(' %s=' % name) writeattr(self._out, value) self._out.write('>') def endElement(self, name): self._out.write('' % name) def startElementNS(self, name, qname, attrs): if name[0] is None: name = name[1] elif self._current_context[name[0]] is None: # default namespace name = name[1] else: name = self._current_context[name[0]] + ":" + name[1] self._out.write('<' + name) for k,v in self._undeclared_ns_maps: if k is None: self._out.write(' xmlns="%s"' % (v or '')) else: self._out.write(' xmlns:%s="%s"' % (k,v)) self._undeclared_ns_maps = [] - for (name, value) in attrs.items(): + for (name, value) in list(attrs.items()): if name[0] is None: name = name[1] elif self._current_context[name[0]] is None: # default namespace #If an attribute has a nsuri but not a prefix, we must #create a prefix and add a nsdecl prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr self._generated_prefix_ctr = self._generated_prefix_ctr + 1 name = prefix + ':' + name[1] self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0]))) self._current_context[name[0]] = prefix else: name = self._current_context[name[0]] + ":" + name[1] self._out.write(' %s=' % name) writeattr(self._out, value) self._out.write('>') def endElementNS(self, name, qname): # XXX: if qname is not None, we better use it. # Python 2.0b2 requires us to use the recorded prefix for # name[0], though if name[0] is None: qname = name[1] elif self._current_context[name[0]] is None: qname = name[1] else: qname = self._current_context[name[0]] + ":" + name[1] self._out.write('' % qname) def characters(self, content): writetext(self._out, content) def ignorableWhitespace(self, content): self._out.write(content) def processingInstruction(self, target, data): self._out.write('' % (target, data)) class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler): """A XMLGenerator that also supports the LexicalHandler interface""" def __init__(self, out=None, encoding="iso-8859-1"): XMLGenerator.__init__(self, out, encoding) self._in_cdata = 0 def characters(self, content): if self._in_cdata: self._out.write(content.replace(']]>', ']]>]]>') def comment(self, content): self._out.write('') def startCDATA(self): self._in_cdata = 1 self._out.write('') # --- ContentGenerator is the SAX1 DocumentHandler for writing back XML class ContentGenerator(XMLGenerator): def characters(self, str, start, end): # In SAX1, characters receives start and end; in SAX2, it receives # a string. For plain strings, we may want to use a buffer object. return XMLGenerator.characters(self, str[start:start+end]) # --- XMLFilterImpl class XMLFilterBase(saxlib.XMLFilter): """This class is designed to sit between an XMLReader and the client application's event handlers. By default, it does nothing but pass requests up to the reader and events on to the handlers unmodified, but subclasses can override specific methods to modify the event stream or the configuration requests as they pass through.""" # ErrorHandler methods def error(self, exception): self._err_handler.error(exception) def fatalError(self, exception): self._err_handler.fatalError(exception) def warning(self, exception): self._err_handler.warning(exception) # ContentHandler methods def setDocumentLocator(self, locator): self._cont_handler.setDocumentLocator(locator) def startDocument(self): self._cont_handler.startDocument() def endDocument(self): self._cont_handler.endDocument() def startPrefixMapping(self, prefix, uri): self._cont_handler.startPrefixMapping(prefix, uri) def endPrefixMapping(self, prefix): self._cont_handler.endPrefixMapping(prefix) def startElement(self, name, attrs): self._cont_handler.startElement(name, attrs) def endElement(self, name): self._cont_handler.endElement(name) def startElementNS(self, name, qname, attrs): self._cont_handler.startElementNS(name, qname, attrs) def endElementNS(self, name, qname): self._cont_handler.endElementNS(name, qname) def characters(self, content): self._cont_handler.characters(content) def ignorableWhitespace(self, chars): self._cont_handler.ignorableWhitespace(chars) def processingInstruction(self, target, data): self._cont_handler.processingInstruction(target, data) def skippedEntity(self, name): self._cont_handler.skippedEntity(name) # DTDHandler methods def notationDecl(self, name, publicId, systemId): self._dtd_handler.notationDecl(name, publicId, systemId) def unparsedEntityDecl(self, name, publicId, systemId, ndata): self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) # EntityResolver methods def resolveEntity(self, publicId, systemId): self._ent_handler.resolveEntity(publicId, systemId) # XMLReader methods def parse(self, source): self._parent.setContentHandler(self) self._parent.setErrorHandler(self) self._parent.setEntityResolver(self) self._parent.setDTDHandler(self) self._parent.parse(source) def setLocale(self, locale): self._parent.setLocale(locale) def getFeature(self, name): return self._parent.getFeature(name) def setFeature(self, name, state): self._parent.setFeature(name, state) def getProperty(self, name): return self._parent.getProperty(name) def setProperty(self, name, value): self._parent.setProperty(name, value) # FIXME: remove this backward compatibility hack when not needed anymore XMLFilterImpl = XMLFilterBase # --- BaseIncrementalParser class BaseIncrementalParser(xmlreader.IncrementalParser): """This class implements the parse method of the XMLReader interface using the feed, close and reset methods of the IncrementalParser interface as a convenience to SAX 2.0 driver writers.""" def parse(self, source): source = prepare_input_source(source) self.prepareParser(source) self._cont_handler.startDocument() # FIXME: what about char-stream? inf = source.getByteStream() buffer = inf.read(16384) while buffer != "": self.feed(buffer) buffer = inf.read(16384) self.close() self.reset() self._cont_handler.endDocument() def prepareParser(self, source): """This method is called by the parse implementation to allow the SAX 2.0 driver to prepare itself for parsing.""" raise NotImplementedError("prepareParser must be overridden!") # --- Utility functions def prepare_input_source(source, base = ""): """This function takes an InputSource and an optional base URL and returns a fully resolved InputSource object ready for reading.""" if type(source) in _StringTypes: source = xmlreader.InputSource(source) elif hasattr(source, "read"): f = source source = xmlreader.InputSource() source.setByteStream(f) if hasattr(f, "name"): source.setSystemId(f.name) if source.getByteStream() is None: sysid = source.getSystemId() if os.path.isfile(sysid): basehead = os.path.split(os.path.normpath(base))[0] source.setSystemId(os.path.join(basehead, sysid)) f = open(sysid, "rb") else: - source.setSystemId(urlparse.urljoin(base, sysid)) - f = urllib2.urlopen(source.getSystemId()) + source.setSystemId(urllib.parse.urljoin(base, sysid)) + f = urllib.request.urlopen(source.getSystemId()) source.setByteStream(f) return source # =========================================================================== # # DEPRECATED SAX 1.0 CLASSES # # =========================================================================== # --- AttributeMap class AttributeMap: """An implementation of AttributeList that takes an (attr,val) hash and uses it to implement the AttributeList interface.""" def __init__(self, map): self.map=map def getLength(self): - return len(self.map.keys()) + return len(list(self.map.keys())) def getName(self, i): try: - return self.map.keys()[i] - except IndexError,e: + return list(self.map.keys())[i] + except IndexError as e: return None def getType(self, i): return "CDATA" def getValue(self, i): try: - if type(i)==types.IntType: + if type(i)==int: return self.map[self.getName(i)] else: return self.map[i] - except KeyError,e: + except KeyError as e: return None def __len__(self): return len(self.map) def __getitem__(self, key): - if type(key)==types.IntType: - return self.map.keys()[key] + if type(key)==int: + return list(self.map.keys())[key] else: return self.map[key] def items(self): - return self.map.items() + return list(self.map.items()) def keys(self): - return self.map.keys() + return list(self.map.keys()) def has_key(self,key): - return self.map.has_key(key) + return key in self.map def get(self, key, alternative=None): return self.map.get(key, alternative) def copy(self): return AttributeMap(self.map.copy()) def values(self): - return self.map.values() + return list(self.map.values()) # --- Event broadcasting object class EventBroadcaster: """Takes a list of objects and forwards any method calls received to all objects in the list. The attribute list holds the list and can freely be modified by clients.""" class Event: "Helper objects that represent event methods." def __init__(self,list,name): self.list=list self.name=name def __call__(self,*rest): for obj in self.list: - apply(getattr(obj,self.name), rest) + getattr(obj,self.name)(*rest) def __init__(self,list): self.list=list def __getattr__(self,name): return self.Event(self.list,name) def __repr__(self): return "" % id(self) # --- ESIS document handler -import saxlib +from . import saxlib class ESISDocHandler(saxlib.HandlerBase): "A SAX document handler that produces naive ESIS output." def __init__(self,writer=sys.stdout): self.writer=writer def processingInstruction (self,target, remainder): """Receive an event signalling that a processing instruction has been found.""" self.writer.write("?"+target+" "+remainder+"\n") def startElement(self,name,amap): "Receive an event signalling the start of an element." self.writer.write("("+name+"\n") - for a_name in amap.keys(): + for a_name in list(amap.keys()): self.writer.write("A"+a_name+" "+amap[a_name]+"\n") def endElement(self,name): "Receive an event signalling the end of an element." self.writer.write(")"+name+"\n") def characters(self,data,start_ix,length): "Receive an event signalling that character data has been found." self.writer.write("-"+data[start_ix:start_ix+length]+"\n") # --- XML canonizer class Canonizer(saxlib.HandlerBase): "A SAX document handler that produces canonized XML output." def __init__(self,writer=sys.stdout): self.elem_level=0 self.writer=writer def processingInstruction (self,target, remainder): if not target=="xml": self.writer.write("") def startElement(self,name,amap): self.writer.write("<"+name) - a_names=amap.keys() + a_names=list(amap.keys()) a_names.sort() for a_name in a_names: self.writer.write(" "+a_name+"=\"") self.write_data(amap[a_name]) self.writer.write("\"") self.writer.write(">") self.elem_level=self.elem_level+1 def endElement(self,name): self.writer.write("") self.elem_level=self.elem_level-1 def ignorableWhitespace(self,data,start_ix,length): self.characters(data,start_ix,length) def characters(self,data,start_ix,length): if self.elem_level>0: self.write_data(data[start_ix:start_ix+length]) def write_data(self,data): "Writes datachars to writer." data=data.replace("&","&") data=data.replace("<","<") data=data.replace("\"",""") data=data.replace(">",">") data=data.replace(chr(9)," ") data=data.replace(chr(10)," ") data=data.replace(chr(13)," ") self.writer.write(data) # --- mllib class mllib: """A re-implementation of the htmllib, sgmllib and xmllib interfaces as a SAX DocumentHandler.""" # Unsupported: # - setnomoretags # - setliteral # - translate_references # - handle_xml # - handle_doctype # - handle_charref # - handle_entityref # - handle_comment # - handle_cdata # - tag_attributes def __init__(self): self.reset() def reset(self): - import saxexts # only used here + from . import saxexts # only used here self.parser=saxexts.XMLParserFactory.make_parser() self.handler=mllib.Handler(self.parser,self) self.handler.reset() def feed(self,data): self.parser.feed(data) def close(self): self.parser.close() def get_stack(self): return self.handler.get_stack() # --- Handler methods (to be overridden) def handle_starttag(self,name,method,atts): method(atts) def handle_endtag(self,name,method): method() def handle_data(self,data): pass def handle_proc(self,target,data): pass def unknown_starttag(self,name,atts): pass def unknown_endtag(self,name): pass def syntax_error(self,message): pass # --- The internal handler class class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler): """An internal class to handle SAX events and translate them to mllib events.""" def __init__(self,driver,handler): self.driver=driver self.driver.setDocumentHandler(self) self.driver.setErrorHandler(self) self.handler=handler self.reset() def get_stack(self): return self.stack def reset(self): self.stack=[] # --- DocumentHandler methods def characters(self, ch, start, length): self.handler.handle_data(ch[start:start+length]) def endElement(self, name): if hasattr(self.handler,"end_"+name): self.handler.handle_endtag(name, getattr(self.handler,"end_"+name)) else: self.handler.unknown_endtag(name) del self.stack[-1] def ignorableWhitespace(self, ch, start, length): self.handler.handle_data(ch[start:start+length]) def processingInstruction(self, target, data): self.handler.handle_proc(target,data) def startElement(self, name, atts): self.stack.append(name) if hasattr(self.handler,"start_"+name): self.handler.handle_starttag(name, getattr(self.handler, "start_"+name), atts) else: self.handler.unknown_starttag(name,atts) # --- ErrorHandler methods def error(self, exception): self.handler.syntax_error(str(exception)) def fatalError(self, exception): raise RuntimeError(str(exception)) diff --git a/xcap/sax/writer.py b/xcap/sax/writer.py index 9121fd3..ada2544 100644 --- a/xcap/sax/writer.py +++ b/xcap/sax/writer.py @@ -1,549 +1,548 @@ """SAX document handlers that support output generation of XML, SGML, and XHTML. This module provides three different groups of objects: the actual SAX document handlers that drive the output, DTD information containers, and syntax descriptors (of limited public use in most cases). Output Drivers -------------- The output drivers conform to the SAX C protocol. They can be used anywhere a C is used. Two drivers are provided: a `basic' driver which creates a fairly minimal output without much intelligence, and a `pretty-printing' driver that performs pretty-printing with nice indentation and the like. Both can optionally make use of DTD information and syntax objects. DTD Information Containers -------------------------- Each DTD information object provides an attribute C which describes the expected output syntax; an alternate can be provided to the output drivers if desired. Syntax Descriptors ------------------ Syntax descriptor objects provide several attributes which describe the various lexical components of XML & SGML markup. The attributes have names that reflect the shorthand notation from the SGML world, but the values are strings which give the appropriate characters for the markup language being described. The one addition is the C attribute which should be used to end the start tag of elements which have no content. This is needed to properly support XML and XHTML. """ __version__ = '$Revision: 1.9 $' import string import xml.parsers.xmlproc.dtdparser import xml.parsers.xmlproc.xmlapp from xml.sax.saxutils import escape DEFAULT_LINELENGTH = 74 class Syntax: com = "--" # comment start or end cro = "&#" # character reference open refc = ";" # reference close dso = "[" # declaration subset open dsc = "]" # declaration subset close ero = "&" # entity reference open lit = '"' # literal start or end lit_quoted = '"' # quoted literal lita = "'" # literal start or end (alternative) mdo = "" # markup declaration close msc = "]]" # marked section close pio = "" # tag close vi = "=" # value indicator def __init__(self): if self.__class__ is Syntax: - raise RuntimeError, "Syntax must be subclassed to be used!" + raise RuntimeError("Syntax must be subclassed to be used!") class SGMLSyntax(Syntax): empty_stagc = ">" pic = ">" # processing instruction close net = "/" # null end tag class XMLSyntax(Syntax): empty_stagc = "/>" pic = "?>" # processing instruction close net = None # null end tag not supported class XHTMLSyntax(XMLSyntax): empty_stagc = " />" class DoctypeInfo: syntax = XMLSyntax() fpi = None sysid = None def __init__(self): self.__empties = {} self.__elements_only = {} self.__attribs = {} def is_empty(self, gi): - return self.__empties.has_key(gi) + return gi in self.__empties def get_empties_list(self): - return self.__empties.keys() + return list(self.__empties.keys()) def has_element_content(self, gi): - return self.__elements_only.has_key(gi) + return gi in self.__elements_only def get_element_containers_list(self): - return self.__elements_only.keys() + return list(self.__elements_only.keys()) def get_attributes_list(self, gi): - return self.__attribs.get(gi, {}).keys() + return list(self.__attribs.get(gi, {}).keys()) def get_attribute_info(self, gi, attr): return self.__attribs[gi][attr] def add_empty(self, gi): self.__empties[gi] = 1 def add_element_container(self, gi): self.__elements_only[gi] = gi def add_attribute_defn(self, gi, attr, type, decl, default): try: d = self.__attribs[gi] except KeyError: d = self.__attribs[gi] = {} - if not d.has_key(attr): + if attr not in d: d[attr] = (type, decl, default) else: - print "<%s> attribute %s already defined" % (gi, attr) + print("<%s> attribute %s already defined" % (gi, attr)) def load_pubtext(self, pubtext): - raise NotImplementedError, "sublasses must implement load_pubtext()" + raise NotImplementedError("sublasses must implement load_pubtext()") class _XMLDTDLoader(xml.parsers.xmlproc.xmlapp.DTDConsumer): def __init__(self, info, parser): self.info = info xml.parsers.xmlproc.xmlapp.DTDConsumer.__init__(self, parser) self.new_attribute = info.add_attribute_defn def new_element_type(self, gi, model): if model[0] == "|" and model[1][0] == ("#PCDATA", ""): # no action required pass elif model == ("", [], ""): self.info.add_empty(gi) else: self.info.add_element_container(gi) class XMLDoctypeInfo(DoctypeInfo): def load_pubtext(self, sysid): parser = xml.parsers.xmlproc.dtdparser.DTDParser() loader = _XMLDTDLoader(self, parser) parser.set_dtd_consumer(loader) parser.parse_resource(sysid) class XHTMLDoctypeInfo(XMLDoctypeInfo): # Bogus W3C cruft requires the extra space when terminating empty elements. syntax = XHTMLSyntax() class SGMLDoctypeInfo(DoctypeInfo): syntax = SGMLSyntax() import re __element_prefix_search = re.compile("": lit = self.__syntax.lit s = '%sxml version=%s1.0%s encoding%s%s%s%s' % ( self.__syntax.pio, lit, lit, self.__syntax.vi, lit, self._encoding, lit) if self.__standalone: s = '%s standalone%s%s%s%s' % ( s, self.__syntax.vi, lit, self.__standalone, lit) self._write("%s%s\n" % (s, self.__syntax.pic)) def endDocument(self): if self.__stack: - raise RuntimeError, "open element stack cannot be empty on close" + raise RuntimeError("open element stack cannot be empty on close") def startElement(self, tag, attrs={}): if self.__pending_doctype: self.handle_doctype(tag) self._check_pending_content() self.__pushtag(tag) self.__check_flowing(tag, attrs) - if attrs.has_key("xml:lang"): + if "xml:lang" in attrs: self.__lang = attrs["xml:lang"] del attrs["xml:lang"] if self._packing: prefix = "" elif self._flowing: prefix = self._prefix[:-self.indentation] else: prefix = "" stag = "%s%s%s" % (prefix, self.__syntax.stago, tag) prefix = "%s %s" % (prefix, (len(tag) * " ")) lit = self.__syntax.lit lita = self.__syntax.lita vi = self.__syntax.vi a = '' if self._flowing != self.__stack[-1][0]: if self._dtdflowing is not None \ and self._flowing == self._dtdflowing: pass else: a = ' xml:space%s%s%s%s' \ % (vi, lit, ["default", "preserve"][self._flowing], lit) if self.__lang != self.__stack[-1][1]: a = '%s xml:lang%s%s%s%s' % (a, vi, lit, self.lang, lit) line = stag + a self._offset = self._offset + len(line) a = '' - for k, v in attrs.items(): + for k, v in list(attrs.items()): if v is None: continue v = str(v) if string.find(v, lit) == -1: a = ' %s%s%s%s%s' % (k, vi, lit, escape(str(v)), lit) elif string.find(v, lita) == -1: a = ' %s%s%s%s%s' % (k, vi, lita, escape(str(v)), lita) else: a = ' %s%s%s%s%s' % (k, vi, lit, escape(str(v), {lit:self.__syntax.lit_quoted}), lita) if (self._offset + len(a)) > self.lineLength: self._write(line + "\n") line = prefix + a self._offset = len(line) else: line = line + a self._offset = self._offset + len(a) self._write(line) self.__pending_content = 1 if ( self.__dtdinfo and not (self.__dtdinfo.has_element_content(tag) or self.__dtdinfo.is_empty(tag))): self._packing = 1 def endElement(self, tag): if self.__pending_content: if self._flowing: self._write(self.__syntax.empty_stagc) if self._packing: self._offset = self._offset \ + len(self.__syntax.empty_stagc) else: self._write("\n") self._offset = 0 else: self._write(self.__syntax.empty_stagc) self._offset = self._offset + len(self.__syntax.empty_stagc) self.__pending_content = 0 self.__poptag(tag) return depth = len(self.__stack) if depth == 1 or self._packing or not self._flowing: prefix = '' else: prefix = self._prefix[:-self.indentation] \ + (" " * self.indentEndTags) self.__poptag(tag) self._write("%s%s%s%s" % ( prefix, self.__syntax.etago, tag, self.__syntax.tagc)) if self._packing: self._offset = self._offset + len(tag) + 3 else: self._write("\n") self._offset = 0 def characters(self, data, start, length): data = data[start: start+length] if data: self._check_pending_content() data = escape(data) if "\n" in data: p = string.find(data, "\n") self._offset = len(data) - (p + 1) else: self._offset = self._offset + len(data) self._check_pending_content() self._write(data) def comment(self, data, start, length): data = data[start: start+length] self._check_pending_content() s = "%s%s%s%s%s" % (self.__syntax.mdo, self.__syntax.com, data, self.__syntax.com, self.__syntax.mdc) p = string.rfind(s, "\n") if self._packing: if p >= 0: self._offset = len(s) - (p + 1) else: self._offset = self._offset + len(s) else: self._write("%s%s\n" % (self._prefix, s)) self._offset = 0 def ignorableWhitespace(self, data, start, length): pass def processingInstruction(self, target, data): self._check_pending_content() s = "%s%s %s%s" % (self.__syntax.pio, target, data, self.__syntax.pic) prefix = self._prefix[:-self.indentation] \ + (" " * self.indentEndTags) if "\n" in s: p = string.rfind(s, "\n") if self._flowing and not self._packing: self._write(prefix + s + "\n") self._offset = 0 else: self._write(s) self._offset = len(s) - (p + 1) elif self._flowing and not self._packing: self._write(prefix + s + "\n") self._offset = 0 else: self._write(s) self._offset = self._offset + len(s) # This doesn't actually have a SAX equivalent, so we'll use it as # an internal helper. def handle_doctype(self, root): self.__pending_doctype = 0 if self.__dtdinfo: fpi = self.__dtdinfo.fpi sysid = self.__dtdinfo.sysid else: fpi = sysid = None lit = self.__syntax.lit isxml = self.__syntax.pic == "?>" if isxml and sysid: s = '%sDOCTYPE %s\n' % (self.__syntax.mdo, root) if fpi: s = s + ' PUBLIC %s%s%s\n' % (lit, fpi, lit) s = s + ' %s%s%s>\n' % (lit, sysid, lit) else: s = s + ' SYSTEM %s%s%s>\n' % (lit, sysid, lit) self._write(s) self._offset = 0 elif not isxml: s = "%sDOCTYPE %s" % (self.__syntax.mdo, root) if fpi: s = '%s\n PUBLIC %s%s%s' % (s, lit, fpi, lit) if sysid: s = '%s\n SYSTEM %s%s%s' % (s, lit, sysid, lit) self._write("%s%s\n" % (s, self.__syntax.mdc)) self._offset = 0 def handle_cdata(self, data): self._check_pending_content() # There should be a better way to generate '[CDATA[' start = self.__syntax.mdo + "[CDATA[" end = self.__syntax.msc + self.__syntax.mdc s = "%s%s%s" % (start, escape(data), end) if self._packing: if "\n" in s: rpos = string.rfind(s, "\n") self._offset = len(s) - (rpos + 1) + len(end) else: self._offset = self._offset + len(s) + len(start + end) self._write(s) else: self._offset = 0 self._write(s + "\n") # Internal helper methods. def __poptag(self, tag): state = self.__stack.pop() self._flowing, self.__lang, expected_tag, \ self._packing, self._dtdflowing = state if tag != expected_tag: - raise RuntimeError, \ - "expected , got " % (expected_tag, tag) + raise RuntimeError("expected , got " % (expected_tag, tag)) self._prefix = self._prefix[:-self.indentation] def __pushtag(self, tag): self.__stack.append((self._flowing, self.__lang, tag, self._packing, self._dtdflowing)) self._prefix = self._prefix + " " * self.indentation def __check_flowing(self, tag, attrs): """Check the contents of attrs and the DTD information to determine whether the following content should be flowed. tag -- general identifier of the element being opened attrs -- attributes dictionary as reported by the parser or application This sets up both the _flowing and _dtdflowing (object) attributes. """ docspec = dtdspec = None if self.__dtdinfo: try: info = self.__dtdinfo.get_attribute_info(tag, "xml:space") except KeyError: info = None if info is not None: self._flowing = info[2] != "preserve" self._dtdflowing = self._flowing - if attrs.has_key("xml:space"): + if "xml:space" in attrs: self._flowing = attrs["xml:space"] != "preserve" del attrs["xml:space"] def _check_pending_content(self): if self.__pending_content: s = self.__syntax.tagc if self._flowing and not self._packing: s = s + "\n" self._offset = 0 else: self._offset = self._offset + len(s) self._write(s) self.__pending_content = 0 class PrettyPrinter(XmlWriter): """Pretty-printing XML output handler.""" def __init__(self, fp, standalone=None, dtdinfo=None, syntax=None, linelength=None, indentation=2, endtagindentation=None): XmlWriter.__init__(self, fp, standalone=standalone, dtdinfo=dtdinfo, syntax=syntax, linelength=linelength) self.indentation = indentation if endtagindentation is not None: self.indentEndTags = endtagindentation else: self.indentEndTags = indentation def characters(self, data, start, length): data = data[start: start + length] if not data: return self._check_pending_content() data = escape(data) if not self._flowing: self._write(data) return words = string.split(data) begspace = data[0] in string.whitespace endspace = words and (data[-1] in string.whitespace) prefix = self._prefix if len(prefix) > 40: prefix = " " offset = self._offset L = [] append = L.append if begspace: append(" ") offset = offset + 1 ws = "" ws_len = 0 while words: w = words[0] del words[0] if (offset + ws_len + len(w)) > self.lineLength: append("\n") append(prefix) append(w) offset = len(prefix) + len(w) else: append(ws) ws, ws_len = " ", 1 append(w) offset = offset + 1 + len(w) if endspace: append(" ") offset = offset + 1 self._offset = offset self._write(string.join(L, "")) diff --git a/xcap/sax/xmlreader.py b/xcap/sax/xmlreader.py index f1c9d9d..01e9e0c 100644 --- a/xcap/sax/xmlreader.py +++ b/xcap/sax/xmlreader.py @@ -1,378 +1,378 @@ """An XML Reader is the SAX 2 name for an XML parser. XML Parsers should be based on this code. """ -import handler +from . import handler -from _exceptions import SAXNotSupportedException, SAXNotRecognizedException +from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException # ===== XMLREADER ===== class XMLReader: """Interface for reading an XML document using callbacks. XMLReader is the interface that an XML parser's SAX2 driver must implement. This interface allows an application to set and query features and properties in the parser, to register event handlers for document processing, and to initiate a document parse. All SAX interfaces are assumed to be synchronous: the parse methods must not return until parsing is complete, and readers must wait for an event-handler callback to return before reporting the next event.""" def __init__(self): self._cont_handler = handler.ContentHandler() self._dtd_handler = handler.DTDHandler() self._ent_handler = handler.EntityResolver() self._err_handler = handler.ErrorHandler() def parse(self, source): "Parse an XML document from a system identifier or an InputSource." raise NotImplementedError("This method must be implemented!") def getContentHandler(self): "Returns the current ContentHandler." return self._cont_handler def setContentHandler(self, handler): "Registers a new object to receive document content events." self._cont_handler = handler def getDTDHandler(self): "Returns the current DTD handler." return self._dtd_handler def setDTDHandler(self, handler): "Register an object to receive basic DTD-related events." self._dtd_handler = handler def getEntityResolver(self): "Returns the current EntityResolver." return self._ent_handler def setEntityResolver(self, resolver): "Register an object to resolve external entities." self._ent_handler = resolver def getErrorHandler(self): "Returns the current ErrorHandler." return self._err_handler def setErrorHandler(self, handler): "Register an object to receive error-message events." self._err_handler = handler def setLocale(self, locale): """Allow an application to set the locale for errors and warnings. SAX parsers are not required to provide localization for errors and warnings; if they cannot support the requested locale, however, they must throw a SAX exception. Applications may request a locale change in the middle of a parse.""" raise SAXNotSupportedException("Locale support not implemented") def getFeature(self, name): "Looks up and returns the state of a SAX2 feature." raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): "Sets the state of a SAX2 feature." raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def getProperty(self, name): "Looks up and returns the value of a SAX2 property." raise SAXNotRecognizedException("Property '%s' not recognized" % name) def setProperty(self, name, value): "Sets the value of a SAX2 property." raise SAXNotRecognizedException("Property '%s' not recognized" % name) class IncrementalParser(XMLReader): """This interface adds three extra methods to the XMLReader interface that allow XML parsers to support incremental parsing. Support for this interface is optional, since not all underlying XML parsers support this functionality. When the parser is instantiated it is ready to begin accepting data from the feed method immediately. After parsing has been finished with a call to close the reset method must be called to make the parser ready to accept new data, either from feed or using the parse method. Note that these methods must _not_ be called during parsing, that is, after parse has been called and before it returns. By default, the class also implements the parse method of the XMLReader interface using the feed, close and reset methods of the IncrementalParser interface as a convenience to SAX 2.0 driver writers.""" def __init__(self, bufsize=2**16): self._bufsize = bufsize XMLReader.__init__(self) def parse(self, source): - import saxutils + from . import saxutils source = saxutils.prepare_input_source(source) self.prepareParser(source) file = source.getByteStream() buffer = file.read(self._bufsize) while buffer != "": self.feed(buffer) buffer = file.read(self._bufsize) self.close() def feed(self, data): """This method gives the raw XML data in the data parameter to the parser and makes it parse the data, emitting the corresponding events. It is allowed for XML constructs to be split across several calls to feed. feed may raise SAXException.""" raise NotImplementedError("This method must be implemented!") def prepareParser(self, source): """This method is called by the parse implementation to allow the SAX 2.0 driver to prepare itself for parsing.""" raise NotImplementedError("prepareParser must be overridden!") def close(self): """This method is called when the entire XML document has been passed to the parser through the feed method, to notify the parser that there are no more data. This allows the parser to do the final checks on the document and empty the internal data buffer. The parser will not be ready to parse another document until the reset method has been called. close may raise SAXException.""" raise NotImplementedError("This method must be implemented!") def reset(self): """This method is called after close has been called to reset the parser so that it is ready to parse new documents. The results of calling parse or feed after close without calling reset are undefined.""" raise NotImplementedError("This method must be implemented!") # ===== LOCATOR ===== class Locator: """Interface for associating a SAX event with a document location. A locator object will return valid results only during calls to DocumentHandler methods; at any other time, the results are unpredictable.""" def getColumnNumber(self): "Return the column number where the current event ends." return -1 def getLineNumber(self): "Return the line number where the current event ends." return -1 def getPublicId(self): "Return the public identifier for the current event." return None def getSystemId(self): "Return the system identifier for the current event." return None # ===== INPUTSOURCE ===== class InputSource: """Encapsulation of the information needed by the XMLReader to read entities. This class may include information about the public identifier, system identifier, byte stream (possibly with character encoding information) and/or the character stream of an entity. Applications will create objects of this class for use in the XMLReader.parse method and for returning from EntityResolver.resolveEntity. An InputSource belongs to the application, the XMLReader is not allowed to modify InputSource objects passed to it from the application, although it may make copies and modify those.""" def __init__(self, system_id = None): self.__system_id = system_id self.__public_id = None self.__encoding = None self.__bytefile = None self.__charfile = None def setPublicId(self, public_id): "Sets the public identifier of this InputSource." self.__public_id = public_id def getPublicId(self): "Returns the public identifier of this InputSource." return self.__public_id def setSystemId(self, system_id): "Sets the system identifier of this InputSource." self.__system_id = system_id def getSystemId(self): "Returns the system identifier of this InputSource." return self.__system_id def setEncoding(self, encoding): """Sets the character encoding of this InputSource. The encoding must be a string acceptable for an XML encoding declaration (see section 4.3.3 of the XML recommendation). The encoding attribute of the InputSource is ignored if the InputSource also contains a character stream.""" self.__encoding = encoding def getEncoding(self): "Get the character encoding of this InputSource." return self.__encoding def setByteStream(self, bytefile): """Set the byte stream (a Python file-like object which does not perform byte-to-character conversion) for this input source. The SAX parser will ignore this if there is also a character stream specified, but it will use a byte stream in preference to opening a URI connection itself. If the application knows the character encoding of the byte stream, it should set it with the setEncoding method.""" self.__bytefile = bytefile def getByteStream(self): """Get the byte stream for this input source. The getEncoding method will return the character encoding for this byte stream, or None if unknown.""" return self.__bytefile def setCharacterStream(self, charfile): """Set the character stream for this input source. (The stream must be a Python 2.0 Unicode-wrapped file-like that performs conversion to Unicode strings.) If there is a character stream specified, the SAX parser will ignore any byte stream and will not attempt to open a URI connection to the system identifier.""" self.__charfile = charfile def getCharacterStream(self): "Get the character stream for this input source." return self.__charfile # ===== ATTRIBUTESIMPL ===== class AttributesImpl: def __init__(self, attrs): """Non-NS-aware implementation. attrs should be of the form {name : value}.""" self._attrs = attrs def getLength(self): return len(self._attrs) def getType(self, name): return "CDATA" def getValue(self, name): return self._attrs[name] def getValueByQName(self, name): return self._attrs[name] def getNameByQName(self, name): - if not self._attrs.has_key(name): - raise KeyError, name + if name not in self._attrs: + raise KeyError(name) return name def getQNameByName(self, name): - if not self._attrs.has_key(name): - raise KeyError, name + if name not in self._attrs: + raise KeyError(name) return name def getNames(self): - return self._attrs.keys() + return list(self._attrs.keys()) def getQNames(self): - return self._attrs.keys() + return list(self._attrs.keys()) def __len__(self): return len(self._attrs) def __getitem__(self, name): return self._attrs[name] def keys(self): - return self._attrs.keys() + return list(self._attrs.keys()) def has_key(self, name): - return self._attrs.has_key(name) + return name in self._attrs def get(self, name, alternative=None): return self._attrs.get(name, alternative) def copy(self): return self.__class__(self._attrs) def items(self): - return self._attrs.items() + return list(self._attrs.items()) def values(self): - return self._attrs.values() + return list(self._attrs.values()) # ===== ATTRIBUTESNSIMPL ===== class AttributesNSImpl(AttributesImpl): def __init__(self, attrs, qnames): """NS-aware implementation. attrs should be of the form {(ns_uri, lname): value, ...}. qnames of the form {(ns_uri, lname): qname, ...}.""" self._attrs = attrs self._qnames = qnames def getValueByQName(self, name): - for (nsname, qname) in self._qnames.items(): + for (nsname, qname) in list(self._qnames.items()): if qname == name: return self._attrs[nsname] - raise KeyError, name + raise KeyError(name) def getNameByQName(self, name): - for (nsname, qname) in self._qnames.items(): + for (nsname, qname) in list(self._qnames.items()): if qname == name: return nsname - raise KeyError, name + raise KeyError(name) def getQNameByName(self, name): return self._qnames[name] def getQNames(self): - return self._qnames.values() + return list(self._qnames.values()) def copy(self): return self.__class__(self._attrs, self._qnames) def _test(): XMLReader() IncrementalParser() Locator() if __name__ == "__main__": _test() diff --git a/xcap/server.py b/xcap/server.py index 2788cba..b543f4b 100644 --- a/xcap/server.py +++ b/xcap/server.py @@ -1,177 +1,177 @@ """HTTP handling for the XCAP server""" -from __future__ import absolute_import -import resource as _resource + +from . import resource as _resource import sys from application.configuration.datatypes import IPAddress, NetworkRangeList from application.configuration import ConfigSection, ConfigSetting from application import log from twisted.internet import reactor from twisted.cred.portal import Portal import xcap from xcap import authentication from xcap.datatypes import XCAPRootURI from xcap.appusage import getApplicationForURI, Backend from xcap.resource import XCAPDocument, XCAPElement, XCAPAttribute, XCAPNamespaceBinding from xcap.logutil import web_logger from xcap.tls import Certificate, PrivateKey from xcap.web import channel, resource, http, responsecode, server from xcap.xpath import AttributeSelector, NamespaceSelector server.VERSION = "OpenXCAP/%s" % xcap.__version__ class AuthenticationConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Authentication' type = 'digest' cleartext_passwords = True default_realm = ConfigSetting(type=str, value=None) trusted_peers = ConfigSetting(type=NetworkRangeList, value=NetworkRangeList('none')) class ServerConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'Server' address = ConfigSetting(type=IPAddress, value='0.0.0.0') root = ConfigSetting(type=XCAPRootURI, value=None) backend = ConfigSetting(type=Backend, value=None) class TLSConfig(ConfigSection): __cfgfile__ = xcap.__cfgfile__ __section__ = 'TLS' certificate = ConfigSetting(type=Certificate, value=None) private_key = ConfigSetting(type=PrivateKey, value=None) if ServerConfig.root is None: log.critical('The XCAP root URI is not defined') sys.exit(1) if ServerConfig.backend is None: log.critical('OpenXCAP needs a backend to be specified in order to run') sys.exit(1) # Increase the system limit for the maximum number of open file descriptors try: _resource.setrlimit(_resource.RLIMIT_NOFILE, (99999, 99999)) except ValueError: log.warning('Could not raise open file descriptor limit') class XCAPRoot(resource.Resource, resource.LeafResource): addSlash = True def allowedMethods(self): # not used , but methods were already checked by XCAPAuthResource return ('GET', 'PUT', 'DELETE') def resourceForURI(self, xcap_uri): application = getApplicationForURI(xcap_uri) if not xcap_uri.node_selector: return XCAPDocument(xcap_uri, application) else: terminal_selector = xcap_uri.node_selector.terminal_selector if isinstance(terminal_selector, AttributeSelector): return XCAPAttribute(xcap_uri, application) elif isinstance(terminal_selector, NamespaceSelector): return XCAPNamespaceBinding(xcap_uri, application) else: return XCAPElement(xcap_uri, application) def renderHTTP(self, request): application = getApplicationForURI(request.xcap_uri) if not application: return http.Response(responsecode.NOT_FOUND, stream="Application not supported") resource = self.resourceForURI(request.xcap_uri) return resource.renderHTTP(request) class Request(server.Request): def writeResponse(self, response): web_logger.log_access(request=self, response=response) return server.Request.writeResponse(self, response) class HTTPChannel(channel.http.HTTPChannel): inputTimeOut = 30 def __init__(self): channel.http.HTTPChannel.__init__(self) # if connection wasn't completed for 30 seconds, terminate it, # this avoids having lingering TCP connections which don't complete # the TLS handshake self.setTimeout(30) def timeoutConnection(self): if self.transport: log.info('Timing out client: {}'.format(self.transport.getPeer())) channel.http.HTTPChannel.timeoutConnection(self) class HTTPFactory(channel.HTTPFactory): noisy = False protocol = HTTPChannel class XCAPSite(server.Site): def __call__(self, *args, **kwargs): return Request(site=self, *args, **kwargs) class XCAPServer(object): def __init__(self): portal = Portal(authentication.XCAPAuthRealm()) if AuthenticationConfig.cleartext_passwords: http_checker = ServerConfig.backend.PlainPasswordChecker() else: http_checker = ServerConfig.backend.HashPasswordChecker() portal.registerChecker(http_checker) trusted_peers = AuthenticationConfig.trusted_peers portal.registerChecker(authentication.TrustedPeerChecker(trusted_peers)) portal.registerChecker(authentication.PublicGetApplicationChecker()) auth_type = AuthenticationConfig.type if auth_type == 'basic': credential_factory = authentication.BasicCredentialFactory(auth_type) elif auth_type == 'digest': credential_factory = authentication.DigestCredentialFactory('MD5', auth_type) else: raise ValueError('Invalid authentication type: %r. Please check the configuration.' % auth_type) root = authentication.XCAPAuthResource(XCAPRoot(), (credential_factory,), portal, (authentication.IAuthUser,)) self.site = XCAPSite(root) def _start_https(self, reactor): from gnutls.interfaces.twisted import X509Credentials from gnutls.connection import TLSContext, TLSContextServerOptions cert, pKey = TLSConfig.certificate, TLSConfig.private_key if cert is None or pKey is None: log.critical('The TLS certificate/key could not be loaded') sys.exit(1) credentials = X509Credentials(cert, pKey) tls_context = TLSContext(credentials, server_options=TLSContextServerOptions(certificate_request=None)) reactor.listenTLS(ServerConfig.root.port, HTTPFactory(self.site), tls_context, interface=ServerConfig.address) log.info('TLS started') def start(self): log.info('Listening on: %s:%d' % (ServerConfig.address, ServerConfig.root.port)) log.info('XCAP root: %s' % ServerConfig.root) if ServerConfig.root.startswith('https'): self._start_https(reactor) else: reactor.listenTCP(ServerConfig.root.port, HTTPFactory(self.site), interface=ServerConfig.address) reactor.run(installSignalHandlers=ServerConfig.backend.installSignalHandlers) diff --git a/xcap/tls.py b/xcap/tls.py index 7503129..ca72a60 100644 --- a/xcap/tls.py +++ b/xcap/tls.py @@ -1,54 +1,54 @@ """TLS helper classes""" __all__ = ['Certificate', 'PrivateKey'] from application import log from application.process import process from gnutls.crypto import X509Certificate, X509PrivateKey class _FileError(Exception): pass def file_content(filename): path = process.configuration.file(filename) if path is None: raise _FileError('File %r does not exist' % filename) try: f = open(path, 'rt') except Exception: raise _FileError('File %r could not be open' % filename) try: return f.read() finally: f.close() class Certificate(object): """Configuration data type. Used to create a gnutls.crypto.X509Certificate object from a file given in the configuration file.""" def __new__(cls, value): if isinstance(value, str): try: return X509Certificate(file_content(value)) - except Exception, e: + except Exception as e: log.warning('Certificate file %r could not be loaded: %s' % (value, e)) return None else: raise TypeError('value should be a string') class PrivateKey(object): """Configuration data type. Used to create a gnutls.crypto.X509PrivateKey object from a file given in the configuration file.""" def __new__(cls, value): if isinstance(value, str): try: return X509PrivateKey(file_content(value)) - except Exception, e: + except Exception as e: log.warning('Private key file %r could not be loaded: %s' % (value, e)) return None else: raise TypeError('value should be a string') diff --git a/xcap/uri.py b/xcap/uri.py index 0424adc..9bc8df6 100644 --- a/xcap/uri.py +++ b/xcap/uri.py @@ -1,108 +1,108 @@ """XCAP URI module http://tools.ietf.org/html/rfc4825#section-6 The algorithm to decode the URI is as following: * First, percent-decode the whole URI (urllib.unquote) * Split document selector from node selector (str.split('~~')) * Then use xpath_tokenizer from lxml to parse the whole node selector and extract individual steps Although after doing percent-decoding first, we cannot use s.split('/'), using lexer from lxml alleviates that fact a bit and produces good results. A potential problem can arise with URIs that contain [percent-encoded] double quotes. Here's an example: /resource-lists/list[@name="friends"]/external[@anchor="/list[@name=%22mkting%22]"] which would be converted to /resource-lists/list[@name="friends"]/external[@anchor="/list[@name="mkting"]"] and that would confuse the parser. I'm not sure if it's legal to have such URIs, but if it is this module has to be fixed. Meanwhile, the safe approach is to use " /resource-lists/list[@name="friends"]/external[@anchor="/list[@name="mkting"]"] """ -from urllib import unquote +from urllib.parse import unquote from xcap.xpath import DocumentSelector, NodeSelector class XCAPUser(object): def __init__(self, username, domain): self.username = username self.domain = domain @property def uri(self): return 'sip:%s@%s' % (self.username, self.domain) def __eq__(self, other): return isinstance(other, XCAPUser) and self.uri == other.uri def __ne__(self, other): return not self.__eq__(other) - def __nonzero__(self): + def __bool__(self): return bool(self.username) and bool(self.domain) def __str__(self): return "%s@%s" % (self.username, self.domain) def __repr__(self): return 'XCAPUser(%r, %r)' % (self.username, self.domain) @classmethod def parse(cls, user_id, default_domain=None): if user_id.startswith("sip:"): user_id = user_id[4:] _split = user_id.split('@', 1) username = _split[0] if len(_split) == 2: domain = _split[1] else: domain = default_domain return cls(username, domain) class XCAPUri(object): """An XCAP URI containing the XCAP root, document selector and node selector.""" def __init__(self, xcap_root, resource_selector, namespaces): "namespaces maps application id to default namespace" self.xcap_root = xcap_root self.resource_selector = unquote(resource_selector) realm = None # convention to get the realm if it's not contained in the user ID section # of the document selector (bad eyebeam) if self.resource_selector.startswith("@"): first_slash = self.resource_selector.find("/") realm = self.resource_selector[1:first_slash] self.resource_selector = self.resource_selector[first_slash:] _split = self.resource_selector.split('~~', 1) doc_selector = _split[0] self.doc_selector = DocumentSelector(doc_selector) self.application_id = self.doc_selector.application_id if len(_split) == 2: self.node_selector = NodeSelector(_split[1], namespaces.get(self.application_id)) else: self.node_selector = None if self.doc_selector.user_id: self.user = XCAPUser.parse(self.doc_selector.user_id, realm) else: self.user = XCAPUser(None, realm) def __str__(self): return self.xcap_root + self.resource_selector diff --git a/xcap/web/auth/digest.py b/xcap/web/auth/digest.py index 992e88a..a102bce 100644 --- a/xcap/web/auth/digest.py +++ b/xcap/web/auth/digest.py @@ -1,349 +1,349 @@ # Copyright (c) 2006-2008 Twisted Matrix Laboratories. """ Implementation of RFC2617: HTTP Digest Authentication http://www.faqs.org/rfcs/rfc2617.html """ import sys import time import random from hashlib import md5, sha1 from twisted.cred import credentials, error from zope.interface import implements, Interface from xcap.web.auth.interfaces import ICredentialFactory # The digest math algorithms = { 'md5': md5, 'md5-sess': md5, 'sha': sha1, } # DigestCalcHA1 def calcHA1( pszAlg, pszUserName, pszRealm, pszPassword, pszNonce, pszCNonce, preHA1=None ): """ @param pszAlg: The name of the algorithm to use to calculate the digest. Currently supported are md5 md5-sess and sha. @param pszUserName: The username @param pszRealm: The realm @param pszPassword: The password @param pszNonce: The nonce @param pszCNonce: The cnonce @param preHA1: If available this is a str containing a previously calculated HA1 as a hex string. If this is given then the values for pszUserName, pszRealm, and pszPassword are ignored. """ if (preHA1 and (pszUserName or pszRealm or pszPassword)): raise TypeError(("preHA1 is incompatible with the pszUserName, " "pszRealm, and pszPassword arguments")) if preHA1 is None: # We need to calculate the HA1 from the username:realm:password m = algorithms[pszAlg]() m.update(pszUserName) m.update(":") m.update(pszRealm) m.update(":") m.update(pszPassword) HA1 = m.digest() else: # We were given a username:realm:password HA1 = preHA1.decode('hex') if pszAlg == "md5-sess": m = algorithms[pszAlg]() m.update(HA1) m.update(":") m.update(pszNonce) m.update(":") m.update(pszCNonce) HA1 = m.digest() return HA1.encode('hex') # DigestCalcResponse def calcResponse( HA1, algo, pszNonce, pszNonceCount, pszCNonce, pszQop, pszMethod, pszDigestUri, pszHEntity, ): m = algorithms[algo]() m.update(pszMethod) m.update(":") m.update(pszDigestUri) if pszQop == "auth-int": m.update(":") m.update(pszHEntity) HA2 = m.digest().encode('hex') m = algorithms[algo]() m.update(HA1) m.update(":") m.update(pszNonce) m.update(":") if pszNonceCount and pszCNonce: # pszQop: m.update(pszNonceCount) m.update(":") m.update(pszCNonce) m.update(":") m.update(pszQop) m.update(":") m.update(HA2) respHash = m.digest().encode('hex') return respHash class IUsernameDigestHash(Interface): """ This credential is used when a CredentialChecker has access to the hash of the username:realm:password as in an Apache .htdigest file. """ def checkHash(self, digestHash): """ @param digestHash: The hashed username:realm:password to check against. @return: a deferred which becomes, or a boolean indicating if the hash matches. """ class DigestedCredentials: """Yet Another Simple HTTP Digest authentication scheme""" implements(credentials.IUsernameHashedPassword, IUsernameDigestHash) def __init__(self, username, method, realm, fields): self.username = username self.method = method self.realm = realm self.fields = fields def checkPassword(self, password): response = self.fields.get('response') uri = self.fields.get('uri') nonce = self.fields.get('nonce') cnonce = self.fields.get('cnonce') nc = self.fields.get('nc') algo = self.fields.get('algorithm', 'md5').lower() qop = self.fields.get('qop', 'auth') expected = calcResponse( calcHA1(algo, self.username, self.realm, password, nonce, cnonce), algo, nonce, nc, cnonce, qop, self.method, uri, None ) return expected == response def checkHash(self, digestHash): response = self.fields.get('response') uri = self.fields.get('uri') nonce = self.fields.get('nonce') cnonce = self.fields.get('cnonce') nc = self.fields.get('nc') algo = self.fields.get('algorithm', 'md5').lower() qop = self.fields.get('qop', 'auth') expected = calcResponse( calcHA1(algo, None, None, None, nonce, cnonce, preHA1=digestHash), algo, nonce, nc, cnonce, qop, self.method, uri, None ) return expected == response class DigestCredentialFactory(object): """ Support for RFC2617 HTTP Digest Authentication @cvar CHALLENGE_LIFETIME_SECS: The number of seconds for which an opaque should be valid. @ivar privateKey: A random string used for generating the secure opaque. """ implements(ICredentialFactory) CHALLENGE_LIFETIME_SECS = 15 * 60 # 15 minutes scheme = "digest" def __init__(self, algorithm, realm): """ @type algorithm: C{str} @param algorithm: case insensitive string that specifies the hash algorithm used, should be either, md5, md5-sess or sha @type realm: C{str} @param realm: case sensitive string that specifies the realm portion of the challenge """ self.algorithm = algorithm self.realm = realm - c = tuple([random.randrange(sys.maxint) for _ in range(3)]) + c = tuple([random.randrange(sys.maxsize) for _ in range(3)]) self.privateKey = '%d%d%d' % c def generateNonce(self): - c = tuple([random.randrange(sys.maxint) for _ in range(3)]) + c = tuple([random.randrange(sys.maxsize) for _ in range(3)]) c = '%d%d%d' % c return c def _getTime(self): """ Parameterize the time based seed used in generateOpaque so we can deterministically unittest it's behavior. """ return time.time() def generateOpaque(self, nonce, clientip): """ Generate an opaque to be returned to the client. This should be a unique string that can be returned to us and verified. """ # Now, what we do is encode the nonce, client ip and a timestamp # in the opaque value with a suitable digest key = "%s,%s,%s" % (nonce, clientip, str(int(self._getTime()))) digest = md5(key + self.privateKey).hexdigest() ekey = key.encode('base64') return "%s-%s" % (digest, ekey.strip('\n')) def verifyOpaque(self, opaque, nonce, clientip): """ Given the opaque and nonce from the request, as well as the clientip that made the request, verify that the opaque was generated by us. And that it's not too old. @param opaque: The opaque value from the Digest response @param nonce: The nonce value from the Digest response @param clientip: The remote IP address of the client making the request @return: C{True} if the opaque was successfully verified. @raise error.LoginFailed: if C{opaque} could not be parsed or contained the wrong values. """ # First split the digest from the key opaqueParts = opaque.split('-') if len(opaqueParts) != 2: raise error.LoginFailed('Invalid response, invalid opaque value') # Verify the key key = opaqueParts[1].decode('base64') keyParts = key.split(',') if len(keyParts) != 3: raise error.LoginFailed('Invalid response, invalid opaque value') if keyParts[0] != nonce: raise error.LoginFailed( 'Invalid response, incompatible opaque/nonce values') if keyParts[1] != clientip: raise error.LoginFailed( 'Invalid response, incompatible opaque/client values') if (int(self._getTime()) - int(keyParts[2]) > DigestCredentialFactory.CHALLENGE_LIFETIME_SECS): raise error.LoginFailed( 'Invalid response, incompatible opaque/nonce too old') # Verify the digest digest = md5(key + self.privateKey).hexdigest() if digest != opaqueParts[0]: raise error.LoginFailed('Invalid response, invalid opaque value') return True def getChallenge(self, peer): """ Generate the challenge for use in the WWW-Authenticate header @param peer: The L{IAddress} of the requesting client. @return: The C{dict} that can be used to generate a WWW-Authenticate header. """ c = self.generateNonce() o = self.generateOpaque(c, peer.host) return {'nonce': c, 'opaque': o, 'qop': 'auth', 'algorithm': self.algorithm, 'realm': self.realm} def decode(self, response, request): """ Decode the given response and attempt to generate a L{DigestedCredentials} from it. @type response: C{str} @param response: A string of comma seperated key=value pairs @type request: L{xcap.web.server.Request} @param request: the request being processed @return: L{DigestedCredentials} @raise: L{error.LoginFailed} if the response does not contain a username, a nonce, an opaque, or if the opaque is invalid. """ def unq(s): if s[0] == s[-1] == '"': return s[1:-1] return s response = ' '.join(response.splitlines()) parts = response.split(',') auth = {} for (k, v) in [p.split('=', 1) for p in parts]: auth[k.strip()] = unq(v.strip()) username = auth.get('username') if not username: raise error.LoginFailed('Invalid response, no username given.') if 'opaque' not in auth: raise error.LoginFailed('Invalid response, no opaque given.') if 'nonce' not in auth: raise error.LoginFailed('Invalid response, no nonce given.') # Now verify the nonce/opaque values for this client if self.verifyOpaque(auth.get('opaque'), auth.get('nonce'), request.remoteAddr.host): return DigestedCredentials(username, request.method, self.realm, auth) diff --git a/xcap/web/auth/wrapper.py b/xcap/web/auth/wrapper.py index e05e25c..d8fbc6e 100644 --- a/xcap/web/auth/wrapper.py +++ b/xcap/web/auth/wrapper.py @@ -1,200 +1,200 @@ """ Wrapper Resources for rfc2617 HTTP Auth. """ from zope.interface import implements, directlyProvides from twisted.cred import error, credentials from twisted.python import failure from xcap.web import responsecode from xcap.web import http from xcap.web import iweb from xcap.web.auth.interfaces import IAuthenticatedRequest class UnauthorizedResponse(http.StatusResponse): """A specialized response class for generating www-authenticate headers from the given L{CredentialFactory} instances """ def __init__(self, factories, remoteAddr=None): """ @param factories: A L{dict} of {'scheme': ICredentialFactory} @param remoteAddr: An L{IAddress} for the connecting client. """ super(UnauthorizedResponse, self).__init__( responsecode.UNAUTHORIZED, "You are not authorized to access this resource.") authHeaders = [] - for factory in factories.itervalues(): + for factory in factories.values(): authHeaders.append((factory.scheme, factory.getChallenge(remoteAddr))) self.headers.setHeader('www-authenticate', authHeaders) class HTTPAuthResource(object): """I wrap a resource to prevent it being accessed unless the authentication can be completed using the credential factory, portal, and interfaces specified. """ implements(iweb.IResource) def __init__(self, wrappedResource, credentialFactories, portal, interfaces): """ @param wrappedResource: A L{xcap.web.iweb.IResource} to be returned from locateChild and render upon successful authentication. @param credentialFactories: A list of instances that implement L{ICredentialFactory}. @type credentialFactories: L{list} @param portal: Portal to handle logins for this resource. @type portal: L{twisted.cred.portal.Portal} @param interfaces: the interfaces that are allowed to log in via the given portal @type interfaces: L{tuple} """ self.wrappedResource = wrappedResource self.credentialFactories = dict([(factory.scheme, factory) for factory in credentialFactories]) self.portal = portal self.interfaces = interfaces def _loginSucceeded(self, avatar, request): """ Callback for successful login. @param avatar: A tuple of the form (interface, avatar) as returned by your realm. @param request: L{IRequest} that encapsulates this auth attempt. @return: the IResource in C{self.wrappedResource} """ request.avatarInterface, request.avatar = avatar directlyProvides(request, IAuthenticatedRequest) def _addAuthenticateHeaders(request, response): """ A response filter that adds www-authenticate headers to an outgoing response if it's code is UNAUTHORIZED (401) and it does not already have them. """ if response.code == responsecode.UNAUTHORIZED: if not response.headers.hasHeader('www-authenticate'): newResp = UnauthorizedResponse(self.credentialFactories, request.remoteAddr) response.headers.setHeader( 'www-authenticate', newResp.headers.getHeader('www-authenticate')) return response _addAuthenticateHeaders.handleErrors = True request.addResponseFilter(_addAuthenticateHeaders) return self.wrappedResource def _loginFailed(self, result, request): """ Errback for failed login. @param result: L{Failure} returned by portal.login @param request: L{IRequest} that encapsulates this auth attempt. @return: A L{Failure} containing an L{HTTPError} containing the L{UnauthorizedResponse} if C{result} is an L{UnauthorizedLogin} or L{UnhandledCredentials} error """ result.trap(error.UnauthorizedLogin, error.UnhandledCredentials) return failure.Failure( http.HTTPError( UnauthorizedResponse( self.credentialFactories, request.remoteAddr))) def login(self, factory, response, request): """ @param factory: An L{ICredentialFactory} that understands the given response. @param response: The client's authentication response as a string. @param request: The request that prompted this authentication attempt. @return: A L{Deferred} that fires with the wrappedResource on success or a failure containing an L{UnauthorizedResponse} """ try: creds = factory.decode(response, request) except error.LoginFailed: raise http.HTTPError(UnauthorizedResponse( self.credentialFactories, request.remoteAddr)) return self.portal.login(creds, None, *self.interfaces ).addCallbacks(self._loginSucceeded, self._loginFailed, (request,), None, (request,), None) def authenticate(self, request): """ Attempt to authenticate the givin request @param request: An L{IRequest} to be authenticated. """ authHeader = request.headers.getHeader('authorization') if authHeader is None: return self.portal.login(credentials.Anonymous(), None, *self.interfaces ).addCallbacks(self._loginSucceeded, self._loginFailed, (request,), None, (request,), None) elif authHeader[0] not in self.credentialFactories: raise http.HTTPError(UnauthorizedResponse( self.credentialFactories, request.remoteAddr)) else: return self.login(self.credentialFactories[authHeader[0]], authHeader[1], request) def locateChild(self, request, seg): """ Authenticate the request then return the C{self.wrappedResource} and the unmodified segments. """ return self.authenticate(request), seg def renderHTTP(self, request): """ Authenticate the request then return the result of calling renderHTTP on C{self.wrappedResource} """ def _renderResource(resource): return resource.renderHTTP(request) d = self.authenticate(request) d.addCallback(_renderResource) return d diff --git a/xcap/web/channel/http.py b/xcap/web/channel/http.py index 8a8c95d..36418cf 100644 --- a/xcap/web/channel/http.py +++ b/xcap/web/channel/http.py @@ -1,898 +1,898 @@ import socket import warnings -from cStringIO import StringIO +from io import StringIO from twisted.internet import interfaces, protocol, reactor from twisted.protocols import policies, basic from twisted.python import log from zope.interface import implements from xcap.web import responsecode from xcap.web import http_headers from xcap.web import http PERSIST_NO_PIPELINE, PERSIST_PIPELINE = (1,2) _cachedHostNames = {} def _cachedGetHostByAddr(hostaddr): hostname = _cachedHostNames.get(hostaddr) if hostname is None: try: hostname = socket.gethostbyaddr(hostaddr)[0] except socket.herror: hostname = hostaddr _cachedHostNames[hostaddr]=hostname return hostname class StringTransport(object): """ I am a StringIO wrapper that conforms for the transport API. I support the 'writeSequence' method. """ def __init__(self): self.s = StringIO() def writeSequence(self, seq): self.s.write(''.join(seq)) def __getattr__(self, attr): return getattr(self.__dict__['s'], attr) class AbortedException(Exception): pass class HTTPParser(object): """This class handles the parsing side of HTTP processing. With a suitable subclass, it can parse either the client side or the server side of the connection. """ # Class config: parseCloseAsEnd = False # Instance vars chunkedIn = False headerlen = 0 length = None inHeaders = None partialHeader = '' connHeaders = None finishedReading = False channel = None # For subclassing... # Needs attributes: # version # Needs functions: # createRequest() # processRequest() # _abortWithError() # handleContentChunk(data) # handleContentComplete() # Needs functions to exist on .channel # channel.maxHeaderLength # channel.requestReadFinished(self) # channel.setReadPersistent(self, persistent) # (from LineReceiver): # channel.setRawMode() # channel.setLineMode(extraneous) # channel.pauseProducing() # channel.resumeProducing() # channel.stopProducing() def __init__(self, channel): self.inHeaders = http_headers.Headers() self.channel = channel def lineReceived(self, line): if self.chunkedIn: # Parsing a chunked input if self.chunkedIn == 1: # First we get a line like "chunk-size [';' chunk-extension]" # (where chunk extension is just random crap as far as we're concerned) # RFC says to ignore any extensions you don't recognize -- that's all of them. chunksize = line.split(';', 1)[0] try: self.length = int(chunksize, 16) except: self._abortWithError(responsecode.BAD_REQUEST, "Invalid chunk size, not a hex number: %s!" % chunksize) if self.length < 0: self._abortWithError(responsecode.BAD_REQUEST, "Invalid chunk size, negative.") if self.length == 0: # We're done, parse the trailers line self.chunkedIn = 3 else: # Read self.length bytes of raw data self.channel.setRawMode() elif self.chunkedIn == 2: # After we got data bytes of the appropriate length, we end up here, # waiting for the CRLF, then go back to get the next chunk size. if line != '': self._abortWithError(responsecode.BAD_REQUEST, "Excess %d bytes sent in chunk transfer mode" % len(line)) self.chunkedIn = 1 elif self.chunkedIn == 3: # TODO: support Trailers (maybe! but maybe not!) # After getting the final "0" chunk we're here, and we *EAT MERCILESSLY* # any trailer headers sent, and wait for the blank line to terminate the # request. if line == '': self.allContentReceived() # END of chunk handling elif line == '': # Empty line => End of headers if self.partialHeader: self.headerReceived(self.partialHeader) self.partialHeader = '' self.allHeadersReceived() # can set chunkedIn self.createRequest() if self.chunkedIn: # stay in linemode waiting for chunk header pass elif self.length == 0: # no content expected self.allContentReceived() else: # await raw data as content self.channel.setRawMode() # Should I do self.pauseProducing() here? self.processRequest() else: self.headerlen += len(line) if self.headerlen > self.channel.maxHeaderLength: self._abortWithError(responsecode.BAD_REQUEST, 'Headers too long.') if line[0] in ' \t': # Append a header continuation self.partialHeader += line else: if self.partialHeader: self.headerReceived(self.partialHeader) self.partialHeader = line def rawDataReceived(self, data): """Handle incoming content.""" datalen = len(data) if datalen < self.length: self.handleContentChunk(data) self.length = self.length - datalen else: self.handleContentChunk(data[:self.length]) extraneous = data[self.length:] channel = self.channel # could go away from allContentReceived. if not self.chunkedIn: self.allContentReceived() else: # NOTE: in chunked mode, self.length is the size of the current chunk, # so we still have more to read. self.chunkedIn = 2 # Read next chunksize channel.setLineMode(extraneous) def headerReceived(self, line): """Store this header away. Check for too much header data (> channel.maxHeaderLength) and abort the connection if so. """ nameval = line.split(':', 1) if len(nameval) != 2: self._abortWithError(responsecode.BAD_REQUEST, "No ':' in header.") name, val = nameval val = val.lstrip(' \t') self.inHeaders.addRawHeader(name, val) def allHeadersReceived(self): # Split off connection-related headers connHeaders = self.splitConnectionHeaders() # Set connection parameters from headers self.setConnectionParams(connHeaders) self.connHeaders = connHeaders def allContentReceived(self): self.finishedReading = True self.channel.requestReadFinished(self) self.handleContentComplete() def splitConnectionHeaders(self): """ Split off connection control headers from normal headers. The normal headers are then passed on to user-level code, while the connection headers are stashed in .connHeaders and used for things like request/response framing. This corresponds roughly with the HTTP RFC's description of 'hop-by-hop' vs 'end-to-end' headers in RFC2616 S13.5.1, with the following exceptions: * proxy-authenticate and proxy-authorization are not treated as connection headers. * content-length is, as it is intimiately related with low-level HTTP parsing, and is made available to user-level code via the stream length, rather than a header value. (except for HEAD responses, in which case it is NOT used by low-level HTTP parsing, and IS kept in the normal headers. """ def move(name): h = inHeaders.getRawHeaders(name, None) if h is not None: inHeaders.removeHeader(name) connHeaders.setRawHeaders(name, h) # NOTE: According to HTTP spec, we're supposed to eat the # 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that # doesn't sound like a good idea to me, because it makes it impossible # to have a non-authenticating transparent proxy in front of an # authenticating proxy. An authenticating proxy can eat them itself. # # 'Proxy-Connection' is an undocumented HTTP 1.0 abomination. connHeaderNames = ['content-length', 'connection', 'keep-alive', 'te', 'trailers', 'transfer-encoding', 'upgrade', 'proxy-connection'] inHeaders = self.inHeaders connHeaders = http_headers.Headers() move('connection') if self.version < (1,1): # Remove all headers mentioned in Connection, because a HTTP 1.0 # proxy might have erroneously forwarded it from a 1.1 client. for name in connHeaders.getHeader('connection', ()): if inHeaders.hasHeader(name): inHeaders.removeHeader(name) else: # Otherwise, just add the headers listed to the list of those to move connHeaderNames.extend(connHeaders.getHeader('connection', ())) # If the request was HEAD, self.length has been set to 0 by # HTTPClientRequest.submit; in this case, Content-Length should # be treated as a response header, not a connection header. # Note: this assumes the invariant that .length will always be None # coming into this function, unless this is a HEAD request. if self.length is not None: connHeaderNames.remove('content-length') for headername in connHeaderNames: move(headername) return connHeaders def setConnectionParams(self, connHeaders): # Figure out persistent connection stuff if self.version >= (1,1): if 'close' in connHeaders.getHeader('connection', ()): readPersistent = False else: readPersistent = PERSIST_PIPELINE elif 'keep-alive' in connHeaders.getHeader('connection', ()): readPersistent = PERSIST_NO_PIPELINE else: readPersistent = False # Okay, now implement section 4.4 Message Length to determine # how to find the end of the incoming HTTP message. transferEncoding = connHeaders.getHeader('transfer-encoding') if transferEncoding: if transferEncoding[-1] == 'chunked': # Chunked self.chunkedIn = 1 # Cut off the chunked encoding (cause it's special) transferEncoding = transferEncoding[:-1] elif not self.parseCloseAsEnd: # Would close on end of connection, except this can't happen for # client->server data. (Well..it could actually, since TCP has half-close # but the HTTP spec says it can't, so we'll pretend it's right.) self._abortWithError(responsecode.BAD_REQUEST, "Transfer-Encoding received without chunked in last position.") # TODO: support gzip/etc encodings. # FOR NOW: report an error if the client uses any encodings. # They shouldn't, because we didn't send a TE: header saying it's okay. if transferEncoding: self._abortWithError(responsecode.NOT_IMPLEMENTED, "Transfer-Encoding %s not supported." % transferEncoding) else: # No transfer-coding. self.chunkedIn = 0 if self.parseCloseAsEnd: # If no Content-Length, then it's indeterminate length data # (unless the responsecode was one of the special no body ones) # Also note that for HEAD requests, connHeaders won't have # content-length even if the response did. if self.code in http.NO_BODY_CODES: self.length = 0 else: self.length = connHeaders.getHeader('content-length', self.length) # If it's an indeterminate stream without transfer encoding, it must be # the last request. if self.length is None: readPersistent = False else: # If no Content-Length either, assume no content. self.length = connHeaders.getHeader('content-length', 0) # Set the calculated persistence self.channel.setReadPersistent(readPersistent) def abortParse(self): # If we're erroring out while still reading the request if not self.finishedReading: self.finishedReading = True self.channel.setReadPersistent(False) self.channel.requestReadFinished(self) # producer interface def pauseProducing(self): if not self.finishedReading: self.channel.pauseProducing() def resumeProducing(self): if not self.finishedReading: self.channel.resumeProducing() def stopProducing(self): if not self.finishedReading: self.channel.stopProducing() class HTTPChannelRequest(HTTPParser): """This class handles the state and parsing for one HTTP request. It is responsible for all the low-level connection oriented behavior. Thus, it takes care of keep-alive, de-chunking, etc., and passes the non-connection headers on to the user-level Request object.""" command = path = version = None queued = 0 request = None out_version = "HTTP/1.1" def __init__(self, channel, queued=0): HTTPParser.__init__(self, channel) self.queued=queued # Buffer writes to a string until we're first in line # to write a response if queued: self.transport = StringTransport() else: self.transport = self.channel.transport # set the version to a fallback for error generation self.version = (1,0) def gotInitialLine(self, initialLine): parts = initialLine.split() # Parse the initial request line if len(parts) != 3: if len(parts) == 1: parts.append('/') if len(parts) == 2 and parts[1][0] == '/': parts.append('HTTP/0.9') else: self._abortWithError(responsecode.BAD_REQUEST, 'Bad request line: %s' % initialLine) self.command, self.path, strversion = parts try: protovers = http.parseVersion(strversion) if protovers[0] != 'http': raise ValueError() except ValueError: self._abortWithError(responsecode.BAD_REQUEST, "Unknown protocol: %s" % strversion) self.version = protovers[1:3] # Ensure HTTP 0 or HTTP 1. if self.version[0] > 1: self._abortWithError(responsecode.HTTP_VERSION_NOT_SUPPORTED, 'Only HTTP 0.9 and HTTP 1.x are supported.') if self.version[0] == 0: # simulate end of headers, as HTTP 0 doesn't have headers. self.lineReceived('') def lineLengthExceeded(self, line, wasFirst=False): code = wasFirst and responsecode.REQUEST_URI_TOO_LONG or responsecode.BAD_REQUEST self._abortWithError(code, 'Header line too long.') def createRequest(self): self.request = self.channel.requestFactory(self, self.command, self.path, self.version, self.length, self.inHeaders) del self.inHeaders def processRequest(self): self.request.process() def handleContentChunk(self, data): self.request.handleContentChunk(data) def handleContentComplete(self): self.request.handleContentComplete() ############## HTTPChannelRequest *RESPONSE* methods ############# producer = None chunkedOut = False finished = False ##### Request Callbacks ##### def writeIntermediateResponse(self, code, headers=None): if self.version >= (1,1): self._writeHeaders(code, headers, False) def writeHeaders(self, code, headers): self._writeHeaders(code, headers, True) def _writeHeaders(self, code, headers, addConnectionHeaders): # HTTP 0.9 doesn't have headers. if self.version[0] == 0: return l = [] code_message = responsecode.RESPONSES.get(code, "Unknown Status") l.append('%s %s %s\r\n' % (self.out_version, code, code_message)) if headers is not None: for name, valuelist in headers.getAllRawHeaders(): for value in valuelist: l.append("%s: %s\r\n" % (name, value)) if addConnectionHeaders: # if we don't have a content length, we send data in # chunked mode, so that we can support persistent connections. if (headers.getHeader('content-length') is None and self.command != "HEAD" and code not in http.NO_BODY_CODES): if self.version >= (1,1): l.append("%s: %s\r\n" % ('Transfer-Encoding', 'chunked')) self.chunkedOut = True else: # Cannot use persistent connections if we can't do chunking self.channel.dropQueuedRequests() if self.channel.isLastRequest(self): l.append("%s: %s\r\n" % ('Connection', 'close')) elif self.version < (1,1): l.append("%s: %s\r\n" % ('Connection', 'Keep-Alive')) l.append("\r\n") self.transport.writeSequence(l) def write(self, data): if not data: return elif self.chunkedOut: self.transport.writeSequence(("%X\r\n" % len(data), data, "\r\n")) else: self.transport.write(data) def finish(self): """We are finished writing data.""" if self.finished: warnings.warn("Warning! request.finish called twice.", stacklevel=2) return if self.chunkedOut: # write last chunk and closing CRLF self.transport.write("0\r\n\r\n") self.finished = True if not self.queued: self._cleanup() def abortConnection(self, closeWrite=True): """Abort the HTTP connection because of some kind of unrecoverable error. If closeWrite=False, then only abort reading, but leave the writing side alone. This is mostly for internal use by the HTTP request parsing logic, so that it can call an error page generator. Otherwise, completely shut down the connection. """ self.abortParse() if closeWrite: if self.producer: self.producer.stopProducing() self.unregisterProducer() self.finished = True if self.queued: self.transport.reset() self.transport.truncate() else: self._cleanup() def getHostInfo(self): t=self.channel.transport secure = interfaces.ISSLTransport(t, None) is not None host = t.getHost() host.host = _cachedGetHostByAddr(host.host) return host, secure def getRemoteHost(self): return self.channel.transport.getPeer() ##### End Request Callbacks ##### def _abortWithError(self, errorcode, text=''): """Handle low level protocol errors.""" headers = http_headers.Headers() headers.setHeader('content-length', len(text)+1) self.abortConnection(closeWrite=False) self.writeHeaders(errorcode, headers) self.write(text) self.write("\n") self.finish() raise AbortedException def _cleanup(self): """Called when have finished responding and are no longer queued.""" if self.producer: log.err(RuntimeError("Producer was not unregistered for %s" % self)) self.unregisterProducer() self.channel.requestWriteFinished(self) del self.transport # methods for channel - end users should not use these def noLongerQueued(self): """Notify the object that it is no longer queued. We start writing whatever data we have to the transport, etc. This method is not intended for users. """ if not self.queued: - raise RuntimeError, "noLongerQueued() got called unnecessarily." + raise RuntimeError("noLongerQueued() got called unnecessarily.") self.queued = 0 # set transport to real one and send any buffer data data = self.transport.getvalue() self.transport = self.channel.transport if data: self.transport.write(data) # if we have producer, register it with transport if (self.producer is not None) and not self.finished: self.transport.registerProducer(self.producer, True) # if we're finished, clean up if self.finished: self._cleanup() # consumer interface def registerProducer(self, producer, streaming): """Register a producer. """ if self.producer: - raise ValueError, "registering producer %s before previous one (%s) was unregistered" % (producer, self.producer) + raise ValueError("registering producer %s before previous one (%s) was unregistered" % (producer, self.producer)) self.producer = producer if self.queued: producer.pauseProducing() else: self.transport.registerProducer(producer, streaming) def unregisterProducer(self): """Unregister the producer.""" if not self.queued: self.transport.unregisterProducer() self.producer = None def connectionLost(self, reason): """connection was lost""" if self.queued and self.producer: self.producer.stopProducing() self.producer = None if self.request: self.request.connectionLost(reason) class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin, object): """A receiver for HTTP requests. Handles splitting up the connection for the multiple HTTPChannelRequests that may be in progress on this channel. @ivar timeOut: number of seconds to wait before terminating an idle connection. @ivar maxPipeline: number of outstanding in-progress requests to allow before pausing the input. @ivar maxHeaderLength: number of bytes of header to accept from the client. """ implements(interfaces.IHalfCloseableProtocol) ## Configuration parameters. Set in instances or subclasses. # How many simultaneous requests to handle. maxPipeline = 4 # Timeout when between two requests betweenRequestsTimeOut = 15 # Timeout between lines or bytes while reading a request inputTimeOut = 60 * 4 # maximum length of headers (10KiB) maxHeaderLength = 10240 # Allow persistent connections? allowPersistentConnections = True # ChannelRequest chanRequestFactory = HTTPChannelRequest requestFactory = http.Request _first_line = 2 readPersistent = PERSIST_PIPELINE _readLost = False _writeLost = False _lingerTimer = None chanRequest = None def _callLater(self, secs, fun): reactor.callLater(secs, fun) def __init__(self): # the request queue self.requests = [] def connectionMade(self): self.setTimeout(self.inputTimeOut) self.factory.outstandingRequests+=1 def lineReceived(self, line): if self._first_line: self.setTimeout(self.inputTimeOut) # if this connection is not persistent, drop any data which # the client (illegally) sent after the last request. if not self.readPersistent: self.dataReceived = self.lineReceived = lambda *args: None return # IE sends an extraneous empty line (\r\n) after a POST request; # eat up such a line, but only ONCE if not line and self._first_line == 1: self._first_line = 2 return self._first_line = 0 if not self.allowPersistentConnections: # Don't allow a second request self.readPersistent = False try: self.chanRequest = self.chanRequestFactory(self, len(self.requests)) self.requests.append(self.chanRequest) self.chanRequest.gotInitialLine(line) except AbortedException: pass else: try: self.chanRequest.lineReceived(line) except AbortedException: pass def lineLengthExceeded(self, line): if self._first_line: # Fabricate a request object to respond to the line length violation. self.chanRequest = self.chanRequestFactory(self, len(self.requests)) self.requests.append(self.chanRequest) self.chanRequest.gotInitialLine("GET fake HTTP/1.0") try: self.chanRequest.lineLengthExceeded(line, self._first_line) except AbortedException: pass def rawDataReceived(self, data): self.setTimeout(self.inputTimeOut) try: self.chanRequest.rawDataReceived(data) except AbortedException: pass def requestReadFinished(self, request): if(self.readPersistent is PERSIST_NO_PIPELINE or len(self.requests) >= self.maxPipeline): self.pauseProducing() # reset state variables self._first_line = 1 self.chanRequest = None self.setLineMode() # Disable the idle timeout, in case this request takes a long # time to finish generating output. if len(self.requests) > 0: self.setTimeout(None) def _startNextRequest(self): # notify next request, if present, it can start writing del self.requests[0] if self._writeLost: self.transport.loseConnection() elif self.requests: self.requests[0].noLongerQueued() # resume reading if allowed to if(not self._readLost and self.readPersistent is not PERSIST_NO_PIPELINE and len(self.requests) < self.maxPipeline): self.resumeProducing() elif self._readLost: # No more incoming data, they already closed! self.transport.loseConnection() else: # no requests in queue, resume reading self.setTimeout(self.betweenRequestsTimeOut) self.resumeProducing() def setReadPersistent(self, persistent): if self.readPersistent: # only allow it to be set if it's not currently False self.readPersistent = persistent def dropQueuedRequests(self): """Called when a response is written that forces a connection close.""" self.readPersistent = False # Tell all requests but first to abort. for request in self.requests[1:]: request.connectionLost(None) del self.requests[1:] def isLastRequest(self, request): # Is this channel handling the last possible request return not self.readPersistent and self.requests[-1] == request def requestWriteFinished(self, request): """Called by first request in queue when it is done.""" if request != self.requests[0]: raise TypeError # Don't del because we haven't finished cleanup, so, # don't want queue len to be 0 yet. self.requests[0] = None if self.readPersistent or len(self.requests) > 1: # Do this in the next reactor loop so as to # not cause huge call stacks with fast # incoming requests. self._callLater(0, self._startNextRequest) else: self.lingeringClose() def timeoutConnection(self): #log.msg("Timing out client: %s" % str(self.transport.getPeer())) policies.TimeoutMixin.timeoutConnection(self) def lingeringClose(self): """ This is a bit complicated. This process is necessary to ensure proper workingness when HTTP pipelining is in use. Here is what it wants to do: 1. Finish writing any buffered data, then close our write side. While doing so, read and discard any incoming data. 2. When that happens (writeConnectionLost called), wait up to 20 seconds for the remote end to close their write side (our read side). 3. - If they do (readConnectionLost called), close the socket, and cancel the timeout. - If that doesn't happen, the timer fires, and makes the socket close anyways. """ # Close write half self.transport.loseWriteConnection() # Throw out any incoming data self.dataReceived = self.lineReceived = lambda *args: None self.transport.resumeProducing() def writeConnectionLost(self): # Okay, all data has been written # In 20 seconds, actually close the socket self._lingerTimer = reactor.callLater(20, self._lingerClose) self._writeLost = True def _lingerClose(self): self._lingerTimer = None self.transport.loseConnection() def readConnectionLost(self): """Read connection lost""" # If in the lingering-close state, lose the socket. if self._lingerTimer: self._lingerTimer.cancel() self._lingerTimer = None self.transport.loseConnection() return # If between requests, drop connection # when all current requests have written their data. self._readLost = True if not self.requests: # No requests in progress, lose now. self.transport.loseConnection() # If currently in the process of reading a request, this is # probably a client abort, so lose the connection. if self.chanRequest: self.transport.loseConnection() def connectionLost(self, reason): self.factory.outstandingRequests-=1 self._writeLost = True self.readConnectionLost() self.setTimeout(None) # Tell all requests to abort. for request in self.requests: if request is not None: request.connectionLost(reason) class OverloadedServerProtocol(protocol.Protocol): def connectionMade(self): self.transport.write("HTTP/1.0 503 Service Unavailable\r\n" "Content-Type: text/html\r\n" "Connection: close\r\n\r\n" "503 Service Unavailable" "

Service Unavailable

" "The server is currently overloaded, " "please try again later.") self.transport.loseConnection() class HTTPFactory(protocol.ServerFactory): """Factory for HTTP server.""" protocol = HTTPChannel protocolArgs = None outstandingRequests = 0 def __init__(self, requestFactory, maxRequests=600, **kwargs): self.maxRequests=maxRequests self.protocolArgs = kwargs self.protocolArgs['requestFactory']=requestFactory def buildProtocol(self, addr): if self.outstandingRequests >= self.maxRequests: return OverloadedServerProtocol() p = protocol.ServerFactory.buildProtocol(self, addr) - for arg,value in self.protocolArgs.iteritems(): + for arg,value in self.protocolArgs.items(): setattr(p, arg, value) return p __all__ = ['HTTPFactory', ] diff --git a/xcap/web/compat.py b/xcap/web/compat.py index 75bd95b..e37543b 100644 --- a/xcap/web/compat.py +++ b/xcap/web/compat.py @@ -1,447 +1,446 @@ -from __future__ import generators -from urllib import quote, string +from urllib.parse import quote import UserDict, math, time -from cStringIO import StringIO +from io import StringIO from xcap.web import http_headers, iweb, stream, responsecode from twisted.internet import defer, address from twisted.python import components from twisted.spread import pb from zope.interface import implements class HeaderAdapter(UserDict.DictMixin): def __init__(self, headers): self._headers = headers def __getitem__(self, name): raw = self._headers.getRawHeaders(name) if raw is None: raise KeyError(name) return ', '.join(raw) def __setitem__(self, name, value): self._headers.setRawHeaders([value]) def __delitem__(self, name): if not self._headers.hasHeader(name): raise KeyError(name) self._headers.removeHeader(name) def iteritems(self): for k,v in self._headers.getAllRawHeaders(): yield k, ', '.join(v) def keys(self): - return [k for k, _ in self.iteritems()] + return [k for k, _ in self.items()] def __iter__(self): - for k, _ in self.iteritems(): + for k, _ in self.items(): yield k def has_key(self, name): return self._headers.hasHeader(name) def makeOldRequestAdapter(original): # Cache the adapter. Replace this with a more better generalized # mechanism when one becomes available. if not hasattr(original, '_oldRequest'): original._oldRequest = OldRequestAdapter(original) return original._oldRequest def _addressToTuple(addr): if isinstance(addr, address.IPv4Address): return ('INET', addr.host, addr.port) elif isinstance(addr, address.UNIXAddress): return ('UNIX', addr.name) else: return tuple(addr) class OldRequestAdapter(pb.Copyable, components.Componentized, object): """Adapt old requests to new request """ implements(iweb.IOldRequest) def _getFrom(where, name): def _get(self): return getattr(getattr(self, where), name) return property(_get) def _getsetFrom(where, name): def _get(self): return getattr(getattr(self, where), name) def _set(self, new): setattr(getattr(self, where), name, new) def _del(self): delattr(getattr(self, where), name) return property(_get, _set, _del) def _getsetHeaders(where): def _get(self): headers = getattr(self, where).headers return HeaderAdapter(headers) def _set(self, newheaders): headers = http_headers.Headers() - for n,v in newheaders.items(): + for n,v in list(newheaders.items()): headers.setRawHeaders(n, (v,)) newheaders = headers getattr(self, where).headers = newheaders return property(_get, _set) code = _getsetFrom('response', 'code') code_message = "" method = _getsetFrom('request', 'method') uri = _getsetFrom('request', 'uri') def _getClientproto(self): return "HTTP/%d.%d" % self.request.clientproto clientproto = property(_getClientproto) received_headers = _getsetHeaders('request') headers = _getsetHeaders('response') path = _getsetFrom('request', 'path') # cookies = # Do I need this? # received_cookies = # Do I need this? content = StringIO() #### FIXME args = _getsetFrom('request', 'args') # stack = # WTF is stack? prepath = _getsetFrom('request', 'prepath') postpath = _getsetFrom('request', 'postpath') def _getClient(self): return "WTF" client = property(_getClient) def _getHost(self): return address.IPv4Address("TCP", self.request.host, self.request.port) host = property(_getHost) def __init__(self, request): from xcap.web import http components.Componentized.__init__(self) self.request = request self.response = http.Response(stream=stream.ProducerStream()) # This deferred will be fired by the first call to write on OldRequestAdapter # and will cause the headers to be output. self.deferredResponse = defer.Deferred() def getStateToCopyFor(self, issuer): # This is for distrib compatibility x = {} x['prepath'] = self.prepath x['postpath'] = self.postpath x['method'] = self.method x['uri'] = self.uri x['clientproto'] = self.clientproto self.content.seek(0, 0) x['content_data'] = self.content.read() x['remote'] = pb.ViewPoint(issuer, self) x['host'] = _addressToTuple(self.request.chanRequest.channel.transport.getHost()) x['client'] = _addressToTuple(self.request.chanRequest.channel.transport.getPeer()) return x def getTypeToCopy(self): # lie to PB so the ResourcePublisher doesn't have to know xcap.web # exists which is good because xcap.web doesn't exist. return 'twisted.web.server.Request' def registerProducer(self, producer, streaming): self.response.stream.registerProducer(producer, streaming) def unregisterProducer(self): self.response.stream.unregisterProducer() def finish(self): if self.deferredResponse is not None: d = self.deferredResponse self.deferredResponse = None d.callback(self.response) self.response.stream.finish() def write(self, data): if self.deferredResponse is not None: d = self.deferredResponse self.deferredResponse = None d.callback(self.response) self.response.stream.write(data) def getHeader(self, name): raw = self.request.headers.getRawHeaders(name) if raw is None: return None return ', '.join(raw) def setHeader(self, name, value): """Set an outgoing HTTP header. """ self.response.headers.setRawHeaders(name, [value]) def setResponseCode(self, code, message=None): # message ignored self.response.code = code def setLastModified(self, when): # Never returns CACHED -- can it and still be compliant? - when = long(math.ceil(when)) + when = int(math.ceil(when)) self.response.headers.setHeader('last-modified', when) return None def setETag(self, etag): self.response.headers.setRawHeaders('etag', [etag]) return None def getAllHeaders(self): - return dict(self.headers.iteritems()) + return dict(iter(self.headers.items())) def getRequestHostname(self): return self.request.host def getCookie(self, key): for cookie in self.request.headers.getHeader('cookie', ()): if cookie.name == key: return cookie.value return None def addCookie(self, k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None): if expires is None and max_age is not None: expires=max_age-time.time() cookie = http_headers.Cookie(k,v, expires=expires, domain=domain, path=path, comment=comment, secure=secure) self.response.headers.setHeader('set-cookie', self.request.headers.getHeader('set-cookie', ())+(cookie,)) def notifyFinish(self): ### FIXME return None # return self.request.notifyFinish() def getHost(self): return self.host def setHost(self, host, port, ssl=0): self.request.host = host self.request.port = port self.request.scheme = ssl and 'https' or 'http' def isSecure(self): return self.request.scheme == 'https' def getClientIP(self): if isinstance(self.request.chanRequest.getRemoteHost(), address.IPv4Address): return self.client.host else: return None return self.request.chanRequest.getRemoteHost() return "127.0.0.1" def getClient(self): return "127.0.0.1" ### FIXME: def getUser(self): return "" def getPassword(self): return "" # Identical to original methods -- hopefully these don't have to change def sibLink(self, name): "Return the text that links to a sibling of the requested resource." if self.postpath: return (len(self.postpath)*"../") + name else: return name def childLink(self, name): "Return the text that links to a child of the requested resource." lpp = len(self.postpath) if lpp > 1: return ((lpp-1)*"../") + name elif lpp == 1: return name else: # lpp == 0 if len(self.prepath) and self.prepath[-1]: return self.prepath[-1] + '/' + name else: return name def redirect(self, url): """Utility function that does a redirect. The request should have finish() called after this. """ self.setResponseCode(responsecode.FOUND) self.setHeader("location", url) def prePathURL(self): port = self.getHost().port if self.isSecure(): default = 443 else: default = 80 if port == default: hostport = '' else: hostport = ':%d' % port return quote('http%s://%s%s/%s' % ( self.isSecure() and 's' or '', self.getRequestHostname(), hostport, string.join(self.prepath, '/')), "/:") # def URLPath(self): # from twisted.python import urlpath # return urlpath.URLPath.fromRequest(self) # But nevow wants it to look like this... :( def URLPath(self): from nevow import url return url.URL.fromContext(self) def rememberRootURL(self, url=None): """ Remember the currently-processed part of the URL for later recalling. """ if url is None: url = self.prePathURL() # remove one segment self.appRootURL = url[:url.rindex("/")] else: self.appRootURL = url def getRootURL(self): """ Get a previously-remembered URL. """ return self.appRootURL session = None def getSession(self, sessionInterface = None): # Session management if not self.session: # FIXME: make sitepath be something cookiename = string.join(['TWISTED_SESSION'] + self.sitepath, "_") sessionCookie = self.getCookie(cookiename) if sessionCookie: try: self.session = self.site.getSession(sessionCookie) except KeyError: pass # if it still hasn't been set, fix it up. if not self.session: self.session = self.site.makeSession() self.addCookie(cookiename, self.session.uid, path='/') self.session.touch() if sessionInterface: return self.session.getComponent(sessionInterface) return self.session class OldNevowResourceAdapter(object): implements(iweb.IResource) def __init__(self, original): # Can't use self.__original= because of __setattr__. self.__dict__['_OldNevowResourceAdapter__original']=original def __getattr__(self, name): return getattr(self.__original, name) def __setattr__(self, name, value): setattr(self.__original, name, value) def __delattr__(self, name): delattr(self.__original, name) def locateChild(self, ctx, segments): from xcap.web.server import parsePOSTData request = iweb.IRequest(ctx) if request.method == "POST": return parsePOSTData(request).addCallback( lambda x: self.__original.locateChild(ctx, segments)) return self.__original.locateChild(ctx, segments) def renderHTTP(self, ctx): from xcap.web.server import parsePOSTData request = iweb.IRequest(ctx) if request.method == "POST": return parsePOSTData(request).addCallback(self.__reallyRender, ctx) return self.__reallyRender(None, ctx) def __reallyRender(self, ignored, ctx): # This deferred will be called when our resource is _finished_ # writing, and will make sure we write the rest of our data # and finish the connection. defer.maybeDeferred(self.__original.renderHTTP, ctx).addCallback(self.__finish, ctx) # Sometimes the __original.renderHTTP will write() before we # even get this far, and we don't want to return # oldRequest.deferred if it's already been set to None. oldRequest = iweb.IOldRequest(ctx) if oldRequest.deferredResponse is None: return oldRequest.response return oldRequest.deferredResponse def __finish(self, data, ctx): oldRequest = iweb.IOldRequest(ctx) oldRequest.write(data) oldRequest.finish() class OldResourceAdapter(object): implements(iweb.IOldNevowResource) def __init__(self, original): self.original = original def __repr__(self): return "<%s @ 0x%x adapting %r>" % (self.__class__.__name__, id(self), self.original) def locateChild(self, req, segments): - import server + from . import server request = iweb.IOldRequest(req) if self.original.isLeaf: return self, server.StopTraversal name = segments[0] if name == '': res = self else: request.prepath.append(request.postpath.pop(0)) res = self.original.getChildWithDefault(name, request) request.postpath.insert(0, request.prepath.pop()) if isinstance(res, defer.Deferred): return res.addCallback(lambda res: (res, segments[1:])) return res, segments[1:] def _handle_NOT_DONE_YET(self, data, request): from twisted.web.server import NOT_DONE_YET if data == NOT_DONE_YET: # Return a deferred that will never fire, so the finish # callback doesn't happen. This is because, when returning # NOT_DONE_YET, the page is responsible for calling finish. return defer.Deferred() else: return data def renderHTTP(self, req): request = iweb.IOldRequest(req) result = defer.maybeDeferred(self.original.render, request).addCallback( self._handle_NOT_DONE_YET, request) return result __all__ = [] diff --git a/xcap/web/dirlist.py b/xcap/web/dirlist.py index 0aff7be..6fb855b 100644 --- a/xcap/web/dirlist.py +++ b/xcap/web/dirlist.py @@ -1,119 +1,119 @@ # Copyright (c) 2001-2004 Twisted Matrix Laboratories. # See LICENSE for details. """Directory listing.""" # system imports import os -import urllib +import urllib.request, urllib.parse, urllib.error import stat import time # twisted imports from xcap.web import iweb, resource, http, http_headers def formatFileSize(size): if size < 1024: return '%i' % size elif size < (1024**2): return '%iK' % (size / 1024) elif size < (1024**3): return '%iM' % (size / (1024**2)) else: return '%iG' % (size / (1024**3)) class DirectoryLister(resource.Resource): def __init__(self, pathname, dirs=None, contentTypes={}, contentEncodings={}, defaultType='text/html'): self.contentTypes = contentTypes self.contentEncodings = contentEncodings self.defaultType = defaultType # dirs allows usage of the File to specify what gets listed self.dirs = dirs self.path = pathname resource.Resource.__init__(self) def data_listing(self, request, data): if self.dirs is None: directory = os.listdir(self.path) directory.sort() else: directory = self.dirs files = [] for path in directory: - url = urllib.quote(path, '/') + url = urllib.parse.quote(path, '/') fullpath = os.path.join(self.path, path) try: st = os.stat(fullpath) except OSError: continue if stat.S_ISDIR(st.st_mode): url = url + '/' files.append({ 'link': url, 'linktext': path + "/", 'size': '', 'type': '-', 'lastmod': time.strftime("%Y-%b-%d %H:%M", time.localtime(st.st_mtime)) }) else: from xcap.web.static import getTypeAndEncoding mimetype, encoding = getTypeAndEncoding( path, self.contentTypes, self.contentEncodings, self.defaultType) filesize = st.st_size files.append({ 'link': url, 'linktext': path, 'size': formatFileSize(filesize), 'type': mimetype, 'lastmod': time.strftime("%Y-%b-%d %H:%M", time.localtime(st.st_mtime)) }) return files def __repr__(self): return '' % self.path __str__ = __repr__ def render(self, request): - title = "Directory listing for %s" % urllib.unquote(request.path) + title = "Directory listing for %s" % urllib.parse.unquote(request.path) s= """%s

%s

""" % (title,title) s+="" s+="" even = False for row in self.data_listing(request, None): s+='' % (even and 'even' or 'odd',) s+='' % row even = not even s+="
FilenameSizeLast ModifiedFile Type
%(linktext)s%(size)s%(lastmod)s%(type)s
" response = http.Response(200, {}, s) response.headers.setHeader("content-type", http_headers.MimeType('text', 'html')) return response __all__ = ['DirectoryLister'] diff --git a/xcap/web/fileupload.py b/xcap/web/fileupload.py index 0e7e794..b654a39 100644 --- a/xcap/web/fileupload.py +++ b/xcap/web/fileupload.py @@ -1,374 +1,374 @@ -from __future__ import generators + import re from zope.interface import implements -import urllib +import urllib.request, urllib.parse, urllib.error import tempfile from twisted.internet import defer from xcap.web.stream import IStream, FileStream, BufferedStream, readStream from xcap.web.stream import generatorToStream, readAndDiscard from xcap.web import http_headers -from cStringIO import StringIO +from io import StringIO ################################### ##### Multipart MIME Reader ##### ################################### class MimeFormatError(Exception): pass # parseContentDispositionFormData is absolutely horrible, but as # browsers don't seem to believe in sensible quoting rules, it's # really the only way to handle the header. (Quotes can be in the # filename, unescaped) cd_regexp = re.compile( ' *form-data; *name="([^"]*)"(?:; *filename="(.*)")?$', re.IGNORECASE) def parseContentDispositionFormData(value): match = cd_regexp.match(value) if not match: # Error parsing. raise ValueError("Unknown content-disposition format.") name=match.group(1) filename=match.group(2) return name, filename #@defer.deferredGenerator def _readHeaders(stream): """Read the MIME headers. Assumes we've just finished reading in the boundary string.""" ctype = fieldname = filename = None headers = [] # Now read headers while 1: line = stream.readline(size=1024) if isinstance(line, defer.Deferred): line = defer.waitForDeferred(line) yield line line = line.getResult() #print "GOT", line if not line.endswith('\r\n'): if line == "": raise MimeFormatError("Unexpected end of stream.") else: raise MimeFormatError("Header line too long") line = line[:-2] # strip \r\n if line == "": break # End of headers parts = line.split(':', 1) if len(parts) != 2: raise MimeFormatError("Header did not have a :") name, value = parts name = name.lower() headers.append((name, value)) if name == "content-type": ctype = http_headers.parseContentType(http_headers.tokenize((value,), foldCase=False)) elif name == "content-disposition": fieldname, filename = parseContentDispositionFormData(value) if ctype is None: ctype == http_headers.MimeType('application', 'octet-stream') if fieldname is None: raise MimeFormatError('Content-disposition invalid or omitted.') # End of headers, return (field name, content-type, filename) yield fieldname, filename, ctype return _readHeaders = defer.deferredGenerator(_readHeaders) class _BoundaryWatchingStream(object): def __init__(self, stream, boundary): self.stream = stream self.boundary = boundary self.data = '' self.deferred = defer.Deferred() length = None # unknown def read(self): if self.stream is None: if self.deferred is not None: deferred = self.deferred self.deferred = None deferred.callback(None) return None newdata = self.stream.read() if isinstance(newdata, defer.Deferred): return newdata.addCallbacks(self._gotRead, self._gotError) return self._gotRead(newdata) def _gotRead(self, newdata): if not newdata: raise MimeFormatError("Unexpected EOF") # BLECH, converting buffer back into string. self.data += str(newdata) data = self.data boundary = self.boundary off = data.find(boundary) if off == -1: # No full boundary, check for the first character off = data.rfind(boundary[0], max(0, len(data)-len(boundary))) if off != -1: # We could have a partial boundary, store it for next time self.data = data[off:] return data[:off] else: self.data = '' return data else: self.stream.pushback(data[off+len(boundary):]) self.stream = None return data[:off] def _gotError(self, err): # Propogate error back to MultipartMimeStream also if self.deferred is not None: deferred = self.deferred self.deferred = None deferred.errback(err) return err def close(self): # Assume error will be raised again and handled by MMS? readAndDiscard(self).addErrback(lambda _: None) class MultipartMimeStream(object): implements(IStream) def __init__(self, stream, boundary): self.stream = BufferedStream(stream) self.boundary = "--"+boundary self.first = True def read(self): """ Return a deferred which will fire with a tuple of: (fieldname, filename, ctype, dataStream) or None when all done. Format errors will be sent to the errback. Returns None when all done. IMPORTANT: you *must* exhaust dataStream returned by this call before calling .read() again! """ if self.first: self.first = False d = self._readFirstBoundary() else: d = self._readBoundaryLine() d.addCallback(self._doReadHeaders) d.addCallback(self._gotHeaders) return d def _readFirstBoundary(self): #print "_readFirstBoundary" line = self.stream.readline(size=1024) if isinstance(line, defer.Deferred): line = defer.waitForDeferred(line) yield line line = line.getResult() if line != self.boundary + '\r\n': raise MimeFormatError("Extra data before first boundary: %r looking for: %r" % (line, self.boundary + '\r\n')) self.boundary = "\r\n"+self.boundary yield True return _readFirstBoundary = defer.deferredGenerator(_readFirstBoundary) def _readBoundaryLine(self): #print "_readBoundaryLine" line = self.stream.readline(size=1024) if isinstance(line, defer.Deferred): line = defer.waitForDeferred(line) yield line line = line.getResult() if line == "--\r\n": # THE END! yield False return elif line != "\r\n": raise MimeFormatError("Unexpected data on same line as boundary: %r" % (line,)) yield True return _readBoundaryLine = defer.deferredGenerator(_readBoundaryLine) def _doReadHeaders(self, morefields): #print "_doReadHeaders", morefields if not morefields: return None return _readHeaders(self.stream) def _gotHeaders(self, headers): if headers is None: return None bws = _BoundaryWatchingStream(self.stream, self.boundary) self.deferred = bws.deferred ret=list(headers) ret.append(bws) return tuple(ret) def readIntoFile(stream, outFile, maxlen): """Read the stream into a file, but not if it's longer than maxlen. Returns Deferred which will be triggered on finish. """ curlen = [0] def done(_): return _ def write(data): curlen[0] += len(data) if curlen[0] > maxlen: raise MimeFormatError("Maximum length of %d bytes exceeded." % maxlen) outFile.write(data) return readStream(stream, write).addBoth(done) #@defer.deferredGenerator def parseMultipartFormData(stream, boundary, maxMem=100*1024, maxFields=1024, maxSize=10*1024*1024): # If the stream length is known to be too large upfront, abort immediately if stream.length is not None and stream.length > maxSize: raise MimeFormatError("Maximum length of %d bytes exceeded." % maxSize) mms = MultipartMimeStream(stream, boundary) numFields = 0 args = {} files = {} while 1: datas = mms.read() if isinstance(datas, defer.Deferred): datas = defer.waitForDeferred(datas) yield datas datas = datas.getResult() if datas is None: break numFields+=1 if numFields == maxFields: raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields) # Parse data fieldname, filename, ctype, stream = datas if filename is None: # Not a file outfile = StringIO() maxBuf = min(maxSize, maxMem) else: outfile = tempfile.NamedTemporaryFile() maxBuf = maxSize x = readIntoFile(stream, outfile, maxBuf) if isinstance(x, defer.Deferred): x = defer.waitForDeferred(x) yield x x = x.getResult() if filename is None: # Is a normal form field outfile.seek(0) data = outfile.read() args.setdefault(fieldname, []).append(data) maxMem -= len(data) maxSize -= len(data) else: # Is a file upload maxSize -= outfile.tell() outfile.seek(0) files.setdefault(fieldname, []).append((filename, ctype, outfile)) yield args, files return parseMultipartFormData = defer.deferredGenerator(parseMultipartFormData) ################################### ##### x-www-urlencoded reader ##### ################################### def parse_urlencoded_stream(input, maxMem=100*1024, keep_blank_values=False, strict_parsing=False): lastdata = '' still_going=1 while still_going: try: yield input.wait - data = input.next() + data = next(input) except StopIteration: pairs = [lastdata] still_going=0 else: maxMem -= len(data) if maxMem < 0: raise MimeFormatError("Maximum length of %d bytes exceeded." % maxMem) pairs = str(data).split('&') pairs[0] = lastdata + pairs[0] lastdata=pairs.pop() for name_value in pairs: nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: - raise MimeFormatError("bad query field: %s") % `name_value` + raise MimeFormatError("bad query field: %s") % repr(name_value) continue if len(nv[1]) or keep_blank_values: - name = urllib.unquote(nv[0].replace('+', ' ')) - value = urllib.unquote(nv[1].replace('+', ' ')) + name = urllib.parse.unquote(nv[0].replace('+', ' ')) + value = urllib.parse.unquote(nv[1].replace('+', ' ')) yield name, value parse_urlencoded_stream = generatorToStream(parse_urlencoded_stream) def parse_urlencoded(stream, maxMem=100*1024, maxFields=1024, keep_blank_values=False, strict_parsing=False): d = {} numFields = 0 s=parse_urlencoded_stream(stream, maxMem, keep_blank_values, strict_parsing) while 1: datas = s.read() if isinstance(datas, defer.Deferred): datas = defer.waitForDeferred(datas) yield datas datas = datas.getResult() if datas is None: break name, value = datas numFields += 1 if numFields == maxFields: raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields) if name in d: d[name].append(value) else: d[name] = [value] yield d return parse_urlencoded = defer.deferredGenerator(parse_urlencoded) if __name__ == '__main__': d = parseMultipartFormData( FileStream(open("upload.txt")), "----------0xKhTmLbOuNdArY") from twisted.python import log d.addErrback(log.err) def pr(s): - print s + print(s) d.addCallback(pr) __all__ = ['parseMultipartFormData', 'parse_urlencoded', 'parse_urlencoded_stream', 'MultipartMimeStream', 'MimeFormatError'] diff --git a/xcap/web/filter/gzip.py b/xcap/web/filter/gzip.py index 566f514..9a93def 100644 --- a/xcap/web/filter/gzip.py +++ b/xcap/web/filter/gzip.py @@ -1,79 +1,79 @@ -from __future__ import generators + import struct import zlib from xcap.web import stream # TODO: ungzip (can any browsers actually generate gzipped # upload data?) But it's necessary for client anyways. def gzipStream(input, compressLevel=6): crc, size = zlib.crc32(''), 0 # magic header, compression method, no flags header = '\037\213\010\000' # timestamp header += struct.pack('= size: end = size - 1 if start >= size: raise UnsatisfiableRangeRequest return start,end def makeUnsatisfiable(request, oldresponse): if request.headers.hasHeader('if-range'): return oldresponse # Return resource instead of error response = http.Response(responsecode.REQUESTED_RANGE_NOT_SATISFIABLE) response.headers.setHeader("content-range", ('bytes', None, None, oldresponse.stream.length)) return response def makeSegment(inputStream, lastOffset, start, end): offset = start - lastOffset length = end + 1 - start if offset != 0: before, inputStream = inputStream.split(offset) before.close() return inputStream.split(length) def rangefilter(request, oldresponse): if oldresponse.stream is None: return oldresponse size = oldresponse.stream.length if size is None: # Does not deal with indeterminate length outputs return oldresponse oldresponse.headers.setHeader('accept-ranges',('bytes',)) rangespec = request.headers.getHeader('range') # If we've got a range header and the If-Range header check passes, and # the range type is bytes, do a partial response. if (rangespec is not None and http.checkIfRange(request, oldresponse) and rangespec[0] == 'bytes'): # If it's a single range, return a simple response if len(rangespec[1]) == 1: try: start,end = canonicalizeRange(rangespec[1][0], size) except UnsatisfiableRangeRequest: return makeUnsatisfiable(request, oldresponse) response = http.Response(responsecode.PARTIAL_CONTENT, oldresponse.headers) response.headers.setHeader('content-range',('bytes',start, end, size)) content, after = makeSegment(oldresponse.stream, 0, start, end) after.close() response.stream = content return response else: # Return a multipart/byteranges response lastOffset = -1 offsetList = [] for arange in rangespec[1]: try: start,end = canonicalizeRange(arange, size) except UnsatisfiableRangeRequest: continue if start <= lastOffset: # Stupid client asking for out-of-order or overlapping ranges, PUNT! return oldresponse offsetList.append((start,end)) lastOffset = end if not offsetList: return makeUnsatisfiable(request, oldresponse) content_type = oldresponse.headers.getRawHeaders('content-type') boundary = "%x%x" % (int(time.time()*1000000), os.getpid()) response = http.Response(responsecode.PARTIAL_CONTENT, oldresponse.headers) response.headers.setHeader('content-type', http_headers.MimeType('multipart', 'byteranges', [('boundary', boundary)])) response.stream = out = stream.CompoundStream() lastOffset = 0 origStream = oldresponse.stream headerString = "\r\n--%s" % boundary if len(content_type) == 1: headerString+='\r\nContent-Type: %s' % content_type[0] headerString+="\r\nContent-Range: %s\r\n\r\n" for start,end in offsetList: out.addStream(headerString % http_headers.generateContentRange(('bytes', start, end, size))) content, origStream = makeSegment(origStream, lastOffset, start, end) lastOffset = end + 1 out.addStream(content) origStream.close() out.addStream("\r\n--%s--\r\n" % boundary) return response else: return oldresponse __all__ = ['rangefilter'] diff --git a/xcap/web/http.py b/xcap/web/http.py index 0eef031..c2c5412 100644 --- a/xcap/web/http.py +++ b/xcap/web/http.py @@ -1,473 +1,473 @@ # Copyright (c) 2001-2004 Twisted Matrix Laboratories. # See LICENSE for details. """HyperText Transfer Protocol implementation. The second coming. Maintainer: U{James Y Knight } """ # import traceback; log.msg(''.join(traceback.format_stack())) # system imports import socket import time import cgi # twisted imports from twisted.internet import interfaces, error from twisted.python import log, components from zope.interface import implements # sibling imports from xcap.web import responsecode from xcap.web import http_headers from xcap.web import iweb from xcap.web import stream from xcap.web.stream import IByteStream defaultPortForScheme = {'http': 80, 'https':443, 'ftp':21} def splitHostPort(scheme, hostport): """Split the host in "host:port" format into host and port fields. If port was not specified, use the default for the given scheme, if known. Returns a tuple of (hostname, portnumber).""" # Split hostport into host and port hostport = hostport.split(':', 1) try: if len(hostport) == 2: return hostport[0], int(hostport[1]) except ValueError: pass return hostport[0], defaultPortForScheme.get(scheme, 0) def parseVersion(strversion): """Parse version strings of the form Protocol '/' Major '.' Minor. E.g. 'HTTP/1.1'. Returns (protocol, major, minor). Will raise ValueError on bad syntax.""" proto, strversion = strversion.split('/') major, minor = strversion.split('.') major, minor = int(major), int(minor) if major < 0 or minor < 0: raise ValueError("negative number") return (proto.lower(), major, minor) class HTTPError(Exception): def __init__(self, codeOrResponse): """An Exception for propagating HTTP Error Responses. @param codeOrResponse: The numeric HTTP code or a complete http.Response object. @type codeOrResponse: C{int} or L{http.Response} """ Exception.__init__(self) self.response = iweb.IResponse(codeOrResponse) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.response) class Response(object): """An object representing an HTTP Response to be sent to the client. """ implements(iweb.IResponse) code = responsecode.OK headers = None stream = None def __init__(self, code=None, headers=None, stream=None): """ @param code: The HTTP status code for this Response @type code: C{int} @param headers: Headers to be sent to the client. @type headers: C{dict}, L{xcap.web.http_headers.Headers}, or C{None} @param stream: Content body to send to the HTTP client @type stream: L{xcap.web.stream.IByteStream} """ if code is not None: self.code = int(code) if headers is not None: if isinstance(headers, dict): headers = http_headers.Headers(headers) self.headers=headers else: self.headers = http_headers.Headers() if stream is not None: self.stream = IByteStream(stream) def __repr__(self): if self.stream is None: streamlen = None else: streamlen = self.stream.length return "<%s.%s code=%d, streamlen=%s>" % (self.__module__, self.__class__.__name__, self.code, streamlen) class StatusResponse (Response): """ A L{Response} object which simply contains a status code and a description of what happened. """ def __init__(self, code, description, title=None): """ @param code: a response code in L{responsecode.RESPONSES}. @param description: a string description. @param title: the message title. If not specified or C{None}, defaults to C{responsecode.RESPONSES[code]}. """ if title is None: title = cgi.escape(responsecode.RESPONSES[code]) output = "".join(( "", "", "%s" % (title,), "", "", "

%s

" % (title,), "

%s

" % (cgi.escape(description),), "", "", )) - if type(output) == unicode: + if type(output) == str: output = output.encode("utf-8") mime_params = {"charset": "utf-8"} else: mime_params = {} super(StatusResponse, self).__init__(code=code, stream=output) self.headers.setHeader("content-type", http_headers.MimeType("text", "html", mime_params)) self.description = description def __repr__(self): return "<%s %s %s>" % (self.__class__.__name__, self.code, self.description) class RedirectResponse (StatusResponse): """ A L{Response} object that contains a redirect to another network location. """ def __init__(self, location): """ @param location: the URI to redirect to. """ super(RedirectResponse, self).__init__( responsecode.MOVED_PERMANENTLY, "Document moved to %s." % (location,) ) self.headers.setHeader("location", location) def NotModifiedResponse(oldResponse=None): if oldResponse is not None: headers=http_headers.Headers() for header in ( # Required from sec 10.3.5: 'date', 'etag', 'content-location', 'expires', 'cache-control', 'vary', # Others: 'server', 'proxy-authenticate', 'www-authenticate', 'warning'): value = oldResponse.headers.getRawHeaders(header) if value is not None: headers.setRawHeaders(header, value) else: headers = None return Response(code=responsecode.NOT_MODIFIED, headers=headers) def checkPreconditions(request, response=None, entityExists=True, etag=None, lastModified=None): """Check to see if this request passes the conditional checks specified by the client. May raise an HTTPError with result codes L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, as appropriate. This function is called automatically as an output filter for GET and HEAD requests. With GET/HEAD, it is not important for the precondition check to occur before doing the action, as the method is non-destructive. However, if you are implementing other request methods, like PUT for your resource, you will need to call this after determining the etag and last-modified time of the existing resource but before actually doing the requested action. In that case, This examines the appropriate request headers for conditionals, (If-Modified-Since, If-Unmodified-Since, If-Match, If-None-Match, or If-Range), compares with the etag and last and and then sets the response code as necessary. @param response: This should be provided for GET/HEAD methods. If it is specified, the etag and lastModified arguments will be retrieved automatically from the response headers and shouldn't be separately specified. Not providing the response with a GET request may cause the emitted "Not Modified" responses to be non-conformant. @param entityExists: Set to False if the entity in question doesn't yet exist. Necessary for PUT support with 'If-None-Match: *'. @param etag: The etag of the resource to check against, or None. @param lastModified: The last modified date of the resource to check against, or None. @raise: HTTPError: Raised when the preconditions fail, in order to abort processing and emit an error page. """ if response: assert etag is None and lastModified is None # if the code is some sort of error code, don't do anything if not ((response.code >= 200 and response.code <= 299) or response.code == responsecode.PRECONDITION_FAILED): return False etag = response.headers.getHeader("etag") lastModified = response.headers.getHeader("last-modified") def matchETag(tags, allowWeak): if entityExists and '*' in tags: return True if etag is None: return False return ((allowWeak or not etag.weak) and ([etagmatch for etagmatch in tags if etag.match(etagmatch, strongCompare=not allowWeak)])) # First check if-match/if-unmodified-since # If either one fails, we return PRECONDITION_FAILED match = request.headers.getHeader("if-match") if match: if not matchETag(match, False): raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource does not have a matching ETag.")) unmod_since = request.headers.getHeader("if-unmodified-since") if unmod_since: if not lastModified or lastModified > unmod_since: raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has changed.")) # Now check if-none-match/if-modified-since. # This bit is tricky, because of the requirements when both IMS and INM # are present. In that case, you can't return a failure code # unless *both* checks think it failed. # Also, if the INM check succeeds, ignore IMS, because INM is treated # as more reliable. # I hope I got the logic right here...the RFC is quite poorly written # in this area. Someone might want to verify the testcase against # RFC wording. # If IMS header is later than current time, ignore it. notModified = None ims = request.headers.getHeader('if-modified-since') if ims: notModified = (ims < time.time() and lastModified and lastModified <= ims) inm = request.headers.getHeader("if-none-match") if inm: if request.method in ("HEAD", "GET"): # If it's a range request, don't allow a weak ETag, as that # would break. canBeWeak = not request.headers.hasHeader('Range') if notModified != False and matchETag(inm, canBeWeak): raise HTTPError(NotModifiedResponse(response)) else: if notModified != False and matchETag(inm, False): raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has a matching ETag.")) else: if notModified == True: if request.method in ("HEAD", "GET"): raise HTTPError(NotModifiedResponse(response)) else: # S14.25 doesn't actually say what to do for a failing IMS on # non-GET methods. But Precondition Failed makes sense to me. raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has not changed.")) def checkIfRange(request, response): """Checks for the If-Range header, and if it exists, checks if the test passes. Returns true if the server should return partial data.""" ifrange = request.headers.getHeader("if-range") if ifrange is None: return True if isinstance(ifrange, http_headers.ETag): return ifrange.match(response.headers.getHeader("etag"), strongCompare=True) else: return ifrange == response.headers.getHeader("last-modified") class _NotifyingProducerStream(stream.ProducerStream): doStartReading = None def __init__(self, length=None, doStartReading=None): stream.ProducerStream.__init__(self, length=length) self.doStartReading = doStartReading def read(self): if self.doStartReading is not None: doStartReading = self.doStartReading self.doStartReading = None doStartReading() return stream.ProducerStream.read(self) def write(self, data): self.doStartReading = None stream.ProducerStream.write(self, data) def finish(self): self.doStartReading = None stream.ProducerStream.finish(self) # response codes that must have empty bodies NO_BODY_CODES = (responsecode.NO_CONTENT, responsecode.NOT_MODIFIED) class Request(object): """A HTTP request. Subclasses should override the process() method to determine how the request will be processed. @ivar method: The HTTP method that was used. @ivar uri: The full URI that was requested (includes arguments). @ivar headers: All received headers @ivar clientproto: client HTTP version @ivar stream: incoming data stream. """ implements(iweb.IRequest, interfaces.IConsumer) known_expects = ('100-continue',) def __init__(self, chanRequest, command, path, version, contentLength, headers): """ @param chanRequest: the channel request we're associated with. """ self.chanRequest = chanRequest self.method = command self.uri = path self.clientproto = version self.headers = headers if '100-continue' in self.headers.getHeader('expect', ()): doStartReading = self._sendContinue else: doStartReading = None self.stream = _NotifyingProducerStream(contentLength, doStartReading) self.stream.registerProducer(self.chanRequest, True) def checkExpect(self): """Ensure there are no expectations that cannot be met. Checks Expect header against self.known_expects.""" expects = self.headers.getHeader('expect', ()) for expect in expects: if expect not in self.known_expects: raise HTTPError(responsecode.EXPECTATION_FAILED) def process(self): """Called by channel to let you process the request. Can be overridden by a subclass to do something useful.""" pass def handleContentChunk(self, data): """Callback from channel when a piece of data has been received. Puts the data in .stream""" self.stream.write(data) def handleContentComplete(self): """Callback from channel when all data has been received. """ self.stream.unregisterProducer() self.stream.finish() def connectionLost(self, reason): """connection was lost""" pass def __repr__(self): return '<%s %s %s>'% (self.method, self.uri, self.clientproto) def _sendContinue(self): self.chanRequest.writeIntermediateResponse(responsecode.CONTINUE) def _finished(self, x): """We are finished writing data.""" self.chanRequest.finish() def _error(self, reason): if reason.check(error.ConnectionLost): log.msg("Request error: " + reason.getErrorMessage()) else: log.err(reason) # Only bother with cleanup on errors other than lost connection. self.chanRequest.abortConnection() def writeResponse(self, response): """ Write a response. """ if self.stream.doStartReading is not None: # Expect: 100-continue was requested, but 100 response has not been # sent, and there's a possibility that data is still waiting to be # sent. # # Ideally this means the remote side will not send any data. # However, because of compatibility requirements, it might timeout, # and decide to do so anyways at the same time we're sending back # this response. Thus, the read state is unknown after this. # We must close the connection. self.chanRequest.channel.setReadPersistent(False) # Nothing more will be read self.chanRequest.allContentReceived() if response.code != responsecode.NOT_MODIFIED: # Not modified response is *special* and doesn't get a content-length. if response.stream is None: response.headers.setHeader('content-length', 0) elif response.stream.length is not None: response.headers.setHeader('content-length', response.stream.length) self.chanRequest.writeHeaders(response.code, response.headers) # if this is a "HEAD" request, or a special response code, # don't return any data. if self.method == "HEAD" or response.code in NO_BODY_CODES: if response.stream is not None: response.stream.close() self._finished(None) return d = stream.StreamProducer(response.stream).beginProducing(self.chanRequest) d.addCallback(self._finished).addErrback(self._error) from xcap.web import compat components.registerAdapter(compat.makeOldRequestAdapter, iweb.IRequest, iweb.IOldRequest) components.registerAdapter(compat.OldNevowResourceAdapter, iweb.IOldNevowResource, iweb.IResource) components.registerAdapter(Response, int, iweb.IResponse) try: # If twisted.web is installed, add an adapter for it from twisted.web import resource except: pass else: components.registerAdapter(compat.OldResourceAdapter, resource.IResource, iweb.IOldNevowResource) __all__ = ['HTTPError', 'NotModifiedResponse', 'Request', 'Response', 'checkIfRange', 'checkPreconditions', 'defaultPortForScheme', 'parseVersion', 'splitHostPort'] diff --git a/xcap/web/http_headers.py b/xcap/web/http_headers.py index 8965658..17b12f7 100644 --- a/xcap/web/http_headers.py +++ b/xcap/web/http_headers.py @@ -1,1538 +1,1539 @@ -from __future__ import generators + import types, time from calendar import timegm import base64 import re def dashCapitalize(s): ''' Capitalize a string, making sure to treat - as a word seperator ''' return '-'.join([ x.capitalize() for x in s.split('-')]) # datetime parsing and formatting weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] weekdayname_lower = [name.lower() for name in weekdayname] monthname = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] monthname_lower = [name and name.lower() for name in monthname] # HTTP Header parsing API header_case_mapping = {} def casemappingify(d): global header_case_mapping - newd = dict([(key.lower(),key) for key in d.keys()]) + newd = dict([(key.lower(),key) for key in list(d.keys())]) header_case_mapping.update(newd) def lowerify(d): - return dict([(key.lower(),value) for key,value in d.items()]) + return dict([(key.lower(),value) for key,value in list(d.items())]) class HeaderHandler(object): """HeaderHandler manages header generating and parsing functions. """ HTTPParsers = {} HTTPGenerators = {} def __init__(self, parsers=None, generators=None): """ @param parsers: A map of header names to parsing functions. @type parsers: L{dict} @param generators: A map of header names to generating functions. @type generators: L{dict} """ if parsers: self.HTTPParsers.update(parsers) if generators: self.HTTPGenerators.update(generators) def parse(self, name, header): """ Parse the given header based on its given name. @param name: The header name to parse. @type name: C{str} @param header: A list of unparsed headers. @type header: C{list} of C{str} @return: The return value is the parsed header representation, it is dependent on the header. See the HTTP Headers document. """ parser = self.HTTPParsers.get(name, None) if parser is None: raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,)) try: for p in parser: # print "Parsing %s: %s(%s)" % (name, repr(p), repr(h)) header = p(header) # if isinstance(h, types.GeneratorType): # h=list(h) - except ValueError,v: + except ValueError as v: # print v header=None return header def generate(self, name, header): """ Generate the given header based on its given name. @param name: The header name to generate. @type name: C{str} @param header: A parsed header, such as the output of L{HeaderHandler}.parse. @return: C{list} of C{str} each representing a generated HTTP header. """ generator = self.HTTPGenerators.get(name, None) if generator is None: # print self.generators raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,)) for g in generator: header = g(header) #self._raw_headers[name] = h return header def updateParsers(self, parsers): """Update en masse the parser maps. @param parsers: Map of header names to parser chains. @type parsers: C{dict} """ casemappingify(parsers) self.HTTPParsers.update(lowerify(parsers)) def addParser(self, name, value): """Add an individual parser chain for the given header. @param name: Name of the header to add @type name: C{str} @param value: The parser chain @type value: C{str} """ self.updateParsers({name: value}) def updateGenerators(self, generators): """Update en masse the generator maps. @param parsers: Map of header names to generator chains. @type parsers: C{dict} """ casemappingify(generators) self.HTTPGenerators.update(lowerify(generators)) def addGenerators(self, name, value): """Add an individual generator chain for the given header. @param name: Name of the header to add @type name: C{str} @param value: The generator chain @type value: C{str} """ self.updateGenerators({name: value}) def update(self, parsers, generators): """Conveniently update parsers and generators all at once. """ self.updateParsers(parsers) self.updateGenerators(generators) DefaultHTTPHandler = HeaderHandler() ## HTTP DateTime parser def parseDateTime(dateString): """Convert an HTTP date string (one of three formats) to seconds since epoch.""" parts = dateString.split() if not parts[0][0:3].lower() in weekdayname_lower: # Weekday is stupid. Might have been omitted. try: return parseDateTime("Sun, "+dateString) except ValueError: # Guess not. pass partlen = len(parts) if (partlen == 5 or partlen == 6) and parts[1].isdigit(): # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT # (Note: "GMT" is literal, not a variable timezone) # (also handles without "GMT") # This is the normal format day = parts[1] month = parts[2] year = parts[3] time = parts[4] elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT # (Note: "GMT" is literal, not a variable timezone) # (also handles without without "GMT") # Two digit year, yucko. day, month, year = parts[1].split('-') time = parts[2] year=int(year) if year < 69: year = year + 2000 elif year < 100: year = year + 1900 elif len(parts) == 5: # 3rd date format: Sun Nov 6 08:49:37 1994 # ANSI C asctime() format. day = parts[2] month = parts[1] year = parts[4] time = parts[3] else: raise ValueError("Unknown datetime format %r" % dateString) day = int(day) month = int(monthname_lower.index(month.lower())) year = int(year) - hour, min, sec = map(int, time.split(':')) + hour, min, sec = list(map(int, time.split(':'))) return int(timegm((year, month, day, hour, min, sec))) ##### HTTP tokenizer class Token(str): __slots__=[] tokens = {} def __new__(self, char): token = Token.tokens.get(char) if token is None: Token.tokens[char] = token = str.__new__(self, char) return token def __repr__(self): return "Token(%s)" % str.__repr__(self) http_tokens = " \t\"()<>@,;:\\/[]?={}" http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" def tokenize(header, foldCase=True): """Tokenize a string according to normal HTTP header parsing rules. In particular: - Whitespace is irrelevant and eaten next to special separator tokens. Its existance (but not amount) is important between character strings. - Quoted string support including embedded backslashes. - Case is insignificant (and thus lowercased), except in quoted strings. (unless foldCase=False) - Multiple headers are concatenated with ',' NOTE: not all headers can be parsed with this function. Takes a raw header value (list of strings), and Returns a generator of strings and Token class instances. """ tokens=http_tokens ctls=http_ctls string = ",".join(header) list = [] start = 0 cur = 0 quoted = False qpair = False inSpaces = -1 qstring = None for x in string: if quoted: if qpair: qpair = False qstring = qstring+string[start:cur-1]+x start = cur+1 elif x == '\\': qpair = True elif x == '"': quoted = False yield qstring+string[start:cur] qstring=None start = cur+1 elif x in tokens: if start != cur: if foldCase: yield string[start:cur].lower() else: yield string[start:cur] start = cur+1 if x == '"': quoted = True qstring = "" inSpaces = False elif x in " \t": if inSpaces is False: inSpaces = True else: inSpaces = -1 yield Token(x) elif x in ctls: raise ValueError("Invalid control character: %d in header" % ord(x)) else: if inSpaces is True: yield Token(' ') inSpaces = False inSpaces = False cur = cur+1 if qpair: - raise ValueError, "Missing character after '\\'" + raise ValueError("Missing character after '\\'") if quoted: - raise ValueError, "Missing end quote" + raise ValueError("Missing end quote") if start != cur: if foldCase: yield string[start:cur].lower() else: yield string[start:cur] def split(seq, delim): """The same as str.split but works on arbitrary sequences. Too bad it's not builtin to python!""" cur = [] for item in seq: if item == delim: yield cur cur = [] else: cur.append(item) yield cur # def find(seq, *args): # """The same as seq.index but returns -1 if not found, instead # Too bad it's not builtin to python!""" # try: # return seq.index(value, *args) # except ValueError: # return -1 def filterTokens(seq): """Filter out instances of Token, leaving only a list of strings. Used instead of a more specific parsing method (e.g. splitting on commas) when only strings are expected, so as to be a little lenient. Apache does it this way and has some comments about broken clients which forget commas (?), so I'm doing it the same way. It shouldn't hurt anything, in any case. """ l=[] for x in seq: if not isinstance(x, Token): l.append(x) return l ##### parser utilities: def checkSingleToken(tokens): if len(tokens) != 1: - raise ValueError, "Expected single token, not %s." % (tokens,) + raise ValueError("Expected single token, not %s." % (tokens,)) return tokens[0] def parseKeyValue(val): if len(val) == 1: return val[0],None elif len(val) == 3 and val[1] == Token('='): return val[0],val[2] - raise ValueError, "Expected key or key=value, but got %s." % (val,) + raise ValueError("Expected key or key=value, but got %s." % (val,)) def parseArgs(field): args=split(field, Token(';')) - val = args.next() + val = next(args) args = [parseKeyValue(arg) for arg in args] return val,args def listParser(fun): """Return a function which applies 'fun' to every element in the comma-separated list""" def listParserHelper(tokens): fields = split(tokens, Token(',')) for field in fields: if len(field) != 0: yield fun(field) return listParserHelper def last(seq): """Return seq[-1]""" return seq[-1] ##### Generation utilities def quoteString(s): return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"') def listGenerator(fun): """Return a function which applies 'fun' to every element in the given list, then joins the result with generateList""" def listGeneratorHelper(l): return generateList([fun(e) for e in l]) return listGeneratorHelper def generateList(seq): return ", ".join(seq) def singleHeader(item): return [item] def generateKeyValues(kvs): l = [] # print kvs for k,v in kvs: if v is None: l.append('%s' % k) else: l.append('%s=%s' % (k,v)) return ";".join(l) class MimeType(object): def fromString(klass, mimeTypeString): """Generate a MimeType object from the given string. @param mimeTypeString: The mimetype to parse @return: L{MimeType} """ return DefaultHTTPHandler.parse('content-type', [mimeTypeString]) fromString = classmethod(fromString) def __init__(self, mediaType, mediaSubtype, params={}, **kwargs): """ @type mediaType: C{str} @type mediaSubtype: C{str} @type params: C{dict} """ self.mediaType = mediaType self.mediaSubtype = mediaSubtype self.params = dict(params) if kwargs: self.params.update(kwargs) def __eq__(self, other): if not isinstance(other, MimeType): return NotImplemented return (self.mediaType == other.mediaType and self.mediaSubtype == other.mediaSubtype and self.params == other.params) def __ne__(self, other): return not self.__eq__(other) def __repr__(self): return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params) def __hash__(self): - return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.iteritems())) + return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.items())) ##### Specific header parsers. def parseAccept(field): type,args = parseArgs(field) if len(type) != 3 or type[1] != Token('/'): - raise ValueError, "MIME Type "+str(type)+" invalid." + raise ValueError("MIME Type "+str(type)+" invalid.") # okay, this spec is screwy. A 'q' parameter is used as the separator # between MIME parameters and (as yet undefined) additional HTTP # parameters. num = 0 for arg in args: if arg[0] == 'q': mimeparams=tuple(args[0:num]) params=args[num:] break num = num + 1 else: mimeparams=tuple(args) params=[] # Default values for parameters: qval = 1.0 # Parse accept parameters: for param in params: if param[0] =='q': qval = float(param[1]) else: # Warn? ignored parameter. pass ret = MimeType(type[0],type[2],mimeparams),qval return ret def parseAcceptQvalue(field): type,args=parseArgs(field) type = checkSingleToken(type) qvalue = 1.0 # Default qvalue is 1 for arg in args: if arg[0] == 'q': qvalue = float(arg[1]) return type,qvalue def addDefaultCharset(charsets): if charsets.get('*') is None and charsets.get('iso-8859-1') is None: charsets['iso-8859-1'] = 1.0 return charsets def addDefaultEncoding(encodings): if encodings.get('*') is None and encodings.get('identity') is None: # RFC doesn't specify a default value for identity, only that it # "is acceptable" if not mentioned. Thus, give it a very low qvalue. encodings['identity'] = .0001 return encodings def parseContentType(header): # Case folding is disabled for this header, because of use of # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf # So, we need to explicitly .lower() the type/subtype and arg keys. type,args = parseArgs(header) if len(type) != 3 or type[1] != Token('/'): - raise ValueError, "MIME Type "+str(type)+" invalid." + raise ValueError("MIME Type "+str(type)+" invalid.") args = [(kv[0].lower(), kv[1]) for kv in args] return MimeType(type[0].lower(), type[2].lower(), tuple(args)) def parseContentMD5(header): try: return base64.decodestring(header) - except Exception,e: + except Exception as e: raise ValueError(e) def parseContentRange(header): """Parse a content-range header into (kind, start, end, realLength). realLength might be None if real length is not known ('*'). start and end might be None if start,end unspecified (for response code 416) """ kind, other = header.strip().split() if kind.lower() != "bytes": raise ValueError("a range of type %r is not supported") startend, realLength = other.split("/") if startend.strip() == '*': start,end=None,None else: - start, end = map(int, startend.split("-")) + start, end = list(map(int, startend.split("-"))) if realLength == "*": realLength = None else: realLength = int(realLength) return (kind, start, end, realLength) def parseExpect(field): type,args=parseArgs(field) type=parseKeyValue(type) return (type[0], (lambda *args:args)(type[1], *args)) def parseExpires(header): # """HTTP/1.1 clients and caches MUST treat other invalid date formats, # especially including the value 0, as in the past (i.e., "already expired").""" try: return parseDateTime(header) except ValueError: return 0 def parseIfModifiedSince(header): # Ancient versions of netscape and *current* versions of MSIE send # If-Modified-Since: Thu, 05 Aug 2004 12:57:27 GMT; length=123 # which is blantantly RFC-violating and not documented anywhere # except bug-trackers for web frameworks. # So, we'll just strip off everything after a ';'. return parseDateTime(header.split(';', 1)[0]) def parseIfRange(headers): try: return ETag.parse(tokenize(headers)) except ValueError: return parseDateTime(last(headers)) def parseRange(range): range = list(range) if len(range) < 3 or range[1] != Token('='): raise ValueError("Invalid range header format: %s" %(range,)) type=range[0] if type != 'bytes': raise ValueError("Unknown range unit: %s." % (type,)) rangeset=split(range[2:], Token(',')) ranges = [] for byterangespec in rangeset: if len(byterangespec) != 1: raise ValueError("Invalid range header format: %s" % (range,)) start,end=byterangespec[0].split('-') if not start and not end: raise ValueError("Invalid range header format: %s" % (range,)) if start: start = int(start) else: start = None if end: end = int(end) else: end = None if start and end and start > end: raise ValueError("Invalid range header, start > end: %s" % (range,)) ranges.append((start,end)) return type,ranges def parseRetryAfter(header): try: # delta seconds return time.time() + int(header) except ValueError: # or datetime return parseDateTime(header) # WWW-Authenticate and Authorization def parseWWWAuthenticate(tokenized): headers = [] tokenList = list(tokenized) while tokenList: scheme = tokenList.pop(0) challenge = {} last = None kvChallenge = False while tokenList: token = tokenList.pop(0) if token == Token('='): kvChallenge = True challenge[last] = tokenList.pop(0) last = None elif token == Token(','): if kvChallenge: if len(tokenList) > 1 and tokenList[1] != Token('='): break else: break else: last = token if last and scheme and not challenge and not kvChallenge: challenge = last last = None headers.append((scheme, challenge)) if last and last not in (Token('='), Token(',')): if headers[-1] == (scheme, challenge): scheme = last challenge = {} headers.append((scheme, challenge)) return headers def parseAuthorization(header): scheme, rest = header.split(' ', 1) # this header isn't tokenized because it may eat characters # in the unquoted base64 encoded credentials return scheme.lower(), rest #### Header generators def generateAccept(accept): mimeType,q = accept out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) if mimeType.params: - out+=';'+generateKeyValues(mimeType.params.iteritems()) + out+=';'+generateKeyValues(iter(mimeType.params.items())) if q != 1.0: out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.') return out def removeDefaultEncoding(seq): for item in seq: if item[0] != 'identity' or item[1] != .0001: yield item def generateAcceptQvalue(keyvalue): if keyvalue[1] == 1.0: return "%s" % keyvalue[0:1] else: return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.') def parseCacheControl(kv): k, v = parseKeyValue(kv) if k == 'max-age' or k == 'min-fresh' or k == 's-maxage': # Required integer argument if v is None: v = 0 else: v = int(v) elif k == 'max-stale': # Optional integer argument if v is not None: v = int(v) elif k == 'private' or k == 'no-cache': # Optional list argument if v is not None: v = [field.strip().lower() for field in v.split(',')] return k, v -def generateCacheControl((k, v)): +def generateCacheControl(xxx_todo_changeme): + (k, v) = xxx_todo_changeme if v is None: return str(k) else: if k == 'no-cache' or k == 'private': # quoted list of values v = quoteString(generateList( [header_case_mapping.get(name) or dashCapitalize(name) for name in v])) return '%s=%s' % (k,v) def generateContentRange(tup): """tup is (type, start, end, len) len can be None. """ type, start, end, len = tup if len == None: len = '*' else: len = int(len) if start == None and end == None: startend = '*' else: startend = '%d-%d' % (start, end) return '%s %s/%s' % (type, startend, len) def generateDateTime(secSinceEpoch): """Convert seconds since epoch to HTTP datetime string.""" year, month, day, hh, mm, ss, wd, y, z = time.gmtime(secSinceEpoch) s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( weekdayname[wd], day, monthname[month], year, hh, mm, ss) return s def generateExpect(item): if item[1][0] is None: out = '%s' % (item[0],) else: out = '%s=%s' % (item[0], item[1][0]) if len(item[1]) > 1: out += ';'+generateKeyValues(item[1][1:]) return out def generateRange(range): def noneOr(s): if s is None: return '' return s type,ranges=range if type != 'bytes': raise ValueError("Unknown range unit: "+type+".") return (type+'='+ ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1])) for startend in ranges])) def generateRetryAfter(when): # always generate delta seconds format return str(int(when - time.time())) def generateContentType(mimeType): out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) if mimeType.params: - out+=';'+generateKeyValues(mimeType.params.iteritems()) + out+=';'+generateKeyValues(iter(mimeType.params.items())) return out def generateIfRange(dateOrETag): if isinstance(dateOrETag, ETag): return dateOrETag.generate() else: return generateDateTime(dateOrETag) # WWW-Authenticate and Authorization def generateWWWAuthenticate(headers): _generated = [] for seq in headers: scheme, challenge = seq[0], seq[1] # If we're going to parse out to something other than a dict # we need to be able to generate from something other than a dict try: l = [] - for k,v in dict(challenge).iteritems(): + for k,v in dict(challenge).items(): l.append("%s=%s" % (k, quoteString(v))) _generated.append("%s %s" % (scheme, ", ".join(l))) except ValueError: _generated.append("%s %s" % (scheme, challenge)) return _generated def generateAuthorization(seq): return [' '.join(seq)] #### class ETag(object): def __init__(self, tag, weak=False): self.tag = str(tag) self.weak = weak def match(self, other, strongCompare): # Sec 13.3. # The strong comparison function: in order to be considered equal, both # validators MUST be identical in every way, and both MUST NOT be weak. # # The weak comparison function: in order to be considered equal, both # validators MUST be identical in every way, but either or both of # them MAY be tagged as "weak" without affecting the result. if not isinstance(other, ETag) or other.tag != self.tag: return False if strongCompare and (other.weak or self.weak): return False return True def __eq__(self, other): return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak def __ne__(self, other): return not self.__eq__(other) def __repr__(self): return "Etag(%r, weak=%r)" % (self.tag, self.weak) def parse(tokens): tokens=tuple(tokens) if len(tokens) == 1 and not isinstance(tokens[0], Token): return ETag(tokens[0]) if(len(tokens) == 3 and tokens[0] == "w" and tokens[1] == Token('/')): return ETag(tokens[2], weak=True) raise ValueError("Invalid ETag.") parse=staticmethod(parse) def generate(self): if self.weak: return 'W/'+quoteString(self.tag) else: return quoteString(self.tag) def parseStarOrETag(tokens): tokens=tuple(tokens) if tokens == ('*',): return '*' else: return ETag.parse(tokens) def generateStarOrETag(etag): if etag=='*': return etag else: return etag.generate() #### Cookies. Blech! class Cookie(object): # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', 'discard', 'secure', 'comment', 'commenturl', 'version'] def __init__(self, name, value, path=None, domain=None, ports=None, expires=None, discard=False, secure=False, comment=None, commenturl=None, version=0): self.name=name self.value=value self.path=path self.domain=domain self.ports=ports self.expires=expires self.discard=discard self.secure=secure self.comment=comment self.commenturl=commenturl self.version=version def __repr__(self): s="Cookie(%r=%r" % (self.name, self.value) if self.path is not None: s+=", path=%r" % (self.path,) if self.domain is not None: s+=", domain=%r" % (self.domain,) if self.ports is not None: s+=", ports=%r" % (self.ports,) if self.expires is not None: s+=", expires=%r" % (self.expires,) if self.secure is not False: s+=", secure=%r" % (self.secure,) if self.comment is not None: s+=", comment=%r" % (self.comment,) if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,) if self.version != 0: s+=", version=%r" % (self.version,) s+=")" return s def __eq__(self, other): return (isinstance(other, Cookie) and other.path == self.path and other.domain == self.domain and other.ports == self.ports and other.expires == self.expires and other.secure == self.secure and other.comment == self.comment and other.commenturl == self.commenturl and other.version == self.version) def __ne__(self, other): return not self.__eq__(other) def parseCookie(headers): """Bleargh, the cookie spec sucks. This surely needs interoperability testing. There are two specs that are supported: Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html Version 1) http://www.faqs.org/rfcs/rfc2965.html """ cookies = [] # There can't really be multiple cookie headers according to RFC, because # if multiple headers are allowed, they must be joinable with ",". # Neither new RFC2965 cookies nor old netscape cookies are. header = ';'.join(headers) if header[0:8].lower() == "$version": # RFC2965 cookie h=tokenize([header], foldCase=False) r_cookies = split(h, Token(',')) for r_cookie in r_cookies: last_cookie = None rr_cookies = split(r_cookie, Token(';')) for cookie in rr_cookies: nameval = tuple(split(cookie, Token('='))) if len(nameval) == 2: (name,), (value,) = nameval else: (name,), = nameval value = None name=name.lower() if name == '$version': continue if name[0] == '$': if last_cookie is not None: if name == '$path': last_cookie.path=value elif name == '$domain': last_cookie.domain=value elif name == '$port': if value is None: last_cookie.ports = () else: last_cookie.ports=tuple([int(s) for s in value.split(',')]) else: last_cookie = Cookie(name, value, version=1) cookies.append(last_cookie) else: # Oldstyle cookies don't do quoted strings or anything sensible. # All characters are valid for names except ';' and '=', and all # characters are valid for values except ';'. Spaces are stripped, # however. r_cookies = header.split(';') for r_cookie in r_cookies: name,value = r_cookie.split('=', 1) name=name.strip(' \t') value=value.strip(' \t') cookies.append(Cookie(name, value)) return cookies cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$" cookie_validname_re = re.compile(cookie_validname) cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$' cookie_validvalue_re = re.compile(cookie_validvalue) def generateCookie(cookies): # There's a fundamental problem with the two cookie specifications. # They both use the "Cookie" header, and the RFC Cookie header only allows # one version to be specified. Thus, when you have a collection of V0 and # V1 cookies, you have to either send them all as V0 or send them all as # V1. # I choose to send them all as V1. # You might think converting a V0 cookie to a V1 cookie would be lossless, # but you'd be wrong. If you do the conversion, and a V0 parser tries to # read the cookie, it will see a modified form of the cookie, in cases # where quotes must be added to conform to proper V1 syntax. # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y") # However, that is what we will do, anyways. It has a high probability of # breaking applications that only handle oldstyle cookies, where some other # application set a newstyle cookie that is applicable over for site # (or host), AND where the oldstyle cookie uses a value which is invalid # syntax in a newstyle cookie. # Also, the cookie name *cannot* be quoted in V1, so some cookies just # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These # are just dicarded during conversion. # As this is an unsolvable problem, I will pretend I can just say # OH WELL, don't do that, or else upgrade your old applications to have # newstyle cookie parsers. # I will note offhandedly that there are *many* sites which send V0 cookies # that are not valid V1 cookie syntax. About 20% for my cookies file. # However, they do not generally mix them with V1 cookies, so this isn't # an issue, at least right now. I have not tested to see how many of those # webapps support RFC2965 V1 cookies. I suspect not many. max_version = max([cookie.version for cookie in cookies]) if max_version == 0: # no quoting or anything. return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies]) else: str_cookies = ['$Version="1"'] for cookie in cookies: if cookie.version == 0: # Version 0 cookie: we make sure the name and value are valid # V1 syntax. # If they are, we use them as is. This means in *most* cases, # the cookie will look literally the same on output as it did # on input. # If it isn't a valid name, ignore the cookie. # If it isn't a valid value, quote it and hope for the best on # the other side. if cookie_validname_re.match(cookie.name) is None: continue value=cookie.value if cookie_validvalue_re.match(cookie.value) is None: value = quoteString(value) str_cookies.append("%s=%s" % (cookie.name, value)) else: # V1 cookie, nice and easy str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value))) if cookie.path: str_cookies.append("$Path=%s" % quoteString(cookie.path)) if cookie.domain: str_cookies.append("$Domain=%s" % quoteString(cookie.domain)) if cookie.ports is not None: if len(cookie.ports) == 0: str_cookies.append("$Port") else: str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) return ';'.join(str_cookies) def parseSetCookie(headers): setCookies = [] for header in headers: try: parts = header.split(';') l = [] for part in parts: namevalue = part.split('=',1) if len(namevalue) == 1: name=namevalue[0] value=None else: name,value=namevalue value=value.strip(' \t') name=name.strip(' \t') l.append((name, value)) setCookies.append(makeCookieFromList(l, True)) except ValueError: # If we can't parse one Set-Cookie, ignore it, # but not the rest of Set-Cookies. pass return setCookies def parseSetCookie2(toks): outCookies = [] for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))] for y in split(toks, Token(','))]: try: outCookies.append(makeCookieFromList(cookie, False)) except ValueError: # Again, if we can't handle one cookie -- ignore it. pass return outCookies def makeCookieFromList(tup, netscapeFormat): name, value = tup[0] if name is None or value is None: raise ValueError("Cookie has missing name or value") if name.startswith("$"): raise ValueError("Invalid cookie name: %r, starts with '$'." % name) cookie = Cookie(name, value) hadMaxAge = False for name,value in tup[1:]: name = name.lower() if value is None: if name in ("discard", "secure"): # Boolean attrs value = True elif name != "port": # Can be either boolean or explicit continue if name in ("comment", "commenturl", "discard", "domain", "path", "secure"): # simple cases setattr(cookie, name, value) elif name == "expires" and not hadMaxAge: if netscapeFormat and value[0] == '"' and value[-1] == '"': value = value[1:-1] cookie.expires = parseDateTime(value) elif name == "max-age": hadMaxAge = True cookie.expires = int(value) + time.time() elif name == "port": if value is None: cookie.ports = () else: if netscapeFormat and value[0] == '"' and value[-1] == '"': value = value[1:-1] cookie.ports = tuple([int(s) for s in value.split(',')]) elif name == "version": cookie.version = int(value) return cookie def generateSetCookie(cookies): setCookies = [] for cookie in cookies: out = ["%s=%s" % (cookie.name, cookie.value)] if cookie.expires: out.append("expires=%s" % generateDateTime(cookie.expires)) if cookie.path: out.append("path=%s" % cookie.path) if cookie.domain: out.append("domain=%s" % cookie.domain) if cookie.secure: out.append("secure") setCookies.append('; '.join(out)) return setCookies def generateSetCookie2(cookies): setCookies = [] for cookie in cookies: out = ["%s=%s" % (cookie.name, quoteString(cookie.value))] if cookie.comment: out.append("Comment=%s" % quoteString(cookie.comment)) if cookie.commenturl: out.append("CommentURL=%s" % quoteString(cookie.commenturl)) if cookie.discard: out.append("Discard") if cookie.domain: out.append("Domain=%s" % quoteString(cookie.domain)) if cookie.expires: out.append("Max-Age=%s" % (cookie.expires - time.time())) if cookie.path: out.append("Path=%s" % quoteString(cookie.path)) if cookie.ports is not None: if len(cookie.ports) == 0: out.append("Port") else: out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) if cookie.secure: out.append("Secure") out.append('Version="1"') setCookies.append('; '.join(out)) return setCookies def parseDepth(depth): if depth not in ("0", "1", "infinity"): raise ValueError("Invalid depth header value: %s" % (depth,)) return depth def parseOverWrite(overwrite): if overwrite == "F": return False elif overwrite == "T": return True raise ValueError("Invalid overwrite header value: %s" % (overwrite,)) def generateOverWrite(overwrite): if overwrite: return "T" else: return "F" ##### Random stuff that looks useful. # def sortMimeQuality(s): # def sorter(item1, item2): # if item1[0] == '*': # if item2[0] == '*': # return 0 # def sortQuality(s): # def sorter(item1, item2): # if item1[1] < item2[1]: # return -1 # if item1[1] < item2[1]: # return 1 # if item1[0] == item2[0]: # return 0 # def getMimeQuality(mimeType, accepts): # type,args = parseArgs(mimeType) # type=type.split(Token('/')) # if len(type) != 2: # raise ValueError, "MIME Type "+s+" invalid." # for accept in accepts: # accept,acceptQual=accept # acceptType=accept[0:1] # acceptArgs=accept[2] # if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and # (args == acceptArgs or len(acceptArgs) == 0)): # return acceptQual # def getQuality(type, accepts): # qual = accepts.get(type) # if qual is not None: # return qual # return accepts.get('*') # Headers object class __RecalcNeeded(object): def __repr__(self): return "" _RecalcNeeded = __RecalcNeeded() class Headers(object): """This class stores the HTTP headers as both a parsed representation and the raw string representation. It converts between the two on demand.""" def __init__(self, headers=None, rawHeaders=None, handler=DefaultHTTPHandler): self._raw_headers = {} self._headers = {} self.handler = handler if headers is not None: - for key, value in headers.iteritems(): + for key, value in headers.items(): self.setHeader(key, value) if rawHeaders is not None: - for key, value in rawHeaders.iteritems(): + for key, value in rawHeaders.items(): self.setRawHeaders(key, value) def _setRawHeaders(self, headers): self._raw_headers = headers self._headers = {} def _toParsed(self, name): r = self._raw_headers.get(name, None) h = self.handler.parse(name, r) if h is not None: self._headers[name] = h return h def _toRaw(self, name): h = self._headers.get(name, None) r = self.handler.generate(name, h) if r is not None: self._raw_headers[name] = r return r def hasHeader(self, name): """Does a header with the given name exist?""" name=name.lower() - return self._raw_headers.has_key(name) + return name in self._raw_headers def getRawHeaders(self, name, default=None): """Returns a list of headers matching the given name as the raw string given.""" name=name.lower() raw_header = self._raw_headers.get(name, default) if raw_header is not _RecalcNeeded: return raw_header return self._toRaw(name) def getHeader(self, name, default=None): """Ret9urns the parsed representation of the given header. The exact form of the return value depends on the header in question. If no parser for the header exists, raise ValueError. If the header doesn't exist, return default (or None if not specified) """ name=name.lower() parsed = self._headers.get(name, default) if parsed is not _RecalcNeeded: return parsed return self._toParsed(name) def setRawHeaders(self, name, value): """Sets the raw representation of the given header. Value should be a list of strings, each being one header of the given name. """ name=name.lower() self._raw_headers[name] = value self._headers[name] = _RecalcNeeded def setHeader(self, name, value): """Sets the parsed representation of the given header. Value should be a list of objects whose exact form depends on the header in question. """ name=name.lower() self._raw_headers[name] = _RecalcNeeded self._headers[name] = value def addRawHeader(self, name, value): """ Add a raw value to a header that may or may not already exist. If it exists, add it as a separate header to output; do not replace anything. """ name=name.lower() raw_header = self._raw_headers.get(name) if raw_header is None: # No header yet raw_header = [] self._raw_headers[name] = raw_header elif raw_header is _RecalcNeeded: raw_header = self._toRaw(name) raw_header.append(value) self._headers[name] = _RecalcNeeded def removeHeader(self, name): """Removes the header named.""" name=name.lower() - if self._raw_headers.has_key(name): + if name in self._raw_headers: del self._raw_headers[name] del self._headers[name] def __repr__(self): return ''% (self._raw_headers, self._headers) def canonicalNameCaps(self, name): """Return the name with the canonical capitalization, if known, otherwise, Caps-After-Dashes""" return header_case_mapping.get(name) or dashCapitalize(name) def getAllRawHeaders(self): """Return an iterator of key,value pairs of all headers contained in this object, as strings. The keys are capitalized in canonical capitalization.""" - for k,v in self._raw_headers.iteritems(): + for k,v in self._raw_headers.items(): if v is _RecalcNeeded: v = self._toRaw(k) yield self.canonicalNameCaps(k), v def makeImmutable(self): """Make this header set immutable. All mutating operations will raise an exception.""" self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise def _mutateRaise(self, *args): raise AttributeError("This header object is immutable as the headers have already been sent.") """The following dicts are all mappings of header to list of operations to perform. The first operation should generally be 'tokenize' if the header can be parsed according to the normal tokenization rules. If it cannot, generally the first thing you want to do is take only the last instance of the header (in case it was sent multiple times, which is strictly an error, but we're nice.). """ -iteritems = lambda x: x.iteritems() +iteritems = lambda x: iter(x.items()) parser_general_headers = { 'Cache-Control':(tokenize, listParser(parseCacheControl), dict), 'Connection':(tokenize,filterTokens), 'Date':(last,parseDateTime), # 'Pragma':tokenize # 'Trailer':tokenize 'Transfer-Encoding':(tokenize,filterTokens), # 'Upgrade':tokenize # 'Via':tokenize,stripComment # 'Warning':tokenize } generator_general_headers = { 'Cache-Control':(iteritems, listGenerator(generateCacheControl), singleHeader), 'Connection':(generateList,singleHeader), 'Date':(generateDateTime,singleHeader), # 'Pragma': # 'Trailer': 'Transfer-Encoding':(generateList,singleHeader), # 'Upgrade': # 'Via': # 'Warning': } parser_request_headers = { 'Accept': (tokenize, listParser(parseAccept), dict), 'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset), 'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding), 'Accept-Language':(tokenize, listParser(parseAcceptQvalue), dict), 'Authorization': (last, parseAuthorization), 'Cookie':(parseCookie,), 'Expect':(tokenize, listParser(parseExpect), dict), 'From':(last,), 'Host':(last,), 'If-Match':(tokenize, listParser(parseStarOrETag), list), 'If-Modified-Since':(last, parseIfModifiedSince), 'If-None-Match':(tokenize, listParser(parseStarOrETag), list), 'If-Range':(parseIfRange,), 'If-Unmodified-Since':(last,parseDateTime), 'Max-Forwards':(last,int), # 'Proxy-Authorization':str, # what is "credentials" 'Range':(tokenize, parseRange), 'Referer':(last,str), # TODO: URI object? 'TE':(tokenize, listParser(parseAcceptQvalue), dict), 'User-Agent':(last,str), } generator_request_headers = { 'Accept': (iteritems,listGenerator(generateAccept),singleHeader), 'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), 'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader), 'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), 'Authorization': (generateAuthorization,), # what is "credentials" 'Cookie':(generateCookie,singleHeader), 'Expect':(iteritems, listGenerator(generateExpect), singleHeader), 'From':(str,singleHeader), 'Host':(str,singleHeader), 'If-Match':(listGenerator(generateStarOrETag), singleHeader), 'If-Modified-Since':(generateDateTime,singleHeader), 'If-None-Match':(listGenerator(generateStarOrETag), singleHeader), 'If-Range':(generateIfRange, singleHeader), 'If-Unmodified-Since':(generateDateTime,singleHeader), 'Max-Forwards':(str, singleHeader), # 'Proxy-Authorization':str, # what is "credentials" 'Range':(generateRange,singleHeader), 'Referer':(str,singleHeader), 'TE': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), 'User-Agent':(str,singleHeader), } parser_response_headers = { 'Accept-Ranges':(tokenize, filterTokens), 'Age':(last,int), 'ETag':(tokenize, ETag.parse), 'Location':(last,), # TODO: URI object? # 'Proxy-Authenticate' 'Retry-After':(last, parseRetryAfter), 'Server':(last,), 'Set-Cookie':(parseSetCookie,), 'Set-Cookie2':(tokenize, parseSetCookie2), 'Vary':(tokenize, filterTokens), 'WWW-Authenticate': (lambda h: tokenize(h, foldCase=False), parseWWWAuthenticate,) } generator_response_headers = { 'Accept-Ranges':(generateList, singleHeader), 'Age':(str, singleHeader), 'ETag':(ETag.generate, singleHeader), 'Location':(str, singleHeader), # 'Proxy-Authenticate' 'Retry-After':(generateRetryAfter, singleHeader), 'Server':(str, singleHeader), 'Set-Cookie':(generateSetCookie,), 'Set-Cookie2':(generateSetCookie2,), 'Vary':(generateList, singleHeader), 'WWW-Authenticate':(generateWWWAuthenticate,) } parser_entity_headers = { 'Allow':(lambda str:tokenize(str, foldCase=False), filterTokens), 'Content-Encoding':(tokenize, filterTokens), 'Content-Language':(tokenize, filterTokens), 'Content-Length':(last, int), 'Content-Location':(last,), # TODO: URI object? 'Content-MD5':(last, parseContentMD5), 'Content-Range':(last, parseContentRange), 'Content-Type':(lambda str:tokenize(str, foldCase=False), parseContentType), 'Expires':(last, parseExpires), 'Last-Modified':(last, parseDateTime), } generator_entity_headers = { 'Allow':(generateList, singleHeader), 'Content-Encoding':(generateList, singleHeader), 'Content-Language':(generateList, singleHeader), 'Content-Length':(str, singleHeader), 'Content-Location':(str, singleHeader), 'Content-MD5':(base64.encodestring, lambda x: x.strip("\n"), singleHeader), 'Content-Range':(generateContentRange, singleHeader), 'Content-Type':(generateContentType, singleHeader), 'Expires':(generateDateTime, singleHeader), 'Last-Modified':(generateDateTime, singleHeader), } DefaultHTTPHandler.updateParsers(parser_general_headers) DefaultHTTPHandler.updateParsers(parser_request_headers) DefaultHTTPHandler.updateParsers(parser_response_headers) DefaultHTTPHandler.updateParsers(parser_entity_headers) DefaultHTTPHandler.updateGenerators(generator_general_headers) DefaultHTTPHandler.updateGenerators(generator_request_headers) DefaultHTTPHandler.updateGenerators(generator_response_headers) DefaultHTTPHandler.updateGenerators(generator_entity_headers) # casemappingify(DefaultHTTPParsers) # casemappingify(DefaultHTTPGenerators) # lowerify(DefaultHTTPParsers) # lowerify(DefaultHTTPGenerators) diff --git a/xcap/web/server.py b/xcap/web/server.py index 99cdc79..a5b8149 100644 --- a/xcap/web/server.py +++ b/xcap/web/server.py @@ -1,575 +1,575 @@ # Copyright (c) 2001-2008 Twisted Matrix Laboratories. # See LICENSE for details. """ This is a web-server which integrates with the twisted.internet infrastructure. """ # System Imports -import cgi, time, urlparse -from urllib import quote, unquote -from urlparse import urlsplit +import cgi, time, urllib.parse +from urllib.parse import quote, unquote +from urllib.parse import urlsplit import weakref from zope.interface import implements # Twisted Imports from twisted.internet import defer from twisted.python import log, failure # Sibling Imports from xcap.web import http, iweb, fileupload, responsecode from xcap.web import http_headers from xcap.web.filter.range import rangefilter from xcap.web import error from xcap.web import version as web_version from twisted import __version__ as twisted_version VERSION = "Twisted/%s TwistedWeb/%s" % (twisted_version, web_version) _errorMarker = object() def defaultHeadersFilter(request, response): if not response.headers.hasHeader('server'): response.headers.setHeader('server', VERSION) if not response.headers.hasHeader('date'): response.headers.setHeader('date', time.time()) return response defaultHeadersFilter.handleErrors = True def preconditionfilter(request, response): if request.method in ("GET", "HEAD"): http.checkPreconditions(request, response) return response def doTrace(request): request = iweb.IRequest(request) txt = "%s %s HTTP/%d.%d\r\n" % (request.method, request.uri, request.clientproto[0], request.clientproto[1]) l=[] for name, valuelist in request.headers.getAllRawHeaders(): for value in valuelist: l.append("%s: %s\r\n" % (name, value)) txt += ''.join(l) return http.Response( responsecode.OK, {'content-type': http_headers.MimeType('message', 'http')}, txt) def parsePOSTData(request, maxMem=100*1024, maxFields=1024, maxSize=10*1024*1024): """ Parse data of a POST request. @param request: the request to parse. @type request: L{xcap.web.http.Request}. @param maxMem: maximum memory used during the parsing of the data. @type maxMem: C{int} @param maxFields: maximum number of form fields allowed. @type maxFields: C{int} @param maxSize: maximum size of file upload allowed. @type maxSize: C{int} @return: a deferred that will fire when the parsing is done. The deferred itself doesn't hold a return value, the request is modified directly. @rtype: C{defer.Deferred} """ if request.stream.length == 0: return defer.succeed(None) parser = None ctype = request.headers.getHeader('content-type') if ctype is None: return defer.succeed(None) def updateArgs(data): args = data request.args.update(args) def updateArgsAndFiles(data): args, files = data request.args.update(args) request.files.update(files) def error(f): f.trap(fileupload.MimeFormatError) raise http.HTTPError( http.StatusResponse(responsecode.BAD_REQUEST, str(f.value))) if (ctype.mediaType == 'application' and ctype.mediaSubtype == 'x-www-form-urlencoded'): d = fileupload.parse_urlencoded(request.stream) d.addCallbacks(updateArgs, error) return d elif (ctype.mediaType == 'multipart' and ctype.mediaSubtype == 'form-data'): boundary = ctype.params.get('boundary') if boundary is None: return defer.fail(http.HTTPError( http.StatusResponse( responsecode.BAD_REQUEST, "Boundary not specified in Content-Type."))) d = fileupload.parseMultipartFormData(request.stream, boundary, maxMem, maxFields, maxSize) d.addCallbacks(updateArgsAndFiles, error) return d else: return defer.fail(http.HTTPError( http.StatusResponse( responsecode.BAD_REQUEST, "Invalid content-type: %s/%s" % ( ctype.mediaType, ctype.mediaSubtype)))) class StopTraversal(object): """ Indicates to Request._handleSegment that it should stop handling path segments. """ pass class Request(http.Request): """ vars: site remoteAddr scheme host port path params querystring args files prepath postpath @ivar path: The path only (arguments not included). @ivar args: All of the arguments, including URL and POST arguments. @type args: A mapping of strings (the argument names) to lists of values. i.e., ?foo=bar&foo=baz&quux=spam results in {'foo': ['bar', 'baz'], 'quux': ['spam']}. """ implements(iweb.IRequest) site = None _initialprepath = None responseFilters = [rangefilter, preconditionfilter, error.defaultErrorHandler, defaultHeadersFilter] def __init__(self, *args, **kw): - if kw.has_key('site'): + if 'site' in kw: self.site = kw['site'] del kw['site'] - if kw.has_key('prepathuri'): + if 'prepathuri' in kw: self._initialprepath = kw['prepathuri'] del kw['prepathuri'] # Copy response filters from the class self.responseFilters = self.responseFilters[:] self.files = {} self.resources = [] http.Request.__init__(self, *args, **kw) def addResponseFilter(self, f, atEnd=False): if atEnd: self.responseFilters.append(f) else: self.responseFilters.insert(0, f) def unparseURL(self, scheme=None, host=None, port=None, path=None, params=None, querystring=None, fragment=None): """Turn the request path into a url string. For any pieces of the url that are not specified, use the value from the request. The arguments have the same meaning as the same named attributes of Request.""" if scheme is None: scheme = self.scheme if host is None: host = self.host if port is None: port = self.port if path is None: path = self.path if params is None: params = self.params if querystring is None: query = self.querystring if fragment is None: fragment = '' if port == http.defaultPortForScheme.get(scheme, 0): hostport = host else: hostport = host + ':' + str(port) - return urlparse.urlunparse(( + return urllib.parse.urlunparse(( scheme, hostport, path, params, querystring, fragment)) def _parseURL(self): if self.uri[0] == '/': # Can't use urlparse for request_uri because urlparse # wants to be given an absolute or relative URI, not just # an abs_path, and thus gets '//foo' wrong. self.scheme = self.host = self.path = self.params = self.querystring = '' if '?' in self.uri: self.path, self.querystring = self.uri.split('?', 1) else: self.path = self.uri if ';' in self.path: self.path, self.params = self.path.split(';', 1) else: # It is an absolute uri, use standard urlparse (self.scheme, self.host, self.path, - self.params, self.querystring, fragment) = urlparse.urlparse(self.uri) + self.params, self.querystring, fragment) = urllib.parse.urlparse(self.uri) if self.querystring: self.args = cgi.parse_qs(self.querystring, True) else: self.args = {} - path = map(unquote, self.path[1:].split('/')) + path = list(map(unquote, self.path[1:].split('/'))) if self._initialprepath: # We were given an initial prepath -- this is for supporting # CGI-ish applications where part of the path has already # been processed - prepath = map(unquote, self._initialprepath[1:].split('/')) + prepath = list(map(unquote, self._initialprepath[1:].split('/'))) if path[:len(prepath)] == prepath: self.prepath = prepath self.postpath = path[len(prepath):] else: self.prepath = [] self.postpath = path else: self.prepath = [] self.postpath = path #print "_parseURL", self.uri, (self.uri, self.scheme, self.host, self.path, self.params, self.querystring) def _fixupURLParts(self): hostaddr, secure = self.chanRequest.getHostInfo() if not self.scheme: self.scheme = ('http', 'https')[secure] if self.host: self.host, self.port = http.splitHostPort(self.scheme, self.host) else: # If GET line wasn't an absolute URL host = self.headers.getHeader('host') if host: self.host, self.port = http.splitHostPort(self.scheme, host) else: # When no hostname specified anywhere, either raise an # error, or use the interface hostname, depending on # protocol version if self.clientproto >= (1,1): raise http.HTTPError(responsecode.BAD_REQUEST) self.host = hostaddr.host self.port = hostaddr.port def process(self): "Process a request." try: self.checkExpect() resp = self.preprocessRequest() if resp is not None: self._cbFinishRender(resp).addErrback(self._processingFailed) return self._parseURL() self._fixupURLParts() self.remoteAddr = self.chanRequest.getRemoteHost() except: failedDeferred = self._processingFailed(failure.Failure()) return d = defer.Deferred() d.addCallback(self._getChild, self.site.resource, self.postpath) d.addCallback(lambda res, req: res.renderHTTP(req), self) d.addCallback(self._cbFinishRender) d.addErrback(self._processingFailed) d.callback(None) def preprocessRequest(self): """Do any request processing that doesn't follow the normal resource lookup procedure. "OPTIONS *" is handled here, for example. This would also be the place to do any CONNECT processing.""" if self.method == "OPTIONS" and self.uri == "*": response = http.Response(responsecode.OK) response.headers.setHeader('allow', ('GET', 'HEAD', 'OPTIONS', 'TRACE')) return response # This is where CONNECT would go if we wanted it return None def _getChild(self, _, res, path, updatepaths=True): """Call res.locateChild, and pass the result on to _handleSegment.""" self.resources.append(res) if not path: return res result = res.locateChild(self, path) if isinstance(result, defer.Deferred): return result.addCallback(self._handleSegment, res, path, updatepaths) else: return self._handleSegment(result, res, path, updatepaths) def _handleSegment(self, result, res, path, updatepaths): """Handle the result of a locateChild call done in _getChild.""" newres, newpath = result # If the child resource is None then display a error page if newres is None: raise http.HTTPError(responsecode.NOT_FOUND) # If we got a deferred then we need to call back later, once the # child is actually available. if isinstance(newres, defer.Deferred): return newres.addCallback( lambda actualRes: self._handleSegment( (actualRes, newpath), res, path, updatepaths) ) if path: url = quote("/" + "/".join(path)) else: url = "/" if newpath is StopTraversal: # We need to rethink how to do this. #if newres is res: self._rememberResource(res, url) return res #else: # raise ValueError("locateChild must not return StopTraversal with a resource other than self.") newres = iweb.IResource(newres) if newres is res: assert not newpath is path, "URL traversal cycle detected when attempting to locateChild %r from resource %r." % (path, res) assert len(newpath) < len(path), "Infinite loop impending..." if updatepaths: # We found a Resource... update the request.prepath and postpath - for x in xrange(len(path) - len(newpath)): + for x in range(len(path) - len(newpath)): self.prepath.append(self.postpath.pop(0)) child = self._getChild(None, newres, newpath, updatepaths=updatepaths) self._rememberResource(child, url) return child _urlsByResource = weakref.WeakKeyDictionary() def _rememberResource(self, resource, url): """ Remember the URL of a visited resource. """ self._urlsByResource[resource] = url return resource def urlForResource(self, resource): """ Looks up the URL of the given resource if this resource was found while processing this request. Specifically, this includes the requested resource, and resources looked up via L{locateResource}. Note that a resource may be found at multiple URIs; if the same resource is visited at more than one location while processing this request, this method will return one of those URLs, but which one is not defined, nor whether the same URL is returned in subsequent calls. @param resource: the resource to find a URI for. This resource must have been obtained from the request (ie. via its C{uri} attribute, or through its C{locateResource} or C{locateChildResource} methods). @return: a valid URL for C{resource} in this request. @raise NoURLForResourceError: if C{resource} has no URL in this request (because it was not obtained from the request). """ resource = self._urlsByResource.get(resource, None) if resource is None: raise NoURLForResourceError(resource) return resource def locateResource(self, url): """ Looks up the resource with the given URL. @param uri: The URL of the desired resource. @return: a L{Deferred} resulting in the L{IResource} at the given URL or C{None} if no such resource can be located. @raise HTTPError: If C{url} is not a URL on the site that this request is being applied to. The contained response will have a status code of L{responsecode.BAD_GATEWAY}. @raise HTTPError: If C{url} contains a query or fragment. The contained response will have a status code of L{responsecode.BAD_REQUEST}. """ if url is None: return None # # Parse the URL # (scheme, host, path, query, fragment) = urlsplit(url) if query or fragment: raise http.HTTPError(http.StatusResponse( responsecode.BAD_REQUEST, "URL may not contain a query or fragment: %s" % (url,) )) # The caller shouldn't be asking a request on one server to lookup a # resource on some other server. if (scheme and scheme != self.scheme) or (host and host != self.headers.getHeader("host")): raise http.HTTPError(http.StatusResponse( responsecode.BAD_GATEWAY, "URL is not on this site (%s://%s/): %s" % (scheme, self.headers.getHeader("host"), url) )) segments = path.split("/") assert segments[0] == "", "URL path didn't begin with '/': %s" % (path,) - segments = map(unquote, segments[1:]) + segments = list(map(unquote, segments[1:])) def notFound(f): f.trap(http.HTTPError) if f.value.response.code != responsecode.NOT_FOUND: return f return None d = defer.maybeDeferred(self._getChild, None, self.site.resource, segments, updatepaths=False) d.addCallback(self._rememberResource, path) d.addErrback(notFound) return d def locateChildResource(self, parent, childName): """ Looks up the child resource with the given name given the parent resource. This is similar to locateResource(), but doesn't have to start the lookup from the root resource, so it is potentially faster. @param parent: the parent of the resource being looked up. This resource must have been obtained from the request (ie. via its C{uri} attribute, or through its C{locateResource} or C{locateChildResource} methods). @param childName: the name of the child of C{parent} to looked up. to C{parent}. @return: a L{Deferred} resulting in the L{IResource} at the given URL or C{None} if no such resource can be located. @raise NoURLForResourceError: if C{resource} was not obtained from the request. """ if parent is None or childName is None: return None assert "/" not in childName, "Child name may not contain '/': %s" % (childName,) parentURL = self.urlForResource(parent) if not parentURL.endswith("/"): parentURL += "/" url = parentURL + quote(childName) segment = childName def notFound(f): f.trap(http.HTTPError) if f.value.response.code != responsecode.NOT_FOUND: return f return None d = defer.maybeDeferred(self._getChild, None, parent, [segment], updatepaths=False) d.addCallback(self._rememberResource, url) d.addErrback(notFound) return d def _processingFailed(self, reason): if reason.check(http.HTTPError) is not None: # If the exception was an HTTPError, leave it alone d = defer.succeed(reason.value.response) else: # Otherwise, it was a random exception, so give a # ICanHandleException implementer a chance to render the page. def _processingFailed_inner(reason): handler = iweb.ICanHandleException(self, self) return handler.renderHTTP_exception(self, reason) d = defer.maybeDeferred(_processingFailed_inner, reason) d.addCallback(self._cbFinishRender) d.addErrback(self._processingReallyFailed, reason) return d def _processingReallyFailed(self, reason, origReason): log.msg("Exception rendering error page:", isErr=1) log.err(reason) log.msg("Original exception:", isErr=1) log.err(origReason) body = ("Internal Server Error" "

Internal Server Error

An error occurred rendering the requested page. Additionally, an error occured rendering the error page.") response = http.Response( responsecode.INTERNAL_SERVER_ERROR, {'content-type': http_headers.MimeType('text','html')}, body) self.writeResponse(response) def _cbFinishRender(self, result): def filterit(response, f): if (hasattr(f, 'handleErrors') or (response.code >= 200 and response.code < 300)): return f(self, response) else: return response response = iweb.IResponse(result, None) if response: d = defer.Deferred() for f in self.responseFilters: d.addCallback(filterit, f) d.addCallback(self.writeResponse) d.callback(response) return d resource = iweb.IResource(result, None) if resource: self.resources.append(resource) d = defer.maybeDeferred(resource.renderHTTP, self) d.addCallback(self._cbFinishRender) return d raise TypeError("html is not a resource or a response") def renderHTTP_exception(self, req, reason): log.msg("Exception rendering:", isErr=1) log.err(reason) body = ("Internal Server Error" "

Internal Server Error

An error occurred rendering the requested page. More information is available in the server log.") return http.Response( responsecode.INTERNAL_SERVER_ERROR, {'content-type': http_headers.MimeType('text','html')}, body) class Site(object): def __init__(self, resource): """Initialize. """ self.resource = iweb.IResource(resource) def __call__(self, *args, **kwargs): return Request(site=self, *args, **kwargs) class NoURLForResourceError(RuntimeError): def __init__(self, resource): RuntimeError.__init__(self, "Resource %r has no URL in this request." % (resource,)) self.resource = resource __all__ = ['Request', 'Site', 'StopTraversal', 'VERSION', 'defaultHeadersFilter', 'doTrace', 'parsePOSTData', 'preconditionfilter', 'NoURLForResourceError'] diff --git a/xcap/web/static.py b/xcap/web/static.py index bc1067f..5c7e022 100644 --- a/xcap/web/static.py +++ b/xcap/web/static.py @@ -1,597 +1,597 @@ # Copyright (c) 2001-2008 Twisted Matrix Laboratories. # See LICENSE for details. """ I deal with static resources. """ # System Imports import os, time, stat import tempfile # Sibling Imports from xcap.web import http_headers, resource from xcap.web import http, iweb, stream, responsecode, server, dirlist # Twisted Imports from twisted.python import filepath from twisted.internet.defer import maybeDeferred from zope.interface import implements class MetaDataMixin(object): """ Mix-in class for L{iweb.IResource} which provides methods for accessing resource metadata specified by HTTP. """ def etag(self): """ @return: The current etag for the resource if available, None otherwise. """ return None def lastModified(self): """ @return: The last modified time of the resource if available, None otherwise. """ return None def creationDate(self): """ @return: The creation date of the resource if available, None otherwise. """ return None def contentLength(self): """ @return: The size in bytes of the resource if available, None otherwise. """ return None def contentType(self): """ @return: The MIME type of the resource if available, None otherwise. """ return None def contentEncoding(self): """ @return: The encoding of the resource if available, None otherwise. """ return None def displayName(self): """ @return: The display name of the resource if available, None otherwise. """ return None def exists(self): """ @return: True if the resource exists on the server, False otherwise. """ return True class StaticRenderMixin(resource.RenderMixin, MetaDataMixin): def checkPreconditions(self, request): # This code replaces the code in resource.RenderMixin if request.method not in ("GET", "HEAD"): http.checkPreconditions( request, entityExists = self.exists(), etag = self.etag(), lastModified = self.lastModified(), ) # Check per-method preconditions method = getattr(self, "preconditions_" + request.method, None) if method: return method(request) def renderHTTP(self, request): """ See L{resource.RenderMixIn.renderHTTP}. This implementation automatically sets some headers on the response based on data available from L{MetaDataMixin} methods. """ def setHeaders(response): response = iweb.IResponse(response) # Don't provide additional resource information to error responses if response.code < 400: # Content-* headers refer to the response content, not # (necessarily) to the resource content, so they depend on the # request method, and therefore can't be set here. for (header, value) in ( ("etag", self.etag()), ("last-modified", self.lastModified()), ): if value is not None: response.headers.setHeader(header, value) return response def onError(f): # If we get an HTTPError, run its response through setHeaders() as # well. f.trap(http.HTTPError) return setHeaders(f.value.response) d = maybeDeferred(super(StaticRenderMixin, self).renderHTTP, request) return d.addCallbacks(setHeaders, onError) class Data(resource.Resource): """ This is a static, in-memory resource. """ def __init__(self, data, type): self.data = data self.type = http_headers.MimeType.fromString(type) self.created_time = time.time() def etag(self): lastModified = self.lastModified() return http_headers.ETag("%X-%X" % (lastModified, hash(self.data)), weak=(time.time() - lastModified <= 1)) def lastModified(self): return self.creationDate() def creationDate(self): return self.created_time def contentLength(self): return len(self.data) def contentType(self): return self.type def render(self, req): return http.Response( responsecode.OK, http_headers.Headers({'content-type': self.contentType()}), stream=self.data) class File(StaticRenderMixin): """ File is a resource that represents a plain non-interpreted file (although it can look for an extension like .rpy or .cgi and hand the file to a processor for interpretation if you wish). Its constructor takes a file path. Alternatively, you can give a directory path to the constructor. In this case the resource will represent that directory, and its children will be files underneath that directory. This provides access to an entire filesystem tree with a single Resource. If you map the URL 'http://server/FILE' to a resource created as File('/tmp'), then http://server/FILE/ will return an HTML-formatted listing of the /tmp/ directory, and http://server/FILE/foo/bar.html will return the contents of /tmp/foo/bar.html . """ implements(iweb.IResource) def _getContentTypes(self): if not hasattr(File, "_sharedContentTypes"): File._sharedContentTypes = loadMimeTypes() return File._sharedContentTypes contentTypes = property(_getContentTypes) contentEncodings = { ".gz" : "gzip", ".bz2": "bzip2" } processors = {} indexNames = ["index", "index.html", "index.htm", "index.trp", "index.rpy"] type = None def __init__(self, path, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None): """Create a file with the given path. """ super(File, self).__init__() self.putChildren = {} self.fp = filepath.FilePath(path) # Remove the dots from the path to split self.defaultType = defaultType self.ignoredExts = list(ignoredExts) if processors is not None: self.processors = dict([ (key.lower(), value) - for key, value in processors.items() + for key, value in list(processors.items()) ]) if indexNames is not None: self.indexNames = indexNames def exists(self): return self.fp.exists() def etag(self): if not self.fp.exists(): return None st = self.fp.statinfo # # Mark ETag as weak if it was modified more recently than we can # measure and report, as it could be modified again in that span # and we then wouldn't know to provide a new ETag. # weak = (time.time() - st.st_mtime <= 1) return http_headers.ETag( "%X-%X-%X" % (st.st_ino, st.st_size, st.st_mtime), weak=weak ) def lastModified(self): if self.fp.exists(): return self.fp.getmtime() else: return None def creationDate(self): if self.fp.exists(): return self.fp.getmtime() else: return None def contentLength(self): if self.fp.exists(): if self.fp.isfile(): return self.fp.getsize() else: # Computing this would require rendering the resource; let's # punt instead. return None else: return None def _initTypeAndEncoding(self): self._type, self._encoding = getTypeAndEncoding( self.fp.basename(), self.contentTypes, self.contentEncodings, self.defaultType ) # Handle cases not covered by getTypeAndEncoding() if self.fp.isdir(): self._type = "httpd/unix-directory" def contentType(self): if not hasattr(self, "_type"): self._initTypeAndEncoding() return http_headers.MimeType.fromString(self._type) def contentEncoding(self): if not hasattr(self, "_encoding"): self._initTypeAndEncoding() return self._encoding def displayName(self): if self.fp.exists(): return self.fp.basename() else: return None def ignoreExt(self, ext): """Ignore the given extension. Serve file.ext if file is requested """ self.ignoredExts.append(ext) def directoryListing(self): return dirlist.DirectoryLister(self.fp.path, self.listChildren(), self.contentTypes, self.contentEncodings, self.defaultType) def putChild(self, name, child): """ Register a child with the given name with this resource. @param name: the name of the child (a URI path segment) @param child: the child to register """ self.putChildren[name] = child def getChild(self, name): """ Look up a child resource. @return: the child of this resource with the given name. """ if name == "": return self child = self.putChildren.get(name, None) if child: return child child_fp = self.fp.child(name) if child_fp.exists(): return self.createSimilarFile(child_fp.path) else: return None def listChildren(self): """ @return: a sequence of the names of all known children of this resource. """ - children = self.putChildren.keys() + children = list(self.putChildren.keys()) if self.fp.isdir(): children += [c for c in self.fp.listdir() if c not in children] return children def locateChild(self, req, segments): """ See L{IResource}C{.locateChild}. """ # If getChild() finds a child resource, return it child = self.getChild(segments[0]) if child is not None: return (child, segments[1:]) # If we're not backed by a directory, we have no children. # But check for existance first; we might be a collection resource # that the request wants created. self.fp.restat(False) if self.fp.exists() and not self.fp.isdir(): return (None, ()) # OK, we need to return a child corresponding to the first segment path = segments[0] if path: fpath = self.fp.child(path) else: # Request is for a directory (collection) resource return (self, server.StopTraversal) # Don't run processors on directories - if someone wants their own # customized directory rendering, subclass File instead. if fpath.isfile(): processor = self.processors.get(fpath.splitext()[1].lower()) if processor: return ( processor(fpath.path), segments[1:]) elif not fpath.exists(): sibling_fpath = fpath.siblingExtensionSearch(*self.ignoredExts) if sibling_fpath is not None: fpath = sibling_fpath return self.createSimilarFile(fpath.path), segments[1:] def renderHTTP(self, req): self.fp.restat(False) return super(File, self).renderHTTP(req) def render(self, req): """You know what you doing.""" if not self.fp.exists(): return responsecode.NOT_FOUND if self.fp.isdir(): if req.uri[-1] != "/": # Redirect to include trailing '/' in URI return http.RedirectResponse(req.unparseURL(path=req.path+'/')) else: ifp = self.fp.childSearchPreauth(*self.indexNames) if ifp: # Render from the index file standin = self.createSimilarFile(ifp.path) else: # Render from a DirectoryLister standin = dirlist.DirectoryLister( self.fp.path, self.listChildren(), self.contentTypes, self.contentEncodings, self.defaultType ) return standin.render(req) try: f = self.fp.open() - except IOError, e: + except IOError as e: import errno if e[0] == errno.EACCES: return responsecode.FORBIDDEN elif e[0] == errno.ENOENT: return responsecode.NOT_FOUND else: raise response = http.Response() response.stream = stream.FileStream(f, 0, self.fp.getsize()) for (header, value) in ( ("content-type", self.contentType()), ("content-encoding", self.contentEncoding()), ): if value is not None: response.headers.setHeader(header, value) return response def createSimilarFile(self, path): return self.__class__(path, self.defaultType, self.ignoredExts, self.processors, self.indexNames[:]) class FileSaver(resource.PostableResource): allowedTypes = (http_headers.MimeType('text', 'plain'), http_headers.MimeType('text', 'html'), http_headers.MimeType('text', 'css')) - def __init__(self, destination, expectedFields=[], allowedTypes=None, maxBytes=1000000, permissions=0644): + def __init__(self, destination, expectedFields=[], allowedTypes=None, maxBytes=1000000, permissions=0o644): self.destination = destination self.allowedTypes = allowedTypes or self.allowedTypes self.maxBytes = maxBytes self.expectedFields = expectedFields self.permissions = permissions def makeUniqueName(self, filename): """Called when a unique filename is needed. filename is the name of the file as given by the client. Returns the fully qualified path of the file to create. The file must not yet exist. """ return tempfile.mktemp(suffix=os.path.splitext(filename)[1], dir=self.destination) def isSafeToWrite(self, filename, mimetype, filestream): """Returns True if it's "safe" to write this file, otherwise it raises an exception. """ if filestream.length > self.maxBytes: raise IOError("%s: File exceeds maximum length (%d > %d)" % (filename, filestream.length, self.maxBytes)) if mimetype not in self.allowedTypes: raise IOError("%s: File type not allowed %s" % (filename, mimetype)) return True def writeFile(self, filename, mimetype, fileobject): """Does the I/O dirty work after it calls isSafeToWrite to make sure it's safe to write this file. """ filestream = stream.FileStream(fileobject) if self.isSafeToWrite(filename, mimetype, filestream): outname = self.makeUniqueName(filename) flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0) fileobject = os.fdopen(os.open(outname, flags, self.permissions), 'wb', 0) stream.readIntoFile(filestream, fileobject) return outname def render(self, req): content = [""] if req.files: for fieldName in req.files: if fieldName in self.expectedFields: for finfo in req.files[fieldName]: try: outname = self.writeFile(*finfo) content.append("Saved file %s
" % outname) - except IOError, err: + except IOError as err: content.append(str(err) + "
") else: content.append("%s is not a valid field" % fieldName) else: content.append("No files given") content.append("") return http.Response(responsecode.OK, {}, stream='\n'.join(content)) # FIXME: hi there I am a broken class # """I contain AsIsProcessor, which serves files 'As Is' # Inspired by Apache's mod_asis # """ # # class ASISProcessor: # implements(iweb.IResource) # # def __init__(self, path): # self.path = path # # def renderHTTP(self, request): # request.startedWriting = 1 # return File(self.path) # # def locateChild(self, request): # return None, () ## # Utilities ## dangerousPathError = http.HTTPError(responsecode.NOT_FOUND) #"Invalid request URL." def isDangerous(path): return path == '..' or '/' in path or os.sep in path def addSlash(request): return "http%s://%s%s/" % ( request.isSecure() and 's' or '', request.getHeader("host"), (request.uri.split('?')[0])) def loadMimeTypes(mimetype_locations=['/etc/mime.types']): """ Multiple file locations containing mime-types can be passed as a list. The files will be sourced in that order, overriding mime-types from the files sourced beforehand, but only if a new entry explicitly overrides the current entry. """ import mimetypes # Grab Python's built-in mimetypes dictionary. contentTypes = mimetypes.types_map # Update Python's semi-erroneous dictionary with a few of the # usual suspects. contentTypes.update( { '.conf': 'text/plain', '.diff': 'text/plain', '.exe': 'application/x-executable', '.flac': 'audio/x-flac', '.java': 'text/plain', '.ogg': 'application/ogg', '.oz': 'text/x-oz', '.swf': 'application/x-shockwave-flash', '.tgz': 'application/x-gtar', '.wml': 'text/vnd.wap.wml', '.xul': 'application/vnd.mozilla.xul+xml', '.py': 'text/plain', '.patch': 'text/plain', } ) # Users can override these mime-types by loading them out configuration # files (this defaults to ['/etc/mime.types']). for location in mimetype_locations: if os.path.exists(location): contentTypes.update(mimetypes.read_mime_types(location)) return contentTypes def getTypeAndEncoding(filename, types, encodings, defaultType): p, ext = os.path.splitext(filename) ext = ext.lower() - if encodings.has_key(ext): + if ext in encodings: enc = encodings[ext] ext = os.path.splitext(p)[1].lower() else: enc = None type = types.get(ext, defaultType) return type, enc ## # Test code ## if __name__ == '__builtin__': # Running from twistd -y from twisted.application import service, strports from xcap.web import server res = File('/') application = service.Application("demo") s = strports.service('8080', server.Site(res)) s.setServiceParent(application) diff --git a/xcap/web/stream.py b/xcap/web/stream.py index 3b39eb8..56ff1b1 100644 --- a/xcap/web/stream.py +++ b/xcap/web/stream.py @@ -1,1082 +1,1082 @@ """ The stream module provides a simple abstraction of streaming data. While Twisted already has some provisions for handling this in its Producer/Consumer model, the rather complex interactions between producer and consumer makes it difficult to implement something like the CompoundStream object. Thus, this API. The IStream interface is very simple. It consists of two methods: read, and close. The read method should either return some data, None if there is no data left to read, or a Deferred. Close frees up any underlying resources and causes read to return None forevermore. IByteStream adds a bit more to the API: 1) read is required to return objects conforming to the buffer interface. 2) .length, which may either an integer number of bytes remaining, or None if unknown 3) .split(position). Split takes a position, and splits the stream in two pieces, returning the two new streams. Using the original stream after calling split is not allowed. There are two builtin source stream classes: FileStream and MemoryStream. The first produces data from a file object, the second from a buffer in memory. Any number of these can be combined into one stream with the CompoundStream object. Then, to interface with other parts of Twisted, there are two transcievers: StreamProducer and ProducerStream. The first takes a stream and turns it into an IPushProducer, which will write to a consumer. The second is a consumer which is a stream, so that other producers can write to it. """ -from __future__ import generators + import copy, os, types, sys from zope.interface import Interface, Attribute, implements from twisted.internet.defer import Deferred from twisted.internet import interfaces as ti_interfaces, defer, reactor, protocol, error as ti_error from twisted.python import components, log from twisted.python.failure import Failure # Python 2.4.2 (only) has a broken mmap that leaks a fd every time you call it. if sys.version_info[0:3] != (2,4,2): try: import mmap except ImportError: mmap = None else: mmap = None ############################## #### Interfaces #### ############################## class IStream(Interface): """A stream of arbitrary data.""" def read(): """Read some data. Returns some object representing the data. If there is no more data available, returns None. Can also return a Deferred resulting in one of the above. Errors may be indicated by exception or by a Deferred of a Failure. """ def close(): """Prematurely close. Should also cause further reads to return None.""" class IByteStream(IStream): """A stream which is of bytes.""" length = Attribute("""How much data is in this stream. Can be None if unknown.""") def read(): """Read some data. Returns an object conforming to the buffer interface, or if there is no more data available, returns None. Can also return a Deferred resulting in one of the above. Errors may be indicated by exception or by a Deferred of a Failure. """ def split(point): """Split this stream into two, at byte position 'point'. Returns a tuple of (before, after). After calling split, no other methods should be called on this stream. Doing so will have undefined behavior. If you cannot implement split easily, you may implement it as:: return fallbackSplit(self, point) """ def close(): """Prematurely close this stream. Should also cause further reads to return None. Additionally, .length should be set to 0. """ class ISendfileableStream(Interface): def read(sendfile=False): """ Read some data. If sendfile == False, returns an object conforming to the buffer interface, or else a Deferred. If sendfile == True, returns either the above, or a SendfileBuffer. """ class SimpleStream(object): """Superclass of simple streams with a single buffer and a offset and length into that buffer.""" implements(IByteStream) length = None start = None def read(self): return None def close(self): self.length = 0 def split(self, point): if self.length is not None: if point > self.length: raise ValueError("split point (%d) > length (%d)" % (point, self.length)) b = copy.copy(self) self.length = point if b.length is not None: b.length -= point b.start += point return (self, b) ############################## #### FileStream #### ############################## # maximum mmap size MMAP_LIMIT = 4*1024*1024 # minimum mmap size MMAP_THRESHOLD = 8*1024 # maximum sendfile length SENDFILE_LIMIT = 16777216 # minimum sendfile size SENDFILE_THRESHOLD = 256 def mmapwrapper(*args, **kwargs): """ Python's mmap call sucks and ommitted the "offset" argument for no discernable reason. Replace this with a mmap module that has offset. """ offset = kwargs.get('offset', None) if offset in [None, 0]: if 'offset' in kwargs: del kwargs['offset'] else: raise mmap.error("mmap: Python sucks and does not support offset.") return mmap.mmap(*args, **kwargs) class FileStream(SimpleStream): implements(ISendfileableStream) """A stream that reads data from a file. File must be a normal file that supports seek, (e.g. not a pipe or device or socket).""" # 65K, minus some slack CHUNK_SIZE = 2 ** 2 ** 2 ** 2 - 32 f = None def __init__(self, f, start=0, length=None, useMMap=bool(mmap)): """ Create the stream from file f. If you specify start and length, use only that portion of the file. """ self.f = f self.start = start if length is None: self.length = os.fstat(f.fileno()).st_size else: self.length = length self.useMMap = useMMap def read(self, sendfile=False): if self.f is None: return None length = self.length if length == 0: self.f = None return None if sendfile and length > SENDFILE_THRESHOLD: # XXX: Yay using non-existent sendfile support! # FIXME: if we return a SendfileBuffer, and then sendfile # fails, then what? Or, what if file is too short? readSize = min(length, SENDFILE_LIMIT) res = SendfileBuffer(self.f, self.start, readSize) self.length -= readSize self.start += readSize return res if self.useMMap and length > MMAP_THRESHOLD: readSize = min(length, MMAP_LIMIT) try: res = mmapwrapper(self.f.fileno(), readSize, access=mmap.ACCESS_READ, offset=self.start) #madvise(res, MADV_SEQUENTIAL) self.length -= readSize self.start += readSize return res except mmap.error: pass # Fall back to standard read. readSize = min(length, self.CHUNK_SIZE) self.f.seek(self.start) b = self.f.read(readSize) bytesRead = len(b) if not bytesRead: raise RuntimeError("Ran out of data reading file %r, expected %d more bytes" % (self.f, length)) else: self.length -= bytesRead self.start += bytesRead return b def close(self): self.f = None SimpleStream.close(self) components.registerAdapter(FileStream, file, IByteStream) ############################## #### MemoryStream #### ############################## class MemoryStream(SimpleStream): """A stream that reads data from a buffer object.""" def __init__(self, mem, start=0, length=None): """ Create the stream from buffer object mem. If you specify start and length, use only that portion of the buffer. """ self.mem = mem self.start = start if length is None: self.length = len(mem) - start else: if len(mem) < length: raise ValueError("len(mem) < start + length") self.length = length def read(self): if self.mem is None: return None if self.length == 0: result = None else: result = buffer(self.mem, self.start, self.length) self.mem = None self.length = 0 return result def close(self): self.mem = None SimpleStream.close(self) components.registerAdapter(MemoryStream, str, IByteStream) -components.registerAdapter(MemoryStream, types.BufferType, IByteStream) +components.registerAdapter(MemoryStream, memoryview, IByteStream) ############################## #### CompoundStream #### ############################## class CompoundStream(object): """A stream which is composed of many other streams. Call addStream to add substreams. """ implements(IByteStream, ISendfileableStream) deferred = None length = 0 def __init__(self, buckets=()): self.buckets = [IByteStream(s) for s in buckets] def addStream(self, bucket): """Add a stream to the output""" bucket = IByteStream(bucket) self.buckets.append(bucket) if self.length is not None: if bucket.length is None: self.length = None else: self.length += bucket.length def read(self, sendfile=False): if self.deferred is not None: raise RuntimeError("Call to read while read is already outstanding") if not self.buckets: return None if sendfile and ISendfileableStream.providedBy(self.buckets[0]): try: result = self.buckets[0].read(sendfile) except: return self._gotFailure(Failure()) else: try: result = self.buckets[0].read() except: return self._gotFailure(Failure()) if isinstance(result, Deferred): self.deferred = result result.addCallbacks(self._gotRead, self._gotFailure, (sendfile,)) return result return self._gotRead(result, sendfile) def _gotFailure(self, f): self.deferred = None del self.buckets[0] self.close() return f def _gotRead(self, result, sendfile): self.deferred = None if result is None: del self.buckets[0] # Next bucket return self.read(sendfile) if self.length is not None: self.length -= len(result) return result def split(self, point): num = 0 origPoint = point for bucket in self.buckets: num+=1 if point == 0: b = CompoundStream() b.buckets = self.buckets[num:] del self.buckets[num:] return self,b if bucket.length is None: # Indeterminate length bucket. # give up and use fallback splitter. return fallbackSplit(self, origPoint) if point < bucket.length: before,after = bucket.split(point) b = CompoundStream() b.buckets = self.buckets[num:] b.buckets[0] = after del self.buckets[num+1:] self.buckets[num] = before return self,b point -= bucket.length def close(self): for bucket in self.buckets: bucket.close() self.buckets = [] self.length = 0 ############################## #### readStream #### ############################## class _StreamReader(object): """Process a stream's data using callbacks for data and stream finish.""" def __init__(self, stream, gotDataCallback): self.stream = stream self.gotDataCallback = gotDataCallback self.result = Deferred() def run(self): # self.result may be del'd in _read() result = self.result self._read() return result def _read(self): try: result = self.stream.read() except: self._gotError(Failure()) return if isinstance(result, Deferred): result.addCallbacks(self._gotData, self._gotError) else: self._gotData(result) def _gotError(self, failure): result = self.result del self.result, self.gotDataCallback, self.stream result.errback(failure) def _gotData(self, data): if data is None: result = self.result del self.result, self.gotDataCallback, self.stream result.callback(None) return try: self.gotDataCallback(data) except: self._gotError(Failure()) return reactor.callLater(0, self._read) def readStream(stream, gotDataCallback): """Pass a stream's data to a callback. Returns Deferred which will be triggered on finish. Errors in reading the stream or in processing it will be returned via this Deferred. """ return _StreamReader(stream, gotDataCallback).run() def readAndDiscard(stream): """Read all the data from the given stream, and throw it out. Returns Deferred which will be triggered on finish. """ return readStream(stream, lambda _: None) def readIntoFile(stream, outFile): """Read a stream and write it into a file. Returns Deferred which will be triggered on finish. """ def done(_): outFile.close() return _ return readStream(stream, outFile.write).addBoth(done) def connectStream(inputStream, factory): """Connect a protocol constructed from a factory to stream. Returns an output stream from the protocol. The protocol's transport will have a finish() method it should call when done writing. """ # XXX deal better with addresses p = factory.buildProtocol(None) out = ProducerStream() out.disconnecting = False # XXX for LineReceiver suckage p.makeConnection(out) readStream(inputStream, lambda _: p.dataReceived(_)).addCallbacks( lambda _: p.connectionLost(ti_error.ConnectionDone()), lambda _: p.connectionLost(_)) return out ############################## #### fallbackSplit #### ############################## def fallbackSplit(stream, point): after = PostTruncaterStream(stream, point) before = TruncaterStream(stream, point, after) return (before, after) class TruncaterStream(object): def __init__(self, stream, point, postTruncater): self.stream = stream self.length = point self.postTruncater = postTruncater def read(self): if self.length == 0: if self.postTruncater is not None: postTruncater = self.postTruncater self.postTruncater = None postTruncater.sendInitialSegment(self.stream.read()) self.stream = None return None result = self.stream.read() if isinstance(result, Deferred): return result.addCallback(self._gotRead) else: return self._gotRead(result) def _gotRead(self, data): if data is None: raise ValueError("Ran out of data for a split of a indeterminate length source") if self.length >= len(data): self.length -= len(data) return data else: before = buffer(data, 0, self.length) after = buffer(data, self.length) self.length = 0 if self.postTruncater is not None: postTruncater = self.postTruncater self.postTruncater = None postTruncater.sendInitialSegment(after) self.stream = None return before def split(self, point): if point > self.length: raise ValueError("split point (%d) > length (%d)" % (point, self.length)) post = PostTruncaterStream(self.stream, point) trunc = TruncaterStream(post, self.length - point, self.postTruncater) self.length = point self.postTruncater = post return self, trunc def close(self): if self.postTruncater is not None: self.postTruncater.notifyClosed(self) else: # Nothing cares about the rest of the stream self.stream.close() self.stream = None self.length = 0 class PostTruncaterStream(object): deferred = None sentInitialSegment = False truncaterClosed = None closed = False length = None def __init__(self, stream, point): self.stream = stream self.deferred = Deferred() if stream.length is not None: self.length = stream.length - point def read(self): if not self.sentInitialSegment: self.sentInitialSegment = True if self.truncaterClosed is not None: readAndDiscard(self.truncaterClosed) self.truncaterClosed = None return self.deferred return self.stream.read() def split(self, point): return fallbackSplit(self, point) def close(self): self.closed = True if self.truncaterClosed is not None: # have first half close itself self.truncaterClosed.postTruncater = None self.truncaterClosed.close() elif self.sentInitialSegment: # first half already finished up self.stream.close() self.deferred = None # Callbacks from TruncaterStream def sendInitialSegment(self, data): if self.closed: # First half finished, we don't want data. self.stream.close() self.stream = None if self.deferred is not None: if isinstance(data, Deferred): data.chainDeferred(self.deferred) else: self.deferred.callback(data) def notifyClosed(self, truncater): if self.closed: # we are closed, have first half really close truncater.postTruncater = None truncater.close() elif self.sentInitialSegment: # We are trying to read, read up first half readAndDiscard(truncater) else: # Idle, store closed info. self.truncaterClosed = truncater ######################################## #### ProducerStream/StreamProducer #### ######################################## class ProducerStream(object): """Turns producers into a IByteStream. Thus, implements IConsumer and IByteStream.""" implements(IByteStream, ti_interfaces.IConsumer) length = None closed = False failed = False producer = None producerPaused = False deferred = None bufferSize = 5 def __init__(self, length=None): self.buffer = [] self.length = length # IByteStream implementation def read(self): if self.buffer: return self.buffer.pop(0) elif self.closed: self.length = 0 if self.failed: f = self.failure del self.failure return defer.fail(f) return None else: deferred = self.deferred = Deferred() if self.producer is not None and (not self.streamingProducer or self.producerPaused): self.producerPaused = False self.producer.resumeProducing() return deferred def split(self, point): return fallbackSplit(self, point) def close(self): """Called by reader of stream when it is done reading.""" self.buffer=[] self.closed = True if self.producer is not None: self.producer.stopProducing() self.producer = None self.deferred = None # IConsumer implementation def write(self, data): if self.closed: return if self.deferred: deferred = self.deferred self.deferred = None deferred.callback(data) else: self.buffer.append(data) if(self.producer is not None and self.streamingProducer and len(self.buffer) > self.bufferSize): self.producer.pauseProducing() self.producerPaused = True def finish(self, failure=None): """Called by producer when it is done. If the optional failure argument is passed a Failure instance, the stream will return it as errback on next Deferred. """ self.closed = True if not self.buffer: self.length = 0 if self.deferred is not None: deferred = self.deferred self.deferred = None if failure is not None: self.failed = True deferred.errback(failure) else: deferred.callback(None) else: if failure is not None: self.failed = True self.failure = failure def registerProducer(self, producer, streaming): if self.producer is not None: raise RuntimeError("Cannot register producer %s, because producer %s was never unregistered." % (producer, self.producer)) if self.closed: producer.stopProducing() else: self.producer = producer self.streamingProducer = streaming if not streaming: producer.resumeProducing() def unregisterProducer(self): self.producer = None class StreamProducer(object): """A push producer which gets its data by reading a stream.""" implements(ti_interfaces.IPushProducer) deferred = None finishedCallback = None paused = False consumer = None def __init__(self, stream, enforceStr=True): self.stream = stream self.enforceStr = enforceStr def beginProducing(self, consumer): if self.stream is None: return defer.succeed(None) self.consumer = consumer finishedCallback = self.finishedCallback = Deferred() self.consumer.registerProducer(self, True) self.resumeProducing() return finishedCallback def resumeProducing(self): self.paused = False if self.deferred is not None: return try: data = self.stream.read() except: self.stopProducing(Failure()) return if isinstance(data, Deferred): self.deferred = data.addCallbacks(self._doWrite, self.stopProducing) else: self._doWrite(data) def _doWrite(self, data): if self.consumer is None: return if data is None: # The end. if self.consumer is not None: self.consumer.unregisterProducer() if self.finishedCallback is not None: self.finishedCallback.callback(None) self.finishedCallback = self.deferred = self.consumer = self.stream = None return self.deferred = None if self.enforceStr: # XXX: sucks that we have to do this. make transport.write(buffer) work! data = str(buffer(data)) self.consumer.write(data) if not self.paused: self.resumeProducing() def pauseProducing(self): self.paused = True def stopProducing(self, failure=ti_error.ConnectionLost()): if self.consumer is not None: self.consumer.unregisterProducer() if self.finishedCallback is not None: if failure is not None: self.finishedCallback.errback(failure) else: self.finishedCallback.callback(None) self.finishedCallback = None self.paused = True if self.stream is not None: self.stream.close() self.finishedCallback = self.deferred = self.consumer = self.stream = None ############################## #### ProcessStreamer #### ############################## class _ProcessStreamerProtocol(protocol.ProcessProtocol): def __init__(self, inputStream, outStream, errStream): self.inputStream = inputStream self.outStream = outStream self.errStream = errStream self.resultDeferred = defer.Deferred() def connectionMade(self): p = StreamProducer(self.inputStream) # if the process stopped reading from the input stream, # this is not an error condition, so it oughtn't result # in a ConnectionLost() from the input stream: p.stopProducing = lambda err=None: StreamProducer.stopProducing(p, err) d = p.beginProducing(self.transport) d.addCallbacks(lambda _: self.transport.closeStdin(), self._inputError) def _inputError(self, f): log.msg("Error in input stream for %r" % self.transport) log.err(f) self.transport.closeStdin() def outReceived(self, data): self.outStream.write(data) def errReceived(self, data): self.errStream.write(data) def outConnectionLost(self): self.outStream.finish() def errConnectionLost(self): self.errStream.finish() def processEnded(self, reason): self.resultDeferred.errback(reason) del self.resultDeferred class ProcessStreamer(object): """Runs a process hooked up to streams. Requires an input stream, has attributes 'outStream' and 'errStream' for stdout and stderr. outStream and errStream are public attributes providing streams for stdout and stderr of the process. """ def __init__(self, inputStream, program, args, env={}): self.outStream = ProducerStream() self.errStream = ProducerStream() self._protocol = _ProcessStreamerProtocol(IByteStream(inputStream), self.outStream, self.errStream) self._program = program self._args = args self._env = env def run(self): """Run the process. Returns Deferred which will eventually have errback for non-clean (exit code > 0) exit, with ProcessTerminated, or callback with None on exit code 0. """ # XXX what happens if spawn fails? reactor.spawnProcess(self._protocol, self._program, self._args, env=self._env) del self._env return self._protocol.resultDeferred.addErrback(lambda _: _.trap(ti_error.ProcessDone)) ############################## #### generatorToStream #### ############################## class _StreamIterator(object): done=False def __iter__(self): return self - def next(self): + def __next__(self): if self.done: raise StopIteration return self.value wait=object() class _IteratorStream(object): length = None def __init__(self, fun, stream, args, kwargs): self._stream=stream self._streamIterator = _StreamIterator() self._gen = fun(self._streamIterator, *args, **kwargs) def read(self): try: - val = self._gen.next() + val = next(self._gen) except StopIteration: return None else: if val is _StreamIterator.wait: newdata = self._stream.read() if isinstance(newdata, defer.Deferred): return newdata.addCallback(self._gotRead) else: return self._gotRead(newdata) return val def _gotRead(self, data): if data is None: self._streamIterator.done=True else: self._streamIterator.value=data return self.read() def close(self): self._stream.close() del self._gen, self._stream, self._streamIterator def split(self): return fallbackSplit(self) def generatorToStream(fun): """Converts a generator function into a stream. The function should take an iterator as its first argument, which will be converted *from* a stream by this wrapper, and yield items which are turned *into* the results from the stream's 'read' call. One important point: before every call to input.next(), you *MUST* do a "yield input.wait" first. Yielding this magic value takes care of ensuring that the input is not a deferred before you see it. >>> from xcap.web import stream >>> from string import maketrans >>> alphabet = 'abcdefghijklmnopqrstuvwxyz' >>> >>> def encrypt(input, key): ... code = alphabet[key:] + alphabet[:key] ... translator = maketrans(alphabet+alphabet.upper(), code+code.upper()) ... yield input.wait ... for s in input: ... yield str(s).translate(translator) ... yield input.wait ... >>> encrypt = stream.generatorToStream(encrypt) >>> >>> plaintextStream = stream.MemoryStream('SampleSampleSample') >>> encryptedStream = encrypt(plaintextStream, 13) >>> encryptedStream.read() 'FnzcyrFnzcyrFnzcyr' >>> >>> plaintextStream = stream.MemoryStream('SampleSampleSample') >>> encryptedStream = encrypt(plaintextStream, 13) >>> evenMoreEncryptedStream = encrypt(encryptedStream, 13) >>> evenMoreEncryptedStream.read() 'SampleSampleSample' """ def generatorToStream_inner(stream, *args, **kwargs): return _IteratorStream(fun, stream, args, kwargs) return generatorToStream_inner ############################## #### BufferedStream #### ############################## class BufferedStream(object): """A stream which buffers its data to provide operations like readline and readExactly.""" data = "" def __init__(self, stream): self.stream = stream def _readUntil(self, f): """Internal helper function which repeatedly calls f each time after more data has been received, until it returns non-None.""" while True: r = f() if r is not None: yield r; return newdata = self.stream.read() if isinstance(newdata, defer.Deferred): newdata = defer.waitForDeferred(newdata) yield newdata; newdata = newdata.getResult() if newdata is None: # End Of File newdata = self.data self.data = '' yield newdata; return self.data += str(newdata) _readUntil = defer.deferredGenerator(_readUntil) def readExactly(self, size=None): """Read exactly size bytes of data, or, if size is None, read the entire stream into a string.""" if size is not None and size < 0: raise ValueError("readExactly: size cannot be negative: %s", size) def gotdata(): data = self.data if size is not None and len(data) >= size: pre,post = data[:size], data[size:] self.data = post return pre return self._readUntil(gotdata) def readline(self, delimiter='\r\n', size=None): """ Read a line of data from the string, bounded by delimiter. The delimiter is included in the return value. If size is specified, read and return at most that many bytes, even if the delimiter has not yet been reached. If the size limit falls within a delimiter, the rest of the delimiter, and the next line will be returned together. """ if size is not None and size < 0: raise ValueError("readline: size cannot be negative: %s" % (size, )) def gotdata(): data = self.data if size is not None: splitpoint = data.find(delimiter, 0, size) if splitpoint == -1: if len(data) >= size: splitpoint = size else: splitpoint += len(delimiter) else: splitpoint = data.find(delimiter) if splitpoint != -1: splitpoint += len(delimiter) if splitpoint != -1: pre = data[:splitpoint] self.data = data[splitpoint:] return pre return self._readUntil(gotdata) def pushback(self, pushed): """Push data back into the buffer.""" self.data = pushed + self.data def read(self): data = self.data if data: self.data = "" return data return self.stream.read() def _len(self): l = self.stream.length if l is None: return None return l + len(self.data) length = property(_len) def split(self, offset): off = offset - len(self.data) pre, post = self.stream.split(max(0, off)) pre = BufferedStream(pre) post = BufferedStream(post) if off < 0: pre.data = self.data[:-off] post.data = self.data[-off:] else: pre.data = self.data return pre, post def substream(stream, start, end): if start > end: raise ValueError("start position must be less than end position %r" % ((start, end),)) stream = stream.split(start)[1] return stream.split(end - start)[0] __all__ = ['IStream', 'IByteStream', 'FileStream', 'MemoryStream', 'CompoundStream', 'readAndDiscard', 'fallbackSplit', 'ProducerStream', 'StreamProducer', 'BufferedStream', 'readStream', 'ProcessStreamer', 'readIntoFile', 'generatorToStream'] diff --git a/xcap/xcapdiff.py b/xcap/xcapdiff.py index ddd058d..8995288 100644 --- a/xcap/xcapdiff.py +++ b/xcap/xcapdiff.py @@ -1,194 +1,194 @@ """Track changes of the documents and notify subscribers Create a Notifier object: >>> n = Notifier(xcap_root, publish_xcapdiff_func) When a change occurs, call on_change >>> n.on_change(xcap_uri_updated, old_etag, new_etag) (old_etag being None means the document was just created, new_etag being None means the document was deleted) Notifier will call publish_xcapdiff_func with 2 args: user's uri and xcap-diff document. Number of calls is limited to no more than 1 call per MIN_WAIT seconds for a given user uri. """ from time import time from functools import wraps from twisted.internet import reactor def xml_xcapdiff(xcap_root, content): return """ %s """ % (xcap_root, content) def xml_document(sel, old_etag, new_etag): if old_etag: old_etag = ( ' previous-etag="%s"' % old_etag ) else: old_etag = '' if new_etag: new_etag = ( ' new-etag="%s"' % new_etag ) else: new_etag = '' return '' % (new_etag, sel, old_etag) class UserChanges(object): MIN_WAIT = 5 def __init__(self, publish_xcapdiff): self.changes = {} self.rate_limit = RateLimit(self.MIN_WAIT) self.publish_xcapdiff = publish_xcapdiff def add_change(self, uri, old_etag, etag, xcap_root): self.changes.setdefault(uri, [old_etag, etag])[1] = etag self.rate_limit.callAtLimitedRate(self.publish, uri.user.uri, xcap_root) def publish(self, user_uri, xcap_root): if self.changes: self.publish_xcapdiff(user_uri, self.unload_changes(xcap_root)) def unload_changes(self, xcap_root): docs = [] - for uri, (old_etag, etag) in self.changes.iteritems(): + for uri, (old_etag, etag) in self.changes.items(): docs.append(xml_document(uri, old_etag, etag)) result = xml_xcapdiff(xcap_root, '\n'.join(docs)) self.changes.clear() return result - def __nonzero__(self): + def __bool__(self): return self.changes.__nonzero__() class Notifier(object): def __init__(self, xcap_root, publish_xcapdiff_func): self.publish_xcapdiff = publish_xcapdiff_func self.xcap_root = xcap_root # maps user_uri to UserChanges self.users_changes = {} def on_change(self, uri, old_etag, new_etag): changes = self.users_changes.setdefault(uri.user, UserChanges(self.publish_xcapdiff)) changes.add_change(uri, old_etag, new_etag, self.xcap_root) class RateLimit(object): def __init__(self, min_wait): # minimum number of seconds between calls self.min_wait = min_wait # time() of the last call self.last_call = 0 # DelayedCall object of scheduled call self.delayed_call = None def callAtLimitedRate(self, f, *args, **kwargs): """Call f(*args, **kw) if it wasn't called in the last self.min_wait seconds. If it was, schedule it for later. Don't do anything if it's already scheduled. >>> rate = RateLimit(1) >>> def f(a, start = time()): ... print "%d %s" % (time()-start, a) ... return 'return value is lost!' >>> rate.callAtLimitedRate(f, 'a') 0 a >>> rate.callAtLimitedRate(f, 'b') # scheduled for 1 second later >>> rate.callAtLimitedRate(f, 'c') # ignored as there's already call in progress >>> _ = reactor.callLater(1.5, rate.callAtLimitedRate, f, 'd') >>> _ = reactor.callLater(2.1, reactor_stop) >>> reactor_run() 1 b 2 d """ current = time() delta = current - self.last_call if not self.delayed_call or \ self.delayed_call.called or \ self.delayed_call.cancelled: @wraps(f) def wrapped_f(): try: return f(*args, **kwargs) finally: self.last_call = time() self.delayed_call = callMaybeLater(self.min_wait - delta, wrapped_f) class RateLimitedFun(RateLimit): def __init__(self, min_wait, function): RateLimit.__init__(self, min_wait) self.function = function def __call__(self, *args, **kwargs): return self.callAtLimitedRate(self.function, *args, **kwargs) def limit_rate(min_wait): """Decorator for limiting rate of the function. The resulting value of the new function will be None regardless of what the wrapped function returned. >>> @limit_rate(1) ... def f(a, start = time()): ... print "%d %s" % (time()-start, a) ... return 'return value is lost!' >>> f('a') 0 a >>> f('b') # scheduled for 1 second later >>> f('c') # ignored as there's already call in progress >>> _ = reactor.callLater(1.5, f, 'd') >>> _ = reactor.callLater(2.1, reactor_stop) >>> reactor_run() 1 b 2 d """ rate = RateLimit(min_wait) def decorate(f): @wraps(f) def wrapped(*args, **kwargs): rate.callAtLimitedRate(f, *args, **kwargs) return wrapped return decorate def callMaybeLater(seconds, f, *args, **kw): "execute f and return None if seconds is zero, callLater otherwise" if seconds <= 0: f(*args, **kw) else: return reactor.callLater(seconds, f, *args, **kw) if __name__=='__main__': def reactor_run(first_time = [True]): if first_time[0]: reactor.run() first_time[0] = False else: reactor.running = True reactor.mainLoop() def reactor_stop(): reactor.running = False import doctest doctest.testmod() diff --git a/xcap/xpath.py b/xcap/xpath.py index 0fdcd77..acb909b 100644 --- a/xcap/xpath.py +++ b/xcap/xpath.py @@ -1,375 +1,375 @@ import re from application import log from copy import copy from lxml import _elementpath as ElementPath from xml.sax.saxutils import quoteattr __all__ = ['parse_node_selector', 'AttributeSelector', 'DocumentSelector', 'ElementSelector', 'NamespaceSelector', 'NodeSelector'] # Errors class Error(ValueError): pass class NodeParsingError(Error): http_error = 400 class DocumentSelectorError(Error): http_error = 404 # XPath tokenizer class List(list): def get(self, index, default=None): try: return self[index] except LookupError: return default class Op(str): tag = False class Tag(str): tag = True class XPathTokenizer(object): @classmethod def tokens(cls, selector): """ >>> xpath_tokenizer('resource-lists') ['resource-lists'] >>> xpath_tokenizer('list[@name="friends"]') ['list', '[', '@', 'name', '=', 'friends', ']'] We cannot properly tokenize an URI like this :( >>> uri_ugly = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:a@example.org/index/~~/resource-lists/list[@name="mkting"]"]' >>> len(xpath_tokenizer(uri_ugly)) # expected 7 10 To feed such URI to this function, replace quote \" with " >>> uri_nice = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:a@example.org/index/~~/resource-lists/list[@name="mkting"]"]' >>> len(xpath_tokenizer(uri_nice)) # expected 7 7 """ def unquote_attr_value(s): # XXX currently equivalent but differently encoded URIs won't be considered equal (", etc.) if len(s) > 1 and s[0] == s[-1] and s[0] in '"\'': return s[1:-1] raise NodeParsingError tokens = List() prev = None for op, tag in ElementPath.xpath_tokenizer(selector): if prev == '=': unq = unquote_attr_value else: unq = lambda x:x if op: x = Op(unq(op)) else: x = Tag(unq(tag)) tokens.append(x) prev = x return tokens # XPath parsing def read_element_tag(lst, index, namespace, namespaces): if index == len(lst): raise NodeParsingError elif lst[index] == '*': return '*', index+1 elif lst.get(index+1) == ':': if not lst[index].tag: raise NodeParsingError if not lst.get(index+2) or not lst.get(index+2).tag: raise NodeParsingError try: namespaces[lst[index]] except LookupError: raise NodeParsingError return (namespaces[lst[index]], lst[index+2]), index+3 else: return (namespace, lst[index]), index+1 def read_position(lst, index): if lst.get(index) == '[' and lst.get(index+2) == ']': return int(lst[index+1]), index+3 return None, index # XML attributes don't belong to the same namespace as containing tag def read_att_test(lst, index, _namespace, namespaces): if lst.get(index) == '[' and lst.get(index+1) == '@' and lst.get(index+3) == '=' and lst.get(index+5) == ']': return (None, lst[index+2]), lst[index+4], index+6 elif lst.get(index) == '[' and lst.get(index+1) == '@' and lst.get(index+3) == ':' and lst.get(index+5) == '=' and lst.get(index+7) == ']': return (namespaces[lst[index+2]], lst[index+4]), lst[index+6], index+8 return None, None, index class Step(object): def __init__(self, name, position=None, att_name=None, att_value=None): self.name = name self.position = position self.att_name = att_name self.att_value = att_value def to_string(self, ns_prefix_mapping=dict()): try: namespace, name = self.name except ValueError: res = self.name else: prefix = ns_prefix_mapping[namespace] if prefix: res = prefix + ':' + name else: res = name if self.position is not None: res += '[%s]' % self.position if self.att_name is not None: namespace, name = self.att_name if namespace: prefix = ns_prefix_mapping[namespace] else: prefix = None if prefix: res += '[@%s:%s=%s]' % (prefix, name, quoteattr(self.att_value)) else: res += '[@%s=%s]' % (name, quoteattr(self.att_value)) return res def __repr__(self): args = [self.name, self.position, self.att_name, self.att_value] while args and args[-1] is None: del args[-1] args = [repr(x) for x in args] return 'Step(%s)' % ', '.join(args) def read_step(lst, index, namespace, namespaces): if lst.get(index) == '@': return AttributeSelector(lst[index+1]), index+2 elif lst.get(index) == 'namespace' and lst.get(index+1) == '::' and lst.get(index+2) == '*': return NamespaceSelector(), index+3 else: tag, index = read_element_tag(lst, index, namespace, namespaces) position, index = read_position(lst, index) att_name, att_value, index = read_att_test(lst, index, namespace, namespaces) return Step(tag, position, att_name, att_value), index def read_slash(lst, index): if lst.get(index) == '/': return index+1 raise NodeParsingError def read_node_selector(lst, namespace, namespaces): index = 0 if lst.get(0) == '/': index = read_slash(lst, index) steps = [] terminal_selector = None while True: step, index = read_step(lst, index, namespace, namespaces) if isinstance(step, TerminalSelector): if index != len(lst): raise NodeParsingError terminal_selector = step break steps.append(step) if index == len(lst): break index = read_slash(lst, index) return ElementSelector(steps, namespace, namespaces), terminal_selector def parse_node_selector(selector, namespace=None, namespaces=dict()): """ >>> parse_node_selector('/resource-lists', None, {}) ([Step((None, 'resource-lists'))], None) >>> parse_node_selector('/resource-lists/list[1]/entry[@uri="sip:bob@example.com"]', None, {}) ([Step((None, 'resource-lists')), Step((None, 'list'), 1), Step((None, 'entry'), None, (None, 'uri'), 'sip:bob@example.com')], None) >>> parse_node_selector('/*/list[1][@name="friends"]/@name') ([Step('*'), Step((None, 'list'), 1, (None, 'name'), 'friends')], AttributeSelector('name')) >>> parse_node_selector('/*[10][@att="val"]/namespace::*') ([Step('*', 10, (None, 'att'), 'val')], NamespaceSelector()) >>> x = parse_node_selector('/resource-lists/list[@name="friends"]/external[@anchor="http://xcap.example.org/resource-lists/users/sip:a@example.org/index/~~/resource-lists/list%5b@name=%22mkting%22%5d"]') """ try: tokens = XPathTokenizer.tokens(selector) element_selector, terminal_selector = read_node_selector(tokens, namespace, namespaces) element_selector._original_selector = selector return element_selector, terminal_selector - except NodeParsingError, e: + except NodeParsingError as e: e.args = ('Failed to parse node: %r' % selector,) raise except Exception: log.error('internal error in parse_node_selector(%r)' % selector) raise # XPath selectors class TerminalSelector(object): pass class AttributeSelector(TerminalSelector): def __init__(self, attribute): self.attribute = attribute def __str__(self): return '@' + self.attribute def __repr__(self): return 'AttributeSelector(%r)' % self.attribute class DocumentSelector(str): """Constructs a DocumentSelector containing the application_id, context, user_id and document from the given selector string. >>> x = DocumentSelector('/resource-lists/users/sip:joe@example.com/index') >>> x.application_id, x.context, x.user_id, x.document_path ('resource-lists', 'users', 'sip:joe@example.com', 'index') >>> x = DocumentSelector('/rls-services/global/index') >>> x.application_id, x.context, x.user_id, x.document_path ('rls-services', 'global', None, 'index') """ def __init__(self, selector): if selector[:1] == '/': selector = selector[1:] else: raise DocumentSelectorError("Document selector does not start with /") if selector[-1:] == '/': selector = selector[:-1] if not selector: raise DocumentSelectorError("Document selector does not contain auid") segments = selector.split('/') if len(segments) < 2: raise DocumentSelectorError("Document selector does not contain context: %r" % selector) self.application_id = segments[0] self.context = segments[1] if self.context not in ("users", "global"): raise DocumentSelectorError("Document selector context must be either 'users' or 'global', not %r: %r" % \ (self.context, selector)) self.user_id = None if self.context == "users": try: self.user_id = segments[2] except IndexError: raise DocumentSelectorError('Document selector does not contain user id: %r' % selector) segments = segments[3:] else: segments = segments[2:] if not segments: raise DocumentSelectorError("Document selector does not contain document's path: %r" % selector) self.document_path = '/'.join(segments) def __repr__(self): return '%s(%s)' % (self.__class__.__name__, str.__repr__(self)) class ElementSelector(list): XML_TAG_REGEXP = re.compile('\s*<([^ >/]+)') def __init__(self, lst, namespace, namespaces): list.__init__(self, lst) self.namespace = namespace self.namespaces = namespaces def _parse_qname(self, qname): if qname == '*': return qname try: prefix, name = qname.split(':') except ValueError: return (self.namespace, qname) else: return self.namespaces[prefix], name def replace_default_prefix(self, ns_prefix_mapping): steps = [] for step in self: try: namespace, name = step.name except ValueError: steps.append(str(step)) else: steps.append(step.to_string(ns_prefix_mapping)) return '/' + '/'.join(steps) def fix_star(self, element_body): """ >>> elem_selector = parse_node_selector('/watcherinfo/watcher-list/*[@id="8ajksjda7s"]', None, {})[0] >>> elem_selector.fix_star('')[-1].name[1] 'watcher' """ if self and self[-1].name == '*' and self[-1].position is None: m = self.XML_TAG_REGEXP.match(element_body) if m: (name, ) = m.groups() result = copy(self) result[-1].name = self._parse_qname(name) return result return self class NamespaceSelector(TerminalSelector): def __str__(self): return "namespace::*" def __repr__(self): return 'NamespaceSelector()' class NodeSelector(object): XMLNS_REGEXP = re.compile("xmlns\((?P.*?)\)") XPATH_DEFAULT_PREFIX = 'default' def __init__(self, selector, namespace=None): self._original_selector = selector sections = selector.split('?', 1) if len(sections) == 2: self.ns_bindings = self._parse_ns_bindings(sections[1]) else: self.ns_bindings = {} self.element_selector, self.terminal_selector = parse_node_selector(sections[0], namespace, self.ns_bindings) def __str__(self): return self._original_selector # http://www.w3.org/TR/2003/REC-xptr-xmlns-20030325/ def _parse_ns_bindings(self, query): ns_bindings = {} ns_matches = self.XMLNS_REGEXP.findall(query) for m in ns_matches: try: prefix, ns = m.split('=') ns_bindings[prefix] = ns except ValueError: log.error("Ignoring invalid XPointer XMLNS expression: %r" % m) continue return ns_bindings def replace_default_prefix(self, defprefix=None, append_terminal=True): if defprefix is None: defprefix = self.XPATH_DEFAULT_PREFIX - namespace2prefix = dict((v, k) for (k, v) in self.ns_bindings.iteritems()) + namespace2prefix = dict((v, k) for (k, v) in self.ns_bindings.items()) namespace2prefix[self.element_selector.namespace] = defprefix res = self.element_selector.replace_default_prefix(namespace2prefix) if append_terminal and self.terminal_selector: res += '/' + str(self.terminal_selector) return res def get_ns_bindings(self, default_ns): ns_bindings = self.ns_bindings.copy() ns_bindings[self.XPATH_DEFAULT_PREFIX] = default_ns return ns_bindings