diff --git a/xcap/sax/__init__.py b/xcap/sax/__init__.py deleted file mode 100644 index 4f57ba2..0000000 --- a/xcap/sax/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Simple API for XML (SAX) implementation for Python. - -This module provides an implementation of the SAX 2 interface; -information about the Java version of the interface can be found at -http://www.megginson.com/SAX/. The Python version of the interface is -documented at <...>. - -This package contains the following interface classes and functions: - -ContentHandler, ErrorHandler - base classes for SAX2 handlers -SAXException, SAXNotRecognizedException, -SAXParseException, SAXNotSupportedException - SAX exceptions - -make_parser - creation of a new parser object -parse, parseString - parse a document, using a provided handler - -""" - -from .xmlreader import InputSource -from .handler import ContentHandler, ErrorHandler -from ._exceptions import SAXException, SAXNotRecognizedException,\ - SAXParseException, SAXNotSupportedException,\ - SAXReaderNotAvailable - -from .sax2exts import make_parser - -def parse(filename_or_stream, handler, errorHandler=ErrorHandler()): - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - parser.parse(filename_or_stream) - -def parseString(string, handler, errorHandler=ErrorHandler()): - try: - from io import StringIO - except ImportError: - from io import StringIO - - if errorHandler is None: - errorHandler = ErrorHandler() - parser = make_parser() - parser.setContentHandler(handler) - parser.setErrorHandler(errorHandler) - - inpsrc = InputSource() - inpsrc.setByteStream(StringIO(string)) - parser.parse(inpsrc) diff --git a/xcap/sax/_exceptions.py b/xcap/sax/_exceptions.py deleted file mode 100644 index 628e80d..0000000 --- a/xcap/sax/_exceptions.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Different kinds of SAX Exceptions""" -import sys -if sys.platform[:4] == "java": - from java.lang import Exception -del sys - -# ===== SAXEXCEPTION ===== - -class SAXException(Exception): - """Encapsulate an XML error or warning. This class can contain - basic error or warning information from either the XML parser or - the application: you can subclass it to provide additional - functionality, or to add localization. Note that although you will - receive a SAXException as the argument to the handlers in the - ErrorHandler interface, you are not actually required to throw - the exception; instead, you can simply read the information in - it.""" - - def __init__(self, msg, exception=None): - """Creates an exception. The message is required, but the exception - is optional.""" - self._msg = msg - self._exception = exception - Exception.__init__(self, msg) - - def getMessage(self): - "Return a message for this exception." - return self._msg - - def getException(self): - "Return the embedded exception, or None if there was none." - return self._exception - - def __str__(self): - "Create a string representation of the exception." - return self._msg - - def __getitem__(self, ix): - """Avoids weird error messages if someone does exception[ix] by - mistake, since Exception has __getitem__ defined.""" - raise AttributeError("__getitem__") - - -# ===== SAXPARSEEXCEPTION ===== - -class SAXParseException(SAXException): - """Encapsulate an XML parse error or warning. - - This exception will include information for locating the error in - the original XML document. Note that although the application will - receive a SAXParseException as the argument to the handlers in the - ErrorHandler interface, the application is not actually required - to throw the exception; instead, it can simply read the - information in it and take a different action. - - Since this exception is a subclass of SAXException, it inherits - the ability to wrap another exception.""" - - def __init__(self, msg, exception, locator): - "Creates the exception. The exception parameter is allowed to be None." - SAXException.__init__(self, msg, exception) - self._locator = locator - - # We need to cache this stuff at construction time. - # If this exception is thrown, the objects through which we must - # traverse to get this information may be deleted by the time - # it gets caught. - self._systemId = self._locator.getSystemId() - self._colnum = self._locator.getColumnNumber() - self._linenum = self._locator.getLineNumber() - - def getColumnNumber(self): - """The column number of the end of the text where the exception - occurred.""" - return self._colnum - - def getLineNumber(self): - "The line number of the end of the text where the exception occurred." - return self._linenum - - def getPublicId(self): - "Get the public identifier of the entity where the exception occurred." - return self._locator.getPublicId() - - def getSystemId(self): - "Get the system identifier of the entity where the exception occurred." - return self._systemId - - def __str__(self): - "Create a string representation of the exception." - sysid = self.getSystemId() - if sysid is None: - sysid = "" - linenum = self.getLineNumber() - if linenum is None: - linenum = "?" - colnum = self.getColumnNumber() - if colnum is None: - colnum = "?" - return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) - - -# ===== SAXNOTRECOGNIZEDEXCEPTION ===== - -class SAXNotRecognizedException(SAXException): - """Exception class for an unrecognized identifier. - - An XMLReader will raise this exception when it is confronted with an - unrecognized feature or property. SAX applications and extensions may - use this class for similar purposes.""" - - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXNotSupportedException(SAXException): - """Exception class for an unsupported operation. - - An XMLReader will raise this exception when a service it cannot - perform is requested (specifically setting a state or value). SAX - applications and extensions may use this class for similar - purposes.""" - -# ===== SAXNOTSUPPORTEDEXCEPTION ===== - -class SAXReaderNotAvailable(SAXNotSupportedException): - """Exception class for a missing driver. - - An XMLReader module (driver) should raise this exception when it - is first imported, e.g. when a support module cannot be imported. - It also may be raised during parsing, e.g. if executing an external - program is not permitted.""" diff --git a/xcap/sax/drivers/__init__.py b/xcap/sax/drivers/__init__.py deleted file mode 100644 index 67d16c1..0000000 --- a/xcap/sax/drivers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Directory for SAX version 1 drivers." diff --git a/xcap/sax/drivers/drv_htmllib.py b/xcap/sax/drivers/drv_htmllib.py deleted file mode 100644 index c00e95c..0000000 --- a/xcap/sax/drivers/drv_htmllib.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -SAX driver for htmllib.py - -$Id: drv_htmllib.py,v 1.5 2001/12/30 12:13:44 loewis Exp $ -""" - -version="0.10" - -from xml.sax import saxutils,saxlib -from xml.sax.drivers import pylibs - -import htmllib,sys,string - -# --- SAX_HLParser - -class SAX_HLParser(pylibs.SGMLParsers,htmllib.HTMLParser): - "SAX driver for htmllib.py." - - def __init__(self): - htmllib.HTMLParser.__init__(self,None) - pylibs.LibParser.__init__(self) - self.standalone=0 - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS - - def get_parser_name(self): - return "htmllib" - - def get_parser_version(self): - return sys.version[:string.find(sys.version," ")] - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - # reset and feed are taken care of by the subclassing :-) - - def close(self): - htmllib.HTMLParser.close(self) - self.doc_handler.endDocument() - -# --- Global functions - -def create_parser(): - return SAX_HLParser() diff --git a/xcap/sax/drivers/drv_ltdriver.py b/xcap/sax/drivers/drv_ltdriver.py deleted file mode 100644 index 3a19dec..0000000 --- a/xcap/sax/drivers/drv_ltdriver.py +++ /dev/null @@ -1,130 +0,0 @@ -""" -A SAX driver for the LT XML Python interface. -""" - -version="0.10" - -from types import * -from xml.sax import saxlib,saxutils -from XMLinter import * - -# --- The parser - -class SAX_XMLinter(saxlib.Parser): - - def __init__(self): - saxlib.Parser.__init__(self) - - def parse(self,sysID): - self._parse(Open(sysID,NSL_read)) - - def parseFile(self,file): - self._parse(FOpen(file,NSL_read)) - - def setLocale(self, locale): - raise SAXException("Locales not supported") - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS: - - def get_parser_name(self): - return "XMLinter" - - def get_parser_version(self): - return "Unknown" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 1 - - def reset(self): - raise SAXException("Incremental parsing not supported") - - def feed(self,data): - raise SAXException("Incremental parsing not supported") - - def close(self): - raise SAXException("Incremental parsing not supported") - - # --- INTERNAL METHODS - - def _parse(self,file): - bit=GetNextBit(file) - while bit: - if bit.type=="start": - self.doc_handler.startElement(bit.label, - AttributeItem(bit.item)) - elif bit.type=="end": - self.doc_handler.endElement(bit.label) - elif bit.type=="text": - self.doc_handler.characters(bit.body,0,len(bit.body)) - elif bit.type=="empty": - self.doc_handler.startElement(bit.label, - AttributeItem(bit.item)) - self.doc_handler.endElement(bit.label) - elif bit.type=="bad": - self.err_handler.fatalError(saxlib.SAXException("Syntax error",None)) - elif bit.type=="pi": - print("?pi") - else: - print("###"+bit.type) - - bit=GetNextBit(file) - -# --- AttributeItem - -def name(pair): - return pair[0] - -class AttributeItem: - - def __init__(self,item): - self.item=item - self.list=ItemActualAttributes(item) - - def getLength(self): - return len(self.list) - - def getName(self, i): - return self.list[i][0] - - def getType(self, i): - return "CDATA" - - def getValue(self, i): - if type(i)==StringType: - return GetAttrVal(self.item,i) - else: - return self.list[i][1] - - def __len__(self): - return len(self.list) - - def __getitem__(self, key): - if type(key)==StringType: - return GetAttrVal(self.item,key) - else: - return self.list[key][0] - - def keys(self): - return list(map(name,self.list)) - - def has_key(self, key): - return GetAttrVal(self.item,key) - -# --- Global functions - -def create_parser(): - return SAX_XMLinter() - -# --- Testing - -if __name__=="__main__": - p=create_parser() - p.setDocumentHandler(saxutils.Canonizer()) - p.setErrorHandler(saxutils.ErrorPrinter()) - p.parse("tst.xml") diff --git a/xcap/sax/drivers/drv_ltdriver_val.py b/xcap/sax/drivers/drv_ltdriver_val.py deleted file mode 100644 index 06c7216..0000000 --- a/xcap/sax/drivers/drv_ltdriver_val.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -A validating-mode SAX driver for the LT XML Python interface. -""" - -version="0.10" - -from . import drv_ltdriver -from XMLinter import * - -class SAX_XMLinter_val(drv_ltdriver.SAX_XMLinter): - - def __init__(self): - drv_ltdriver.SAX_XMLinter.__init__(self) - - def parse(self,sysID): - self._parse(Open(sysID,NSL_read | NSL_read_validate)) - - def parseFile(self,file): - self._parse(FOpen(file,NSL_read | NSL_read_validate)) - - def get_parser_name(self): - return "XMLinter_val" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - -# --- Global functions - -def create_parser(): - return SAX_XMLinter_val() - -# --- Testing - -if __name__=="__main__": - from xml.sax import saxutils - p=create_parser() - p.setDocumentHandler(saxutils.Canonizer()) - p.setErrorHandler(saxutils.ErrorPrinter()) - p.parse("tst.xml") diff --git a/xcap/sax/drivers/drv_pyexpat.py b/xcap/sax/drivers/drv_pyexpat.py deleted file mode 100644 index a39850b..0000000 --- a/xcap/sax/drivers/drv_pyexpat.py +++ /dev/null @@ -1,228 +0,0 @@ -# -*- coding: iso-8859-1 -*- -""" -SAX driver for the Pyexpat C module. - -$Id: drv_pyexpat.py,v 1.19 2004/11/29 13:38:23 loewis Exp $ -""" - -# Event handling can be speeded up by bypassing the driver for some events. -# This will be implemented later when I can test this driver. -# -# This driver has been much improved by Geir Ove Grønmo. - -version="0.13" - -from xml.sax import saxlib, saxutils, SAXReaderNotAvailable - -try: - from xml.parsers import expat -except ImportError: - raise SAXReaderNotAvailable("expat not supported",None) - -import urllib.request, urllib.error, urllib.parse,types - -# --- SAX_expat - -class SAX_expat(saxlib.Parser,saxlib.Locator): - "SAX driver for the Pyexpat C module." - - def __init__(self): - saxlib.Parser.__init__(self) - self.reset() - - def startElement(self,name,attrs): - at = {} - # Backward compatibility code, for older versions of the - # PyExpat module - if type(attrs) == type({}): - at = attrs - else: - # Assume it's a list containing alternating names & values - at = {} - for i in range(0, len(attrs), 2): - at[attrs[i]] = attrs[i+1] - - self.doc_handler.startElement(name,saxutils.AttributeMap(at)) - - # FIXME: bypass! - def endElement(self,name): - self.doc_handler.endElement(name) - - def characters(self,data): - self.doc_handler.characters(data,0,len(data)) - - # FIXME: bypass! - def processingInstruction(self,target,data): - self.doc_handler.processingInstruction(target,data) - - def parse(self,sysID): - self.parseFile(urllib.request.urlopen(sysID),sysID) - - def parseFile(self,fileobj,sysID=None): - self.reset() - self.sysID=sysID - self.doc_handler.startDocument() - - buf = fileobj.read(16384) - while buf != "": - if self.parser.Parse(buf, 0) != 1: - self.__report_error() - buf = fileobj.read(16384) - self.parser.Parse("", 1) - - self.doc_handler.endDocument() - self.close(needFinal=0) - - # --- Locator methods. Only usable after errors. - - def getSystemId(self): - if self.sysID!=None: - return self.sysID - else: - return "Unknown" - - def getLineNumber(self): - return self.parser.ErrorLineNumber - - def getColumnNumber(self): - return self.parser.ErrorColumnNumber - - # --- Internal - - def __report_error(self): - errc=self.parser.ErrorCode - msg=expat.ErrorString(errc) - exc=saxlib.SAXParseException(msg,None,self) - self.err_handler.fatalError(exc) - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS - - def get_parser_name(self): - return "pyexpat" - - def get_parser_version(self): - return "Unknown" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - def reset(self): - self.sysID=None - self.parser=expat.ParserCreate() - self.parser.StartElementHandler = self.startElement - self.parser.EndElementHandler = self.endElement - self.parser.CharacterDataHandler = self.characters - self.parser.ProcessingInstructionHandler = self.processingInstruction - self.doc_handler.setDocumentLocator(self) - - def feed(self, data): - if self.parser.Parse(data, 0) != 1: - self.__report_error() - - def close(self, needFinal=1): - if self.parser is None: - # make sure close is idempotent - return - if needFinal: - if self.parser.Parse("", 1) != 1: - self.__report_error() - self.parser = None - -# --- An expat driver that uses the lazy map - -class LazyExpatDriver(SAX_expat): - - def __init__(self): - SAX_expat.__init__(self) - self.map=LazyAttributeMap([]) - - def startElement(self,name,attrs): - self.map.list=attrs - self.doc_handler.startElement(name,self.map) - -# --- A lazy attribute map - -# This avoids the costly conversion from a list to a hash table - -class LazyAttributeMap: - """A lazy implementation of AttributeList that takes an - [attr,val,attr,val,...] list and uses it to implement the AttributeList - interface.""" - - def __init__(self, list): - self.list=list - - def getLength(self): - return len(self.list)/2 - - def getName(self, i): - try: - return self.list[2*i] - except IndexError as e: - return None - - def getType(self, i): - return "CDATA" - - def getValue(self, i): - try: - if type(i)==int: - return self.list[2*i+1] - else: - for ix in range(0,len(self.list),2): - if self.list[ix]==i: - return self.list[ix+1] - - return None - except IndexError as e: - return None - - def __len__(self): - return len(self.list)/2 - - def __getitem__(self, key): - if type(key)==int: - return self.list[2*key+1] - else: - for ix in range(0,len(self.list),2): - if self.list[ix]==key: - return self.list[ix+1] - - return None - - def items(self): - result=[""]*(len(self.list)/2) - for ix in range(0,len(self.list),2): - result[ix/2]=(self.list[ix],self.list[ix+1]) - return result - - def keys(self): - result=[""]*(len(self.list)/2) - for ix in range(0,len(self.list),2): - result[ix/2]=self.list[ix] - return result - - def has_key(self,key): - for ix in range(0,len(self.list),2): - if self.list[ix]==key: - return 1 - - return 0 - - def get(self, key, alternative): - for ix in range(0,len(self.list),2): - if self.list[ix]==key: - return self.list[ix+1] - - return alternative - -# --- - -def create_parser(): - return SAX_expat() diff --git a/xcap/sax/drivers/drv_sgmllib.py b/xcap/sax/drivers/drv_sgmllib.py deleted file mode 100644 index 3a01489..0000000 --- a/xcap/sax/drivers/drv_sgmllib.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -SAX driver for sgmllib.py -""" - -version="0.10" - -from xml.sax import saxutils,saxlib -from xml.sax.drivers import pylibs - -import sgmllib,string,sys - -# --- SAX_SLParser - -class SAX_SLParser(pylibs.SGMLParsers,sgmllib.SGMLParser): - "SAX driver for sgmllib.py." - - def __init__(self): - sgmllib.SGMLParser.__init__(self) - pylibs.LibParser.__init__(self) - self.standalone=0 - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS - - def get_parser_name(self): - return "sgmllib" - - def get_parser_version(self): - return sys.version[:string.find(sys.version," ")] - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - # reset and feed are taken care of by the subclassing :-) - - def close(self): - sgmllib.SGMLParser.close(self) - self.doc_handler.endDocument() - -# --- Global functions - -def create_parser(): - return SAX_SLParser() diff --git a/xcap/sax/drivers/drv_sgmlop.py b/xcap/sax/drivers/drv_sgmlop.py deleted file mode 100644 index f5cbc14..0000000 --- a/xcap/sax/drivers/drv_sgmlop.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -SAX driver for the sgmlop parser. - -$Id: drv_sgmlop.py,v 1.10 2002/08/13 09:28:52 afayolle Exp $ -""" - -version="0.12" - -from xml.parsers import sgmlop -from xml.sax import saxlib,saxutils -from xml.sax import SAXException -import urllib.request, urllib.error, urllib.parse,string - -# --- Driver - -class Parser(saxlib.Parser): - - def __init__(self): - saxlib.Parser.__init__(self) - self.reset() - - def setDocumentHandler(self, dh): - self.parser.register(self) # older version wanted ,1 arg - self.doc_handler=dh - - def parse(self, url): - self.parseFile(urllib.request.urlopen(url)) - - def parseFile(self, file): - self._parsing = 1 - self.doc_handler.startDocument() - parser = self.parser - - while 1: - data = file.read(16384) - if not data: - break - parser.feed(data) - - self.close() - - # --- SAX 1.0 METHODS - - def handle_cdata(self, data): - self.doc_handler.characters(data,0,len(data)) - - def handle_data(self, data): - #ignore white space outside the toplevel element - if self._nesting == 0: - if string.strip(data)!="": - # It's not whitespace? - self.err_handler.error(SAXException( - "characters '%s' outside root element" % data)) - return - self.doc_handler.characters(data,0,len(data)) - - def handle_proc(self, target, data): - if target=='xml': - # Don't report as a processing instruction - return - self.doc_handler.processingInstruction(target,data) - - def handle_charref(self, charno): - if charno<256: - self.doc_handler.characters(chr(charno),0,1) - - def finish_starttag(self, name, attrs): - self._nesting = self._nesting + 1 - self.doc_handler.startElement(name,saxutils.AttributeMap(attrs)) - - def finish_endtag(self,name): - self._nesting = self._nesting - 1 - self.doc_handler.endElement(name) - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS - - def get_parser_name(self): - return "sgmlop" - - def get_parser_version(self): - return "Unknown" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - def reset(self): - self.parser=sgmlop.XMLParser() - self._parsing=0 - self._nesting=0 - - def feed(self,data): - if not self._parsing: - self.doc_handler.startDocument() - self._parsing=1 - self.parser.feed(data) - - def close(self): - self.parser.close() - self.doc_handler.endDocument() - -# ---- - -def create_parser(): - return Parser() diff --git a/xcap/sax/drivers/drv_xmldc.py b/xcap/sax/drivers/drv_xmldc.py deleted file mode 100644 index a14a4a9..0000000 --- a/xcap/sax/drivers/drv_xmldc.py +++ /dev/null @@ -1,154 +0,0 @@ -""" -SAX driver for Dan Connollys XML scanner. Should work with Python 1.4. -""" - -version="0.10" - -import sys,urllib.request,urllib.error,urllib.parse,re,string - -if sys.version[:3]<"1.5": - import saxlib -else: - from xml.sax import saxlib - -import xml_dc - -reg_ws="[\n\r\t ]+" -predef_ents={"lt":"<","gt":"<","amp":"&","apos":"'","quot":'"'} - -# --- Driver - -class SAX_xmldc(saxlib.Parser,saxlib.Locator): - - def __init__(self): - saxlib.Parser.__init__(self) - self.current_sysid="" - self.reset() - - # --- Parser methods - - def parse(self, systemId): - try: - self.current_sysid=systemId - infile=urllib.request.urlopen(systemId) - self.parseFile(infile) - finally: - self.current_sysid="" - - def parseFile(self, fileobj): - self.doc_handler.setDocumentLocator(self) - self.reset() - - try: - while 1: - buf=fileobj.read(16384) - if buf=="": - break - - self.feed(buf) - - self.close() - except xml_dc.ScanError as e: - self.err_handler.fatalError(saxlib.SAXParseException(e,None,self)) - except xml_dc.NotWellFormed as e: - self.err_handler.fatalError(saxlib.SAXParseException(e,None,self)) - - # --- Passing on parse events to document handler - - def text(self, str): - self.doc_handler.characters(str,0,len(str)) - - def openStart(self, name): - self.current_elem=name - self.current_attrs_val={} - self.current_attrs_type={} - - def attribute(self, name, type, value): - self.current_attrs_val[name]=value - self.current_attrs_type[name]=type - - def closeStart(self): - self.doc_handler.startElement(self.current_elem, - self.current_attrs_val) - - def closeEmpty(self): - self.doc_handler.startElement(self.current_elem, - self.current_attrs_val) - self.doc_handler.endElement(self.current_elem) - - def endTag(self, name=None): - self.doc_handler.endElement(name) - - def comment(self, stuff): - pass - - def pi(self, stuff): - match=re.search(reg_ws,stuff) - - if not match: - self.doc_handler.processingInstruction(stuff,"") - else: - end_of_target,start_of_data=match.span() - self.doc_handler.processingInstruction(stuff[:end_of_target], - stuff[start_of_data:]) - - def decl(self, name, parts): - pass - - def cref(self, numeral): - numeral=string.atoi(numeral) - self.doc_handler.characters(chr(numeral),0,1) - - def eref(self, name): - pass - - def eof(self): - pass - - # --- Locator methods - - def getLineNumber(self): - return self.parser.line() - - def getSystemId(self): - return self.current_sysid - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS - - def get_parser_name(self): - return "xmldc" - - def get_parser_version(self): - return "1.8" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - def reset(self): - self.parser=xml_dc.Scanner() - self.checker=xml_dc.WellFormed() - self.checker.scanner(self.parser) - self.unfed_so_far=1 - - def feed(self,data): - if self.unfed_so_far: - self.doc_handler.startDocument() - self.unfed_so_far=0 - - self.parser.feed(data) - self.parser.next(self) - - def close(self): - self.checker.eof() - self.doc_handler.endDocument() - -# --- - -def create_parser(): - return SAX_xmldc() diff --git a/xcap/sax/drivers/drv_xmllib.py b/xcap/sax/drivers/drv_xmllib.py deleted file mode 100644 index cdc3aa8..0000000 --- a/xcap/sax/drivers/drv_xmllib.py +++ /dev/null @@ -1,108 +0,0 @@ -""" -SAX driver for xmllib.py -""" - -version="0.91" - -from xml.sax import saxutils -from xml.sax.drivers import pylibs - -import xmllib - -# Make it generate Unicode if possible, UTF-8 else -try: - str("") -except NameError: - from xml.str.iso8859 import wstring - def str(str, encoding): - return wstring.decode(encoding, str).utf8() - -# --- SAX_XLParser - -class SAX_XLParser(pylibs.LibParser, xmllib.XMLParser): - "SAX driver for xmllib.py." - - def __init__(self): - xmllib.XMLParser.__init__(self) - pylibs.LibParser.__init__(self) - self.standalone = 0 - self.reset() - - def _convert(self, str): - return str(str, self.encoding) - - def unknown_starttag(self, tag, attributes): - tag = str(tag, self.encoding) - newattr = {} - for k, v in list(attributes.items()): - newattr[str(k, self.encoding)] = str(v, self.encoding) - self.doc_handler.startElement(tag, saxutils.AttributeMap(newattr)) - - def handle_endtag(self, tag, method): - self.doc_handler.endElement(str(tag, self.encoding)) - - def handle_proc(self, name, data): - self.doc_handler.processingInstruction(name, data[1:]) - - def handle_xml(self, encoding, standalone): - self.standalone = standalone == "yes" - if encoding is not None: - self.encoding = encoding - - def handle_data(self, data): - "Handles PCDATA." - data = str(data, self.encoding) - self.doc_handler.characters(data, 0, len(data)) - - def handle_cdata(self, data): - "Handles CDATA marked sections." - data = str(data, self.encoding) - self.doc_handler.characters(data, 0, len(data)) - - def getLineNumber(self): - return self.lineno - - def getSystemId(self): - return self.sysID - - def _can_locate(self): - "Internal: returns true if location info is available." - return 1 - - # --- EXPERIMENTAL SAX PYTHON EXTENSIONS - - def get_parser_name(self): - return "xmllib" - - def get_parser_version(self): - return xmllib.version - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - def reset(self): - xmllib.XMLParser.reset(self) - self.unfed_so_far = 1 - self.encoding = "utf-8" - - def feed(self, data): - if self.unfed_so_far: - self.doc_handler.startDocument() - self.unfed_so_far = 0 - - xmllib.XMLParser.feed(self, data) - - def close(self): - xmllib.XMLParser.close(self) - self.doc_handler.endDocument() - -# --- Global functions - -def create_parser(): - return SAX_XLParser() diff --git a/xcap/sax/drivers/drv_xmlproc.py b/xcap/sax/drivers/drv_xmlproc.py deleted file mode 100644 index 1c27b6a..0000000 --- a/xcap/sax/drivers/drv_xmlproc.py +++ /dev/null @@ -1,167 +0,0 @@ -""" -A SAX driver for xmlproc - -$Id: drv_xmlproc.py,v 1.13 2001/12/30 12:13:45 loewis Exp $ -""" - -version="0.95" - -from xml.sax import saxlib, saxutils -from xml.parsers.xmlproc import xmlproc - -import os - -# --- SAX_XPParser - -class SAX_XPParser(saxlib.Parser,xmlproc.Application,xmlproc.DTDConsumer, - xmlproc.ErrorHandler,xmlproc.PubIdResolver): - - def __init__(self): - saxlib.Parser.__init__(self) - self.reset() - self.ns_separator=" " - self.locator=1 - self.is_parsing=0 - self.stop_on_error=1 - - def parse(self,sysID): - self.reset() - try: - self.is_parsing=1 - self.parser.parse_resource(sysID) - finally: - self.is_parsing=0 - - def parseFile(self,file): - self.reset() - try: - self.is_parsing=1 - self.parser.read_from(file) - self.parser.flush() - self.parser.parseEnd() - finally: - self.is_parsing=0 - - def _create_parser(self): - return xmlproc.XMLProcessor() - - def setLocale(self, locale): - try: - self.parser.set_error_language(locale) - except KeyError: - raise saxlib.SAXNotSupportedException("Locale '%s' not supported" % locale) - - # --- data event methods - - def doc_start(self): - if self.locator: - self.doc_handler.setDocumentLocator(self) - self.doc_handler.startDocument() - - def doc_end(self): - self.doc_handler.endDocument() - - def handle_data(self,data,start,end): - self.doc_handler.characters(data,start,end-start) - - def handle_ignorable_data(self,data,start,end): - self.doc_handler.ignorableWhitespace(data,start,end-start) - - def handle_pi(self, target, data): - self.doc_handler.processingInstruction(target,data) - - def handle_start_tag(self, name, attrs): - self.doc_handler.startElement(name,saxutils.AttributeMap(attrs)) - - def handle_end_tag(self, name): - self.doc_handler.endElement(name) - - # --- pubid resolution - - def resolve_entity_pubid(self,pubid,sysid): - return self.ent_handler.resolveEntity(pubid,sysid) - - def resolve_doctype_pubid(self,pubid,sysid): - return self.ent_handler.resolveEntity(pubid,sysid) - - # --- error handling - - def warning(self,msg): - self.err_handler.warning(saxlib.SAXParseException(msg,None,self)) - - def error(self,msg): - self.err_handler.error(saxlib.SAXParseException(msg,None,self)) - - def fatal(self,msg): - self.err_handler.fatalError(saxlib.SAXParseException(msg,None,self)) - - # --- location handling - - def getColumnNumber(self): - return self.parser.get_column() - - def getLineNumber(self): - return self.parser.get_line() - - def getSystemId(self): - return self.parser.get_current_sysid() - - # --- DTD parsing - - def new_external_entity(self,name,pubid,sysid,ndata): - if ndata!="": - self.dtd_handler.unparsedEntityDecl(name,pubid,sysid,ndata) - - def new_notation(self,name,pubid,sysid): - self.dtd_handler.notationDecl(name,pubid,sysid) - - # --- entity events - - def resolve_entity(self,pubid,sysid): - newsysid=self.ent_handler.resolveEntity(pubid,sysid) - if newsysid==None: - return sysid - else: - return newsysid - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS: - - def get_parser_name(self): - return "xmlproc" - - def get_parser_version(self): - return xmlproc.version - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 1 - - def reset(self): - if hasattr(self, "parser"): - self.parser.deref() - self.parser=self._create_parser() - self.parser.set_application(self) - self.parser.set_error_handler(self) - self.parser.set_pubid_resolver(self) - self.parser.set_dtd_listener(self) - self.parser.reset() - - def feed(self,data): - self.parser.feed(data) - - def close(self): - self.parser.close() - self.parser.deref() - # Dereferencing to avoid circular references (grrrr) - self.err_handler = self.dtd_handler = self.doc_handler = None - self.parser = self.locator = self.ent_handler = None - -# --- Global functions - -def create_parser(): - return SAX_XPParser() diff --git a/xcap/sax/drivers/drv_xmlproc_val.py b/xcap/sax/drivers/drv_xmlproc_val.py deleted file mode 100644 index afdbace..0000000 --- a/xcap/sax/drivers/drv_xmlproc_val.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -A SAX driver for xmlproc with validation and DTD information. - -$Id: drv_xmlproc_val.py,v 1.9 2001/12/30 12:13:45 loewis Exp $ -""" - -version="0.92" - -from xml.sax import saxlib,saxutils -from xml.parsers.xmlproc import xmlval -from xml.sax.drivers.drv_xmlproc import * - -import types - -# --- SAX_XPValParser - -class SAX_XPValParser(SAX_XPParser): - - def __init__(self): - SAX_XPParser.__init__(self) - - def _create_parser(self): - return xmlval.XMLValidator() - - def handle_start_tag(self, name, attrs): - try: - self.doc_handler.startElement(name, - XPAttributes(attrs,\ - self.parser.dtd.get_elem(name))) - except KeyError as e: - self.doc_handler.startElement(name,XPAttributes(attrs,None)) - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS: - - def get_parser_name(self): - return "xmlproc_val" - - def get_driver_version(self): - return version - - def is_validating(self): - return 1 - -# --- XPAttributes - -class XPAttributes(saxutils.AttributeMap): - - def __init__(self,map,elemdecl): - saxutils.AttributeMap.__init__(self,map) - self.elemdecl=elemdecl - - if elemdecl==None: - self.getType=self.getTypeStatic - - def getTypeStatic(self,i): - return "CDATA" # Used for undeclared elements - - def getType(self, i): - if type(i)==int: - try: - i=list(self.map.keys())[i] - except KeyError as e: - return "CDATA" - - try: - return self.elemdecl.get_attr(i).get_type() - except KeyError as e: - return "CDATA" - -# --- Global functions - -def create_parser(): - return SAX_XPValParser() diff --git a/xcap/sax/drivers/drv_xmltoolkit.py b/xcap/sax/drivers/drv_xmltoolkit.py deleted file mode 100644 index 7b8019a..0000000 --- a/xcap/sax/drivers/drv_xmltoolkit.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -A SAX driver for David Scheres XML-Toolkit parser. -""" - -version="0.20" - -import sys - -from xml.sax import saxlib,saxutils -import XMLFactory,XMLClient,urllib.request,urllib.error,urllib.parse - -class SAX_XTClient(saxlib.Parser,XMLClient.ClientBase): - - def __init__(self): - XMLClient.ClientBase.__init__(self) - saxlib.Parser.__init__(self) - self.reset() - - def text(self,obj): - v=obj.value() - self.doc_handler.characters(v,0,len(v)) - - def pi(self,obj): - if obj.nameOf()=="xml": return # Don't report the XML declaration - - content="" - for part in obj.value(): - content=content+part.value()+" " - - self.doc_handler.processingInstruction(obj.nameOf(),content[:-1]) - - def emptyTag(self,obj): - attrs={} - for assoc in obj.value(): - attrs[assoc.nameOf()]=assoc.value() - - self.doc_handler.startElement(obj.nameOf(), - saxutils.AttributeMap(attrs)) - self.doc_handler.endElement(obj.nameOf()) - - def nonEmptyTag(self,obj): - attrs={} - for assoc in obj.value(): - attrs[assoc.nameOf()]=assoc.value() - - self.doc_handler.startElement(obj.nameOf(), - saxutils.AttributeMap(attrs)) - - def endTag(self,obj): - self.doc_handler.endElement(obj.nameOf()) - - def CDATA(self,obj): - v=obj.value() - self.doc_handler.characters(v,0,len(v)) - - def comment(self,obj): - pass # SAX ignores comments - - def parse(self, sysID): - i=urllib.request.urlopen(sysID) - self.parseFile(i) - i.close() - - def parseFile(self, file): - self.reset() - while 1: - buf=file.read(16384) - if buf=="": break - self.feed(buf) - - self.close() - - # --- EXPERIMENTAL SAX PYTHON EXTENSIONS - - def get_parser_name(self): - return "xmltoolkit" - - def get_parser_version(self): - return "Unknown" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - def reset(self): - self.parser=XMLFactory.XMLFactory(self) - self.unfed_so_far=1 - - def feed(self,data): - if self.unfed_so_far: - self.doc_handler.startDocument() - self.unfed_so_far=0 - - self.parser.feed(data) - - def close(self): - self.parser.endfile() - self.doc_handler.endDocument() - -def create_parser(): - return SAX_XTClient() diff --git a/xcap/sax/drivers/pylibs.py b/xcap/sax/drivers/pylibs.py deleted file mode 100644 index 9189a35..0000000 --- a/xcap/sax/drivers/pylibs.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -Common code for the sgmllib, htmllib and xmllib parser drivers. - -$Id: pylibs.py,v 1.6 2002/08/13 09:28:52 afayolle Exp $ -""" - -from xml.sax import saxlib,saxutils - -import urllib.request, urllib.error, urllib.parse - -# --- LibParser - -class LibParser(saxlib.Parser,saxlib.Locator): - "Common code for the sgmllib, htmllib and xmllib parser drivers." - - def __init__(self): - saxlib.Parser.__init__(self) - - def parse(self,sysID): - "Parses the referenced document." - self.sysID=sysID - self.parseFile(urllib.request.urlopen(sysID)) - - def parseFile(self,fileobj): - "Parses the given file." - if self._can_locate(): - self.doc_handler.setDocumentLocator(self) - self.reset() - while 1: - buf=fileobj.read(16384) - if buf=="": break - - try: - self.feed(buf) - except RuntimeError as e: - self.err_handler.fatalError(saxlib.SAXException(str(e),e)) - - self.close() - - def unknown_endtag(self,tag): - "Handles end tags." - self.doc_handler.endElement(tag) - - def handle_xml(self,encoding,standalone): - "Remembers whether the document is standalone." - self.standalone= standalone=="yes" - - def handle_data(self,data): - "Handles PCDATA." - self.doc_handler.characters(data,0,len(data)) - - def handle_cdata(self,data): - "Handles CDATA marked sections." - self.doc_handler.characters(data,0,len(data)) - - def syntax_error(self, message): - "Handles fatal errors." - if self._can_locate(): - self.err_handler.fatalError(saxlib.SAXParseException(message,None, - self)) - else: - self.err_handler.fatalError(saxlib.SAXException(message,None)) - - -# --- SGMLParsers - -class SGMLParsers(LibParser): - "Common code for the sgmllib and htmllib parsers." - - def handle_pi(self,data): - "Handles processing instructions." - # Should we try to parse out the name if there is one? - self.doc_handler.processingInstruction("",data) - - def handle_starttag(self,tag,method,attributes): - self.unknown_starttag(tag,attributes) - - def unknown_starttag(self,tag,attributes): - "Handles start tags." - attrs={} - for (a,v) in attributes: - attrs[a]=v - - self.doc_handler.startElement(tag,saxutils.AttributeMap(attrs)) - - def handle_endtag(self,tag,method): - "Handles end tags." - self.doc_handler.endElement(tag) - - def unknown_entityref(self,name): - "Handles entity references by throwing an error." - self.err_handler.fatalError(saxlib.SAXException("Reference to unknown entity " - "'%s'" % name,None)) - - def unknown_charref(self,no): - "Handles non-ASCII character references." - self.err_handler.fatalError(saxlib.SAXException("Reference to unknown character '%d'" % no,None)) - - def handle_data(self,data): - "Handles character data in element content." - self.doc_handler.characters(data,0,len(data)) - - def report_unbalanced(self,gi): - "Reports unbalanced tags." - self.err_handler.fatalError(saxlib.SAXException("Unbalanced end tag for '%s'" % gi,None)) - - def _can_locate(self): - "Internal: returns true if location info is available." - return 0 diff --git a/xcap/sax/drivers2/__init__.py b/xcap/sax/drivers2/__init__.py deleted file mode 100644 index 96a8f30..0000000 --- a/xcap/sax/drivers2/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Directory for SAX version 2 drivers." diff --git a/xcap/sax/drivers2/drv_htmllib.py b/xcap/sax/drivers2/drv_htmllib.py deleted file mode 100644 index 9055834..0000000 --- a/xcap/sax/drivers2/drv_htmllib.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -A SAX 2.0 driver for htmllib. - -$Id: drv_htmllib.py,v 1.2 2001/12/30 12:13:45 loewis Exp $ -""" - -import types, string - -from xml.sax import SAXNotSupportedException, SAXNotRecognizedException -from xml.sax.xmlreader import IncrementalParser -from .drv_sgmllib import SgmllibDriver - -class HtmllibDriver(SgmllibDriver): - - from html.entities import entitydefs - -# --- - -def create_parser(): - return HtmllibDriver() diff --git a/xcap/sax/drivers2/drv_javasax.py b/xcap/sax/drivers2/drv_javasax.py deleted file mode 100644 index a5e2316..0000000 --- a/xcap/sax/drivers2/drv_javasax.py +++ /dev/null @@ -1,212 +0,0 @@ -""" -SAX driver for the Java SAX parsers. Can only be used in Jython. - -$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $ -""" - -# --- Initialization - -version = "0.10" -revision = "$Revision: 1.5 $" - -import string -from xml.sax import xmlreader, saxutils -from xml.sax.handler import feature_namespaces -from xml.sax import _exceptions - -# we only work in jython -import sys -if sys.platform[:4] != "java": - raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None) -del sys - -# get the necessary Java SAX classes -try: - from java.lang import String - from org.xml.sax import ContentHandler, SAXException - from org.xml.sax.helpers import XMLReaderFactory -except ImportError: - raise SAXReaderNotAvailable("SAX is not on the classpath", None) - -# get some JAXP stuff -try: - from javax.xml.parsers import SAXParserFactory, ParserConfigurationException - factory = SAXParserFactory.newInstance() - jaxp = 1 -except ImportError: - jaxp = 0 - -# --- JavaSAXParser - -class JavaSAXParser(xmlreader.XMLReader, ContentHandler): - "SAX driver for the Java SAX parsers." - - def __init__(self, jdriver = None): - self._parser = create_java_parser(jdriver) - self._parser.setFeature(feature_namespaces, 0) - self._parser.setContentHandler(self) - self._attrs = AttributesImpl() - self._nsattrs = AttributesNSImpl() - - # XMLReader methods - - def parse(self, source): - "Parse an XML document from a URL or an InputSource." - self._source = saxutils.prepare_input_source(source) - try: - self._parser.parse(source) - except SAXException as e: - raise _exceptions.SAXException("", e) - - def getFeature(self, name): - return self._parser.getFeature(name) - - def setFeature(self, name, state): - self._parser.setFeature(name, state) - - def getProperty(self, name): - return self._parser.getProperty(name) - - def setProperty(self, name, value): - self._parser.setProperty(name, value) - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self._cont_handler.setDocumentLocator(locator) - - def startDocument(self): - self._cont_handler.startDocument() - self._namespaces = self._parser.getFeature(feature_namespaces) - - def startElement(self, uri, lname, qname, attrs): - if self._namespaces: - self._nsattrs._attrs = attrs - self._cont_handler.startElementNS((uri or None, lname), qname, - self._nsattrs) - else: - self._attrs._attrs = attrs - self._cont_handler.startElement(qname, self._attrs) - - def characters(self, char, start, len): - self._cont_handler.characters(str(String(char, start, len))) - - def ignorableWhitespace(self, char, start, len): - self._cont_handler.ignorableWhitespace(str(String(char, start, len))) - - def endElement(self, uri, lname, qname): - if self._namespaces: - self._cont_handler.endElementNS((uri or None, lname), qname) - else: - self._cont_handler.endElement(qname) - - def endDocument(self): - self._cont_handler.endDocument() - - def processingInstruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - -# --- AttributesImpl - -class AttributesImpl: - - def __init__(self, attrs = None): - self._attrs = attrs - - def getLength(self): - return self._attrs.getLength() - - def getType(self, name): - return self._attrs.getType(name) - - def getValue(self, name): - value = self._attrs.getValue(name) - if value == None: - raise KeyError(name) - return value - - def getValueByQName(self, name): - value = self._attrs.getValueByQName(name) - if value == None: - raise KeyError(name) - return value - - def getNameByQName(self, name): - value = self._attrs.getNameByQName(name) - if value == None: - raise KeyError(name) - return value - - def getQNameByName(self, name): - value = self._attrs.getQNameByName(name) - if value == None: - raise KeyError(name) - return value - - def getNames(self): - return self._attrs.getNames() - - def getQNames(self): - return self._attrs.getQNames() - - def __len__(self): - return self._attrs.getLength() - - def __getitem__(self, name): - value = self._attrs.getValue(name) - if value == None: - raise KeyError(name) - return value - - def keys(self): - qnames = [] - for ix in range(self._attrs.getLength()): - qnames.append(self._attrs.getQName(ix)) - return qnames - - def copy(self): - return self.__class__(self._attrs) - - def items(self): - list = [] - for name in self._attrs.getQNames(): - list.append((name, self._attrs.getValue(name))) - return list - - def values(self): - return list(map(self._attrs.getValue, self._attrs.getQNames())) - - def get(self, name, alt = None): - value = self._attrs.getValue(name) - if value != None: - return value - else: - return alt - - def has_key(self, name): - return self._attrs.getValue(name) != None - -# --- AttributesNSImpl - -class AttributesNSImpl: - - def __init__(self): - self._attrs = None - -# --- - -def create_java_parser(jdriver = None): - try: - if jdriver: - return XMLReaderFactory.createXMLReader(jdriver) - elif jaxp: - return factory.newSAXParser().getXMLReader() - else: - return XMLReaderFactory.createXMLReader() - except ParserConfigurationException as e: - raise SAXReaderNotAvailable(e.getMessage()) - except SAXException as e: - raise SAXReaderNotAvailable(e.getMessage()) - -def create_parser(jdriver = None): - return JavaSAXParser(jdriver) diff --git a/xcap/sax/drivers2/drv_pyexpat.py b/xcap/sax/drivers2/drv_pyexpat.py deleted file mode 100644 index ac31baa..0000000 --- a/xcap/sax/drivers2/drv_pyexpat.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -SAX driver for the Pyexpat C module, based on xml.sax.expatdriver. - -$Id: drv_pyexpat.py,v 1.6 2000/09/26 19:53:43 loewis Exp $ -""" - -# XXX: todo list of old drv_pyexpat.py, check whether any of these -# have been fixed. -# Todo on driver: -# - make it support external entities (wait for pyexpat.c) -# - enable configuration between reset() and feed() calls -# - support lexical events? -# - proper inputsource handling -# - properties and features - -# Todo on pyexpat.c: -# - support XML_ExternalEntityParserCreate -# - exceptions in callouts from pyexpat to python code lose position info - -from xml.sax.expatreader import create_parser diff --git a/xcap/sax/drivers2/drv_sgmllib.py b/xcap/sax/drivers2/drv_sgmllib.py deleted file mode 100644 index d0eefda..0000000 --- a/xcap/sax/drivers2/drv_sgmllib.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -A SAX 2.0 driver for sgmllib. - -$Id: drv_sgmllib.py,v 1.3 2001/12/30 12:13:45 loewis Exp $ -""" - -import types, string - -import sgmllib -from xml.sax import SAXNotSupportedException, SAXNotRecognizedException -from xml.sax.xmlreader import IncrementalParser - -# ===== DRIVER - -class SgmllibDriver(sgmllib.SGMLParser, IncrementalParser): - - # ===== SAX 2.0 INTERFACES - - # --- XMLReader methods - - def __init__(self): - sgmllib.SGMLParser.__init__(self) - IncrementalParser.__init__(self) - self._sysid = None - self._pubid = None - - def prepareParser(self, source): - self._sysid = source.getSystemId() - self._pubid = source.getPublicId() - self._cont_handler.startDocument() - - def close(self): - sgmllib.SGMLParser.close(self) - self._cont_handler.endDocument() - - def setLocale(self, locale): - raise SAXNotSupportedException("setLocale not supported") - - def getFeature(self, name): - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def getProperty(self, name): - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - # --- Locator methods - - def getColumnNumber(self): - return -1 - - def getLineNumber(self): - return -1 - - def getPublicId(self): - return self._pubid - - def getSystemId(self): - return self._sysid - - # ===== HTMLLIB INTERFACES - - def unknown_starttag(self, name, attrs): - self._cont_handler.startElement(name, AttributesImpl(attrs)) - - def unknown_endtag(self, name): - self._cont_handler.endElement(name) - - def handle_data(self, data): - self._cont_handler.characters(data) - -# ===== ATTRIBUTESIMPL ===== - -class AttributesImpl: - - def __init__(self, attrs): - "attrs has the form [(name, value), (name, value)...]" - self._attrs = attrs - - def getLength(self): - return len(self._attrs) - - def getType(self, name): - return "CDATA" - - def getValue(self, name): - for (aname, avalue) in self._attrs: - if aname == name: - return avalue - raise KeyError(name) - - def getValueByQName(self, name): - for (aname, avalue) in self._attrs: - if aname == name: - return avalue - raise KeyError(name) - - def getNameByQName(self, name): - for (aname, avalue) in self._attrs: - if aname == name: - return name - raise KeyError(name) - - def getQNameByName(self, name): - return self.getNameByQName(name) - - def getNames(self): - return [x[0] for x in self._attrs] - - def getQNames(self): - return [x[0] for x in self._attrs] - - def __len__(self): - return len(self._attrs) - - def __getitem__(self, name): - for (aname, avalue) in self._attrs: - if aname == name: - return avalue - raise KeyError(name) - - def keys(self): - return self.getNames() - - def has_key(self, name): - for (aname, avalue) in self._attrs: - if aname == name: - return 1 - return 0 - - def get(self, name, alternative=None): - for (aname, avalue) in self._attrs: - if aname == name: - return avalue - - def copy(self): - return self.__class__(self._attrs) - - def items(self): - return self._attrs - - def values(self): - return [x[1] for x in self._attrs] - -# --- - -def create_parser(): - return SgmllibDriver() diff --git a/xcap/sax/drivers2/drv_sgmlop.py b/xcap/sax/drivers2/drv_sgmlop.py deleted file mode 100644 index ff5bceb..0000000 --- a/xcap/sax/drivers2/drv_sgmlop.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -SAX2 driver for the sgmlop parser. - -$Id: drv_sgmlop.py,v 1.7 2003/01/21 12:42:28 loewis Exp $ -""" - -version = "0.1" - -from xml.parsers.sgmllib import SGMLParser -from xml.sax import saxlib, handler -from xml.sax.xmlreader import AttributesImpl, XMLReader -from xml.sax.saxutils import ContentGenerator, prepare_input_source - -try: - import codecs - def to_xml_string(str,encoding): - try: - decoder = codecs.lookup(encoding)[1] - return decoder(str)[0] - except LookupError: - return str -except ImportError: - from xml.str.iso8859 import wstring - def to_xml_string(str,encoding): - if string.lower(self._encoding) == 'utf-8': - return str - else: - return wstring.decode(encoding,str).utf8() - - - -class SaxParser(SGMLParser, XMLReader): - """ Implements IncrementalReader """ - - def __init__(self, bufsize = 65536, encoding = 'UTF-8'): - XMLReader.__init__(self) - SGMLParser.__init__(self) - self._bufsize = bufsize - self._lexical_handler = None - self._encoding = encoding - self.documentStarted = 0 - - def parse(self, source): - source = prepare_input_source(source) - - self.prepareParser(source) - file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer != "": - self.feed(buffer) - buffer = file.read(self._bufsize) - self.close() - - def feed(self,buffer): - if not self.documentStarted: - self._cont_handler.startDocument() - self.documentStarted = 1 - SGMLParser.feed(self,buffer) - - def prepareParser(self, source): - # not used - pass - - def close(self): - """This method is called when the entire XML document has been - passed to the parser through the feed method, to notify the - parser that there are no more data. This allows the parser to - do the final checks on the document and empty the internal - data buffer. - - The parser will not be ready to parse another document until - the reset method has been called. - - close may raise SAXException.""" - SGMLParser.close(self) - self._cont_handler.endDocument() - - def _make_attr_dict(self,attr_list): - d = {} - cvrt = lambda str,e=self._encoding:to_xml_string(str,e) - for (a,b) in attr_list: - d[cvrt(a)]=cvrt(b) - return d - - def unknown_starttag(self,tag,attrs): - self._cont_handler.startElement(to_xml_string(tag,self._encoding), - AttributesImpl(self._make_attr_dict(attrs))) - - def unknown_endtag(self,tag): - self._cont_handler.endElement(to_xml_string(tag,self._encoding)) - - def handle_data(self,data): - self._cont_handler.characters(to_xml_string(data,self._encoding)) - - def unknown_entityref(self, entity): - self._cont_handler.skippedEntity(to_xml_string(entity,self._encoding)) - - def handle_comment(self,data): - if self._lexical_handler is not None: - self._lexical_handler.comment(to_xml_string(data,self._encoding)) - - def setProperty(self,name,value): - if name == handler.property_lexical_handler: - self._lexical_handler = value - elif name == handler.property_encoding: - self._encoding = value - else: - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - def getProperty(self, name): - if name == handler.property_lexical_handler: - return self._lexical_handler - elif name == handler.property_encoding: - return self._encoding - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - -## def getFeature(self, name): -## if name == handler.feature_namespaces: -## return self._namespaces -## raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - -## def setFeature(self, name, state): -## if self._parsing: -## raise SAXNotSupportedException("Cannot set features while parsing") -## if name == handler.feature_namespaces: -## self._namespaces = state -## else: -## raise SAXNotRecognizedException("Feature '%s' not recognized" % -## name) - -def create_parser(): - return SaxParser() diff --git a/xcap/sax/drivers2/drv_sgmlop_html.py b/xcap/sax/drivers2/drv_sgmlop_html.py deleted file mode 100644 index 5825e59..0000000 --- a/xcap/sax/drivers2/drv_sgmlop_html.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -SAX2 driver for parsing HTML with the sgmlop parser. - -$Id: drv_sgmlop_html.py,v 1.3 2002/05/10 14:50:06 akuchling Exp $ -""" - -version = "0.1" - -from .drv_sgmlop import * -from xml.dom.html import HTML_CHARACTER_ENTITIES, HTML_FORBIDDEN_END, HTML_OPT_END, HTML_DTD -from string import strip, upper - -class SaxHtmlParser(SaxParser): - - def __init__(self, bufsize = 65536, encoding = 'iso-8859-1', verbose = 0): - SaxParser.__init__(self, bufsize, encoding) - self.verbose = verbose - - def finish_starttag(self, tag, attrs): - """uses the HTML DTD to automatically generate events - for missing tags""" - - # guess omitted close tags - while self.stack and \ - upper(self.stack[-1]) in HTML_OPT_END and \ - tag not in HTML_DTD.get(self.stack[-1],[]): - self.unknown_endtag(self.stack[-1]) - del self.stack[-1] - - if self.stack and tag not in HTML_DTD.get(self.stack[-1],[]) and self.verbose: - print('Warning : trying to add %s as a child of %s'%\ - (tag,self.stack[-1])) - - self.unknown_starttag(tag,attrs) - if upper(tag) in HTML_FORBIDDEN_END: - # close immediately tags for which we won't get an end - self.unknown_endtag(tag) - return 0 - else: - self.stack.append(tag) - return 1 - - def finish_endtag(self, tag): - if tag in HTML_FORBIDDEN_END : - # do nothing: we've already closed it - return - if tag in self.stack: - while self.stack and self.stack[-1] != tag: - self.unknown_endtag(self.stack[-1]) - del self.stack[-1] - self.unknown_endtag(tag) - del self.stack[-1] - elif self.verbose: - print("Warning: I don't see where tag %s was opened"%tag) - - - def handle_data(self,data): - if self.stack: - if '#PCDATA' not in HTML_DTD.get(self.stack[-1],[]) and not strip(data): - # this is probably ignorable whitespace - self._cont_handler.ignorableWhitespace(data) - else: - self._cont_handler.characters(to_xml_string(data,self._encoding)) - - def close(self): - SGMLParser.close(self) - self.stack.reverse() - for tag in self.stack: - self.unknown_endtag(tag) - self.stack = [] - self._cont_handler.endDocument() - - -def create_parser(): - return SaxHtmlParser() diff --git a/xcap/sax/drivers2/drv_xmlproc.py b/xcap/sax/drivers2/drv_xmlproc.py deleted file mode 100644 index 336bd0c..0000000 --- a/xcap/sax/drivers2/drv_xmlproc.py +++ /dev/null @@ -1,424 +0,0 @@ -""" -A SAX 2.0 driver for xmlproc. - -$Id: drv_xmlproc.py,v 1.16 2003/07/27 17:58:20 loewis Exp $ -""" - -import types, string - -from xml.parsers.xmlproc import xmlproc, xmlval, xmlapp -from xml.sax import saxlib -from xml.sax.xmlreader import AttributesImpl, AttributesNSImpl -from xml.sax.xmlreader import IncrementalParser -from xml.sax.saxutils import ContentGenerator, prepare_input_source - -# Todo -# - EntityResolver InputSource handling -# - as much as possible of LexicalHandler -# - entity expansion features -# - core properties -# - extra properties/features -# - element stack -# - entity stack -# - current error code -# - byte offset -# - DTD object -# - catalog path -# - use catalogs -# - regression test -# - methods from Python SAX extensions? -# - remove FIXMEs - -class XmlprocDriver(IncrementalParser): - - # ===== SAX 2.0 INTERFACES - - # --- XMLReader methods - - def __init__(self): - IncrementalParser.__init__(self) - self.__parsing = 0 - self.__validate = 0 - self.__namespaces = 0 - self.__ext_pes = 0 - - self.__locator = 0 - - self._lex_handler = saxlib.LexicalHandler() - self._decl_handler = saxlib.DeclHandler() - self._parser = None - - def prepareParser(self, source): - self.__parsing = 1 - - # create parser - - if self.__validate: - parser = xmlval.XMLValidator() - else: - parser = xmlproc.XMLProcessor() - - # set handlers - - if self._cont_handler != None or self._lex_handler != None: - if self._cont_handler == None: - self._cont_handler = saxlib.ContentHandler() - if self._lex_handler == None: - self._lex_handler = saxlib.LexicalHandler() - - if self.__namespaces: - filter = NamespaceFilter(parser, self._cont_handler, - self._lex_handler, self) - parser.set_application(filter) - else: - parser.set_application(self) - - if self._err_handler != None: - parser.set_error_handler(self) - - if self._decl_handler != None or self._dtd_handler != None: - parser.set_dtd_listener(self) - - parser.set_pubid_resolver(self) - - # FIXME: set other handlers - - if self.__ext_pes: - parser.set_read_external_subset(1) - - self._parser = parser # make it available for callbacks - if source: - parser.set_sysid(source.getSystemId()) - - def feed(self, data): - if not self._parser: - self.prepareParser(None) - self._parser.feed(data) - - def close(self): - self._parser.flush() - self._parser.parseEnd() - - def reset(self): - self._parser = None - self.__parsing = 0 - - def setLocale(self, locale): - pass - - def getFeature(self, name): - if name == saxlib.feature_string_interning or \ - name == saxlib.feature_external_ges: - return 1 - elif name == saxlib.feature_external_pes: - return self.__ext_pes - elif name == saxlib.feature_validation: - return self.__validate - elif name == saxlib.feature_namespaces: - return self.__namespaces - elif name == saxlib.feature_namespace_prefixes: - return 0 - else: - raise saxlib.SAXNotRecognizedException("Feature '%s' not recognized" % - name) - - def setFeature(self, name, state): - if self.__parsing: - raise saxlib.SAXNotSupportedException("Cannot set feature '%s' during parsing" % name) - - if name == saxlib.feature_validation: - self.__validate = state - if self.__validate: - self.__ext_pes = 1 - elif name == saxlib.feature_namespaces: - self.__namespaces = state - elif name == saxlib.feature_external_ges or \ - name == saxlib.feature_string_interning: - if not state: - raise saxlib.SAXNotSupportedException("This feature cannot be turned off with xmlproc.") - elif name == saxlib.feature_namespace_prefixes: - if state: - raise saxlib.SAXNotSupportedException("This feature cannot be turned on with xmlproc.") - elif name == saxlib.feature_external_pes: - self.__ext_pes = state - else: - raise saxlib.SAXNotRecognizedException("Feature '%s' not recognized" % - name) - - def getProperty(self, name): - if name == saxlib.property_lexical_handler: - return self._lex_handler - elif name == saxlib.property_declaration_handler: - return self._decl_handler - - raise saxlib.SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - if name == saxlib.property_lexical_handler: - self._lex_handler = value - elif name == saxlib.property_declaration_handler: - self._decl_handler = value - else: - raise saxlib.SAXNotRecognizedException("Property '%s' not recognized" % name) - - # --- Locator methods - - def getColumnNumber(self): - return self._parser.get_column() - - def getLineNumber(self): - return self._parser.get_line() - - def getPublicId(self): - return None # FIXME: Try to find this. Perhaps from InputSource? - - def getSystemId(self): - return self._parser.get_current_sysid() # FIXME? - - # ===== XMLPROC INTERFACES - - # --- Application methods - - def set_locator(self, locator): - self._locator = locator - - def doc_start(self): - self._cont_handler.startDocument() - - def doc_end(self): - self._cont_handler.endDocument() - - def handle_comment(self, data): - self._lex_handler.comment(data) - - def handle_start_tag(self, name, attrs): - self._cont_handler.startElement(name, AttributesImpl(attrs)) - - def handle_end_tag(self,name): - self._cont_handler.endElement(name) - - def handle_data(self, data, start, end): - self._cont_handler.characters(data[start:end]) - - def handle_ignorable_data(self, data, start, end): - self._cont_handler.ignorableWhitespace(data[start:end]) - - def handle_pi(self, target, data): - self._cont_handler.processingInstruction(target, data) - - def handle_doctype(self, root, pubId, sysId): - self._lex_handler.startDTD(root, pubId, sysId) - - def set_entity_info(self, xmlver, enc, sddecl): - pass - - # --- ErrorHandler methods - - # set_locator implemented as Application method above - - def get_locator(self): - return self._locator - - def warning(self, msg): - self._err_handler.warning(saxlib.SAXParseException(msg, None, self)) - - def error(self, msg): - self._err_handler.error(saxlib.SAXParseException(msg, None, self)) - - def fatal(self, msg): - self._err_handler.fatalError(saxlib.SAXParseException(msg, None, self)) - - # --- DTDConsumer methods - - def dtd_start(self): - pass # this is done by handle_doctype - - def dtd_end(self): - - self._lex_handler.endDTD() - - def handle_comment(self, contents): - self._lex_handler.comment(contents) - - def handle_pi(self, target, rem): - self._cont_handler.processingInstruction(target, rem) - - def new_general_entity(self, name, val): - self._decl_handler.internalEntityDecl(name, val) - - def new_external_entity(self, ent_name, pub_id, sys_id, ndata): - if not ndata: - self._decl_handler.externalEntityDecl(ent_name, pub_id, sys_id) - else: - self._dtd_handler.unparsedEntityDecl(ent_name, pub_id, sys_id, - ndata) - - def new_parameter_entity(self, name, val): - self._decl_handler.internalEntityDecl("%" + name, val) - - def new_external_pe(self, name, pubid, sysid): - self._decl_handler.externalEntityDecl("%" + name, pubid, sysid) - - def new_notation(self, name, pubid, sysid): - self._dtd_handler.notationDecl(name, pubid, sysid) - - def new_element_type(self, elem_name, elem_cont): - if elem_cont == None: - elem_cont = "ANY" - elif elem_cont == ("", [], ""): - elem_cont = "EMPTY" - self._decl_handler.elementDecl(elem_name, elem_cont) - - def new_attribute(self, elem, attr, type, a_decl, a_def): - self._decl_handler.attributeDecl(elem, attr, type, a_decl, a_def) - - # --- PubIdResolver methods - - def resolve_pe_pubid(self, pubid, sysid): - # Delegate out to the instance's EntityResolver. - # TODO: does not support returning an InputSource from resolveEntity. - return self._ent_handler.resolveEntity(pubid, sysid) - - def resolve_doctype_pubid(self, pubid, sysid): - # Delegate out to the instance's EntityResolver. - # TODO: does not support returning an InputSource from resolveEntity. - return self._ent_handler.resolveEntity(pubid, sysid) - - def resolve_entity_pubid(self, pubid, sysid): - # Delegate out to the instance's EntityResolver. - # TODO: does not support returning an InputSource from resolveEntity. - return self._ent_handler.resolveEntity(pubid, sysid) - -# --- NamespaceFilter - -class NamespaceFilter: - """An xmlproc application that processes qualified names and reports them - as (URI, local-part). It reports errors through the error reporting - mechanisms of the parser.""" - - def __init__(self, parser, content, lexical, driver): - self._cont_handler = content - self._lex_handler = lexical - self.driver = driver - self.ns_map = {"" : None} # Current prefix -> URI map - self.ns_map["xml"] = "http://www.w3.org/XML/1998/namespace" - self.ns_stack = [] # Pushed for each element, used to maint ns_map - self.rep_ns_attrs = 0 # Report xmlns-attributes? - self.parser = parser - - def set_locator(self, locator): - self.driver.set_locator(locator) - - def doc_start(self): - self._cont_handler.startDocument() - - def doc_end(self): - self._cont_handler.endDocument() - - def handle_comment(self, data): - self._lex_handler.comment(data) - - def handle_start_tag(self,name,attrs): - old_ns={} # Reset ns_map to these values when we leave this element - del_ns=[] # Delete these prefixes from ns_map when we leave element - - # attrs=attrs.copy() Will have to do this if more filters are made - - # Find declarations, update self.ns_map and self.ns_stack - for (a,v) in list(attrs.items()): - if a[:6]=="xmlns:": - prefix=a[6:] - if string.find(prefix,":")!=-1: - self.parser.report_error(1900) - - #if v=="": - # self.parser.report_error(1901) - elif a=="xmlns": - prefix="" - else: - continue - - if prefix in self.ns_map: - old_ns[prefix]=self.ns_map[prefix] - if v: - self.ns_map[prefix]=v - else: - del self.ns_map[prefix] - - if not self.rep_ns_attrs: - del attrs[a] - - self.ns_stack.append((old_ns,del_ns)) - - # Process elem and attr names - cooked_name = self.__process_name(name) - ns = cooked_name[0] - - rawnames = {} - for (a,v) in list(attrs.items()): - del attrs[a] - aname = self.__process_name(a, is_attr=1) - if aname in attrs: - self.parser.report_error(1903) - attrs[aname] = v - rawnames[aname] = a - - # Report event - self._cont_handler.startElementNS(cooked_name, name, - AttributesNSImpl(attrs, rawnames)) - - def handle_end_tag(self, rawname): - name = self.__process_name(rawname) - - # Clean up self.ns_map and self.ns_stack - (old_ns,del_ns)=self.ns_stack[-1] - del self.ns_stack[-1] - - self.ns_map.update(old_ns) - for prefix in del_ns: - del self.ns_map[prefix] - - self._cont_handler.endElementNS(name, rawname) - - def handle_data(self, data, start, end): - self._cont_handler.characters(data[start:end]) - - def handle_ignorable_data(self, data, start, end): - self._cont_handler.ignorableWhitespace(data[start:end]) - - def handle_pi(self, target, data): - self._cont_handler.processingInstruction(target, data) - - def handle_doctype(self, root, pubId, sysId): - self._lex_handler.startDTD(root, pubId, sysId) - - def set_entity_info(self, xmlver, enc, sddecl): - pass - - # --- Internal methods - - def __process_name(self, name, default_to=None, is_attr=0): - n=string.split(name,":") - if len(n)>2: - self.parser.report_error(1900) - return (None, name) - elif len(n)==2: - if n[0]=="xmlns": - return None, name - - try: - return (self.ns_map[n[0]], n[1]) - except KeyError: - self.parser.report_error(1902) - return None, name - elif is_attr: - return None, name - elif default_to != None: - return (default_to, name) - elif "" in self.ns_map and name != "xmlns": - return self.ns_map[""], name - else: - return (None, name) - -def create_parser(): - return XmlprocDriver() diff --git a/xcap/sax/expatreader.py b/xcap/sax/expatreader.py deleted file mode 100644 index 8498f17..0000000 --- a/xcap/sax/expatreader.py +++ /dev/null @@ -1,429 +0,0 @@ -""" -SAX driver for the pyexpat C module. This driver works with -pyexpat.__version__ == '2.22'. -""" - -version = "0.20" - -from xml.sax._exceptions import * -from xml.sax.handler import feature_validation, feature_namespaces -from xml.sax.handler import feature_namespace_prefixes -from xml.sax.handler import feature_external_ges, feature_external_pes -from xml.sax.handler import feature_string_interning -from xml.sax.handler import property_xml_string, property_interning_dict - -# xml.parsers.expat does not raise ImportError in Jython -import sys -if sys.platform[:4] == "java": - raise SAXReaderNotAvailable("expat not available in Java", None) -del sys - -try: - from xml.parsers import expat -except ImportError: - raise SAXReaderNotAvailable("expat not supported", None) -else: - if not hasattr(expat, "ParserCreate"): - raise SAXReaderNotAvailable("expat not supported", None) -from xml.sax import xmlreader, saxutils, handler - -AttributesImpl = xmlreader.AttributesImpl -AttributesNSImpl = xmlreader.AttributesNSImpl - -# If we're using a sufficiently recent version of Python, we can use -# weak references to avoid cycles between the parser and content -# handler, otherwise we'll just have to pretend. -try: - import _weakref -except ImportError: - def _mkproxy(o): - return o -else: - import weakref - _mkproxy = weakref.proxy - del weakref, _weakref - -# --- ExpatLocator - -class ExpatLocator(xmlreader.Locator): - """Locator for use with the ExpatParser class. - - This uses a weak reference to the parser object to avoid creating - a circular reference between the parser and the content handler. - """ - def __init__(self, parser): - self._ref = _mkproxy(parser) - - def getColumnNumber(self): - parser = self._ref - if parser._parser is None: - return None - return parser._parser.ErrorColumnNumber - - def getLineNumber(self): - parser = self._ref - if parser._parser is None: - return 1 - return parser._parser.ErrorLineNumber - - def getPublicId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getPublicId() - - def getSystemId(self): - parser = self._ref - if parser is None: - return None - return parser._source.getSystemId() - - -# --- ExpatParser - -class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): - """SAX driver for the pyexpat C module.""" - - def __init__(self, namespaceHandling=0, bufsize=2**16-20): - xmlreader.IncrementalParser.__init__(self, bufsize) - self._source = xmlreader.InputSource() - self._parser = None - self._namespaces = namespaceHandling - self._lex_handler_prop = None - self._parsing = 0 - self._entity_stack = [] - self._external_ges = 1 - self._interning = None - self._namespace_prefixes = 1 - - # XMLReader methods - - def parse(self, source): - "Parse an XML document from a URL or an InputSource." - source = saxutils.prepare_input_source(source) - - self._source = source - self.reset() - self._cont_handler.setDocumentLocator(ExpatLocator(self)) - try: - xmlreader.IncrementalParser.parse(self, source) - finally: - # Drop reference to Expat parser, but read potential - # error state before that. Also, if close has completed, - # we don't have a parser anymore, anyway. - if self._parser: - self._ColumnNumber = self._parser.ErrorColumnNumber - self._LineNumber = self._parser.ErrorLineNumber - self._parser = None - - def prepareParser(self, source): - if source.getSystemId() != None: - self._parser.SetBase(source.getSystemId()) - - # Redefined setContentHandler to allow changing handlers during parsing - - def setContentHandler(self, handler): - xmlreader.IncrementalParser.setContentHandler(self, handler) - if self._parsing: - self._reset_cont_handler() - - def getFeature(self, name): - if name == feature_namespaces: - return self._namespaces - elif name == feature_string_interning: - return self._interning is not None - elif name == feature_namespace_prefixes: - return self._namespace_prefixes - elif name in (feature_validation, feature_external_pes): - return 0 - elif name == feature_external_ges: - return self._external_ges - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - if self._parsing: - raise SAXNotSupportedException("Cannot set features while parsing") - - if name == feature_namespaces: - self._namespaces = state - elif name == feature_external_ges: - self._external_ges = state - elif name == feature_string_interning: - if state: - if self._interning is None: - self._interning = {} - else: - self._interning = None - elif name == feature_namespace_prefixes: - self._namespace_prefixes = state - elif name == feature_validation: - if state: - raise SAXNotSupportedException( - "expat does not support validation") - elif name == feature_external_pes: - if state: - raise SAXNotSupportedException( - "expat does not read external parameter entities") - else: - raise SAXNotRecognizedException( - "Feature '%s' not recognized" % name) - - def getProperty(self, name): - if name == handler.property_lexical_handler: - return self._lex_handler_prop - elif name == property_interning_dict: - return self._interning - elif name == property_xml_string: - if self._parser: - if hasattr(self._parser, "GetInputContext"): - return self._parser.GetInputContext() - else: - raise SAXNotRecognizedException( - "This version of expat does not support getting" - " the XML string") - else: - raise SAXNotSupportedException( - "XML string cannot be returned when not parsing") - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - if name == handler.property_lexical_handler: - self._lex_handler_prop = value - if self._parsing: - self._reset_lex_handler_prop() - elif name == property_interning_dict: - self._interning = value - elif name == property_xml_string: - raise SAXNotSupportedException("Property '%s' cannot be set" % - name) - else: - raise SAXNotRecognizedException("Property '%s' not recognized" % - name) - - # IncrementalParser methods - - def feed(self, data, isFinal = 0): - if not self._parsing: - self.reset() - self._parsing = 1 - self._cont_handler.startDocument() - - try: - # The isFinal parameter is internal to the expat reader. - # If it is set to true, expat will check validity of the entire - # document. When feeding chunks, they are not normally final - - # except when invoked from close. - self._parser.Parse(data, isFinal) - except expat.error as e: - exc = SAXParseException(expat.ErrorString(e.code), e, self) - # FIXME: when to invoke error()? - self._err_handler.fatalError(exc) - - def close(self): - if self._entity_stack: - # If we are completing an external entity, do nothing here - return - self.feed("", isFinal = 1) - self._cont_handler.endDocument() - self._parsing = 0 - # break cycle created by expat handlers pointing to our methods - self._parser = None - - def _reset_cont_handler(self): - self._parser.ProcessingInstructionHandler = \ - self._cont_handler.processingInstruction - self._parser.CharacterDataHandler = self._cont_handler.characters - - def _reset_lex_handler_prop(self): - lex = self._lex_handler_prop - parser = self._parser - if lex is None: - parser.CommentHandler = None - parser.StartCdataSectionHandler = None - parser.EndCdataSectionHandler = None - parser.StartDoctypeDeclHandler = None - parser.EndDoctypeDeclHandler = None - else: - parser.CommentHandler = lex.comment - parser.StartCdataSectionHandler = lex.startCDATA - parser.EndCdataSectionHandler = lex.endCDATA - parser.StartDoctypeDeclHandler = self.start_doctype_decl - parser.EndDoctypeDeclHandler = lex.endDTD - - def reset(self): - if self._namespaces: - self._parser = expat.ParserCreate(None, " ", - intern=self._interning) - self._parser.namespace_prefixes = 1 - self._parser.StartElementHandler = self.start_element_ns - self._parser.EndElementHandler = self.end_element_ns - else: - self._parser = expat.ParserCreate(intern = self._interning) - self._parser.StartElementHandler = self.start_element - self._parser.EndElementHandler = self.end_element - - self._reset_cont_handler() - self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl - self._parser.NotationDeclHandler = self.notation_decl - self._parser.StartNamespaceDeclHandler = self.start_namespace_decl - self._parser.EndNamespaceDeclHandler = self.end_namespace_decl - - self._decl_handler_prop = None - if self._lex_handler_prop: - self._reset_lex_handler_prop() -# self._parser.DefaultHandler = -# self._parser.DefaultHandlerExpand = -# self._parser.NotStandaloneHandler = - self._parser.ExternalEntityRefHandler = self.external_entity_ref - try: - self._parser.SkippedEntityHandler = self.skipped_entity_handler - except AttributeError: - # This pyexpat does not support SkippedEntity - pass - self._parser.SetParamEntityParsing( - expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - - self._parsing = 0 - self._entity_stack = [] - # default values when _parser goes aways - self._ColumnNumber = None - self._LineNumber = 1 - - # Locator methods - - def getColumnNumber(self): - if self._parser is None: - return self._ColumnNumber - return self._parser.ErrorColumnNumber - - def getLineNumber(self): - if self._parser is None: - return self._LineNumber - return self._parser.ErrorLineNumber - - def getPublicId(self): - return self._source.getPublicId() - - def getSystemId(self): - return self._source.getSystemId() - - # event handlers - def start_element(self, name, attrs): - self._cont_handler.startElement(name, AttributesImpl(attrs)) - - def end_element(self, name): - self._cont_handler.endElement(name) - - def start_element_ns(self, name, attrs): - pair = name.split() - if len(pair) == 1: - # no namespace - elem_qname = name - pair = (None, name) - elif len(pair) == 3: - # namespace plus prefix - elem_qname = "%s:%s" % (pair[2], pair[1]) - pair = pair[0], pair[1] - else: - # default namespace - elem_qname = pair[1] - pair = tuple(pair) - - newattrs = {} - qnames = {} - for (aname, value) in list(attrs.items()): - parts = aname.split() - length = len(parts) - if length == 1: - # no namespace - qname = aname - apair = (None, aname) - elif length == 3: - qname = "%s:%s" % (parts[2], parts[1]) - apair = parts[0], parts[1] - else: - # default namespace - qname = parts[1] - apair = tuple(parts) - - newattrs[apair] = value - qnames[apair] = qname - - self._cont_handler.startElementNS(pair, elem_qname, - AttributesNSImpl(newattrs, qnames)) - - def end_element_ns(self, name): - pair = name.split() - if len(pair) == 1: - pair = (None, name) - elif len(pair) == 3: - pair = pair[0], pair[1] - else: - pair = tuple(pair) - - self._cont_handler.endElementNS(pair, None) - - # this is not used (call directly to ContentHandler) - def processing_instruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - # this is not used (call directly to ContentHandler) - def character_data(self, data): - self._cont_handler.characters(data) - - def start_namespace_decl(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def end_namespace_decl(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): - self._lex_handler_prop.startDTD(name, pubid, sysid) - - def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): - self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) - - def notation_decl(self, name, base, sysid, pubid): - self._dtd_handler.notationDecl(name, pubid, sysid) - - def external_entity_ref(self, context, base, sysid, pubid): - if not self._external_ges: - return 1 - - source = self._ent_handler.resolveEntity(pubid, sysid) - source = saxutils.prepare_input_source(source, - self._source.getSystemId() or - "") - - self._entity_stack.append((self._parser, self._source)) - self._parser = self._parser.ExternalEntityParserCreate(context) - self._source = source - - try: - xmlreader.IncrementalParser.parse(self, source) - except: - return 0 # FIXME: save error info here? - - (self._parser, self._source) = self._entity_stack[-1] - del self._entity_stack[-1] - return 1 - - def skipped_entity_handler(self, name, is_pe): - if is_pe: - # The SAX spec requires to report skipped PEs with a '%' - name = '%'+name - self._cont_handler.skippedEntity(name) - -# --- - -def create_parser(*args, **kwargs): - return ExpatParser(*args, **kwargs) - -# --- - -if __name__ == "__main__": - import xml.sax - p = create_parser() - p.setContentHandler(xml.sax.XMLGenerator()) - p.setErrorHandler(xml.sax.ErrorHandler()) - p.parse("../../../hamlet.xml") diff --git a/xcap/sax/handler.py b/xcap/sax/handler.py deleted file mode 100644 index 5a4394a..0000000 --- a/xcap/sax/handler.py +++ /dev/null @@ -1,345 +0,0 @@ -""" -This module contains the core classes of version 2.0 of SAX for Python. -This file provides only default classes with absolutely minimum -functionality, from which drivers and applications can be subclassed. - -Many of these classes are empty and are included only as documentation -of the interfaces. - -$Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $ -""" - -version = '2.0beta' - -#============================================================================ -# -# HANDLER INTERFACES -# -#============================================================================ - -# ===== ERRORHANDLER ===== - -class ErrorHandler: - """Basic interface for SAX error handlers. - - If you create an object that implements this interface, then - register the object with your XMLReader, the parser will call the - methods in your object to report all warnings and errors. There - are three levels of errors available: warnings, (possibly) - recoverable errors, and unrecoverable errors. All methods take a - SAXParseException as the only parameter.""" - - def error(self, exception): - "Handle a recoverable error." - raise exception - - def fatalError(self, exception): - "Handle a non-recoverable error." - raise exception - - def warning(self, exception): - "Handle a warning." - print(exception) - - -# ===== CONTENTHANDLER ===== - -class ContentHandler: - """Interface for receiving logical document content events. - - This is the main callback interface in SAX, and the one most - important to applications. The order of events in this interface - mirrors the order of the information in the document.""" - - def __init__(self): - self._locator = None - - def setDocumentLocator(self, locator): - """Called by the parser to give the application a locator for - locating the origin of document events. - - SAX parsers are strongly encouraged (though not absolutely - required) to supply a locator: if it does so, it must supply - the locator to the application by invoking this method before - invoking any of the other methods in the DocumentHandler - interface. - - The locator allows the application to determine the end - position of any document-related event, even if the parser is - not reporting an error. Typically, the application will use - this information for reporting its own errors (such as - character content that does not match an application's - business rules). The information returned by the locator is - probably not sufficient for use with a search engine. - - Note that the locator will return correct information only - during the invocation of the events in this interface. The - application should not attempt to use it at any other time.""" - self._locator = locator - - def startDocument(self): - """Receive notification of the beginning of a document. - - The SAX parser will invoke this method only once, before any - other methods in this interface or in DTDHandler (except for - setDocumentLocator).""" - - def endDocument(self): - """Receive notification of the end of a document. - - The SAX parser will invoke this method only once, and it will - be the last method invoked during the parse. The parser shall - not invoke this method until it has either abandoned parsing - (because of an unrecoverable error) or reached the end of - input.""" - - def startPrefixMapping(self, prefix, uri): - """Begin the scope of a prefix-URI Namespace mapping. - - The information from this event is not necessary for normal - Namespace processing: the SAX XML reader will automatically - replace prefixes for element and attribute names when the - http://xml.org/sax/features/namespaces feature is true (the - default). - - There are cases, however, when applications need to use - prefixes in character data or in attribute values, where they - cannot safely be expanded automatically; the - start/endPrefixMapping event supplies the information to the - application to expand prefixes in those contexts itself, if - necessary. - - Note that start/endPrefixMapping events are not guaranteed to - be properly nested relative to each-other: all - startPrefixMapping events will occur before the corresponding - startElement event, and all endPrefixMapping events will occur - after the corresponding endElement event, but their order is - not guaranteed.""" - - def endPrefixMapping(self, prefix): - """End the scope of a prefix-URI mapping. - - See startPrefixMapping for details. This event will always - occur after the corresponding endElement event, but the order - of endPrefixMapping events is not otherwise guaranteed.""" - - def startElement(self, name, attrs): - """Signals the start of an element in non-namespace mode. - - The name parameter contains the raw XML 1.0 name of the - element type as a string and the attrs parameter holds an - instance of the Attributes class containing the attributes of - the element.""" - - def endElement(self, name): - """Signals the end of an element in non-namespace mode. - - The name parameter contains the name of the element type, just - as with the startElement event.""" - - def startElementNS(self, name, qname, attrs): - """Signals the start of an element in namespace mode. - - The name parameter contains the name of the element type as a - (uri, localname) tuple, the qname parameter the raw XML 1.0 - name used in the source document, and the attrs parameter - holds an instance of the Attributes class containing the - attributes of the element. - - The uri part of the name tuple is None for elements which have - no namespace.""" - - def endElementNS(self, name, qname): - """Signals the end of an element in namespace mode. - - The name parameter contains the name of the element type, just - as with the startElementNS event.""" - - def characters(self, content): - """Receive notification of character data. - - The Parser will call this method to report each chunk of - character data. SAX parsers may return all contiguous - character data in a single chunk, or they may split it into - several chunks; however, all of the characters in any single - event must come from the same external entity so that the - Locator provides useful information.""" - - def ignorableWhitespace(self, whitespace): - """Receive notification of ignorable whitespace in element content. - - Validating Parsers must use this method to report each chunk - of ignorable whitespace (see the W3C XML 1.0 recommendation, - section 2.10): non-validating parsers may also use this method - if they are capable of parsing and using content models. - - SAX parsers may return all contiguous whitespace in a single - chunk, or they may split it into several chunks; however, all - of the characters in any single event must come from the same - external entity, so that the Locator provides useful - information. - - The application must not attempt to read from the array - outside of the specified range.""" - - def processingInstruction(self, target, data): - """Receive notification of a processing instruction. - - The Parser will invoke this method once for each processing - instruction found: note that processing instructions may occur - before or after the main document element. - - A SAX parser should never report an XML declaration (XML 1.0, - section 2.8) or a text declaration (XML 1.0, section 4.3.1) - using this method.""" - - def skippedEntity(self, name): - """Receive notification of a skipped entity. - - The Parser will invoke this method once for each entity - skipped. Non-validating processors may skip entities if they - have not seen the declarations (because, for example, the - entity was declared in an external DTD subset). All processors - may skip external entities, depending on the values of the - http://xml.org/sax/features/external-general-entities and the - http://xml.org/sax/features/external-parameter-entities - properties.""" - - -# ===== DTDHandler ===== - -class DTDHandler: - """Handle DTD events. - - This interface specifies only those DTD events required for basic - parsing (unparsed entities and attributes).""" - - def notationDecl(self, name, publicId, systemId): - "Handle a notation declaration event." - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - "Handle an unparsed entity declaration event." - - -# ===== ENTITYRESOLVER ===== - -class EntityResolver: - """Basic interface for resolving entities. If you create an object - implementing this interface, then register the object with your - Parser, the parser will call the method in your object to - resolve all external entities. Note that DefaultHandler implements - this interface with the default behaviour.""" - - def resolveEntity(self, publicId, systemId): - """Resolve the system identifier of an entity and return either - the system identifier to read from as a string, or an InputSource - to read from.""" - return systemId - - -#============================================================================ -# -# CORE FEATURES -# -#============================================================================ - -feature_namespaces = "http://xml.org/sax/features/namespaces" -# true: Perform Namespace processing (default). -# false: Optionally do not perform Namespace processing -# (implies namespace-prefixes). -# access: (parsing) read-only; (not parsing) read/write - -feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" -# true: Report the original prefixed names and attributes used for Namespace -# declarations. -# false: Do not report attributes used for Namespace declarations, and -# optionally do not report original prefixed names (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_string_interning = "http://xml.org/sax/features/string-interning" -# true: All element names, prefixes, attribute names, Namespace URIs, and -# local names are interned using the built-in intern function. -# false: Names are not necessarily interned, although they may be (default). -# access: (parsing) read-only; (not parsing) read/write - -feature_validation = "http://xml.org/sax/features/validation" -# true: Report all validation errors (implies external-general-entities and -# external-parameter-entities). -# false: Do not report validation errors. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_ges = "http://xml.org/sax/features/external-general-entities" -# true: Include all external general (text) entities. -# false: Do not include external general entities. -# access: (parsing) read-only; (not parsing) read/write - -feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" -# true: Include all external parameter entities, including the external -# DTD subset. -# false: Do not include any external parameter entities, even the external -# DTD subset. -# access: (parsing) read-only; (not parsing) read/write - -all_features = [feature_namespaces, - feature_namespace_prefixes, - feature_string_interning, - feature_validation, - feature_external_ges, - feature_external_pes] - - -#============================================================================ -# -# CORE PROPERTIES -# -#============================================================================ - -property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" -# data type: xml.sax.sax2lib.LexicalHandler -# description: An optional extension handler for lexical events like comments. -# access: read/write - -property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" -# data type: xml.sax.sax2lib.DeclHandler -# description: An optional extension handler for DTD-related events other -# than notations and unparsed entities. -# access: read/write - -property_dom_node = "http://xml.org/sax/properties/dom-node" -# data type: org.w3c.dom.Node -# description: When parsing, the current DOM node being visited if this is -# a DOM iterator; when not parsing, the root DOM node for -# iteration. -# access: (parsing) read-only; (not parsing) read/write - -property_xml_string = "http://xml.org/sax/properties/xml-string" -# data type: String -# description: The literal string of characters that was the source for -# the current event. -# access: read-only - -property_encoding = "http://www.python.org/sax/properties/encoding" -# data type: String -# description: The name of the encoding to assume for input data. -# access: write: set the encoding, e.g. established by a higher-level -# protocol. May change during parsing (e.g. after -# processing a META tag) -# read: return the current encoding (possibly established through -# auto-detection. -# initial value: UTF-8 -# - -property_interning_dict = "http://www.python.org/sax/properties/interning-dict" -# data type: Dictionary -# description: The dictionary used to intern common strings in the document -# access: write: Request that the parser uses a specific dictionary, to -# allow interning across different documents -# read: return the current interning dictionary, or None -# - -all_properties = [property_lexical_handler, - property_dom_node, - property_declaration_handler, - property_xml_string, - property_encoding, - property_interning_dict] diff --git a/xcap/sax/sax2exts.py b/xcap/sax/sax2exts.py deleted file mode 100644 index 41584a5..0000000 --- a/xcap/sax/sax2exts.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Various extensions to the core SAX 2.0 API. - -$Id: sax2exts.py,v 1.5 2001/12/30 22:17:03 loewis Exp $ -""" - -from . import saxexts,saxlib - -# In SAX2, validation is turned-on through a property. Make sure -# that all parsers returned from this factory are validating -class ValidatingReaderFactory(saxexts.ParserFactory): - def make_parser(self, parser_list = []): - p = saxexts.ParserFactory.make_parser(self,parser_list) - p.setFeature(saxlib.feature_validation, 1) - return p - - -# --- XMLReader factory - -XMLReaderFactory = saxexts.ParserFactory - -# --- Creating parser factories - -XMLParserFactory = XMLReaderFactory(["xml.sax.drivers2.drv_pyexpat", - "xml.sax.drivers2.drv_xmlproc"]) - -XMLValParserFactory = ValidatingReaderFactory(["xml.sax.drivers2.drv_xmlproc"]) - -HTMLParserFactory=XMLReaderFactory(["xml.sax.drivers2.drv_htmllib", - "xml.sax.drivers2.drv_sgmlop", - "xml.sax.drivers2.drv_sgmllib"]) - -SGMLParserFactory=XMLReaderFactory(["xml.sax.drivers2.drv_sgmlop", - "xml.sax.drivers2.drv_sgmllib"]) - -def make_parser(parser_list = []): - return XMLParserFactory.make_parser(parser_list) diff --git a/xcap/sax/saxexts.py b/xcap/sax/saxexts.py deleted file mode 100644 index 16870ea..0000000 --- a/xcap/sax/saxexts.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -A module of experimental extensions to the standard SAX interface. - -$Id: saxexts.py,v 1.14 2003/01/21 13:02:44 loewis Exp $ -""" - -import _exceptions, handler, sys, string, os, types - -# --- Parser factory - -class ParserFactory: - """A general class to be used by applications for creating parsers on - foreign systems where it is unknown which parsers exist.""" - - def __init__(self,list=[]): - # Python 2 compatibility: let consider environment variables - # and properties override list argument - if "PY_SAX_PARSER" in os.environ: - list = string.split(os.environ["PY_SAX_PARSER"], ",") - _key = "python.xml.sax.parser" - if sys.platform[:4] == "java" \ - and sys.registry.containsKey(_key): - list = string.split(sys.registry.getProperty(_key), ",") - self.parsers=list - - def get_parser_list(self): - "Returns the list of possible drivers." - return self.parsers - - def set_parser_list(self,list): - "Sets the driver list." - self.parsers=list - - if sys.platform[ : 4] == "java": - def _create_parser(self,parser_name): - from org.python.core import imp - drv_module = imp.importName(parser_name, 0, globals()) - return drv_module.create_parser() - - else: - def _create_parser(self,parser_name): - drv_module = __import__(parser_name,{},{},['create_parser']) - return drv_module.create_parser() - - def make_parser(self, parser_list = []): - """Returns a SAX driver for the first available parser of the parsers - in the list. Note that the list is one of drivers, so it first tries - the driver and if that exists imports it to see if the parser also - exists. If no parsers are available a SAXException is thrown. - - Accepts a list of driver package names as an optional argument.""" - - import sys - # SAX1 expected a single package name as optional argument - # Python 2 changed this to be a list of parser names - # We now support both, as well as None (which was the default) - if parser_list is None: - parser_list = [] - elif type(parser_list) == bytes: - parser_list = [parser_list] - - for parser_name in parser_list+self.parsers: - try: - return self._create_parser(parser_name) - except ImportError as e: - if parser_name in sys.modules: - # The parser module was found, but importing it - # failed unexpectedly, pass this exception through - raise - except _exceptions.SAXReaderNotAvailable as e: - # The parser module detected that it won't work properly, - # so mark it as unusable, and try the next one - def _create_parser(msg = str(e)): - raise _exceptions.SAXReaderNotAvailable(msg) - sys.modules[parser_name].create_parser = _create_parser - - raise _exceptions.SAXReaderNotAvailable("No parsers found", None) - -# --- Experimental extension to Parser interface -from . import saxlib -class ExtendedParser(saxlib.Parser): - "Experimental unofficial SAX level 2 extended parser interface." - - def get_parser_name(self): - "Returns a single-word parser name." - raise _exceptions.SAXException("Method not supported.",None) - - def get_parser_version(self): - """Returns the version of the imported parser, which may not be the - one the driver was implemented for.""" - raise _exceptions.SAXException("Method not supported.",None) - - def get_driver_version(self): - "Returns the version number of the driver." - raise _exceptions.SAXException("Method not supported.",None) - - def is_validating(self): - "True if the parser is validating, false otherwise." - raise _exceptions.SAXException("Method not supported.",None) - - def is_dtd_reading(self): - """True if the parser is non-validating, but conforms to the spec by - reading the DTD.""" - raise _exceptions.SAXException("Method not supported.",None) - - def reset(self): - "Makes the parser start parsing afresh." - raise _exceptions.SAXException("Method not supported.",None) - - def feed(self,data): - "Feeds data to the parser." - raise _exceptions.SAXException("Method not supported.",None) - - def close(self): - "Called after the last call to feed, when there are no more data." - raise _exceptions.SAXException("Method not supported.",None) - -# --- Experimental document handler which does not slice strings - -class NosliceDocumentHandler(saxlib.DocumentHandler): - """A document handler that does not force the client application to - slice character data strings.""" - - def __init__(self): - handler.DocumentHandler.__init__() - self.characters=self.safe_handler - - def safe_handler(self,data,start,length): - """A characters event handler that always works, but doesn't always - slice strings.""" - if start==0 and length==len(data): - self.handle_data(data) - else: - self.handle_data(data[start:start+length]) - - def slice_handler(self,data,start,length): - "A character event handler that always slices strings." - self.handle_data(data[start:start+length]) - - def noslice_handler(self,data,start,length): - "A character event handler that never slices strings." - self.handle_data(data) - - def handle_data(self,data): - "This is the character data event method to override." - pass - -# --- Creating parser factories - -XMLParserFactory=ParserFactory(["xml.sax.drivers.drv_pyexpat", - "xml.sax.drivers.drv_xmltok", - "xml.sax.drivers.drv_xmlproc", - "xml.sax.drivers.drv_xmltoolkit", - "xml.sax.drivers.drv_xmllib", - "xml.sax.drivers.drv_xmldc", - "xml.sax.drivers.drv_sgmlop"]) - -XMLValParserFactory=ParserFactory(["xml.sax.drivers.drv_xmlproc_val"]) - -HTMLParserFactory=ParserFactory(["xml.sax.drivers.drv_htmllib", - "xml.sax.drivers.drv_sgmlop", - "xml.sax.drivers.drv_sgmllib"]) - -SGMLParserFactory=ParserFactory(["xml.sax.drivers.drv_sgmlop", - "xml.sax.drivers.drv_sgmllib"]) - -def make_parser(parser_list = []): - return XMLParserFactory.make_parser(parser_list) diff --git a/xcap/sax/saxlib.py b/xcap/sax/saxlib.py deleted file mode 100644 index f77e43a..0000000 --- a/xcap/sax/saxlib.py +++ /dev/null @@ -1,430 +0,0 @@ -""" -This module contains the core classes of version 2.0 of SAX for Python. -This file provides only default classes with absolutely minimum -functionality, from which drivers and applications can be subclassed. - -Many of these classes are empty and are included only as documentation -of the interfaces. - -$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $ -""" - -version = '2.0beta' - -# A number of interfaces used to live in saxlib, but are now in -# various other modules for Python 2 compatibility. If nobody uses -# them here any longer, the references can be removed - -from .handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver -from .xmlreader import XMLReader, InputSource, Locator, IncrementalParser -from ._exceptions import * - -from .handler import \ - feature_namespaces,\ - feature_namespace_prefixes,\ - feature_string_interning,\ - feature_validation,\ - feature_external_ges,\ - feature_external_pes,\ - all_features,\ - property_lexical_handler,\ - property_declaration_handler,\ - property_dom_node,\ - property_xml_string,\ - all_properties - -#============================================================================ -# -# MAIN INTERFACES -# -#============================================================================ - -# ===== XMLFILTER ===== - -class XMLFilter(XMLReader): - """Interface for a SAX2 parser filter. - - A parser filter is an XMLReader that gets its events from another - XMLReader (which may in turn also be a filter) rather than from a - primary source like a document or other non-SAX data source. - Filters can modify a stream of events before passing it on to its - handlers.""" - - def __init__(self, parent = None): - """Creates a filter instance, allowing applications to set the - parent on instantiation.""" - XMLReader.__init__(self) - self._parent = parent - - def setParent(self, parent): - """Sets the parent XMLReader of this filter. The argument may - not be None.""" - self._parent = parent - - def getParent(self): - "Returns the parent of this filter." - return self._parent - -# ===== ATTRIBUTES ===== - -class Attributes: - """Interface for a list of XML attributes. - - Contains a list of XML attributes, accessible by name.""" - - def getLength(self): - "Returns the number of attributes in the list." - raise NotImplementedError("This method must be implemented!") - - def getType(self, name): - "Returns the type of the attribute with the given name." - raise NotImplementedError("This method must be implemented!") - - def getValue(self, name): - "Returns the value of the attribute with the given name." - raise NotImplementedError("This method must be implemented!") - - def getValueByQName(self, name): - """Returns the value of the attribute with the given raw (or - qualified) name.""" - raise NotImplementedError("This method must be implemented!") - - def getNameByQName(self, name): - """Returns the namespace name of the attribute with the given - raw (or qualified) name.""" - raise NotImplementedError("This method must be implemented!") - - def getNames(self): - """Returns a list of the names of all attributes - in the list.""" - raise NotImplementedError("This method must be implemented!") - - def getQNames(self): - """Returns a list of the raw qualified names of all attributes - in the list.""" - raise NotImplementedError("This method must be implemented!") - - def __len__(self): - "Alias for getLength." - raise NotImplementedError("This method must be implemented!") - - def __getitem__(self, name): - "Alias for getValue." - raise NotImplementedError("This method must be implemented!") - - def keys(self): - "Returns a list of the attribute names in the list." - raise NotImplementedError("This method must be implemented!") - - def has_key(self, name): - "True if the attribute is in the list, false otherwise." - raise NotImplementedError("This method must be implemented!") - - def get(self, name, alternative=None): - """Return the value associated with attribute name; if it is not - available, then return the alternative.""" - raise NotImplementedError("This method must be implemented!") - - def copy(self): - "Return a copy of the Attributes object." - raise NotImplementedError("This method must be implemented!") - - def items(self): - "Return a list of (attribute_name, value) pairs." - raise NotImplementedError("This method must be implemented!") - - def values(self): - "Return a list of all attribute values." - raise NotImplementedError("This method must be implemented!") - - -#============================================================================ -# -# HANDLER INTERFACES -# -#============================================================================ - - -# ===== DECLHANDLER ===== - -class DeclHandler: - """Optional SAX2 handler for DTD declaration events. - - Note that some DTD declarations are already reported through the - DTDHandler interface. All events reported to this handler will - occur between the startDTD and endDTD events of the - LexicalHandler. - - To set the DeclHandler for an XMLReader, use the setProperty method - with the identifier http://xml.org/sax/handlers/DeclHandler.""" - - def attributeDecl(self, elem_name, attr_name, type, value_def, value): - """Report an attribute type declaration. - - Only the first declaration will be reported. The type will be - one of the strings "CDATA", "ID", "IDREF", "IDREFS", - "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or - a list of names (in the case of enumerated definitions). - - elem_name is the element type name, attr_name the attribute - type name, type a string representing the attribute type, - value_def a string representing the default declaration - ('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string - representing the attribute's default value, or None if there - is none.""" - - def elementDecl(self, elem_name, content_model): - """Report an element type declaration. - - Only the first declaration will be reported. - - content_model is the string 'EMPTY', the string 'ANY' or the content - model structure represented as tuple (separator, tokens, modifier) - where separator is the separator in the token list (that is, '|' or - ','), tokens is the list of tokens (element type names or tuples - representing parentheses) and modifier is the quantity modifier - ('*', '?' or '+').""" - - def internalEntityDecl(self, name, value): - """Report an internal entity declaration. - - Only the first declaration of an entity will be reported. - - name is the name of the entity. If it is a parameter entity, - the name will begin with '%'. value is the replacement text of - the entity.""" - - def externalEntityDecl(self, name, public_id, system_id): - """Report a parsed entity declaration. (Unparsed entities are - reported to the DTDHandler.) - - Only the first declaration for each entity will be reported. - - name is the name of the entity. If it is a parameter entity, - the name will begin with '%'. public_id and system_id are the - public and system identifiers of the entity. public_id will be - None if none were declared.""" - - - -# ===== LEXICALHANDLER ===== - -class LexicalHandler: - """Optional SAX2 handler for lexical events. - - This handler is used to obtain lexical information about an XML - document, that is, information about how the document was encoded - (as opposed to what it contains, which is reported to the - ContentHandler), such as comments and CDATA marked section - boundaries. - - To set the LexicalHandler of an XMLReader, use the setProperty - method with the property identifier - 'http://xml.org/sax/handlers/LexicalHandler'. There is no - guarantee that the XMLReader will support or recognize this - property.""" - - def comment(self, content): - """Reports a comment anywhere in the document (including the - DTD and outside the document element). - - content is a string that holds the contents of the comment.""" - - def startDTD(self, name, public_id, system_id): - """Report the start of the DTD declarations, if the document - has an associated DTD. - - A startEntity event will be reported before declaration events - from the external DTD subset are reported, and this can be - used to infer from which subset DTD declarations derive. - - name is the name of the document element type, public_id the - public identifier of the DTD (or None if none were supplied) - and system_id the system identfier of the external subset (or - None if none were supplied).""" - - def endDTD(self): - "Signals the end of DTD declarations." - - def startEntity(self, name): - """Report the beginning of an entity. - - The start and end of the document entity is not reported. The - start and end of the external DTD subset is reported with the - pseudo-name '[dtd]'. - - Skipped entities will be reported through the skippedEntity - event of the ContentHandler rather than through this event. - - name is the name of the entity. If it is a parameter entity, - the name will begin with '%'.""" - - def endEntity(self, name): - """Reports the end of an entity. name is the name of the - entity, and follows the same conventions as for - startEntity.""" - - def startCDATA(self): - """Reports the beginning of a CDATA marked section. - - The contents of the CDATA marked section will be reported - through the characters event.""" - - def endCDATA(self): - "Reports the end of a CDATA marked section." - - -#============================================================================ -# -# SAX 1.0 COMPATIBILITY CLASSES -# Note that these are all deprecated. -# -#============================================================================ - -# ===== ATTRIBUTELIST ===== - -class AttributeList: - """Interface for an attribute list. This interface provides - information about a list of attributes for an element (only - specified or defaulted attributes will be reported). Note that the - information returned by this object will be valid only during the - scope of the DocumentHandler.startElement callback, and the - attributes will not necessarily be provided in the order declared - or specified.""" - - def getLength(self): - "Return the number of attributes in list." - - def getName(self, i): - "Return the name of an attribute in the list." - - def getType(self, i): - """Return the type of an attribute in the list. (Parameter can be - either integer index or attribute name.)""" - - def getValue(self, i): - """Return the value of an attribute in the list. (Parameter can be - either integer index or attribute name.)""" - - def __len__(self): - "Alias for getLength." - - def __getitem__(self, key): - "Alias for getName (if key is an integer) and getValue (if string)." - - def keys(self): - "Returns a list of the attribute names." - - def has_key(self, key): - "True if the attribute is in the list, false otherwise." - - def get(self, key, alternative=None): - """Return the value associated with attribute name; if it is not - available, then return the alternative.""" - - def copy(self): - "Return a copy of the AttributeList." - - def items(self): - "Return a list of (attribute_name,value) pairs." - - def values(self): - "Return a list of all attribute values." - - -# ===== DOCUMENTHANDLER ===== - -class DocumentHandler: - """Handle general document events. This is the main client - interface for SAX: it contains callbacks for the most important - document events, such as the start and end of elements. You need - to create an object that implements this interface, and then - register it with the Parser. If you do not want to implement - the entire interface, you can derive a class from HandlerBase, - which implements the default functionality. You can find the - location of any document event using the Locator interface - supplied by setDocumentLocator().""" - - def characters(self, ch, start, length): - "Handle a character data event." - - def endDocument(self): - "Handle an event for the end of a document." - - def endElement(self, name): - "Handle an event for the end of an element." - - def ignorableWhitespace(self, ch, start, length): - "Handle an event for ignorable whitespace in element content." - - def processingInstruction(self, target, data): - "Handle a processing instruction event." - - def setDocumentLocator(self, locator): - "Receive an object for locating the origin of SAX document events." - - def startDocument(self): - "Handle an event for the beginning of a document." - - def startElement(self, name, atts): - "Handle an event for the beginning of an element." - - -# ===== HANDLERBASE ===== - -class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\ - ErrorHandler): - """Default base class for handlers. This class implements the - default behaviour for four SAX interfaces: EntityResolver, - DTDHandler, DocumentHandler, and ErrorHandler: rather - than implementing those full interfaces, you may simply extend - this class and override the methods that you need. Note that the - use of this class is optional (you are free to implement the - interfaces directly if you wish).""" - - -# ===== PARSER ===== - -class Parser: - """Basic interface for SAX (Simple API for XML) parsers. All SAX - parsers must implement this basic interface: it allows users to - register handlers for different types of events and to initiate a - parse from a URI, a character stream, or a byte stream. SAX - parsers should also implement a zero-argument constructor.""" - - def __init__(self): - self.doc_handler = DocumentHandler() - self.dtd_handler = DTDHandler() - self.ent_handler = EntityResolver() - self.err_handler = ErrorHandler() - - def parse(self, systemId): - "Parse an XML document from a system identifier." - - def parseFile(self, fileobj): - "Parse an XML document from a file-like object." - - def setDocumentHandler(self, handler): - "Register an object to receive basic document-related events." - self.doc_handler=handler - - def setDTDHandler(self, handler): - "Register an object to receive basic DTD-related events." - self.dtd_handler=handler - - def setEntityResolver(self, resolver): - "Register an object to resolve external entities." - self.ent_handler=resolver - - def setErrorHandler(self, handler): - "Register an object to receive error-message events." - self.err_handler=handler - - def setLocale(self, locale): - """Allow an application to set the locale for errors and warnings. - - SAX parsers are not required to provide localisation for errors - and warnings; if they cannot support the requested locale, - however, they must throw a SAX exception. Applications may - request a locale change in the middle of a parse.""" - raise SAXNotSupportedException("Locale support not implemented") diff --git a/xcap/sax/saxutils.py b/xcap/sax/saxutils.py deleted file mode 100644 index 46c1fd5..0000000 --- a/xcap/sax/saxutils.py +++ /dev/null @@ -1,809 +0,0 @@ -""" -A library of useful helper classes to the saxlib classes, for the -convenience of application and driver writers. - -$Id: saxutils.py,v 1.35 2004/03/20 07:46:04 fdrake Exp $ -""" - -import os, urllib.parse, urllib.request, urllib.error, urllib.parse, types -from . import handler -from . import xmlreader -import sys, _exceptions, saxlib - -try: - _StringTypes = [bytes, str] -except AttributeError: # 1.5 compatibility:UnicodeType not defined - _StringTypes = [bytes] - -def __dict_replace(s, d): - """Replace substrings of a string using a dictionary.""" - for key, value in list(d.items()): - s = s.replace(key, value) - return s - -def escape(data, entities={}): - """Escape &, <, and > in a string of data. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = data.replace("&", "&") - data = data.replace("<", "<") - data = data.replace(">", ">") - if entities: - data = __dict_replace(data, entities) - return data - -def unescape(data, entities={}): - """Unescape &, <, and > in a string of data. - - You can unescape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = data.replace("<", "<") - data = data.replace(">", ">") - if entities: - data = __dict_replace(data, entities) - # must do ampersand last - return data.replace("&", "&") - -def quoteattr(data, entities={}): - """Escape and quote an attribute value. - - Escape &, <, and > in a string of data, then quote it for use as - an attribute value. The \" character will be escaped as well, if - necessary. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = escape(data, entities) - if '"' in data: - if "'" in data: - data = '"%s"' % data.replace('"', """) - else: - data = "'%s'" % data - else: - data = '"%s"' % data - return data - -# --- DefaultHandler - -class DefaultHandler(handler.EntityResolver, handler.DTDHandler, - handler.ContentHandler, handler.ErrorHandler): - """Default base class for SAX2 event handlers. Implements empty - methods for all callback methods, which can be overridden by - application implementors. Replaces the deprecated SAX1 HandlerBase - class.""" - -# --- Location - -class Location: - """Represents a location in an XML entity. Initialized by being passed - a locator, from which it reads off the current location, which is then - stored internally.""" - - def __init__(self, locator): - self.__col = locator.getColumnNumber() - self.__line = locator.getLineNumber() - self.__pubid = locator.getPublicId() - self.__sysid = locator.getSystemId() - - def getColumnNumber(self): - return self.__col - - def getLineNumber(self): - return self.__line - - def getPublicId(self): - return self.__pubid - - def getSystemId(self): - return self.__sysid - - def __str__(self): - if self.__line is None: - line = "?" - else: - line = self.__line - if self.__col is None: - col = "?" - else: - col = self.__col - return "%s:%s:%s" % ( - self.__sysid or self.__pubid or "", - line, col) - -# --- ErrorPrinter - -class ErrorPrinter: - "A simple class that just prints error messages to standard out." - - def __init__(self, level=0, outfile=sys.stderr): - self._level = level - self._outfile = outfile - - def warning(self, exception): - if self._level <= 0: - self._outfile.write("WARNING in %s: %s\n" % - (self.__getpos(exception), - exception.getMessage())) - - def error(self, exception): - if self._level <= 1: - self._outfile.write("ERROR in %s: %s\n" % - (self.__getpos(exception), - exception.getMessage())) - - def fatalError(self, exception): - if self._level <= 2: - self._outfile.write("FATAL ERROR in %s: %s\n" % - (self.__getpos(exception), - exception.getMessage())) - - def __getpos(self, exception): - if isinstance(exception, _exceptions.SAXParseException): - return "%s:%s:%s" % (exception.getSystemId(), - exception.getLineNumber(), - exception.getColumnNumber()) - else: - return "" - -# --- ErrorRaiser - -class ErrorRaiser: - "A simple class that just raises the exceptions it is passed." - - def __init__(self, level = 0): - self._level = level - - def error(self, exception): - if self._level <= 1: - raise exception - - def fatalError(self, exception): - if self._level <= 2: - raise exception - - def warning(self, exception): - if self._level <= 0: - raise exception - -# --- AttributesImpl now lives in xmlreader -from .xmlreader import AttributesImpl - -# --- XMLGenerator is the SAX2 ContentHandler for writing back XML -import codecs - -def _outputwrapper(stream,encoding): - writerclass = codecs.lookup(encoding)[3] - return writerclass(stream) - -if hasattr(codecs, "register_error"): - def writetext(stream, text, entities={}): - stream.errors = "xmlcharrefreplace" - stream.write(escape(text, entities)) - stream.errors = "strict" -else: - def writetext(stream, text, entities={}): - text = escape(text, entities) - try: - stream.write(text) - except UnicodeError: - for c in text: - try: - stream.write(c) - except UnicodeError: - stream.write("&#%d;" % ord(c)) - -def writeattr(stream, text): - countdouble = text.count('"') - if countdouble: - countsingle = text.count("'") - if countdouble <= countsingle: - entities = {'"': """} - quote = '"' - else: - entities = {"'": "'"} - quote = "'" - else: - entities = {} - quote = '"' - stream.write(quote) - writetext(stream, text, entities) - stream.write(quote) - - -class XMLGenerator(handler.ContentHandler): - GENERATED_PREFIX = "xml.sax.saxutils.prefix%s" - - def __init__(self, out=None, encoding="iso-8859-1"): - if out is None: - import sys - out = sys.stdout - handler.ContentHandler.__init__(self) - self._out = _outputwrapper(out,encoding) - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - self._undeclared_ns_maps = [] - self._encoding = encoding - self._generated_prefix_ctr = 0 - return - - # ContentHandler methods - - def startDocument(self): - self._out.write('\n' % - self._encoding) - - def startPrefixMapping(self, prefix, uri): - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix - self._undeclared_ns_maps.append((prefix, uri)) - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - - def startElement(self, name, attrs): - self._out.write('<' + name) - for (name, value) in list(attrs.items()): - self._out.write(' %s=' % name) - writeattr(self._out, value) - self._out.write('>') - - def endElement(self, name): - self._out.write('' % name) - - def startElementNS(self, name, qname, attrs): - if name[0] is None: - name = name[1] - elif self._current_context[name[0]] is None: - # default namespace - name = name[1] - else: - name = self._current_context[name[0]] + ":" + name[1] - self._out.write('<' + name) - - for k,v in self._undeclared_ns_maps: - if k is None: - self._out.write(' xmlns="%s"' % (v or '')) - else: - self._out.write(' xmlns:%s="%s"' % (k,v)) - self._undeclared_ns_maps = [] - - for (name, value) in list(attrs.items()): - if name[0] is None: - name = name[1] - elif self._current_context[name[0]] is None: - # default namespace - #If an attribute has a nsuri but not a prefix, we must - #create a prefix and add a nsdecl - prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr - self._generated_prefix_ctr = self._generated_prefix_ctr + 1 - name = prefix + ':' + name[1] - self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0]))) - self._current_context[name[0]] = prefix - else: - name = self._current_context[name[0]] + ":" + name[1] - self._out.write(' %s=' % name) - writeattr(self._out, value) - self._out.write('>') - - def endElementNS(self, name, qname): - # XXX: if qname is not None, we better use it. - # Python 2.0b2 requires us to use the recorded prefix for - # name[0], though - if name[0] is None: - qname = name[1] - elif self._current_context[name[0]] is None: - qname = name[1] - else: - qname = self._current_context[name[0]] + ":" + name[1] - self._out.write('' % qname) - - def characters(self, content): - writetext(self._out, content) - - def ignorableWhitespace(self, content): - self._out.write(content) - - def processingInstruction(self, target, data): - self._out.write('' % (target, data)) - - -class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler): - """A XMLGenerator that also supports the LexicalHandler interface""" - - def __init__(self, out=None, encoding="iso-8859-1"): - XMLGenerator.__init__(self, out, encoding) - self._in_cdata = 0 - - def characters(self, content): - if self._in_cdata: - self._out.write(content.replace(']]>', ']]>]]>') - - def comment(self, content): - self._out.write('') - - def startCDATA(self): - self._in_cdata = 1 - self._out.write('') - - -# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML -class ContentGenerator(XMLGenerator): - - def characters(self, str, start, end): - # In SAX1, characters receives start and end; in SAX2, it receives - # a string. For plain strings, we may want to use a buffer object. - return XMLGenerator.characters(self, str[start:start+end]) - -# --- XMLFilterImpl -class XMLFilterBase(saxlib.XMLFilter): - """This class is designed to sit between an XMLReader and the - client application's event handlers. By default, it does nothing - but pass requests up to the reader and events on to the handlers - unmodified, but subclasses can override specific methods to modify - the event stream or the configuration requests as they pass - through.""" - - # ErrorHandler methods - - def error(self, exception): - self._err_handler.error(exception) - - def fatalError(self, exception): - self._err_handler.fatalError(exception) - - def warning(self, exception): - self._err_handler.warning(exception) - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self._cont_handler.setDocumentLocator(locator) - - def startDocument(self): - self._cont_handler.startDocument() - - def endDocument(self): - self._cont_handler.endDocument() - - def startPrefixMapping(self, prefix, uri): - self._cont_handler.startPrefixMapping(prefix, uri) - - def endPrefixMapping(self, prefix): - self._cont_handler.endPrefixMapping(prefix) - - def startElement(self, name, attrs): - self._cont_handler.startElement(name, attrs) - - def endElement(self, name): - self._cont_handler.endElement(name) - - def startElementNS(self, name, qname, attrs): - self._cont_handler.startElementNS(name, qname, attrs) - - def endElementNS(self, name, qname): - self._cont_handler.endElementNS(name, qname) - - def characters(self, content): - self._cont_handler.characters(content) - - def ignorableWhitespace(self, chars): - self._cont_handler.ignorableWhitespace(chars) - - def processingInstruction(self, target, data): - self._cont_handler.processingInstruction(target, data) - - def skippedEntity(self, name): - self._cont_handler.skippedEntity(name) - - # DTDHandler methods - - def notationDecl(self, name, publicId, systemId): - self._dtd_handler.notationDecl(name, publicId, systemId) - - def unparsedEntityDecl(self, name, publicId, systemId, ndata): - self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) - - # EntityResolver methods - - def resolveEntity(self, publicId, systemId): - self._ent_handler.resolveEntity(publicId, systemId) - - # XMLReader methods - - def parse(self, source): - self._parent.setContentHandler(self) - self._parent.setErrorHandler(self) - self._parent.setEntityResolver(self) - self._parent.setDTDHandler(self) - self._parent.parse(source) - - def setLocale(self, locale): - self._parent.setLocale(locale) - - def getFeature(self, name): - return self._parent.getFeature(name) - - def setFeature(self, name, state): - self._parent.setFeature(name, state) - - def getProperty(self, name): - return self._parent.getProperty(name) - - def setProperty(self, name, value): - self._parent.setProperty(name, value) - -# FIXME: remove this backward compatibility hack when not needed anymore -XMLFilterImpl = XMLFilterBase - -# --- BaseIncrementalParser - -class BaseIncrementalParser(xmlreader.IncrementalParser): - """This class implements the parse method of the XMLReader - interface using the feed, close and reset methods of the - IncrementalParser interface as a convenience to SAX 2.0 driver - writers.""" - - def parse(self, source): - source = prepare_input_source(source) - self.prepareParser(source) - - self._cont_handler.startDocument() - - # FIXME: what about char-stream? - inf = source.getByteStream() - buffer = inf.read(16384) - while buffer != "": - self.feed(buffer) - buffer = inf.read(16384) - - self.close() - self.reset() - - self._cont_handler.endDocument() - - def prepareParser(self, source): - """This method is called by the parse implementation to allow - the SAX 2.0 driver to prepare itself for parsing.""" - raise NotImplementedError("prepareParser must be overridden!") - -# --- Utility functions - -def prepare_input_source(source, base = ""): - """This function takes an InputSource and an optional base URL and - returns a fully resolved InputSource object ready for reading.""" - - if type(source) in _StringTypes: - source = xmlreader.InputSource(source) - elif hasattr(source, "read"): - f = source - source = xmlreader.InputSource() - source.setByteStream(f) - if hasattr(f, "name"): - source.setSystemId(f.name) - - if source.getByteStream() is None: - sysid = source.getSystemId() - if os.path.isfile(sysid): - basehead = os.path.split(os.path.normpath(base))[0] - source.setSystemId(os.path.join(basehead, sysid)) - f = open(sysid, "rb") - else: - source.setSystemId(urllib.parse.urljoin(base, sysid)) - f = urllib.request.urlopen(source.getSystemId()) - - source.setByteStream(f) - - return source - -# =========================================================================== -# -# DEPRECATED SAX 1.0 CLASSES -# -# =========================================================================== - -# --- AttributeMap - -class AttributeMap: - """An implementation of AttributeList that takes an (attr,val) hash - and uses it to implement the AttributeList interface.""" - - def __init__(self, map): - self.map=map - - def getLength(self): - return len(list(self.map.keys())) - - def getName(self, i): - try: - return list(self.map.keys())[i] - except IndexError as e: - return None - - def getType(self, i): - return "CDATA" - - def getValue(self, i): - try: - if type(i)==int: - return self.map[self.getName(i)] - else: - return self.map[i] - except KeyError as e: - return None - - def __len__(self): - return len(self.map) - - def __getitem__(self, key): - if type(key)==int: - return list(self.map.keys())[key] - else: - return self.map[key] - - def items(self): - return list(self.map.items()) - - def keys(self): - return list(self.map.keys()) - - def has_key(self,key): - return key in self.map - - def get(self, key, alternative=None): - return self.map.get(key, alternative) - - def copy(self): - return AttributeMap(self.map.copy()) - - def values(self): - return list(self.map.values()) - -# --- Event broadcasting object - -class EventBroadcaster: - """Takes a list of objects and forwards any method calls received - to all objects in the list. The attribute list holds the list and - can freely be modified by clients.""" - - class Event: - "Helper objects that represent event methods." - - def __init__(self,list,name): - self.list=list - self.name=name - - def __call__(self,*rest): - for obj in self.list: - getattr(obj,self.name)(*rest) - - def __init__(self,list): - self.list=list - - def __getattr__(self,name): - return self.Event(self.list,name) - - def __repr__(self): - return "" % id(self) - -# --- ESIS document handler -from . import saxlib -class ESISDocHandler(saxlib.HandlerBase): - "A SAX document handler that produces naive ESIS output." - - def __init__(self,writer=sys.stdout): - self.writer=writer - - def processingInstruction (self,target, remainder): - """Receive an event signalling that a processing instruction - has been found.""" - self.writer.write("?"+target+" "+remainder+"\n") - - def startElement(self,name,amap): - "Receive an event signalling the start of an element." - self.writer.write("("+name+"\n") - for a_name in list(amap.keys()): - self.writer.write("A"+a_name+" "+amap[a_name]+"\n") - - def endElement(self,name): - "Receive an event signalling the end of an element." - self.writer.write(")"+name+"\n") - - def characters(self,data,start_ix,length): - "Receive an event signalling that character data has been found." - self.writer.write("-"+data[start_ix:start_ix+length]+"\n") - -# --- XML canonizer - -class Canonizer(saxlib.HandlerBase): - "A SAX document handler that produces canonized XML output." - - def __init__(self,writer=sys.stdout): - self.elem_level=0 - self.writer=writer - - def processingInstruction (self,target, remainder): - if not target=="xml": - self.writer.write("") - - def startElement(self,name,amap): - self.writer.write("<"+name) - - a_names=list(amap.keys()) - a_names.sort() - - for a_name in a_names: - self.writer.write(" "+a_name+"=\"") - self.write_data(amap[a_name]) - self.writer.write("\"") - self.writer.write(">") - self.elem_level=self.elem_level+1 - - def endElement(self,name): - self.writer.write("") - self.elem_level=self.elem_level-1 - - def ignorableWhitespace(self,data,start_ix,length): - self.characters(data,start_ix,length) - - def characters(self,data,start_ix,length): - if self.elem_level>0: - self.write_data(data[start_ix:start_ix+length]) - - def write_data(self,data): - "Writes datachars to writer." - data=data.replace("&","&") - data=data.replace("<","<") - data=data.replace("\"",""") - data=data.replace(">",">") - data=data.replace(chr(9)," ") - data=data.replace(chr(10)," ") - data=data.replace(chr(13)," ") - self.writer.write(data) - -# --- mllib - -class mllib: - """A re-implementation of the htmllib, sgmllib and xmllib interfaces as a - SAX DocumentHandler.""" - -# Unsupported: -# - setnomoretags -# - setliteral -# - translate_references -# - handle_xml -# - handle_doctype -# - handle_charref -# - handle_entityref -# - handle_comment -# - handle_cdata -# - tag_attributes - - def __init__(self): - self.reset() - - def reset(self): - from . import saxexts # only used here - self.parser=saxexts.XMLParserFactory.make_parser() - self.handler=mllib.Handler(self.parser,self) - self.handler.reset() - - def feed(self,data): - self.parser.feed(data) - - def close(self): - self.parser.close() - - def get_stack(self): - return self.handler.get_stack() - - # --- Handler methods (to be overridden) - - def handle_starttag(self,name,method,atts): - method(atts) - - def handle_endtag(self,name,method): - method() - - def handle_data(self,data): - pass - - def handle_proc(self,target,data): - pass - - def unknown_starttag(self,name,atts): - pass - - def unknown_endtag(self,name): - pass - - def syntax_error(self,message): - pass - - # --- The internal handler class - - class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler): - """An internal class to handle SAX events and translate them to mllib - events.""" - - def __init__(self,driver,handler): - self.driver=driver - self.driver.setDocumentHandler(self) - self.driver.setErrorHandler(self) - self.handler=handler - self.reset() - - def get_stack(self): - return self.stack - - def reset(self): - self.stack=[] - - # --- DocumentHandler methods - - def characters(self, ch, start, length): - self.handler.handle_data(ch[start:start+length]) - - def endElement(self, name): - if hasattr(self.handler,"end_"+name): - self.handler.handle_endtag(name, - getattr(self.handler,"end_"+name)) - else: - self.handler.unknown_endtag(name) - - del self.stack[-1] - - def ignorableWhitespace(self, ch, start, length): - self.handler.handle_data(ch[start:start+length]) - - def processingInstruction(self, target, data): - self.handler.handle_proc(target,data) - - def startElement(self, name, atts): - self.stack.append(name) - - if hasattr(self.handler,"start_"+name): - self.handler.handle_starttag(name, - getattr(self.handler, - "start_"+name), - atts) - else: - self.handler.unknown_starttag(name,atts) - - # --- ErrorHandler methods - - def error(self, exception): - self.handler.syntax_error(str(exception)) - - def fatalError(self, exception): - raise RuntimeError(str(exception)) diff --git a/xcap/sax/writer.py b/xcap/sax/writer.py deleted file mode 100644 index ada2544..0000000 --- a/xcap/sax/writer.py +++ /dev/null @@ -1,548 +0,0 @@ -"""SAX document handlers that support output generation of XML, SGML, -and XHTML. - -This module provides three different groups of objects: the actual SAX -document handlers that drive the output, DTD information containers, -and syntax descriptors (of limited public use in most cases). - - -Output Drivers --------------- - -The output drivers conform to the SAX C protocol. -They can be used anywhere a C is used. Two drivers -are provided: a `basic' driver which creates a fairly minimal output -without much intelligence, and a `pretty-printing' driver that -performs pretty-printing with nice indentation and the like. Both can -optionally make use of DTD information and syntax objects. - - -DTD Information Containers --------------------------- - - - -Each DTD information object provides an attribute C which -describes the expected output syntax; an alternate can be provided to -the output drivers if desired. - - -Syntax Descriptors ------------------- - -Syntax descriptor objects provide several attributes which describe -the various lexical components of XML & SGML markup. The attributes -have names that reflect the shorthand notation from the SGML world, -but the values are strings which give the appropriate characters for -the markup language being described. The one addition is the -C attribute which should be used to end the start tag of -elements which have no content. This is needed to properly support -XML and XHTML. - -""" -__version__ = '$Revision: 1.9 $' - -import string - -import xml.parsers.xmlproc.dtdparser -import xml.parsers.xmlproc.xmlapp -from xml.sax.saxutils import escape - - -DEFAULT_LINELENGTH = 74 - - - -class Syntax: - com = "--" # comment start or end - cro = "&#" # character reference open - refc = ";" # reference close - dso = "[" # declaration subset open - dsc = "]" # declaration subset close - ero = "&" # entity reference open - lit = '"' # literal start or end - lit_quoted = '"' # quoted literal - lita = "'" # literal start or end (alternative) - mdo = "" # markup declaration close - msc = "]]" # marked section close - pio = "" # tag close - vi = "=" # value indicator - - def __init__(self): - if self.__class__ is Syntax: - raise RuntimeError("Syntax must be subclassed to be used!") - - -class SGMLSyntax(Syntax): - empty_stagc = ">" - pic = ">" # processing instruction close - net = "/" # null end tag - - -class XMLSyntax(Syntax): - empty_stagc = "/>" - pic = "?>" # processing instruction close - net = None # null end tag not supported - - -class XHTMLSyntax(XMLSyntax): - empty_stagc = " />" - - - -class DoctypeInfo: - syntax = XMLSyntax() - - fpi = None - sysid = None - - def __init__(self): - self.__empties = {} - self.__elements_only = {} - self.__attribs = {} - - def is_empty(self, gi): - return gi in self.__empties - - def get_empties_list(self): - return list(self.__empties.keys()) - - def has_element_content(self, gi): - return gi in self.__elements_only - - def get_element_containers_list(self): - return list(self.__elements_only.keys()) - - def get_attributes_list(self, gi): - return list(self.__attribs.get(gi, {}).keys()) - - def get_attribute_info(self, gi, attr): - return self.__attribs[gi][attr] - - def add_empty(self, gi): - self.__empties[gi] = 1 - - def add_element_container(self, gi): - self.__elements_only[gi] = gi - - def add_attribute_defn(self, gi, attr, type, decl, default): - try: - d = self.__attribs[gi] - except KeyError: - d = self.__attribs[gi] = {} - if attr not in d: - d[attr] = (type, decl, default) - else: - print("<%s> attribute %s already defined" % (gi, attr)) - - def load_pubtext(self, pubtext): - raise NotImplementedError("sublasses must implement load_pubtext()") - - -class _XMLDTDLoader(xml.parsers.xmlproc.xmlapp.DTDConsumer): - def __init__(self, info, parser): - self.info = info - xml.parsers.xmlproc.xmlapp.DTDConsumer.__init__(self, parser) - self.new_attribute = info.add_attribute_defn - - def new_element_type(self, gi, model): - if model[0] == "|" and model[1][0] == ("#PCDATA", ""): - # no action required - pass - elif model == ("", [], ""): - self.info.add_empty(gi) - else: - self.info.add_element_container(gi) - - -class XMLDoctypeInfo(DoctypeInfo): - def load_pubtext(self, sysid): - parser = xml.parsers.xmlproc.dtdparser.DTDParser() - loader = _XMLDTDLoader(self, parser) - parser.set_dtd_consumer(loader) - parser.parse_resource(sysid) - - -class XHTMLDoctypeInfo(XMLDoctypeInfo): - # Bogus W3C cruft requires the extra space when terminating empty elements. - syntax = XHTMLSyntax() - - -class SGMLDoctypeInfo(DoctypeInfo): - syntax = SGMLSyntax() - - import re - __element_prefix_search = re.compile("": - lit = self.__syntax.lit - s = '%sxml version=%s1.0%s encoding%s%s%s%s' % ( - self.__syntax.pio, lit, lit, self.__syntax.vi, lit, - self._encoding, lit) - if self.__standalone: - s = '%s standalone%s%s%s%s' % ( - s, self.__syntax.vi, lit, self.__standalone, lit) - self._write("%s%s\n" % (s, self.__syntax.pic)) - - def endDocument(self): - if self.__stack: - raise RuntimeError("open element stack cannot be empty on close") - - def startElement(self, tag, attrs={}): - if self.__pending_doctype: - self.handle_doctype(tag) - self._check_pending_content() - self.__pushtag(tag) - self.__check_flowing(tag, attrs) - if "xml:lang" in attrs: - self.__lang = attrs["xml:lang"] - del attrs["xml:lang"] - if self._packing: - prefix = "" - elif self._flowing: - prefix = self._prefix[:-self.indentation] - else: - prefix = "" - stag = "%s%s%s" % (prefix, self.__syntax.stago, tag) - prefix = "%s %s" % (prefix, (len(tag) * " ")) - lit = self.__syntax.lit - lita = self.__syntax.lita - vi = self.__syntax.vi - a = '' - if self._flowing != self.__stack[-1][0]: - if self._dtdflowing is not None \ - and self._flowing == self._dtdflowing: - pass - else: - a = ' xml:space%s%s%s%s' \ - % (vi, lit, ["default", "preserve"][self._flowing], lit) - if self.__lang != self.__stack[-1][1]: - a = '%s xml:lang%s%s%s%s' % (a, vi, lit, self.lang, lit) - line = stag + a - self._offset = self._offset + len(line) - a = '' - for k, v in list(attrs.items()): - if v is None: - continue - v = str(v) - if string.find(v, lit) == -1: - a = ' %s%s%s%s%s' % (k, vi, lit, escape(str(v)), lit) - elif string.find(v, lita) == -1: - a = ' %s%s%s%s%s' % (k, vi, lita, escape(str(v)), lita) - else: - a = ' %s%s%s%s%s' % (k, vi, lit, - escape(str(v), {lit:self.__syntax.lit_quoted}), - lita) - if (self._offset + len(a)) > self.lineLength: - self._write(line + "\n") - line = prefix + a - self._offset = len(line) - else: - line = line + a - self._offset = self._offset + len(a) - self._write(line) - self.__pending_content = 1 - if ( self.__dtdinfo and not - (self.__dtdinfo.has_element_content(tag) - or self.__dtdinfo.is_empty(tag))): - self._packing = 1 - - def endElement(self, tag): - if self.__pending_content: - if self._flowing: - self._write(self.__syntax.empty_stagc) - if self._packing: - self._offset = self._offset \ - + len(self.__syntax.empty_stagc) - else: - self._write("\n") - self._offset = 0 - else: - self._write(self.__syntax.empty_stagc) - self._offset = self._offset + len(self.__syntax.empty_stagc) - self.__pending_content = 0 - self.__poptag(tag) - return - depth = len(self.__stack) - if depth == 1 or self._packing or not self._flowing: - prefix = '' - else: - prefix = self._prefix[:-self.indentation] \ - + (" " * self.indentEndTags) - self.__poptag(tag) - self._write("%s%s%s%s" % ( - prefix, self.__syntax.etago, tag, self.__syntax.tagc)) - if self._packing: - self._offset = self._offset + len(tag) + 3 - else: - self._write("\n") - self._offset = 0 - - def characters(self, data, start, length): - data = data[start: start+length] - if data: - self._check_pending_content() - data = escape(data) - if "\n" in data: - p = string.find(data, "\n") - self._offset = len(data) - (p + 1) - else: - self._offset = self._offset + len(data) - self._check_pending_content() - self._write(data) - - def comment(self, data, start, length): - data = data[start: start+length] - self._check_pending_content() - s = "%s%s%s%s%s" % (self.__syntax.mdo, self.__syntax.com, - data, self.__syntax.com, self.__syntax.mdc) - p = string.rfind(s, "\n") - if self._packing: - if p >= 0: - self._offset = len(s) - (p + 1) - else: - self._offset = self._offset + len(s) - else: - self._write("%s%s\n" % (self._prefix, s)) - self._offset = 0 - - def ignorableWhitespace(self, data, start, length): - pass - - def processingInstruction(self, target, data): - self._check_pending_content() - s = "%s%s %s%s" % (self.__syntax.pio, target, data, self.__syntax.pic) - prefix = self._prefix[:-self.indentation] \ - + (" " * self.indentEndTags) - if "\n" in s: - p = string.rfind(s, "\n") - if self._flowing and not self._packing: - self._write(prefix + s + "\n") - self._offset = 0 - else: - self._write(s) - self._offset = len(s) - (p + 1) - elif self._flowing and not self._packing: - self._write(prefix + s + "\n") - self._offset = 0 - else: - self._write(s) - self._offset = self._offset + len(s) - - - # This doesn't actually have a SAX equivalent, so we'll use it as - # an internal helper. - - def handle_doctype(self, root): - self.__pending_doctype = 0 - if self.__dtdinfo: - fpi = self.__dtdinfo.fpi - sysid = self.__dtdinfo.sysid - else: - fpi = sysid = None - lit = self.__syntax.lit - isxml = self.__syntax.pic == "?>" - if isxml and sysid: - s = '%sDOCTYPE %s\n' % (self.__syntax.mdo, root) - if fpi: - s = s + ' PUBLIC %s%s%s\n' % (lit, fpi, lit) - s = s + ' %s%s%s>\n' % (lit, sysid, lit) - else: - s = s + ' SYSTEM %s%s%s>\n' % (lit, sysid, lit) - self._write(s) - self._offset = 0 - elif not isxml: - s = "%sDOCTYPE %s" % (self.__syntax.mdo, root) - if fpi: - s = '%s\n PUBLIC %s%s%s' % (s, lit, fpi, lit) - if sysid: - s = '%s\n SYSTEM %s%s%s' % (s, lit, sysid, lit) - self._write("%s%s\n" % (s, self.__syntax.mdc)) - self._offset = 0 - - def handle_cdata(self, data): - self._check_pending_content() - # There should be a better way to generate '[CDATA[' - start = self.__syntax.mdo + "[CDATA[" - end = self.__syntax.msc + self.__syntax.mdc - s = "%s%s%s" % (start, escape(data), end) - if self._packing: - if "\n" in s: - rpos = string.rfind(s, "\n") - self._offset = len(s) - (rpos + 1) + len(end) - else: - self._offset = self._offset + len(s) + len(start + end) - self._write(s) - else: - self._offset = 0 - self._write(s + "\n") - - - # Internal helper methods. - - def __poptag(self, tag): - state = self.__stack.pop() - self._flowing, self.__lang, expected_tag, \ - self._packing, self._dtdflowing = state - if tag != expected_tag: - raise RuntimeError("expected , got " % (expected_tag, tag)) - self._prefix = self._prefix[:-self.indentation] - - def __pushtag(self, tag): - self.__stack.append((self._flowing, self.__lang, tag, - self._packing, self._dtdflowing)) - self._prefix = self._prefix + " " * self.indentation - - def __check_flowing(self, tag, attrs): - """Check the contents of attrs and the DTD information to determine - whether the following content should be flowed. - - tag -- general identifier of the element being opened - attrs -- attributes dictionary as reported by the parser or - application - - This sets up both the _flowing and _dtdflowing (object) attributes. - """ - docspec = dtdspec = None - if self.__dtdinfo: - try: - info = self.__dtdinfo.get_attribute_info(tag, "xml:space") - except KeyError: - info = None - if info is not None: - self._flowing = info[2] != "preserve" - self._dtdflowing = self._flowing - if "xml:space" in attrs: - self._flowing = attrs["xml:space"] != "preserve" - del attrs["xml:space"] - - def _check_pending_content(self): - if self.__pending_content: - s = self.__syntax.tagc - if self._flowing and not self._packing: - s = s + "\n" - self._offset = 0 - else: - self._offset = self._offset + len(s) - self._write(s) - self.__pending_content = 0 - - -class PrettyPrinter(XmlWriter): - """Pretty-printing XML output handler.""" - - def __init__(self, fp, standalone=None, dtdinfo=None, - syntax=None, linelength=None, - indentation=2, endtagindentation=None): - XmlWriter.__init__(self, fp, standalone=standalone, dtdinfo=dtdinfo, - syntax=syntax, linelength=linelength) - self.indentation = indentation - if endtagindentation is not None: - self.indentEndTags = endtagindentation - else: - self.indentEndTags = indentation - - def characters(self, data, start, length): - data = data[start: start + length] - if not data: - return - self._check_pending_content() - data = escape(data) - if not self._flowing: - self._write(data) - return - words = string.split(data) - begspace = data[0] in string.whitespace - endspace = words and (data[-1] in string.whitespace) - prefix = self._prefix - if len(prefix) > 40: - prefix = " " - offset = self._offset - L = [] - append = L.append - if begspace: - append(" ") - offset = offset + 1 - ws = "" - ws_len = 0 - while words: - w = words[0] - del words[0] - if (offset + ws_len + len(w)) > self.lineLength: - append("\n") - append(prefix) - append(w) - offset = len(prefix) + len(w) - else: - append(ws) - ws, ws_len = " ", 1 - append(w) - offset = offset + 1 + len(w) - if endspace: - append(" ") - offset = offset + 1 - self._offset = offset - self._write(string.join(L, "")) diff --git a/xcap/sax/xmlreader.py b/xcap/sax/xmlreader.py deleted file mode 100644 index 01e9e0c..0000000 --- a/xcap/sax/xmlreader.py +++ /dev/null @@ -1,378 +0,0 @@ -"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers -should be based on this code. """ - -from . import handler - -from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException - - -# ===== XMLREADER ===== - -class XMLReader: - """Interface for reading an XML document using callbacks. - - XMLReader is the interface that an XML parser's SAX2 driver must - implement. This interface allows an application to set and query - features and properties in the parser, to register event handlers - for document processing, and to initiate a document parse. - - All SAX interfaces are assumed to be synchronous: the parse - methods must not return until parsing is complete, and readers - must wait for an event-handler callback to return before reporting - the next event.""" - - def __init__(self): - self._cont_handler = handler.ContentHandler() - self._dtd_handler = handler.DTDHandler() - self._ent_handler = handler.EntityResolver() - self._err_handler = handler.ErrorHandler() - - def parse(self, source): - "Parse an XML document from a system identifier or an InputSource." - raise NotImplementedError("This method must be implemented!") - - def getContentHandler(self): - "Returns the current ContentHandler." - return self._cont_handler - - def setContentHandler(self, handler): - "Registers a new object to receive document content events." - self._cont_handler = handler - - def getDTDHandler(self): - "Returns the current DTD handler." - return self._dtd_handler - - def setDTDHandler(self, handler): - "Register an object to receive basic DTD-related events." - self._dtd_handler = handler - - def getEntityResolver(self): - "Returns the current EntityResolver." - return self._ent_handler - - def setEntityResolver(self, resolver): - "Register an object to resolve external entities." - self._ent_handler = resolver - - def getErrorHandler(self): - "Returns the current ErrorHandler." - return self._err_handler - - def setErrorHandler(self, handler): - "Register an object to receive error-message events." - self._err_handler = handler - - def setLocale(self, locale): - """Allow an application to set the locale for errors and warnings. - - SAX parsers are not required to provide localization for errors - and warnings; if they cannot support the requested locale, - however, they must throw a SAX exception. Applications may - request a locale change in the middle of a parse.""" - raise SAXNotSupportedException("Locale support not implemented") - - def getFeature(self, name): - "Looks up and returns the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def setFeature(self, name, state): - "Sets the state of a SAX2 feature." - raise SAXNotRecognizedException("Feature '%s' not recognized" % name) - - def getProperty(self, name): - "Looks up and returns the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - - def setProperty(self, name, value): - "Sets the value of a SAX2 property." - raise SAXNotRecognizedException("Property '%s' not recognized" % name) - -class IncrementalParser(XMLReader): - """This interface adds three extra methods to the XMLReader - interface that allow XML parsers to support incremental - parsing. Support for this interface is optional, since not all - underlying XML parsers support this functionality. - - When the parser is instantiated it is ready to begin accepting - data from the feed method immediately. After parsing has been - finished with a call to close the reset method must be called to - make the parser ready to accept new data, either from feed or - using the parse method. - - Note that these methods must _not_ be called during parsing, that - is, after parse has been called and before it returns. - - By default, the class also implements the parse method of the XMLReader - interface using the feed, close and reset methods of the - IncrementalParser interface as a convenience to SAX 2.0 driver - writers.""" - - def __init__(self, bufsize=2**16): - self._bufsize = bufsize - XMLReader.__init__(self) - - def parse(self, source): - from . import saxutils - source = saxutils.prepare_input_source(source) - - self.prepareParser(source) - file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer != "": - self.feed(buffer) - buffer = file.read(self._bufsize) - self.close() - - def feed(self, data): - """This method gives the raw XML data in the data parameter to - the parser and makes it parse the data, emitting the - corresponding events. It is allowed for XML constructs to be - split across several calls to feed. - - feed may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def prepareParser(self, source): - """This method is called by the parse implementation to allow - the SAX 2.0 driver to prepare itself for parsing.""" - raise NotImplementedError("prepareParser must be overridden!") - - def close(self): - """This method is called when the entire XML document has been - passed to the parser through the feed method, to notify the - parser that there are no more data. This allows the parser to - do the final checks on the document and empty the internal - data buffer. - - The parser will not be ready to parse another document until - the reset method has been called. - - close may raise SAXException.""" - raise NotImplementedError("This method must be implemented!") - - def reset(self): - """This method is called after close has been called to reset - the parser so that it is ready to parse new documents. The - results of calling parse or feed after close without calling - reset are undefined.""" - raise NotImplementedError("This method must be implemented!") - -# ===== LOCATOR ===== - -class Locator: - """Interface for associating a SAX event with a document - location. A locator object will return valid results only during - calls to DocumentHandler methods; at any other time, the - results are unpredictable.""" - - def getColumnNumber(self): - "Return the column number where the current event ends." - return -1 - - def getLineNumber(self): - "Return the line number where the current event ends." - return -1 - - def getPublicId(self): - "Return the public identifier for the current event." - return None - - def getSystemId(self): - "Return the system identifier for the current event." - return None - -# ===== INPUTSOURCE ===== - -class InputSource: - """Encapsulation of the information needed by the XMLReader to - read entities. - - This class may include information about the public identifier, - system identifier, byte stream (possibly with character encoding - information) and/or the character stream of an entity. - - Applications will create objects of this class for use in the - XMLReader.parse method and for returning from - EntityResolver.resolveEntity. - - An InputSource belongs to the application, the XMLReader is not - allowed to modify InputSource objects passed to it from the - application, although it may make copies and modify those.""" - - def __init__(self, system_id = None): - self.__system_id = system_id - self.__public_id = None - self.__encoding = None - self.__bytefile = None - self.__charfile = None - - def setPublicId(self, public_id): - "Sets the public identifier of this InputSource." - self.__public_id = public_id - - def getPublicId(self): - "Returns the public identifier of this InputSource." - return self.__public_id - - def setSystemId(self, system_id): - "Sets the system identifier of this InputSource." - self.__system_id = system_id - - def getSystemId(self): - "Returns the system identifier of this InputSource." - return self.__system_id - - def setEncoding(self, encoding): - """Sets the character encoding of this InputSource. - - The encoding must be a string acceptable for an XML encoding - declaration (see section 4.3.3 of the XML recommendation). - - The encoding attribute of the InputSource is ignored if the - InputSource also contains a character stream.""" - self.__encoding = encoding - - def getEncoding(self): - "Get the character encoding of this InputSource." - return self.__encoding - - def setByteStream(self, bytefile): - """Set the byte stream (a Python file-like object which does - not perform byte-to-character conversion) for this input - source. - - The SAX parser will ignore this if there is also a character - stream specified, but it will use a byte stream in preference - to opening a URI connection itself. - - If the application knows the character encoding of the byte - stream, it should set it with the setEncoding method.""" - self.__bytefile = bytefile - - def getByteStream(self): - """Get the byte stream for this input source. - - The getEncoding method will return the character encoding for - this byte stream, or None if unknown.""" - return self.__bytefile - - def setCharacterStream(self, charfile): - """Set the character stream for this input source. (The stream - must be a Python 2.0 Unicode-wrapped file-like that performs - conversion to Unicode strings.) - - If there is a character stream specified, the SAX parser will - ignore any byte stream and will not attempt to open a URI - connection to the system identifier.""" - self.__charfile = charfile - - def getCharacterStream(self): - "Get the character stream for this input source." - return self.__charfile - -# ===== ATTRIBUTESIMPL ===== - -class AttributesImpl: - - def __init__(self, attrs): - """Non-NS-aware implementation. - - attrs should be of the form {name : value}.""" - self._attrs = attrs - - def getLength(self): - return len(self._attrs) - - def getType(self, name): - return "CDATA" - - def getValue(self, name): - return self._attrs[name] - - def getValueByQName(self, name): - return self._attrs[name] - - def getNameByQName(self, name): - if name not in self._attrs: - raise KeyError(name) - return name - - def getQNameByName(self, name): - if name not in self._attrs: - raise KeyError(name) - return name - - def getNames(self): - return list(self._attrs.keys()) - - def getQNames(self): - return list(self._attrs.keys()) - - def __len__(self): - return len(self._attrs) - - def __getitem__(self, name): - return self._attrs[name] - - def keys(self): - return list(self._attrs.keys()) - - def has_key(self, name): - return name in self._attrs - - def get(self, name, alternative=None): - return self._attrs.get(name, alternative) - - def copy(self): - return self.__class__(self._attrs) - - def items(self): - return list(self._attrs.items()) - - def values(self): - return list(self._attrs.values()) - -# ===== ATTRIBUTESNSIMPL ===== - -class AttributesNSImpl(AttributesImpl): - - def __init__(self, attrs, qnames): - """NS-aware implementation. - - attrs should be of the form {(ns_uri, lname): value, ...}. - qnames of the form {(ns_uri, lname): qname, ...}.""" - self._attrs = attrs - self._qnames = qnames - - def getValueByQName(self, name): - for (nsname, qname) in list(self._qnames.items()): - if qname == name: - return self._attrs[nsname] - - raise KeyError(name) - - def getNameByQName(self, name): - for (nsname, qname) in list(self._qnames.items()): - if qname == name: - return nsname - - raise KeyError(name) - - def getQNameByName(self, name): - return self._qnames[name] - - def getQNames(self): - return list(self._qnames.values()) - - def copy(self): - return self.__class__(self._attrs, self._qnames) - - -def _test(): - XMLReader() - IncrementalParser() - Locator() - -if __name__ == "__main__": - _test()