| """ | |
| This module contains the core classes of version 2.0 of SAX for Python. | |
| This file provides only default classes with absolutely minimum | |
| functionality, from which drivers and applications can be subclassed. | |
| Many of these classes are empty and are included only as documentation | |
| of the interfaces. | |
| $Id$ | |
| """ | |
| version = '2.0beta' | |
| #============================================================================ | |
| # | |
| # HANDLER INTERFACES | |
| # | |
| #============================================================================ | |
| # ===== ERRORHANDLER ===== | |
| class ErrorHandler: | |
| """Basic interface for SAX error handlers. | |
| If you create an object that implements this interface, then | |
| register the object with your XMLReader, the parser will call the | |
| methods in your object to report all warnings and errors. There | |
| are three levels of errors available: warnings, (possibly) | |
| recoverable errors, and unrecoverable errors. All methods take a | |
| SAXParseException as the only parameter.""" | |
| def error(self, exception): | |
| "Handle a recoverable error." | |
| raise exception | |
| def fatalError(self, exception): | |
| "Handle a non-recoverable error." | |
| raise exception | |
| def warning(self, exception): | |
| "Handle a warning." | |
| print exception | |
| # ===== CONTENTHANDLER ===== | |
| class ContentHandler: | |
| """Interface for receiving logical document content events. | |
| This is the main callback interface in SAX, and the one most | |
| important to applications. The order of events in this interface | |
| mirrors the order of the information in the document.""" | |
| def __init__(self): | |
| self._locator = None | |
| def setDocumentLocator(self, locator): | |
| """Called by the parser to give the application a locator for | |
| locating the origin of document events. | |
| SAX parsers are strongly encouraged (though not absolutely | |
| required) to supply a locator: if it does so, it must supply | |
| the locator to the application by invoking this method before | |
| invoking any of the other methods in the DocumentHandler | |
| interface. | |
| The locator allows the application to determine the end | |
| position of any document-related event, even if the parser is | |
| not reporting an error. Typically, the application will use | |
| this information for reporting its own errors (such as | |
| character content that does not match an application's | |
| business rules). The information returned by the locator is | |
| probably not sufficient for use with a search engine. | |
| Note that the locator will return correct information only | |
| during the invocation of the events in this interface. The | |
| application should not attempt to use it at any other time.""" | |
| self._locator = locator | |
| def startDocument(self): | |
| """Receive notification of the beginning of a document. | |
| The SAX parser will invoke this method only once, before any | |
| other methods in this interface or in DTDHandler (except for | |
| setDocumentLocator).""" | |
| def endDocument(self): | |
| """Receive notification of the end of a document. | |
| The SAX parser will invoke this method only once, and it will | |
| be the last method invoked during the parse. The parser shall | |
| not invoke this method until it has either abandoned parsing | |
| (because of an unrecoverable error) or reached the end of | |
| input.""" | |
| def startPrefixMapping(self, prefix, uri): | |
| """Begin the scope of a prefix-URI Namespace mapping. | |
| The information from this event is not necessary for normal | |
| Namespace processing: the SAX XML reader will automatically | |
| replace prefixes for element and attribute names when the | |
| http://xml.org/sax/features/namespaces feature is true (the | |
| default). | |
| There are cases, however, when applications need to use | |
| prefixes in character data or in attribute values, where they | |
| cannot safely be expanded automatically; the | |
| start/endPrefixMapping event supplies the information to the | |
| application to expand prefixes in those contexts itself, if | |
| necessary. | |
| Note that start/endPrefixMapping events are not guaranteed to | |
| be properly nested relative to each-other: all | |
| startPrefixMapping events will occur before the corresponding | |
| startElement event, and all endPrefixMapping events will occur | |
| after the corresponding endElement event, but their order is | |
| not guaranteed.""" | |
| def endPrefixMapping(self, prefix): | |
| """End the scope of a prefix-URI mapping. | |
| See startPrefixMapping for details. This event will always | |
| occur after the corresponding endElement event, but the order | |
| of endPrefixMapping events is not otherwise guaranteed.""" | |
| def startElement(self, name, attrs): | |
| """Signals the start of an element in non-namespace mode. | |
| The name parameter contains the raw XML 1.0 name of the | |
| element type as a string and the attrs parameter holds an | |
| instance of the Attributes class containing the attributes of | |
| the element.""" | |
| def endElement(self, name): | |
| """Signals the end of an element in non-namespace mode. | |
| The name parameter contains the name of the element type, just | |
| as with the startElement event.""" | |
| def startElementNS(self, name, qname, attrs): | |
| """Signals the start of an element in namespace mode. | |
| The name parameter contains the name of the element type as a | |
| (uri, localname) tuple, the qname parameter the raw XML 1.0 | |
| name used in the source document, and the attrs parameter | |
| holds an instance of the Attributes class containing the | |
| attributes of the element. | |
| The uri part of the name tuple is None for elements which have | |
| no namespace.""" | |
| def endElementNS(self, name, qname): | |
| """Signals the end of an element in namespace mode. | |
| The name parameter contains the name of the element type, just | |
| as with the startElementNS event.""" | |
| def characters(self, content): | |
| """Receive notification of character data. | |
| The Parser will call this method to report each chunk of | |
| character data. SAX parsers may return all contiguous | |
| character data in a single chunk, or they may split it into | |
| several chunks; however, all of the characters in any single | |
| event must come from the same external entity so that the | |
| Locator provides useful information.""" | |
| def ignorableWhitespace(self, whitespace): | |
| """Receive notification of ignorable whitespace in element content. | |
| Validating Parsers must use this method to report each chunk | |
| of ignorable whitespace (see the W3C XML 1.0 recommendation, | |
| section 2.10): non-validating parsers may also use this method | |
| if they are capable of parsing and using content models. | |
| SAX parsers may return all contiguous whitespace in a single | |
| chunk, or they may split it into several chunks; however, all | |
| of the characters in any single event must come from the same | |
| external entity, so that the Locator provides useful | |
| information.""" | |
| def processingInstruction(self, target, data): | |
| """Receive notification of a processing instruction. | |
| The Parser will invoke this method once for each processing | |
| instruction found: note that processing instructions may occur | |
| before or after the main document element. | |
| A SAX parser should never report an XML declaration (XML 1.0, | |
| section 2.8) or a text declaration (XML 1.0, section 4.3.1) | |
| using this method.""" | |
| def skippedEntity(self, name): | |
| """Receive notification of a skipped entity. | |
| The Parser will invoke this method once for each entity | |
| skipped. Non-validating processors may skip entities if they | |
| have not seen the declarations (because, for example, the | |
| entity was declared in an external DTD subset). All processors | |
| may skip external entities, depending on the values of the | |
| http://xml.org/sax/features/external-general-entities and the | |
| http://xml.org/sax/features/external-parameter-entities | |
| properties.""" | |
| # ===== DTDHandler ===== | |
| class DTDHandler: | |
| """Handle DTD events. | |
| This interface specifies only those DTD events required for basic | |
| parsing (unparsed entities and attributes).""" | |
| def notationDecl(self, name, publicId, systemId): | |
| "Handle a notation declaration event." | |
| def unparsedEntityDecl(self, name, publicId, systemId, ndata): | |
| "Handle an unparsed entity declaration event." | |
| # ===== ENTITYRESOLVER ===== | |
| class EntityResolver: | |
| """Basic interface for resolving entities. If you create an object | |
| implementing this interface, then register the object with your | |
| Parser, the parser will call the method in your object to | |
| resolve all external entities. Note that DefaultHandler implements | |
| this interface with the default behaviour.""" | |
| def resolveEntity(self, publicId, systemId): | |
| """Resolve the system identifier of an entity and return either | |
| the system identifier to read from as a string, or an InputSource | |
| to read from.""" | |
| return systemId | |
| #============================================================================ | |
| # | |
| # CORE FEATURES | |
| # | |
| #============================================================================ | |
| feature_namespaces = "http://xml.org/sax/features/namespaces" | |
| # true: Perform Namespace processing (default). | |
| # false: Optionally do not perform Namespace processing | |
| # (implies namespace-prefixes). | |
| # access: (parsing) read-only; (not parsing) read/write | |
| feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" | |
| # true: Report the original prefixed names and attributes used for Namespace | |
| # declarations. | |
| # false: Do not report attributes used for Namespace declarations, and | |
| # optionally do not report original prefixed names (default). | |
| # access: (parsing) read-only; (not parsing) read/write | |
| feature_string_interning = "http://xml.org/sax/features/string-interning" | |
| # true: All element names, prefixes, attribute names, Namespace URIs, and | |
| # local names are interned using the built-in intern function. | |
| # false: Names are not necessarily interned, although they may be (default). | |
| # access: (parsing) read-only; (not parsing) read/write | |
| feature_validation = "http://xml.org/sax/features/validation" | |
| # true: Report all validation errors (implies external-general-entities and | |
| # external-parameter-entities). | |
| # false: Do not report validation errors. | |
| # access: (parsing) read-only; (not parsing) read/write | |
| feature_external_ges = "http://xml.org/sax/features/external-general-entities" | |
| # true: Include all external general (text) entities. | |
| # false: Do not include external general entities. | |
| # access: (parsing) read-only; (not parsing) read/write | |
| feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" | |
| # true: Include all external parameter entities, including the external | |
| # DTD subset. | |
| # false: Do not include any external parameter entities, even the external | |
| # DTD subset. | |
| # access: (parsing) read-only; (not parsing) read/write | |
| all_features = [feature_namespaces, | |
| feature_namespace_prefixes, | |
| feature_string_interning, | |
| feature_validation, | |
| feature_external_ges, | |
| feature_external_pes] | |
| #============================================================================ | |
| # | |
| # CORE PROPERTIES | |
| # | |
| #============================================================================ | |
| property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" | |
| # data type: xml.sax.sax2lib.LexicalHandler | |
| # description: An optional extension handler for lexical events like comments. | |
| # access: read/write | |
| property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" | |
| # data type: xml.sax.sax2lib.DeclHandler | |
| # description: An optional extension handler for DTD-related events other | |
| # than notations and unparsed entities. | |
| # access: read/write | |
| property_dom_node = "http://xml.org/sax/properties/dom-node" | |
| # data type: org.w3c.dom.Node | |
| # description: When parsing, the current DOM node being visited if this is | |
| # a DOM iterator; when not parsing, the root DOM node for | |
| # iteration. | |
| # access: (parsing) read-only; (not parsing) read/write | |
| property_xml_string = "http://xml.org/sax/properties/xml-string" | |
| # data type: String | |
| # description: The literal string of characters that was the source for | |
| # the current event. | |
| # access: read-only | |
| property_encoding = "http://www.python.org/sax/properties/encoding" | |
| # data type: String | |
| # description: The name of the encoding to assume for input data. | |
| # access: write: set the encoding, e.g. established by a higher-level | |
| # protocol. May change during parsing (e.g. after | |
| # processing a META tag) | |
| # read: return the current encoding (possibly established through | |
| # auto-detection. | |
| # initial value: UTF-8 | |
| # | |
| property_interning_dict = "http://www.python.org/sax/properties/interning-dict" | |
| # data type: Dictionary | |
| # description: The dictionary used to intern common strings in the document | |
| # access: write: Request that the parser uses a specific dictionary, to | |
| # allow interning across different documents | |
| # read: return the current interning dictionary, or None | |
| # | |
| all_properties = [property_lexical_handler, | |
| property_dom_node, | |
| property_declaration_handler, | |
| property_xml_string, | |
| property_encoding, | |
| property_interning_dict] |