|
|
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const ed5 = require("xmlchars/xml/1.0/ed5"); const ed2 = require("xmlchars/xml/1.1/ed2"); const NSed3 = require("xmlchars/xmlns/1.0/ed3"); var isS = ed5.isS; var isChar10 = ed5.isChar; var isNameStartChar = ed5.isNameStartChar; var isNameChar = ed5.isNameChar; var S_LIST = ed5.S_LIST; var NAME_RE = ed5.NAME_RE; var isChar11 = ed2.isChar; var isNCNameStartChar = NSed3.isNCNameStartChar; var isNCNameChar = NSed3.isNCNameChar; var NC_NAME_RE = NSed3.NC_NAME_RE; const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"; const rootNS = { // eslint-disable-next-line @typescript-eslint/no-explicit-any
__proto__: null, xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE, }; const XML_ENTITIES = { // eslint-disable-next-line @typescript-eslint/no-explicit-any
__proto__: null, amp: "&", gt: ">", lt: "<", quot: "\"", apos: "'", }; // EOC: end-of-chunk
const EOC = -1; const NL_LIKE = -2; const S_BEGIN = 0; // Initial state.
const S_BEGIN_WHITESPACE = 1; // leading whitespace
const S_DOCTYPE = 2; // <!DOCTYPE
const S_DOCTYPE_QUOTE = 3; // <!DOCTYPE "//blah
const S_DTD = 4; // <!DOCTYPE "//blah" [ ...
const S_DTD_QUOTED = 5; // <!DOCTYPE "//blah" [ "foo
const S_DTD_OPEN_WAKA = 6; const S_DTD_OPEN_WAKA_BANG = 7; const S_DTD_COMMENT = 8; // <!--
const S_DTD_COMMENT_ENDING = 9; // <!-- blah -
const S_DTD_COMMENT_ENDED = 10; // <!-- blah --
const S_DTD_PI = 11; // <?
const S_DTD_PI_ENDING = 12; // <?hi "there" ?
const S_TEXT = 13; // general stuff
const S_ENTITY = 14; // & and such
const S_OPEN_WAKA = 15; // <
const S_OPEN_WAKA_BANG = 16; // <!...
const S_COMMENT = 17; // <!--
const S_COMMENT_ENDING = 18; // <!-- blah -
const S_COMMENT_ENDED = 19; // <!-- blah --
const S_CDATA = 20; // <![CDATA[ something
const S_CDATA_ENDING = 21; // ]
const S_CDATA_ENDING_2 = 22; // ]]
const S_PI_FIRST_CHAR = 23; // <?hi, first char
const S_PI_REST = 24; // <?hi, rest of the name
const S_PI_BODY = 25; // <?hi there
const S_PI_ENDING = 26; // <?hi "there" ?
const S_XML_DECL_NAME_START = 27; // <?xml
const S_XML_DECL_NAME = 28; // <?xml foo
const S_XML_DECL_EQ = 29; // <?xml foo=
const S_XML_DECL_VALUE_START = 30; // <?xml foo=
const S_XML_DECL_VALUE = 31; // <?xml foo="bar"
const S_XML_DECL_SEPARATOR = 32; // <?xml foo="bar"
const S_XML_DECL_ENDING = 33; // <?xml ... ?
const S_OPEN_TAG = 34; // <strong
const S_OPEN_TAG_SLASH = 35; // <strong /
const S_ATTRIB = 36; // <a
const S_ATTRIB_NAME = 37; // <a foo
const S_ATTRIB_NAME_SAW_WHITE = 38; // <a foo _
const S_ATTRIB_VALUE = 39; // <a foo=
const S_ATTRIB_VALUE_QUOTED = 40; // <a foo="bar
const S_ATTRIB_VALUE_CLOSED = 41; // <a foo="bar"
const S_ATTRIB_VALUE_UNQUOTED = 42; // <a foo=bar
const S_CLOSE_TAG = 43; // </a
const S_CLOSE_TAG_SAW_WHITE = 44; // </a >
const TAB = 9; const NL = 0xA; const CR = 0xD; const SPACE = 0x20; const BANG = 0x21; const DQUOTE = 0x22; const AMP = 0x26; const SQUOTE = 0x27; const MINUS = 0x2D; const FORWARD_SLASH = 0x2F; const SEMICOLON = 0x3B; const LESS = 0x3C; const EQUAL = 0x3D; const GREATER = 0x3E; const QUESTION = 0x3F; const OPEN_BRACKET = 0x5B; const CLOSE_BRACKET = 0x5D; const NEL = 0x85; const LS = 0x2028; // Line Separator
const isQuote = (c) => c === DQUOTE || c === SQUOTE; const QUOTES = [DQUOTE, SQUOTE]; const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER]; const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET]; const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST]; const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS]; function nsPairCheck(parser, prefix, uri) { switch (prefix) { case "xml": if (uri !== XML_NAMESPACE) { parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`); } break; case "xmlns": if (uri !== XMLNS_NAMESPACE) { parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`); } break; default: } switch (uri) { case XMLNS_NAMESPACE: parser.fail(prefix === "" ? `the default namespace may not be set to ${uri}.` : `may not assign a prefix (even "xmlns") to the URI \
${XMLNS_NAMESPACE}.`);
break; case XML_NAMESPACE: switch (prefix) { case "xml": // Assinging the XML namespace to "xml" is fine.
break; case "": parser.fail(`the default namespace may not be set to ${uri}.`); break; default: parser.fail("may not assign the xml namespace to another prefix."); } break; default: } } function nsMappingCheck(parser, mapping) { for (const local of Object.keys(mapping)) { nsPairCheck(parser, local, mapping[local]); } } const isNCName = (name) => NC_NAME_RE.test(name); const isName = (name) => NAME_RE.test(name); const FORBIDDEN_START = 0; const FORBIDDEN_BRACKET = 1; const FORBIDDEN_BRACKET_BRACKET = 2; /** * The list of supported events. */ exports.EVENTS = [ "xmldecl", "text", "processinginstruction", "doctype", "comment", "opentagstart", "attribute", "opentag", "closetag", "cdata", "error", "end", "ready", ]; const EVENT_NAME_TO_HANDLER_NAME = { xmldecl: "xmldeclHandler", text: "textHandler", processinginstruction: "piHandler", doctype: "doctypeHandler", comment: "commentHandler", opentagstart: "openTagStartHandler", attribute: "attributeHandler", opentag: "openTagHandler", closetag: "closeTagHandler", cdata: "cdataHandler", error: "errorHandler", end: "endHandler", ready: "readyHandler", }; class SaxesParser { /** * @param opt The parser options. */ constructor(opt) { this.opt = opt !== null && opt !== void 0 ? opt : {}; this.fragmentOpt = !!this.opt.fragment; const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns; this.trackPosition = this.opt.position !== false; this.fileName = this.opt.fileName; if (xmlnsOpt) { // This is the function we use to perform name checks on PIs and entities.
// When namespaces are used, colons are not allowed in PI target names or
// entity names. So the check depends on whether namespaces are used. See:
//
// https://www.w3.org/XML/xml-names-19990114-errata.html
// NE08
//
this.nameStartCheck = isNCNameStartChar; this.nameCheck = isNCNameChar; this.isName = isNCName; // eslint-disable-next-line @typescript-eslint/unbound-method
this.processAttribs = this.processAttribsNS; // eslint-disable-next-line @typescript-eslint/unbound-method
this.pushAttrib = this.pushAttribNS; // eslint-disable-next-line @typescript-eslint/no-explicit-any
this.ns = Object.assign({ __proto__: null }, rootNS); const additional = this.opt.additionalNamespaces; if (additional != null) { nsMappingCheck(this, additional); Object.assign(this.ns, additional); } } else { this.nameStartCheck = isNameStartChar; this.nameCheck = isNameChar; this.isName = isName; // eslint-disable-next-line @typescript-eslint/unbound-method
this.processAttribs = this.processAttribsPlain; // eslint-disable-next-line @typescript-eslint/unbound-method
this.pushAttrib = this.pushAttribPlain; } //
// The order of the members in this table needs to correspond to the state
// numbers given to the states that correspond to the methods being recorded
// here.
//
this.stateTable = [ /* eslint-disable @typescript-eslint/unbound-method */ this.sBegin, this.sBeginWhitespace, this.sDoctype, this.sDoctypeQuote, this.sDTD, this.sDTDQuoted, this.sDTDOpenWaka, this.sDTDOpenWakaBang, this.sDTDComment, this.sDTDCommentEnding, this.sDTDCommentEnded, this.sDTDPI, this.sDTDPIEnding, this.sText, this.sEntity, this.sOpenWaka, this.sOpenWakaBang, this.sComment, this.sCommentEnding, this.sCommentEnded, this.sCData, this.sCDataEnding, this.sCDataEnding2, this.sPIFirstChar, this.sPIRest, this.sPIBody, this.sPIEnding, this.sXMLDeclNameStart, this.sXMLDeclName, this.sXMLDeclEq, this.sXMLDeclValueStart, this.sXMLDeclValue, this.sXMLDeclSeparator, this.sXMLDeclEnding, this.sOpenTag, this.sOpenTagSlash, this.sAttrib, this.sAttribName, this.sAttribNameSawWhite, this.sAttribValue, this.sAttribValueQuoted, this.sAttribValueClosed, this.sAttribValueUnquoted, this.sCloseTag, this.sCloseTagSawWhite, ]; this._init(); } /** * Indicates whether or not the parser is closed. If ``true``, wait for * the ``ready`` event to write again. */ get closed() { return this._closed; } _init() { var _a; this.openWakaBang = ""; this.text = ""; this.name = ""; this.piTarget = ""; this.entity = ""; this.q = null; this.tags = []; this.tag = null; this.topNS = null; this.chunk = ""; this.chunkPosition = 0; this.i = 0; this.prevI = 0; this.carriedFromPrevious = undefined; this.forbiddenState = FORBIDDEN_START; this.attribList = []; // The logic is organized so as to minimize the need to check
// this.opt.fragment while parsing.
const { fragmentOpt } = this; this.state = fragmentOpt ? S_TEXT : S_BEGIN; // We want these to be all true if we are dealing with a fragment.
this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot = this.sawRoot = fragmentOpt; // An XML declaration is intially possible only when parsing whole
// documents.
this.xmlDeclPossible = !fragmentOpt; this.xmlDeclExpects = ["version"]; this.entityReturnState = undefined; let { defaultXMLVersion } = this.opt; if (defaultXMLVersion === undefined) { if (this.opt.forceXMLVersion === true) { throw new Error("forceXMLVersion set but defaultXMLVersion is not set"); } defaultXMLVersion = "1.0"; } this.setXMLVersion(defaultXMLVersion); this.positionAtNewLine = 0; this.doctype = false; this._closed = false; this.xmlDecl = { version: undefined, encoding: undefined, standalone: undefined, }; this.line = 1; this.column = 0; this.ENTITIES = Object.create(XML_ENTITIES); // eslint-disable-next-line no-unused-expressions
(_a = this.readyHandler) === null || _a === void 0 ? void 0 : _a.call(this); } /** * The stream position the parser is currently looking at. This field is * zero-based. * * This field is not based on counting Unicode characters but is to be * interpreted as a plain index into a JavaScript string. */ get position() { return this.chunkPosition + this.i; } /** * The column number of the next character to be read by the parser. * * This field is zero-based. (The first column in a line is 0.) * * This field reports the index at which the next character would be in the * line if the line were represented as a JavaScript string. Note that this * *can* be different to a count based on the number of *Unicode characters* * due to how JavaScript handles astral plane characters. * * See [[column]] for a number that corresponds to a count of Unicode * characters. */ get columnIndex() { return this.position - this.positionAtNewLine; } /** * Set an event listener on an event. The parser supports one handler per * event type. If you try to set an event handler over an existing handler, * the old handler is silently overwritten. * * @param name The event to listen to. * * @param handler The handler to set. */ on(name, handler) { // eslint-disable-next-line @typescript-eslint/no-explicit-any
this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler; } /** * Unset an event handler. * * @parma name The event to stop listening to. */ off(name) { // eslint-disable-next-line @typescript-eslint/no-explicit-any
this[EVENT_NAME_TO_HANDLER_NAME[name]] = undefined; } /** * Make an error object. The error object will have a message that contains * the ``fileName`` option passed at the creation of the parser. If position * tracking was turned on, it will also have line and column number * information. * * @param message The message describing the error to report. * * @returns An error object with a properly formatted message. */ makeError(message) { var _a; let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : ""; if (this.trackPosition) { if (msg.length > 0) { msg += ":"; } msg += `${this.line}:${this.column}`; } if (msg.length > 0) { msg += ": "; } return new Error(msg + message); } /** * Report a parsing error. This method is made public so that client code may * check for issues that are outside the scope of this project and can report * errors. * * @param message The error to report. * * @returns this */ fail(message) { const err = this.makeError(message); const handler = this.errorHandler; if (handler === undefined) { throw err; } else { handler(err); } return this; } /** * Write a XML data to the parser. * * @param chunk The XML data to write. * * @returns this */ write(chunk) { if (this.closed) { return this.fail("cannot write after close; assign an onready handler."); } let end = false; if (chunk === null) { // We cannot return immediately because carriedFromPrevious may need
// processing.
end = true; chunk = ""; } else if (typeof chunk === "object") { chunk = chunk.toString(); } // We checked if performing a pre-decomposition of the string into an array
// of single complete characters (``Array.from(chunk)``) would be faster
// than the current repeated calls to ``charCodeAt``. As of August 2018, it
// isn't. (There may be Node-specific code that would perform faster than
// ``Array.from`` but don't want to be dependent on Node.)
if (this.carriedFromPrevious !== undefined) { // The previous chunk had char we must carry over.
chunk = `${this.carriedFromPrevious}${chunk}`; this.carriedFromPrevious = undefined; } let limit = chunk.length; const lastCode = chunk.charCodeAt(limit - 1); if (!end && // A trailing CR or surrogate must be carried over to the next
// chunk.
(lastCode === CR || (lastCode >= 0xD800 && lastCode <= 0xDBFF))) { // The chunk ends with a character that must be carried over. We cannot
// know how to handle it until we get the next chunk or the end of the
// stream. So save it for later.
this.carriedFromPrevious = chunk[limit - 1]; limit--; chunk = chunk.slice(0, limit); } const { stateTable } = this; this.chunk = chunk; this.i = 0; while (this.i < limit) { // eslint-disable-next-line @typescript-eslint/no-explicit-any
stateTable[this.state].call(this); } this.chunkPosition += limit; return end ? this.end() : this; } /** * Close the current stream. Perform final well-formedness checks and reset * the parser tstate. * * @returns this */ close() { return this.write(null); } /** * Get a single code point out of the current chunk. This updates the current * position if we do position tracking. * * This is the algorithm to use for XML 1.0. * * @returns The character read. */ getCode10() { const { chunk, i } = this; this.prevI = i; // Yes, we do this instead of doing this.i++. Doing it this way, we do not
// read this.i again, which is a bit faster.
this.i = i + 1; if (i >= chunk.length) { return EOC; } // Using charCodeAt and handling the surrogates ourselves is faster
// than using codePointAt.
const code = chunk.charCodeAt(i); this.column++; if (code < 0xD800) { if (code >= SPACE || code === TAB) { return code; } switch (code) { case NL: this.line++; this.column = 0; this.positionAtNewLine = this.position; return NL; case CR: // We may get NaN if we read past the end of the chunk, which is fine.
if (chunk.charCodeAt(i + 1) === NL) { // A \r\n sequence is converted to \n so we have to skip over the
// next character. We already know it has a size of 1 so ++ is fine
// here.
this.i = i + 2; } // Otherwise, a \r is just converted to \n, so we don't have to skip
// ahead.
// In either case, \r becomes \n.
this.line++; this.column = 0; this.positionAtNewLine = this.position; return NL_LIKE; default: // If we get here, then code < SPACE and it is not NL CR or TAB.
this.fail("disallowed character."); return code; } } if (code > 0xDBFF) { // This is a specialized version of isChar10 that takes into account
// that in this context code > 0xDBFF and code <= 0xFFFF. So it does not
// test cases that don't need testing.
if (!(code >= 0xE000 && code <= 0xFFFD)) { this.fail("disallowed character."); } return code; } const final = 0x10000 + ((code - 0xD800) * 0x400) + (chunk.charCodeAt(i + 1) - 0xDC00); this.i = i + 2; // This is a specialized version of isChar10 that takes into account that in
// this context necessarily final >= 0x10000.
if (final > 0x10FFFF) { this.fail("disallowed character."); } return final; } /** * Get a single code point out of the current chunk. This updates the current * position if we do position tracking. * * This is the algorithm to use for XML 1.1. * * @returns {number} The character read. */ getCode11() { const { chunk, i } = this; this.prevI = i; // Yes, we do this instead of doing this.i++. Doing it this way, we do not
// read this.i again, which is a bit faster.
this.i = i + 1; if (i >= chunk.length) { return EOC; } // Using charCodeAt and handling the surrogates ourselves is faster
// than using codePointAt.
const code = chunk.charCodeAt(i); this.column++; if (code < 0xD800) { if ((code > 0x1F && code < 0x7F) || (code > 0x9F && code !== LS) || code === TAB) { return code; } switch (code) { case NL: // 0xA
this.line++; this.column = 0; this.positionAtNewLine = this.position; return NL; case CR: { // 0xD
// We may get NaN if we read past the end of the chunk, which is
// fine.
const next = chunk.charCodeAt(i + 1); if (next === NL || next === NEL) { // A CR NL or CR NEL sequence is converted to NL so we have to skip
// over the next character. We already know it has a size of 1.
this.i = i + 2; } // Otherwise, a CR is just converted to NL, no skip.
} /* yes, fall through */ case NEL: // 0x85
case LS: // Ox2028
this.line++; this.column = 0; this.positionAtNewLine = this.position; return NL_LIKE; default: this.fail("disallowed character."); return code; } } if (code > 0xDBFF) { // This is a specialized version of isCharAndNotRestricted that takes into
// account that in this context code > 0xDBFF and code <= 0xFFFF. So it
// does not test cases that don't need testing.
if (!(code >= 0xE000 && code <= 0xFFFD)) { this.fail("disallowed character."); } return code; } const final = 0x10000 + ((code - 0xD800) * 0x400) + (chunk.charCodeAt(i + 1) - 0xDC00); this.i = i + 2; // This is a specialized version of isCharAndNotRestricted that takes into
// account that in this context necessarily final >= 0x10000.
if (final > 0x10FFFF) { this.fail("disallowed character."); } return final; } /** * Like ``getCode`` but with the return value normalized so that ``NL`` is * returned for ``NL_LIKE``. */ getCodeNorm() { const c = this.getCode(); return c === NL_LIKE ? NL : c; } unget() { this.i = this.prevI; this.column--; } /** * Capture characters into a buffer until encountering one of a set of * characters. * * @param chars An array of codepoints. Encountering a character in the array * ends the capture. (``chars`` may safely contain ``NL``.) * * @return The character code that made the capture end, or ``EOC`` if we hit * the end of the chunk. The return value cannot be NL_LIKE: NL is returned * instead. */ captureTo(chars) { let { i: start } = this; const { chunk } = this; // eslint-disable-next-line no-constant-condition
while (true) { const c = this.getCode(); const isNLLike = c === NL_LIKE; const final = isNLLike ? NL : c; if (final === EOC || chars.includes(final)) { this.text += chunk.slice(start, this.prevI); return final; } if (isNLLike) { this.text += `${chunk.slice(start, this.prevI)}\n`; start = this.i; } } } /** * Capture characters into a buffer until encountering a character. * * @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT * CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior. * * @return ``true`` if we ran into the character. Otherwise, we ran into the * end of the current chunk. */ captureToChar(char) { let { i: start } = this; const { chunk } = this; // eslint-disable-next-line no-constant-condition
while (true) { let c = this.getCode(); switch (c) { case NL_LIKE: this.text += `${chunk.slice(start, this.prevI)}\n`; start = this.i; c = NL; break; case EOC: this.text += chunk.slice(start); return false; default: } if (c === char) { this.text += chunk.slice(start, this.prevI); return true; } } } /** * Capture characters that satisfy ``isNameChar`` into the ``name`` field of * this parser. * * @return The character code that made the test fail, or ``EOC`` if we hit * the end of the chunk. The return value cannot be NL_LIKE: NL is returned * instead. */ captureNameChars() { const { chunk, i: start } = this; // eslint-disable-next-line no-constant-condition
while (true) { const c = this.getCode(); if (c === EOC) { this.name += chunk.slice(start); return EOC; } // NL is not a name char so we don't have to test specifically for it.
if (!isNameChar(c)) { this.name += chunk.slice(start, this.prevI); return c === NL_LIKE ? NL : c; } } } /** * Skip white spaces. * * @return The character that ended the skip, or ``EOC`` if we hit * the end of the chunk. The return value cannot be NL_LIKE: NL is returned * instead. */ skipSpaces() { // eslint-disable-next-line no-constant-condition
while (true) { const c = this.getCodeNorm(); if (c === EOC || !isS(c)) { return c; } } } setXMLVersion(version) { this.currentXMLVersion = version; /* eslint-disable @typescript-eslint/unbound-method */ if (version === "1.0") { this.isChar = isChar10; this.getCode = this.getCode10; } else { this.isChar = isChar11; this.getCode = this.getCode11; } /* eslint-enable @typescript-eslint/unbound-method */ } // STATE ENGINE METHODS
// This needs to be a state separate from S_BEGIN_WHITESPACE because we want
// to be sure never to come back to this state later.
sBegin() { // We are essentially peeking at the first character of the chunk. Since
// S_BEGIN can be in effect only when we start working on the first chunk,
// the index at which we must look is necessarily 0. Note also that the
// following test does not depend on decoding surrogates.
// If the initial character is 0xFEFF, ignore it.
if (this.chunk.charCodeAt(0) === 0xFEFF) { this.i++; this.column++; } this.state = S_BEGIN_WHITESPACE; } sBeginWhitespace() { // We need to know whether we've encountered spaces or not because as soon
// as we run into a space, an XML declaration is no longer possible. Rather
// than slow down skipSpaces even in places where we don't care whether it
// skipped anything or not, we check whether prevI is equal to the value of
// i from before we skip spaces.
const iBefore = this.i; const c = this.skipSpaces(); if (this.prevI !== iBefore) { this.xmlDeclPossible = false; } switch (c) { case LESS: this.state = S_OPEN_WAKA; // We could naively call closeText but in this state, it is not normal
// to have text be filled with any data.
if (this.text.length !== 0) { throw new Error("no-empty text at start"); } break; case EOC: break; default: this.unget(); this.state = S_TEXT; this.xmlDeclPossible = false; } } sDoctype() { var _a; const c = this.captureTo(DOCTYPE_TERMINATOR); switch (c) { case GREATER: { // eslint-disable-next-line no-unused-expressions
(_a = this.doctypeHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text); this.text = ""; this.state = S_TEXT; this.doctype = true; // just remember that we saw it.
break; } case EOC: break; default: this.text += String.fromCodePoint(c); if (c === OPEN_BRACKET) { this.state = S_DTD; } else if (isQuote(c)) { this.state = S_DOCTYPE_QUOTE; this.q = c; } } } sDoctypeQuote() { const q = this.q; if (this.captureToChar(q)) { this.text += String.fromCodePoint(q); this.q = null; this.state = S_DOCTYPE; } } sDTD() { const c = this.captureTo(DTD_TERMINATOR); if (c === EOC) { return; } this.text += String.fromCodePoint(c); if (c === CLOSE_BRACKET) { this.state = S_DOCTYPE; } else if (c === LESS) { this.state = S_DTD_OPEN_WAKA; } else if (isQuote(c)) { this.state = S_DTD_QUOTED; this.q = c; } } sDTDQuoted() { const q = this.q; if (this.captureToChar(q)) { this.text += String.fromCodePoint(q); this.state = S_DTD; this.q = null; } } sDTDOpenWaka() { const c = this.getCodeNorm(); this.text += String.fromCodePoint(c); switch (c) { case BANG: this.state = S_DTD_OPEN_WAKA_BANG; this.openWakaBang = ""; break; case QUESTION: this.state = S_DTD_PI; break; default: this.state = S_DTD; } } sDTDOpenWakaBang() { const char = String.fromCodePoint(this.getCodeNorm()); const owb = this.openWakaBang += char; this.text += char; if (owb !== "-") { this.state = owb === "--" ? S_DTD_COMMENT : S_DTD; this.openWakaBang = ""; } } sDTDComment() { if (this.captureToChar(MINUS)) { this.text += "-"; this.state = S_DTD_COMMENT_ENDING; } } sDTDCommentEnding() { const c = this.getCodeNorm(); this.text += String.fromCodePoint(c); this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT; } sDTDCommentEnded() { const c = this.getCodeNorm(); this.text += String.fromCodePoint(c); if (c === GREATER) { this.state = S_DTD; } else { this.fail("malformed comment."); // <!-- blah -- bloo --> will be recorded as
// a comment of " blah -- bloo "
this.state = S_DTD_COMMENT; } } sDTDPI() { if (this.captureToChar(QUESTION)) { this.text += "?"; this.state = S_DTD_PI_ENDING; } } sDTDPIEnding() { const c = this.getCodeNorm(); this.text += String.fromCodePoint(c); if (c === GREATER) { this.state = S_DTD; } } sText() { //
// We did try a version of saxes where the S_TEXT state was split in two
// states: one for text inside the root element, and one for text
// outside. This was avoiding having to test this.tags.length to decide
// what implementation to actually use.
//
// Peformance testing on gigabyte-size files did not show any advantage to
// using the two states solution instead of the current one. Conversely, it
// made the code a bit more complicated elsewhere. For instance, a comment
// can appear before the root element so when a comment ended it was
// necessary to determine whether to return to the S_TEXT state or to the
// new text-outside-root state.
//
if (this.tags.length !== 0) { this.handleTextInRoot(); } else { this.handleTextOutsideRoot(); } } sEntity() { // This is essentially a specialized version of captureToChar(SEMICOLON...)
let { i: start } = this; const { chunk } = this; // eslint-disable-next-line no-labels, no-restricted-syntax
loop: // eslint-disable-next-line no-constant-condition
while (true) { switch (this.getCode()) { case NL_LIKE: this.entity += `${chunk.slice(start, this.prevI)}\n`; start = this.i; break; case SEMICOLON: { const { entityReturnState } = this; const entity = this.entity + chunk.slice(start, this.prevI); this.state = entityReturnState; let parsed; if (entity === "") { this.fail("empty entity name."); parsed = "&;"; } else { parsed = this.parseEntity(entity); this.entity = ""; } if (entityReturnState !== S_TEXT || this.textHandler !== undefined) { this.text += parsed; } // eslint-disable-next-line no-labels
break loop; } case EOC: this.entity += chunk.slice(start); // eslint-disable-next-line no-labels
break loop; default: } } } sOpenWaka() { // Reminder: a state handler is called with at least one character
// available in the current chunk. So the first call to get code inside of
// a state handler cannot return ``EOC``. That's why we don't test
// for it.
const c = this.getCode(); // either a /, ?, !, or text is coming next.
if (isNameStartChar(c)) { this.state = S_OPEN_TAG; this.unget(); this.xmlDeclPossible = false; } else { switch (c) { case FORWARD_SLASH: this.state = S_CLOSE_TAG; this.xmlDeclPossible = false; break; case BANG: this.state = S_OPEN_WAKA_BANG; this.openWakaBang = ""; this.xmlDeclPossible = false; break; case QUESTION: this.state = S_PI_FIRST_CHAR; break; default: this.fail("disallowed character in tag name"); this.state = S_TEXT; this.xmlDeclPossible = false; } } } sOpenWakaBang() { this.openWakaBang += String.fromCodePoint(this.getCodeNorm()); switch (this.openWakaBang) { case "[CDATA[": if (!this.sawRoot && !this.reportedTextBeforeRoot) { this.fail("text data outside of root node."); this.reportedTextBeforeRoot = true; } if (this.closedRoot && !this.reportedTextAfterRoot) { this.fail("text data outside of root node."); this.reportedTextAfterRoot = true; } this.state = S_CDATA; this.openWakaBang = ""; break; case "--": this.state = S_COMMENT; this.openWakaBang = ""; break; case "DOCTYPE": this.state = S_DOCTYPE; if (this.doctype || this.sawRoot) { this.fail("inappropriately located doctype declaration."); } this.openWakaBang = ""; break; default: // 7 happens to be the maximum length of the string that can possibly
// match one of the cases above.
if (this.openWakaBang.length >= 7) { this.fail("incorrect syntax."); } } } sComment() { if (this.captureToChar(MINUS)) { this.state = S_COMMENT_ENDING; } } sCommentEnding() { var _a; const c = this.getCodeNorm(); if (c === MINUS) { this.state = S_COMMENT_ENDED; // eslint-disable-next-line no-unused-expressions
(_a = this.commentHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text); this.text = ""; } else { this.text += `-${String.fromCodePoint(c)}`; this.state = S_COMMENT; } } sCommentEnded() { const c = this.getCodeNorm(); if (c !== GREATER) { this.fail("malformed comment."); // <!-- blah -- bloo --> will be recorded as
// a comment of " blah -- bloo "
this.text += `--${String.fromCodePoint(c)}`; this.state = S_COMMENT; } else { this.state = S_TEXT; } } sCData() { if (this.captureToChar(CLOSE_BRACKET)) { this.state = S_CDATA_ENDING; } } sCDataEnding() { const c = this.getCodeNorm(); if (c === CLOSE_BRACKET) { this.state = S_CDATA_ENDING_2; } else { this.text += `]${String.fromCodePoint(c)}`; this.state = S_CDATA; } } sCDataEnding2() { var _a; const c = this.getCodeNorm(); switch (c) { case GREATER: { // eslint-disable-next-line no-unused-expressions
(_a = this.cdataHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text); this.text = ""; this.state = S_TEXT; break; } case CLOSE_BRACKET: this.text += "]"; break; default: this.text += `]]${String.fromCodePoint(c)}`; this.state = S_CDATA; } } // We need this separate state to check the first character fo the pi target
// with this.nameStartCheck which allows less characters than this.nameCheck.
sPIFirstChar() { const c = this.getCodeNorm(); // This is first because in the case where the file is well-formed this is
// the branch taken. We optimize for well-formedness.
if (this.nameStartCheck(c)) { this.piTarget += String.fromCodePoint(c); this.state = S_PI_REST; } else if (c === QUESTION || isS(c)) { this.fail("processing instruction without a target."); this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY; } else { this.fail("disallowed character in processing instruction name."); this.piTarget += String.fromCodePoint(c); this.state = S_PI_REST; } } sPIRest() { // Capture characters into a piTarget while ``this.nameCheck`` run on the
// character read returns true.
const { chunk, i: start } = this; // eslint-disable-next-line no-constant-condition
while (true) { const c = this.getCodeNorm(); if (c === EOC) { this.piTarget += chunk.slice(start); return; } // NL cannot satisfy this.nameCheck so we don't have to test specifically
// for it.
if (!this.nameCheck(c)) { this.piTarget += chunk.slice(start, this.prevI); const isQuestion = c === QUESTION; if (isQuestion || isS(c)) { if (this.piTarget === "xml") { if (!this.xmlDeclPossible) { this.fail("an XML declaration must be at the start of the document."); } this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START; } else { this.state = isQuestion ? S_PI_ENDING : S_PI_BODY; } } else { this.fail("disallowed character in processing instruction name."); this.piTarget += String.fromCodePoint(c); } break; } } } sPIBody() { if (this.text.length === 0) { const c = this.getCodeNorm(); if (c === QUESTION) { this.state = S_PI_ENDING; } else if (!isS(c)) { this.text = String.fromCodePoint(c); } } // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
else if (this.captureToChar(QUESTION)) { this.state = S_PI_ENDING; } } sPIEnding() { var _a; const c = this.getCodeNorm(); if (c === GREATER) { const { piTarget } = this; if (piTarget.toLowerCase() === "xml") { this.fail("the XML declaration must appear at the start of the document."); } // eslint-disable-next-line no-unused-expressions
(_a = this.piHandler) === null || _a === void 0 ? void 0 : _a.call(this, { target: piTarget, body: this.text, }); this.piTarget = this.text = ""; this.state = S_TEXT; } else if (c === QUESTION) { // We ran into ?? as part of a processing instruction. We initially took
// the first ? as a sign that the PI was ending, but it is not. So we have
// to add it to the body but we take the new ? as a sign that the PI is
// ending.
this.text += "?"; } else { this.text += `?${String.fromCodePoint(c)}`; this.state = S_PI_BODY; } this.xmlDeclPossible = false; } sXMLDeclNameStart() { const c = this.skipSpaces(); // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) { // It is valid to go to S_XML_DECL_ENDING from this state.
this.state = S_XML_DECL_ENDING; return; } if (c !== EOC) { this.state = S_XML_DECL_NAME; this.name = String.fromCodePoint(c); } } sXMLDeclName() { const c = this.captureTo(XML_DECL_NAME_TERMINATOR); // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) { this.state = S_XML_DECL_ENDING; this.name += this.text; this.text = ""; this.fail("XML declaration is incomplete."); return; } if (!(isS(c) || c === EQUAL)) { return; } this.name += this.text; this.text = ""; if (!this.xmlDeclExpects.includes(this.name)) { switch (this.name.length) { case 0: this.fail("did not expect any more name/value pairs."); break; case 1: this.fail(`expected the name ${this.xmlDeclExpects[0]}.`); break; default: this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`); } } this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ; } sXMLDeclEq() { const c = this.getCodeNorm(); // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) { this.state = S_XML_DECL_ENDING; this.fail("XML declaration is incomplete."); return; } if (isS(c)) { return; } if (c !== EQUAL) { this.fail("value required."); } this.state = S_XML_DECL_VALUE_START; } sXMLDeclValueStart() { const c = this.getCodeNorm(); // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) { this.state = S_XML_DECL_ENDING; this.fail("XML declaration is incomplete."); return; } if (isS(c)) { return; } if (!isQuote(c)) { this.fail("value must be quoted."); this.q = SPACE; } else { this.q = c; } this.state = S_XML_DECL_VALUE; } sXMLDeclValue() { const c = this.captureTo([this.q, QUESTION]); // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) { this.state = S_XML_DECL_ENDING; this.text = ""; this.fail("XML declaration is incomplete."); return; } if (c === EOC) { return; } const value = this.text; this.text = ""; switch (this.name) { case "version": { this.xmlDeclExpects = ["encoding", "standalone"]; const version = value; this.xmlDecl.version = version; // This is the test specified by XML 1.0 but it is fine for XML 1.1.
if (!/^1\.[0-9]+$/.test(version)) { this.fail("version number must match /^1\\.[0-9]+$/."); } // When forceXMLVersion is set, the XML declaration is ignored.
else if (!this.opt.forceXMLVersion) { this.setXMLVersion(version); } break; } case "encoding": if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) { this.fail("encoding value must match \ /^[A-Za-z0-9][A-Za-z0-9._-]*$/."); } this.xmlDeclExpects = ["standalone"]; this.xmlDecl.encoding = value; break; case "standalone": if (value !== "yes" && value !== "no") { this.fail("standalone value must match \"yes\" or \"no\"."); } this.xmlDeclExpects = []; this.xmlDecl.standalone = value; break; default: // We don't need to raise an error here since we've already raised one
// when checking what name was expected.
} this.name = ""; this.state = S_XML_DECL_SEPARATOR; } sXMLDeclSeparator() { const c = this.getCodeNorm(); // The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) { // It is valid to go to S_XML_DECL_ENDING from this state.
this.state = S_XML_DECL_ENDING; return; } if (!isS(c)) { this.fail("whitespace required."); this.unget(); } this.state = S_XML_DECL_NAME_START; } sXMLDeclEnding() { var _a; const c = this.getCodeNorm(); if (c === GREATER) { if (this.piTarget !== "xml") { this.fail("processing instructions are not allowed before root."); } else if (this.name !== "version" && this.xmlDeclExpects.includes("version")) { this.fail("XML declaration must contain a version."); } // eslint-disable-next-line no-unused-expressions
(_a = this.xmldeclHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.xmlDecl); this.name = ""; this.piTarget = this.text = ""; this.state = S_TEXT; } else { // We got here because the previous character was a ?, but the question
// mark character is not valid inside any of the XML declaration
// name/value pairs.
this.fail("The character ? is disallowed anywhere in XML declarations."); } this.xmlDeclPossible = false; } sOpenTag() { var _a; const c = this.captureNameChars(); if (c === EOC) { return; } const tag = this.tag = { name: this.name, attributes: Object.create(null), }; this.name = ""; if (this.xmlnsOpt) { this.topNS = tag.ns = Object.create(null); } // eslint-disable-next-line no-unused-expressions
(_a = this.openTagStartHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag); this.sawRoot = true; if (!this.fragmentOpt && this.closedRoot) { this.fail("documents may contain only one root."); } switch (c) { case GREATER: this.openTag(); break; case FORWARD_SLASH: this.state = S_OPEN_TAG_SLASH; break; default: if (!isS(c)) { this.fail("disallowed character in tag name."); } this.state = S_ATTRIB; } } sOpenTagSlash() { if (this.getCode() === GREATER) { this.openSelfClosingTag(); } else { this.fail("forward-slash in opening tag not followed by >."); this.state = S_ATTRIB; } } sAttrib() { const c = this.skipSpaces(); if (c === EOC) { return; } if (isNameStartChar(c)) { this.unget(); this.state = S_ATTRIB_NAME; } else if (c === GREATER) { this.openTag(); } else if (c === FORWARD_SLASH) { this.state = S_OPEN_TAG_SLASH; } else { this.fail("disallowed character in attribute name."); } } sAttribName() { const c = this.captureNameChars(); if (c === EQUAL) { this.state = S_ATTRIB_VALUE; } else if (isS(c)) { this.state = S_ATTRIB_NAME_SAW_WHITE; } else if (c === GREATER) { this.fail("attribute without value."); this.pushAttrib(this.name, this.name); this.name = this.text = ""; this.openTag(); } else if (c !== EOC) { this.fail("disallowed character in attribute name."); } } sAttribNameSawWhite() { const c = this.skipSpaces(); switch (c) { case EOC: return; case EQUAL: this.state = S_ATTRIB_VALUE; break; default: this.fail("attribute without value."); // Should we do this???
// this.tag.attributes[this.name] = "";
this.text = ""; this.name = ""; if (c === GREATER) { this.openTag(); } else if (isNameStartChar(c)) { this.unget(); this.state = S_ATTRIB_NAME; } else { this.fail("disallowed character in attribute name."); this.state = S_ATTRIB; } } } sAttribValue() { const c = this.getCodeNorm(); if (isQuote(c)) { this.q = c; this.state = S_ATTRIB_VALUE_QUOTED; } else if (!isS(c)) { this.fail("unquoted attribute value."); this.state = S_ATTRIB_VALUE_UNQUOTED; this.unget(); } } sAttribValueQuoted() { // We deliberately do not use captureTo here. The specialized code we use
// here is faster than using captureTo.
const { q, chunk } = this; let { i: start } = this; // eslint-disable-next-line no-constant-condition
while (true) { switch (this.getCode()) { case q: this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI)); this.name = this.text = ""; this.q = null; this.state = S_ATTRIB_VALUE_CLOSED; return; case AMP: this.text += chunk.slice(start, this.prevI); this.state = S_ENTITY; this.entityReturnState = S_ATTRIB_VALUE_QUOTED; return; case NL: case NL_LIKE: case TAB: this.text += `${chunk.slice(start, this.prevI)} `; start = this.i; break; case LESS: this.text += chunk.slice(start, this.prevI); this.fail("disallowed character."); return; case EOC: this.text += chunk.slice(start); return; default: } } } sAttribValueClosed() { const c = this.getCodeNorm(); if (isS(c)) { this.state = S_ATTRIB; } else if (c === GREATER) { this.openTag(); } else if (c === FORWARD_SLASH) { this.state = S_OPEN_TAG_SLASH; } else if (isNameStartChar(c)) { this.fail("no whitespace between attributes."); this.unget(); this.state = S_ATTRIB_NAME; } else { this.fail("disallowed character in attribute name."); } } sAttribValueUnquoted() { // We don't do anything regarding EOL or space handling for unquoted
// attributes. We already have failed by the time we get here, and the
// contract that saxes upholds states that upon failure, it is not safe to
// rely on the data passed to event handlers (other than
// ``onerror``). Passing "bad" data is not a problem.
const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR); switch (c) { case AMP: this.state = S_ENTITY; this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED; break; case LESS: this.fail("disallowed character."); break; case EOC: break; default: if (this.text.includes("]]>")) { this.fail("the string \"]]>\" is disallowed in char data."); } this.pushAttrib(this.name, this.text); this.name = this.text = ""; if (c === GREATER) { this.openTag(); } else { this.state = S_ATTRIB; } } } sCloseTag() { const c = this.captureNameChars(); if (c === GREATER) { this.closeTag(); } else if (isS(c)) { this.state = S_CLOSE_TAG_SAW_WHITE; } else if (c !== EOC) { this.fail("disallowed character in closing tag."); } } sCloseTagSawWhite() { switch (this.skipSpaces()) { case GREATER: this.closeTag(); break; case EOC: break; default: this.fail("disallowed character in closing tag."); } } // END OF STATE ENGINE METHODS
handleTextInRoot() { // This is essentially a specialized version of captureTo which is optimized
// for performing the ]]> check. A previous version of this code, checked
// ``this.text`` for the presence of ]]>. It simplified the code but was
// very costly when character data contained a lot of entities to be parsed.
//
// Since we are using a specialized loop, we also keep track of the presence
// of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
//
let { i: start, forbiddenState } = this; const { chunk, textHandler: handler } = this; // eslint-disable-next-line no-labels, no-restricted-syntax
scanLoop: // eslint-disable-next-line no-constant-condition
while (true) { switch (this.getCode()) { case LESS: { this.state = S_OPEN_WAKA; if (handler !== undefined) { const { text } = this; const slice = chunk.slice(start, this.prevI); if (text.length !== 0) { handler(text + slice); this.text = ""; } else if (slice.length !== 0) { handler(slice); } } forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels
break scanLoop; } case AMP: this.state = S_ENTITY; this.entityReturnState = S_TEXT; if (handler !== undefined) { this.text += chunk.slice(start, this.prevI); } forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels
break scanLoop; case CLOSE_BRACKET: switch (forbiddenState) { case FORBIDDEN_START: forbiddenState = FORBIDDEN_BRACKET; break; case FORBIDDEN_BRACKET: forbiddenState = FORBIDDEN_BRACKET_BRACKET; break; case FORBIDDEN_BRACKET_BRACKET: break; default: throw new Error("impossible state"); } break; case GREATER: if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) { this.fail("the string \"]]>\" is disallowed in char data."); } forbiddenState = FORBIDDEN_START; break; case NL_LIKE: if (handler !== undefined) { this.text += `${chunk.slice(start, this.prevI)}\n`; } start = this.i; forbiddenState = FORBIDDEN_START; break; case EOC: if (handler !== undefined) { this.text += chunk.slice(start); } // eslint-disable-next-line no-labels
break scanLoop; default: forbiddenState = FORBIDDEN_START; } } this.forbiddenState = forbiddenState; } handleTextOutsideRoot() { // This is essentially a specialized version of captureTo which is optimized
// for a specialized task. We keep track of the presence of non-space
// characters in the text since these are errors when appearing outside the
// document root element.
let { i: start } = this; const { chunk, textHandler: handler } = this; let nonSpace = false; // eslint-disable-next-line no-labels, no-restricted-syntax
outRootLoop: // eslint-disable-next-line no-constant-condition
while (true) { const code = this.getCode(); switch (code) { case LESS: { this.state = S_OPEN_WAKA; if (handler !== undefined) { const { text } = this; const slice = chunk.slice(start, this.prevI); if (text.length !== 0) { handler(text + slice); this.text = ""; } else if (slice.length !== 0) { handler(slice); } } // eslint-disable-next-line no-labels
break outRootLoop; } case AMP: this.state = S_ENTITY; this.entityReturnState = S_TEXT; if (handler !== undefined) { this.text += chunk.slice(start, this.prevI); } nonSpace = true; // eslint-disable-next-line no-labels
break outRootLoop; case NL_LIKE: if (handler !== undefined) { this.text += `${chunk.slice(start, this.prevI)}\n`; } start = this.i; break; case EOC: if (handler !== undefined) { this.text += chunk.slice(start); } // eslint-disable-next-line no-labels
break outRootLoop; default: if (!isS(code)) { nonSpace = true; } } } if (!nonSpace) { return; } // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
// to avoid reporting errors for every single character that is out of
// place.
if (!this.sawRoot && !this.reportedTextBeforeRoot) { this.fail("text data outside of root node."); this.reportedTextBeforeRoot = true; } if (this.closedRoot && !this.reportedTextAfterRoot) { this.fail("text data outside of root node."); this.reportedTextAfterRoot = true; } } pushAttribNS(name, value) { var _a; const { prefix, local } = this.qname(name); const attr = { name, prefix, local, value }; this.attribList.push(attr); // eslint-disable-next-line no-unused-expressions
(_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr); if (prefix === "xmlns") { const trimmed = value.trim(); if (this.currentXMLVersion === "1.0" && trimmed === "") { this.fail("invalid attempt to undefine prefix in XML 1.0"); } this.topNS[local] = trimmed; nsPairCheck(this, local, trimmed); } else if (name === "xmlns") { const trimmed = value.trim(); this.topNS[""] = trimmed; nsPairCheck(this, "", trimmed); } } pushAttribPlain(name, value) { var _a; const attr = { name, value }; this.attribList.push(attr); // eslint-disable-next-line no-unused-expressions
(_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr); } /** * End parsing. This performs final well-formedness checks and resets the * parser to a clean state. * * @returns this */ end() { var _a, _b; if (!this.sawRoot) { this.fail("document must contain a root element."); } const { tags } = this; while (tags.length > 0) { const tag = tags.pop(); this.fail(`unclosed tag: ${tag.name}`); } if ((this.state !== S_BEGIN) && (this.state !== S_TEXT)) { this.fail("unexpected end."); } const { text } = this; if (text.length !== 0) { // eslint-disable-next-line no-unused-expressions
(_a = this.textHandler) === null || _a === void 0 ? void 0 : _a.call(this, text); this.text = ""; } this._closed = true; // eslint-disable-next-line no-unused-expressions
(_b = this.endHandler) === null || _b === void 0 ? void 0 : _b.call(this); this._init(); return this; } /** * Resolve a namespace prefix. * * @param prefix The prefix to resolve. * * @returns The namespace URI or ``undefined`` if the prefix is not defined. */ resolve(prefix) { var _a, _b; let uri = this.topNS[prefix]; if (uri !== undefined) { return uri; } const { tags } = this; for (let index = tags.length - 1; index >= 0; index--) { uri = tags[index].ns[prefix]; if (uri !== undefined) { return uri; } } uri = this.ns[prefix]; if (uri !== undefined) { return uri; } return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix); } /** * Parse a qname into its prefix and local name parts. * * @param name The name to parse * * @returns */ qname(name) { // This is faster than using name.split(":").
const colon = name.indexOf(":"); if (colon === -1) { return { prefix: "", local: name }; } const local = name.slice(colon + 1); const prefix = name.slice(0, colon); if (prefix === "" || local === "" || local.includes(":")) { this.fail(`malformed name: ${name}.`); } return { prefix, local }; } processAttribsNS() { var _a; const { attribList } = this; const tag = this.tag; { // add namespace info to tag
const { prefix, local } = this.qname(tag.name); tag.prefix = prefix; tag.local = local; const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : ""; if (prefix !== "") { if (prefix === "xmlns") { this.fail("tags may not have \"xmlns\" as prefix."); } if (uri === "") { this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`); tag.uri = prefix; } } } if (attribList.length === 0) { return; } const { attributes } = tag; const seen = new Set(); // Note: do not apply default ns to attributes:
// http://www.w3.org/TR/REC-xml-names/#defaulting
for (const attr of attribList) { const { name, prefix, local } = attr; let uri; let eqname; if (prefix === "") { uri = name === "xmlns" ? XMLNS_NAMESPACE : ""; eqname = name; } else { uri = this.resolve(prefix); // if there's any attributes with an undefined namespace,
// then fail on them now.
if (uri === undefined) { this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`); uri = prefix; } eqname = `{${uri}}${local}`; } if (seen.has(eqname)) { this.fail(`duplicate attribute: ${eqname}.`); } seen.add(eqname); attr.uri = uri; attributes[name] = attr; } this.attribList = []; } processAttribsPlain() { const { attribList } = this; // eslint-disable-next-line prefer-destructuring
const attributes = this.tag.attributes; for (const { name, value } of attribList) { if (attributes[name] !== undefined) { this.fail(`duplicate attribute: ${name}.`); } attributes[name] = value; } this.attribList = []; } /** * Handle a complete open tag. This parser code calls this once it has seen * the whole tag. This method checks for well-formeness and then emits * ``onopentag``. */ openTag() { var _a; this.processAttribs(); const { tags } = this; const tag = this.tag; tag.isSelfClosing = false; // There cannot be any pending text here due to the onopentagstart that was
// necessarily emitted before we get here. So we do not check text.
// eslint-disable-next-line no-unused-expressions
(_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag); tags.push(tag); this.state = S_TEXT; this.name = ""; } /** * Handle a complete self-closing tag. This parser code calls this once it has * seen the whole tag. This method checks for well-formeness and then emits * ``onopentag`` and ``onclosetag``. */ openSelfClosingTag() { var _a, _b, _c; this.processAttribs(); const { tags } = this; const tag = this.tag; tag.isSelfClosing = true; // There cannot be any pending text here due to the onopentagstart that was
// necessarily emitted before we get here. So we do not check text.
// eslint-disable-next-line no-unused-expressions
(_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag); // eslint-disable-next-line no-unused-expressions
(_b = this.closeTagHandler) === null || _b === void 0 ? void 0 : _b.call(this, tag); const top = this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null; if (top === null) { this.closedRoot = true; } this.state = S_TEXT; this.name = ""; } /** * Handle a complete close tag. This parser code calls this once it has seen * the whole tag. This method checks for well-formeness and then emits * ``onclosetag``. */ closeTag() { const { tags, name } = this; // Our state after this will be S_TEXT, no matter what, and we can clear
// tagName now.
this.state = S_TEXT; this.name = ""; if (name === "") { this.fail("weird empty close tag."); this.text += "</>"; return; } const handler = this.closeTagHandler; let l = tags.length; while (l-- > 0) { const tag = this.tag = tags.pop(); this.topNS = tag.ns; // eslint-disable-next-line no-unused-expressions
handler === null || handler === void 0 ? void 0 : handler(tag); if (tag.name === name) { break; } this.fail("unexpected close tag."); } if (l === 0) { this.closedRoot = true; } else if (l < 0) { this.fail(`unmatched closing tag: ${name}.`); this.text += `</${name}>`; } } /** * Resolves an entity. Makes any necessary well-formedness checks. * * @param entity The entity to resolve. * * @returns The parsed entity. */ parseEntity(entity) { // startsWith would be significantly slower for this test.
// eslint-disable-next-line @typescript-eslint/prefer-string-starts-ends-with
if (entity[0] !== "#") { const defined = this.ENTITIES[entity]; if (defined !== undefined) { return defined; } this.fail(this.isName(entity) ? "undefined entity." : "disallowed character in entity name."); return `&${entity};`; } let num = NaN; if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) { num = parseInt(entity.slice(2), 16); } else if (/^#[0-9]+$/.test(entity)) { num = parseInt(entity.slice(1), 10); } // The character reference is required to match the CHAR production.
if (!this.isChar(num)) { this.fail("malformed character entity."); return `&${entity};`; } return String.fromCodePoint(num); } } exports.SaxesParser = SaxesParser; //# sourceMappingURL=saxes.js.map
|