public class LagartoParser
extends java.lang.Object
TagVisitor
for callbacks.
Works by the HTML5 specs for tokenization, as described
on WhatWG.
Differences from the specs:
There are two ways how text is passed back to the visitor.
By default it is passed as CharBuffer
, which
gives excellent performances. However, if you need more Strings
than enable it, and all text will be strings. This is faster
then first converting to char buffer and then to strings.
Modifier and Type | Class and Description |
---|---|
protected class |
LagartoParser.ScriptEscape
Since escaping states inside the SCRIPT tag are rare, we want to use them
lazy, only when really needed.
|
protected class |
LagartoParser.XmlDeclaration |
static class |
jodd.lagarto.Scanner.Position
Current position.
|
Constructor and Description |
---|
LagartoParser(char[] charArray)
Creates parser on char array.
|
LagartoParser(java.lang.String string)
Creates parser on a String.
|
Modifier and Type | Method and Description |
---|---|
protected void |
_error(java.lang.String message)
Prepares error message and reports it to the visitor.
|
protected java.lang.CharSequence |
charSequence(int from,
int to)
Creates char sub-sequence from the input.
|
protected void |
consumeCharacterReference() |
protected void |
consumeCharacterReference(char allowedChar) |
protected void |
emitCData(java.lang.CharSequence charSequence) |
protected void |
emitComment(int from,
int to)
Emits a comment.
|
protected void |
emitDoctype() |
protected void |
emitScript(int from,
int to) |
protected void |
emitTag() |
protected void |
emitText()
Emits text if there is some content.
|
protected void |
emitXml() |
protected void |
errorCharReference() |
protected void |
errorEOF() |
protected void |
errorInvalidToken() |
protected int |
find(char[] target,
int from,
int end)
Finds character buffer in some range and returns its index.
|
protected int |
find(char target,
int from,
int end)
Finds a character in some range and returns its index.
|
LagartoParserConfig |
getConfig()
Returns
configuration for the parser. |
long |
getParsingTime()
Returns parsing time in milliseconds.
|
protected void |
initialize(char[] input)
Initializes parser.
|
protected boolean |
isEOF()
Returns
true if EOF. |
boolean |
match(char[] target)
Matches char buffer with content at current location case-sensitive.
|
protected boolean |
match(char[] target,
int ndx)
Matches char buffer with content on given location.
|
boolean |
matchUpperCase(char[] uppercaseTarget)
Matches char buffer given in uppercase with content at current location, that will
be converted to upper case to make case-insensitive matching.
|
void |
parse(TagVisitor visitor)
Parses content and callback provided
TagVisitor . |
protected jodd.lagarto.Scanner.Position |
position(int position)
Calculates
current position : offset, line and column. |
void |
setConfig(LagartoParserConfig config)
Sets parser configuration.
|
protected void |
textEmitChar(char c)
Emits characters into the local text buffer.
|
protected void |
textEmitChars(char[] buffer) |
protected void |
textEmitChars(int from,
int to) |
protected void |
textStart()
Resets text buffer.
|
protected java.lang.CharSequence |
textWrap() |
protected TagVisitor visitor
protected jodd.lagarto.ParsedTag tag
protected ParsedDoctype doctype
protected long parsingTime
protected LagartoParserConfig config
protected boolean parsing
protected State DATA_STATE
protected State TAG_OPEN
protected State END_TAG_OPEN
protected State TAG_NAME
protected State BEFORE_ATTRIBUTE_NAME
protected State ATTRIBUTE_NAME
protected State AFTER_ATTRIBUTE_NAME
protected State BEFORE_ATTRIBUTE_VALUE
protected State ATTR_VALUE_UNQUOTED
protected State ATTR_VALUE_SINGLE_QUOTED
protected State ATTR_VALUE_DOUBLE_QUOTED
protected State AFTER_ATTRIBUTE_VALUE_QUOTED
protected State SELF_CLOSING_START_TAG
protected State BOGUS_COMMENT
protected State MARKUP_DECLARATION_OPEN
protected int rawTextStart
protected int rawTextEnd
protected char[] rawTagName
protected State RAWTEXT
protected State RAWTEXT_LESS_THAN_SIGN
protected State RAWTEXT_END_TAG_OPEN
protected State RAWTEXT_END_TAG_NAME
protected int rcdataTagStart
protected char[] rcdataTagName
protected State RCDATA
protected State RCDATA_LESS_THAN_SIGN
protected State RCDATA_END_TAG_OPEN
protected State RCDATA_END_TAG_NAME
protected int commentStart
protected State COMMENT_START
protected State COMMENT_START_DASH
protected State COMMENT
protected State COMMENT_END_DASH
protected State COMMENT_END
protected State COMMENT_END_BANG
protected State DOCTYPE
protected State BEFORE_DOCTYPE_NAME
protected State DOCTYPE_NAME
protected State AFTER_DOCUMENT_NAME
protected int doctypeIdNameStart
protected State AFTER_DOCTYPE_PUBLIC_KEYWORD
protected State BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
protected State DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
protected State DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
protected State AFTER_DOCTYPE_PUBLIC_IDENTIFIER
protected State BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
protected State BOGUS_DOCTYPE
protected State AFTER_DOCTYPE_SYSTEM_KEYWORD
protected State BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
protected State DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
protected State DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
protected State AFTER_DOCTYPE_SYSTEM_IDENTIFIER
protected int scriptStartNdx
protected int scriptEndNdx
protected int scriptEndTagName
protected State SCRIPT_DATA
protected State SCRIPT_DATA_LESS_THAN_SIGN
protected State SCRIPT_DATA_END_TAG_OPEN
protected State SCRIPT_DATA_END_TAG_NAME
protected LagartoParser.ScriptEscape scriptEscape
protected LagartoParser.XmlDeclaration xmlDeclaration
protected char[] text
protected int textLen
protected int attrStartNdx
protected int attrEndNdx
protected State state
protected char[] input
protected int ndx
protected int total
public LagartoParser(char[] charArray)
public LagartoParser(java.lang.String string)
protected void initialize(char[] input)
public LagartoParserConfig getConfig()
configuration
for the parser.public void setConfig(LagartoParserConfig config)
public void parse(TagVisitor visitor)
TagVisitor
.public long getParsingTime()
protected void consumeCharacterReference(char allowedChar)
protected void consumeCharacterReference()
protected void textEmitChar(char c)
protected void textStart()
protected void textEmitChars(int from, int to)
protected void textEmitChars(char[] buffer)
protected java.lang.CharSequence textWrap()
protected void emitTag()
protected void emitComment(int from, int to)
protected void emitText()
protected void emitScript(int from, int to)
protected void emitDoctype()
protected void emitXml()
protected void emitCData(java.lang.CharSequence charSequence)
protected void errorEOF()
protected void errorInvalidToken()
protected void errorCharReference()
protected void _error(java.lang.String message)
protected final int find(char target, int from, int end)
-1
if character is not found.protected final int find(char[] target, int from, int end)
-1
if character is not found.protected final boolean match(char[] target, int ndx)
public final boolean match(char[] target)
public final boolean matchUpperCase(char[] uppercaseTarget)
protected final java.lang.CharSequence charSequence(int from, int to)
protected final boolean isEOF()
true
if EOF.protected jodd.lagarto.Scanner.Position position(int position)
current position
: offset, line and column.Copyright © 2003-present Jodd Team