C# Class HtmlParserSharp.Core.Tokenizer

Show file Open project: prepare/HTML-Renderer Class Usage Examples

Protected Properties

Property Type Description
ampersandLocation Locator
attributeName AttributeName
confident bool
cstart int
endTag bool
endTagExpectation ElementName
html4 bool
index int
lastCR bool
stateSave TokenizerState
value int

Public Methods

Method Description
BecomeConfident ( ) : void
End ( ) : void
Eof ( ) : void
Err ( string message ) : void
ErrTreeBuilder ( string message ) : void
Fatal ( string message ) : void
InitializeWithoutStarting ( ) : void
InternalEncodingDeclaration ( string internalCharset ) : bool
LoadState ( Tokenizer other ) : void
NotifyAboutMetaBoundary ( ) : void
RequestSuspension ( ) : void
ResetToDataState ( ) : void
SetStateAndEndTagExpectation ( TokenizerState specialTokenizerState, ElementName endTagExpectation ) : void
SetStateAndEndTagExpectation ( TokenizerState specialTokenizerState, [ endTagExpectation ) : void
SetTransitionBaseOffset ( int offset ) : void

Sets an offset to be added to the position reported to TransitionHandler.

Start ( ) : void
TokenizeBuffer ( UTF16Buffer buffer ) : bool
Tokenizer ( ITokenHandler tokenHandler ) : System
Tokenizer ( ITokenHandler tokenHandler, bool newAttributesEachTime ) : System
Warn ( string message ) : void

Protected Methods

Method Description
ErrAstralNonCharacter ( int ch ) : void
ErrAttributeValueMissing ( ) : void
ErrBadCharAfterLt ( char c ) : void
ErrBadCharBeforeAttributeNameOrNull ( char c ) : void
ErrBogusComment ( ) : void
ErrBogusDoctype ( ) : void
ErrCharRefLacksSemicolon ( ) : void
ErrConsecutiveHyphens ( ) : void
ErrDuplicateAttribute ( ) : void
ErrEofAfterLt ( ) : void
ErrEofInAttributeName ( ) : void
ErrEofInAttributeValue ( ) : void
ErrEofInComment ( ) : void
ErrEofInDoctype ( ) : void
ErrEofInEndTag ( ) : void
ErrEofInPublicId ( ) : void
ErrEofInSystemId ( ) : void
ErrEofInTagName ( ) : void
ErrEofWithoutGt ( ) : void
ErrEqualsSignBeforeAttributeName ( ) : void
ErrExpectedPublicId ( ) : void
ErrExpectedSystemId ( ) : void
ErrGarbageAfterLtSlash ( ) : void
ErrGtInPublicId ( ) : void
ErrGtInSystemId ( ) : void
ErrHtml4LtSlashInRcdata ( char folded ) : void
ErrHtml4NonNameInUnquotedAttribute ( char c ) : void
ErrHtml4XmlVoidSyntax ( ) : void
ErrHyphenHyphenBang ( ) : void
ErrLtGt ( ) : void
ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull ( char c ) : void
ErrLtSlashGt ( ) : void
ErrMissingSpaceBeforeDoctypeName ( ) : void
ErrNamelessDoctype ( ) : void
ErrNcrControlChar ( char ch ) : char
ErrNcrControlChar ( ) : void
ErrNcrCr ( ) : void
ErrNcrInC1Range ( ) : void
ErrNcrNonCharacter ( char ch ) : char
ErrNcrOutOfRange ( ) : void
ErrNcrSurrogate ( ) : void
ErrNcrUnassigned ( ) : void
ErrNcrZero ( ) : void
ErrNoDigitsInNCR ( ) : void
ErrNoNamedCharacterMatch ( ) : void
ErrNoSpaceBetweenAttributes ( ) : void
ErrNoSpaceBetweenDoctypePublicKeywordAndQuote ( ) : void
ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote ( ) : void
ErrNoSpaceBetweenPublicAndSystemIds ( ) : void
ErrNotSemicolonTerminated ( ) : void
ErrPrematureEndOfComment ( ) : void
ErrProcessingInstruction ( ) : void
ErrQuoteBeforeAttributeName ( char c ) : void
ErrQuoteOrLtInAttributeNameOrNull ( char c ) : void
ErrSlashNotFollowedByGt ( ) : void
ErrUnescapedAmpersandInterpretedAsCharacterReference ( ) : void
ErrUnquotedAttributeValOrNull ( char c ) : void
ErrWarnLtSlashInRcdata ( ) : void
FlushChars ( char buf, int pos ) : void

Flushes coalesced character tokens.

MaybeErrAttributesOnEndTag ( HtmlAttributes attrs ) : void
MaybeErrSlashInEndTag ( bool selfClosing ) : void
MaybeWarnPrivateUse ( char ch ) : void
MaybeWarnPrivateUseAstral ( ) : void
NoteAttributeWithoutValue ( ) : void
NoteUnquotedAttributeValue ( ) : void
SilentCarriageReturn ( ) : void
SilentLineFeed ( ) : void
StartErrorReporting ( ) : void

Private Methods

Method Description
AddAttributeWithValue ( ) : void
AddAttributeWithoutValue ( ) : void
AdjustDoubleHyphenAndAppendToLongStrBufAndErr ( char c ) : void
AdjustDoubleHyphenAndAppendToLongStrBufCarriageReturn ( ) : void
AdjustDoubleHyphenAndAppendToLongStrBufLineFeed ( ) : void
AppendLongStrBuf ( char c ) : void
AppendLongStrBuf ( char buffer, int offset, int length ) : void
AppendLongStrBufCarriageReturn ( ) : void
AppendLongStrBufLineFeed ( ) : void
AppendSecondHyphenToBogusComment ( ) : void
AppendStrBuf ( char c ) : void
AppendStrBufToLongStrBuf ( ) : void
AttributeNameComplete ( ) : void
BogusDoctype ( ) : void
BogusDoctypeWithoutQuirks ( ) : void
ClearLongStrBuf ( ) : void
ClearLongStrBufAndAppend ( char c ) : void
ClearStrBuf ( ) : void
ClearStrBufAndAppend ( char c ) : void
EmitCarriageReturn ( char buf, int pos ) : void
EmitComment ( int provisionalHyphens, int pos ) : void

Emits the current comment token.

EmitCurrentTagToken ( bool selfClosing, int pos ) : TokenizerState
EmitDoctypeToken ( int pos ) : void
EmitOrAppendOne ( char val, TokenizerState returnState ) : void
EmitOrAppendStrBuf ( TokenizerState returnState ) : void
EmitOrAppendTwo ( char val, TokenizerState returnState ) : void
EmitPlaintextReplacementCharacter ( char buf, int pos ) : void
EmitReplacementCharacter ( char buf, int pos ) : void
EmitStrBuf ( ) : void
EmptyAttributes ( ) : HtmlAttributes
EndTagExpectationToArray ( ) : void
HandleNcrValue ( TokenizerState returnState ) : void
InitDoctypeFields ( ) : void
LongStrBufToString ( ) : string
MaybeAppendSpaceToBogusComment ( ) : void
NewAsciiLowerCaseStringFromString ( String str ) : String
ResetAttributes ( ) : void
SetAdditionalAndRememberAmpersandLocation ( char add ) : void
StateLoop ( TokenizerState state, char c, int pos, char buf, bool reconsume, TokenizerState returnState, int endPos ) : int
StrBufToDoctypeName ( ) : void
StrBufToElementNameString ( ) : void
TurnOnAdditionalHtml4Errors ( ) : void

Method Details

BecomeConfident() public method

public BecomeConfident ( ) : void
return void

End() public method

public End ( ) : void
return void

Eof() public method

public Eof ( ) : void
return void

Err() public method

public Err ( string message ) : void
message string
return void

ErrAstralNonCharacter() protected method

protected ErrAstralNonCharacter ( int ch ) : void
ch int
return void

ErrAttributeValueMissing() protected method

protected ErrAttributeValueMissing ( ) : void
return void

ErrBadCharAfterLt() protected method

protected ErrBadCharAfterLt ( char c ) : void
c char
return void

ErrBadCharBeforeAttributeNameOrNull() protected method

protected ErrBadCharBeforeAttributeNameOrNull ( char c ) : void
c char
return void

ErrBogusComment() protected method

protected ErrBogusComment ( ) : void
return void

ErrBogusDoctype() protected method

protected ErrBogusDoctype ( ) : void
return void

ErrCharRefLacksSemicolon() protected method

protected ErrCharRefLacksSemicolon ( ) : void
return void

ErrConsecutiveHyphens() protected method

protected ErrConsecutiveHyphens ( ) : void
return void

ErrDuplicateAttribute() protected method

protected ErrDuplicateAttribute ( ) : void
return void

ErrEofAfterLt() protected method

protected ErrEofAfterLt ( ) : void
return void

ErrEofInAttributeName() protected method

protected ErrEofInAttributeName ( ) : void
return void

ErrEofInAttributeValue() protected method

protected ErrEofInAttributeValue ( ) : void
return void

ErrEofInComment() protected method

protected ErrEofInComment ( ) : void
return void

ErrEofInDoctype() protected method

protected ErrEofInDoctype ( ) : void
return void

ErrEofInEndTag() protected method

protected ErrEofInEndTag ( ) : void
return void

ErrEofInPublicId() protected method

protected ErrEofInPublicId ( ) : void
return void

ErrEofInSystemId() protected method

protected ErrEofInSystemId ( ) : void
return void

ErrEofInTagName() protected method

protected ErrEofInTagName ( ) : void
return void

ErrEofWithoutGt() protected method

protected ErrEofWithoutGt ( ) : void
return void

ErrEqualsSignBeforeAttributeName() protected method

protected ErrEqualsSignBeforeAttributeName ( ) : void
return void

ErrExpectedPublicId() protected method

protected ErrExpectedPublicId ( ) : void
return void

ErrExpectedSystemId() protected method

protected ErrExpectedSystemId ( ) : void
return void

ErrGarbageAfterLtSlash() protected method

protected ErrGarbageAfterLtSlash ( ) : void
return void

ErrGtInPublicId() protected method

protected ErrGtInPublicId ( ) : void
return void

ErrGtInSystemId() protected method

protected ErrGtInSystemId ( ) : void
return void

ErrHtml4LtSlashInRcdata() protected method

protected ErrHtml4LtSlashInRcdata ( char folded ) : void
folded char
return void

ErrHtml4NonNameInUnquotedAttribute() protected method

protected ErrHtml4NonNameInUnquotedAttribute ( char c ) : void
c char
return void

ErrHtml4XmlVoidSyntax() protected method

protected ErrHtml4XmlVoidSyntax ( ) : void
return void

ErrHyphenHyphenBang() protected method

protected ErrHyphenHyphenBang ( ) : void
return void

ErrLtGt() protected method

protected ErrLtGt ( ) : void
return void

ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull() protected method

protected ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull ( char c ) : void
c char
return void

ErrLtSlashGt() protected method

protected ErrLtSlashGt ( ) : void
return void

ErrMissingSpaceBeforeDoctypeName() protected method

protected ErrMissingSpaceBeforeDoctypeName ( ) : void
return void

ErrNamelessDoctype() protected method

protected ErrNamelessDoctype ( ) : void
return void

ErrNcrControlChar() protected method

protected ErrNcrControlChar ( char ch ) : char
ch char
return char

ErrNcrControlChar() protected method

protected ErrNcrControlChar ( ) : void
return void

ErrNcrCr() protected method

protected ErrNcrCr ( ) : void
return void

ErrNcrInC1Range() protected method

protected ErrNcrInC1Range ( ) : void
return void

ErrNcrNonCharacter() protected method

protected ErrNcrNonCharacter ( char ch ) : char
ch char
return char

ErrNcrOutOfRange() protected method

protected ErrNcrOutOfRange ( ) : void
return void

ErrNcrSurrogate() protected method

protected ErrNcrSurrogate ( ) : void
return void

ErrNcrUnassigned() protected method

protected ErrNcrUnassigned ( ) : void
return void

ErrNcrZero() protected method

protected ErrNcrZero ( ) : void
return void

ErrNoDigitsInNCR() protected method

protected ErrNoDigitsInNCR ( ) : void
return void

ErrNoNamedCharacterMatch() protected method

protected ErrNoNamedCharacterMatch ( ) : void
return void

ErrNoSpaceBetweenAttributes() protected method

protected ErrNoSpaceBetweenAttributes ( ) : void
return void

ErrNoSpaceBetweenDoctypePublicKeywordAndQuote() protected method

protected ErrNoSpaceBetweenDoctypePublicKeywordAndQuote ( ) : void
return void

ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote() protected method

protected ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote ( ) : void
return void

ErrNoSpaceBetweenPublicAndSystemIds() protected method

protected ErrNoSpaceBetweenPublicAndSystemIds ( ) : void
return void

ErrNotSemicolonTerminated() protected method

protected ErrNotSemicolonTerminated ( ) : void
return void

ErrPrematureEndOfComment() protected method

protected ErrPrematureEndOfComment ( ) : void
return void

ErrProcessingInstruction() protected method

protected ErrProcessingInstruction ( ) : void
return void

ErrQuoteBeforeAttributeName() protected method

protected ErrQuoteBeforeAttributeName ( char c ) : void
c char
return void

ErrQuoteOrLtInAttributeNameOrNull() protected method

protected ErrQuoteOrLtInAttributeNameOrNull ( char c ) : void
c char
return void

ErrSlashNotFollowedByGt() protected method

protected ErrSlashNotFollowedByGt ( ) : void
return void

ErrTreeBuilder() public method

public ErrTreeBuilder ( string message ) : void
message string
return void

ErrUnescapedAmpersandInterpretedAsCharacterReference() protected method

protected ErrUnescapedAmpersandInterpretedAsCharacterReference ( ) : void
return void

ErrUnquotedAttributeValOrNull() protected method

protected ErrUnquotedAttributeValOrNull ( char c ) : void
c char
return void

ErrWarnLtSlashInRcdata() protected method

protected ErrWarnLtSlashInRcdata ( ) : void
return void

Fatal() public method

public Fatal ( string message ) : void
message string
return void

FlushChars() protected method

Flushes coalesced character tokens.
protected FlushChars ( char buf, int pos ) : void
buf char The buffer.
pos int The position.
return void

InitializeWithoutStarting() public method

public InitializeWithoutStarting ( ) : void
return void

InternalEncodingDeclaration() public method

public InternalEncodingDeclaration ( string internalCharset ) : bool
internalCharset string
return bool

LoadState() public method

public LoadState ( Tokenizer other ) : void
other Tokenizer
return void

MaybeErrAttributesOnEndTag() protected method

protected MaybeErrAttributesOnEndTag ( HtmlAttributes attrs ) : void
attrs HtmlAttributes
return void

MaybeErrSlashInEndTag() protected method

protected MaybeErrSlashInEndTag ( bool selfClosing ) : void
selfClosing bool
return void

MaybeWarnPrivateUse() protected method

protected MaybeWarnPrivateUse ( char ch ) : void
ch char
return void

MaybeWarnPrivateUseAstral() protected method

protected MaybeWarnPrivateUseAstral ( ) : void
return void

NoteAttributeWithoutValue() protected method

protected NoteAttributeWithoutValue ( ) : void
return void

NoteUnquotedAttributeValue() protected method

protected NoteUnquotedAttributeValue ( ) : void
return void

NotifyAboutMetaBoundary() public method

public NotifyAboutMetaBoundary ( ) : void
return void

RequestSuspension() public method

public RequestSuspension ( ) : void
return void

ResetToDataState() public method

public ResetToDataState ( ) : void
return void

SetStateAndEndTagExpectation() public method

public SetStateAndEndTagExpectation ( TokenizerState specialTokenizerState, ElementName endTagExpectation ) : void
specialTokenizerState TokenizerState
endTagExpectation ElementName
return void

SetStateAndEndTagExpectation() public method

public SetStateAndEndTagExpectation ( TokenizerState specialTokenizerState, [ endTagExpectation ) : void
specialTokenizerState TokenizerState
endTagExpectation [
return void

SetTransitionBaseOffset() public method

Sets an offset to be added to the position reported to TransitionHandler.
public SetTransitionBaseOffset ( int offset ) : void
offset int The offset.
return void

SilentCarriageReturn() protected method

protected SilentCarriageReturn ( ) : void
return void

SilentLineFeed() protected method

protected SilentLineFeed ( ) : void
return void

Start() public method

public Start ( ) : void
return void

StartErrorReporting() protected method

protected StartErrorReporting ( ) : void
return void

TokenizeBuffer() public method

public TokenizeBuffer ( UTF16Buffer buffer ) : bool
buffer UTF16Buffer
return bool

Tokenizer() public method

public Tokenizer ( ITokenHandler tokenHandler ) : System
tokenHandler ITokenHandler
return System

Tokenizer() public method

public Tokenizer ( ITokenHandler tokenHandler, bool newAttributesEachTime ) : System
tokenHandler ITokenHandler
newAttributesEachTime bool
return System

Warn() public method

public Warn ( string message ) : void
message string
return void

Property Details

ampersandLocation protected property

protected Locator ampersandLocation
return Locator

attributeName protected property

protected AttributeName attributeName
return AttributeName

confident protected property

protected bool confident
return bool

cstart protected property

protected int cstart
return int

endTag protected property

protected bool endTag
return bool

endTagExpectation protected property

protected ElementName endTagExpectation
return ElementName

html4 protected property

protected bool html4
return bool

index protected property

protected int index
return int

lastCR protected property

protected bool lastCR
return bool

stateSave protected property

protected TokenizerState stateSave
return TokenizerState

value protected property

protected int value
return int