Property | Type | Description | |
---|---|---|---|
bAutoExtractBetweenTagsOnly | bool | ||
bAutoKeepComments | bool | ||
bAutoKeepScripts | bool | ||
bAutoMarkClosedTagsWithParamsAsOpen | bool | ||
bCompressWhiteSpaceBeforeTag | bool | ||
bKeepRawHTML | bool | ||
bThrowExceptionOnEncodingSetFailure | bool | ||
oEnc | |||
oHE |
Method | Description | |
---|---|---|
CalculateWidth ( string sWidth, int iAvailWidth, bool &bRelative ) : int |
Parses WIDTH param and calculates width
|
|
ChangeToEntities ( string sLine ) : string | ||
ChangeToEntities ( string sLine, bool bChangeDangerousCharsOnly ) : string |
Parses line and changes known entiry characters into proper HTML entiries
|
|
CleanUp ( ) : void |
Cleans up parser in preparation for next parsing
|
|
Close ( ) : void |
Closes object and releases all allocated resources
|
|
DecodeEntities ( string sData ) : string |
This function will decode any entities found in a string - not fast!
|
|
Dispose ( ) : void | ||
HTMLparser ( string p_oHTML ) : System |
Constructs parser object using provided HTML as source for parsing
|
|
HandleMetaEncoding ( |
Handles META tags that set page encoding
|
|
Init ( byte p_bHTML ) : void |
Initialises parses with HTML to be parsed from provided data buffer: this is best in terms of correctness of parsing of various encodings that can be used in HTML
|
|
Init ( byte p_bHTML, int p_iHtmlLength ) : void |
Inits parsing
|
|
Init ( string p_oHTML ) : void |
Initialises parses with HTML to be parsed from provided string
|
|
InitMiniEntities ( ) : void |
Inits mini-entities mode: only "nbsp" will be converted into space, all other entities will be left as is
|
|
IsBiggerFont ( FontSize oFont1, FontSize oFont2 ) : bool |
Checks if first font is bigger than the second
|
|
IsEqualOrBiggerFont ( FontSize oFont1, FontSize oFont2 ) : bool |
Checks if first font is equal or bigger than the second
|
|
LoadFromFile ( string sFileName ) : void |
Loads HTML from file
|
|
ParseFontSize ( string sSize, FontSize oCurSize ) : FontSize |
Parses font's tag size param
|
|
ParseNext ( ) : |
Parses next chunk and returns it with
|
|
ParseNextTag ( ) : |
Returns next tag or null if end of document, text will be ignored completely
|
|
Reset ( ) : void |
Resets current parsed data to start
|
|
SetChunkHashMode ( bool bHashMode ) : void |
Sets chunk param hash mode
|
|
SetEncoding ( string p_sCharSet ) : bool |
Sets current encoding in format used in HTTP headers and HTML META tags
|
|
SetEncoding ( |
Sets encoding
|
|
SetRawHTML ( |
Sets oHTML variable in a chunk to the raw HTML that was parsed for that chunk.
|
Method | Description | |
---|---|---|
Dispose ( bool bDisposing ) : void | ||
GetCharSet ( string sData ) : string |
Retrieves charset information from format used in HTTP headers and META descriptions
|
|
GetNextTag ( ) : |
Internally parses tag and returns it from point when '<' was found
|
|
HTMLparser ( ) : System | ||
ParseTextWithEntities ( ) : |
public static CalculateWidth ( string sWidth, int iAvailWidth, bool &bRelative ) : int | ||
sWidth | string | WIDTH param from tag |
iAvailWidth | int | Currently available width for relative calculations, if negative width will be returned as is |
bRelative | bool | Flag that will be set to true if width was relative |
return | int |
public ChangeToEntities ( string sLine ) : string | ||
sLine | string | |
return | string |
public ChangeToEntities ( string sLine, bool bChangeDangerousCharsOnly ) : string | ||
sLine | string | Line of text |
bChangeDangerousCharsOnly | bool | |
return | string |
public static DecodeEntities ( string sData ) : string | ||
sData | string | |
return | string |
public HTMLparser ( string p_oHTML ) : System | ||
p_oHTML | string | |
return | System |
public static HandleMetaEncoding ( |
||
oP | HTML parser object that is used for parsing | |
oChunk | Parsed chunk that should contain tag META | |
bEncodingSet | bool | Your own flag that shows whether encoding was already set or not, if set /// once then it should not be changed - this is the logic applied by major browsers |
return | bool |
public Init ( byte p_bHTML ) : void | ||
p_bHTML | byte | Data buffer with HTML in it |
return | void |
public Init ( byte p_bHTML, int p_iHtmlLength ) : void | ||
p_bHTML | byte | Data buffer |
p_iHtmlLength | int | Length of data (buffer itself can be longer) - start offset assumed to be 0 |
return | void |
public Init ( string p_oHTML ) : void | ||
p_oHTML | string | String with HTML in it |
return | void |
public static IsBiggerFont ( FontSize oFont1, FontSize oFont2 ) : bool | ||
oFont1 | FontSize | Font #1 |
oFont2 | FontSize | Font #2 |
return | bool |
public static IsEqualOrBiggerFont ( FontSize oFont1, FontSize oFont2 ) : bool | ||
oFont1 | FontSize | Font #1 |
oFont2 | FontSize | Font #2 |
return | bool |
public LoadFromFile ( string sFileName ) : void | ||
sFileName | string | Full filename |
return | void |
public static ParseFontSize ( string sSize, FontSize oCurSize ) : FontSize | ||
sSize | string | String value of the size param |
oCurSize | FontSize | |
return | FontSize |
public SetChunkHashMode ( bool bHashMode ) : void | ||
bHashMode | bool | If true then tag's params will be kept in Chunk's hashtable (slower), otherwise kept in arrays (sParams/sValues) |
return | void |
public SetEncoding ( string p_sCharSet ) : bool | ||
p_sCharSet | string | |
return | bool |
public SetEncoding ( |
||
p_oEnc | Encoding object | |
return | void |
public SetRawHTML ( |
||
oChunk | Chunk returned by ParseNext function, it must belong to the same HTMLparser that /// was initiated with the same HTML data that this chunk belongs to | |
return | void |
public bool bAutoMarkClosedTagsWithParamsAsOpen | ||
return | bool |
public bool bCompressWhiteSpaceBeforeTag | ||
return | bool |
public bool bThrowExceptionOnEncodingSetFailure | ||
return | bool |