#include "HTMLParser.au3"
#include
Func StringRepeat($sChar, $nCount)
$tBuffer = DLLStructCreate("char[" & $nCount & "]")
DllCall("msvcrt.dll", "ptr:cdecl", "memset", "ptr", DLLStructGetPtr($tBuffer), "int", Asc($sChar), "int", $nCount)
Return DLLStructGetData($tBuffer, 1)
EndFunc
$sHTML = FileRead("prettyhtml.txt")
$hFile = FileOpen("prettyhtml_output.txt", 2)
$tTokenList = _HTMLParser2($sHTML)
$iExtended = @extended
$pItem = $tTokenList.First
$iLevel = 0
While 1
_MemMoveMemory($pItem, $__g_pTokenListToken, $__g_iTokenListToken)
Switch $__g_tTokenListToken.Type
Case $__HTMLPARSERCONSTANT_TYPE_NONE
FileClose($hFile)
Exit MsgBox(0, "Ooops!", "Some unknown element found!"&@CRLF&"Closing script...")
Case $__HTMLPARSERCONSTANT_TYPE_CDATA, $__HTMLPARSERCONSTANT_TYPE_COMMENT, $__HTMLPARSERCONSTANT_TYPE_DOCTYPE
FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
Case $__HTMLPARSERCONSTANT_TYPE_STARTTAG
FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
StringRegExp(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), "[/][>]$", 1)
If @error<>0 Then
$aRet = StringRegExp(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), "^[<]([0-9a-zA-Z]+)", 1)
Switch $aRet[0]
Case "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr"
;void element
Case Else
$iLevel+=1
EndSwitch
EndIf
Case $__HTMLPARSERCONSTANT_TYPE_ENDTAG
$iLevel-=1
FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
;~ ConsoleWrite("?"&@CRLF)
Case $__HTMLPARSERCONSTANT_TYPE_TEXT
;excluded for now
;~ ConsoleWrite(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
StringRegExp(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), "^[\R\h\s]+$", 1)
If @error<>0 Then
$sTabs = ($iLevel>0?StringRepeat(@TAB, $iLevel):"")
FileWrite($hFile, $sTabs&StringStripWS(StringRegExpReplace(StringReplace(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), @CRLF, @CRLF&$sTabs), "(^\R|\R$)", ""), 1+2)&@CRLF)
EndIf
EndSwitch
$pItem = $__g_tTokenListToken.Next
If $pItem = 0 Then ExitLoop
WEnd
FileClose($hFile)
Func _HTMLParser2($sHTML);alternate parser, dealing with the tmp current