Jump to content

[UDF] encode/decode Html reserved Chars


Raik
 Share

Recommended Posts

UDF:

#include-Once
; #INDEX# =======================================================================================================================
; Title .........: HtmlEntities
; AutoIt Version : 3.2.10++
; Language ......: English
; Description ...: Functions to escape Html reserved Characters.
; Author( .......: Raik
; ===============================================================================================================================
; #CONSTANTS# ===================================================================================================================
Global Const $aisEntities[246][2]=[[34,'quot'],[38,'amp'],[39,'apos'],[60,'lt'],[62,'gt'],[160,'nbsp'],[161,'iexcl'],[162,'cent'],[163,'pound'],[164,'curren'],[165,'yen'],[166,'brvbar'],[167,'sect'],[168,'uml'],[169,'copy'],[170,'ordf'],[171,'laquo'],[172,'not'],[173,'shy'],[174,'reg'],[175,'macr'],[176,'deg'],[177,'plusmn'],[180,'acute'],[181,'micro'],[182,'para'],[183,'middot'],[184,'cedil'],[186,'ordm'],[187,'raquo'],[191,'iquest'],[192,'Agrave'],[193,'Aacute'],[194,'Acirc'],[195,'Atilde'],[196,'Auml'],[197,'Aring'],[198,'AElig'],[199,'Ccedil'],[200,'Egrave'],[201,'Eacute'],[202,'Ecirc'],[203,'Euml'],[204,'Igrave'],[205,'Iacute'],[206,'Icirc'],[207,'Iuml'],[208,'ETH'],[209,'Ntilde'],[210,'Ograve'],[211,'Oacute'],[212,'Ocirc'],[213,'Otilde'],[214,'Ouml'],[215,'times'],[216,'Oslash'],[217,'Ugrave'],[218,'Uacute'],[219,'Ucirc'],[220,'Uuml'],[221,'Yacute'],[222,'THORN'],[223,'szlig'],[224,'agrave'],[225,'aacute'],[226,'acirc'],[227,'atilde'],[228,'auml'],[229,'aring'],[230,'aelig'],[231,'ccedil'],[232,'egrave'],[233,'eacute'],[234,'ecirc'],[235,'euml'],[236,'igrave'],[237,'iacute'],[238,'icirc'],[239,'iuml'],[240,'eth'],[241,'ntilde'],[242,'ograve'],[243,'oacute'],[244,'ocirc'],[245,'otilde'],[246,'ouml'],[247,'divide'],[248,'oslash'],[249,'ugrave'],[250,'uacute'],[251,'ucirc'],[252,'uuml'],[253,'yacute'],[254,'thorn'],[255,'yuml'],[338,'OElig'],[339,'oelig'],[352,'Scaron'],[353,'scaron'],[376,'Yuml'],[402,'fnof'],[710,'circ'],[732,'tilde'],[913,'Alpha'],[914,'Beta'],[915,'Gamma'],[916,'Delta'],[917,'Epsilon'],[918,'Zeta'],[919,'Eta'],[920,'Theta'],[921,'Iota'],[922,'Kappa'],[923,'Lambda'],[924,'Mu'],[925,'Nu'],[926,'Xi'],[927,'Omicron'],[928,'Pi'],[929,'Rho'],[931,'Sigma'],[932,'Tau'],[933,'Upsilon'],[934,'Phi'],[935,'Chi'],[936,'Psi'],[937,'Omega'],[945,'alpha'],[946,'beta'],[947,'gamma'],[948,'delta'],[949,'epsilon'],[950,'zeta'],[951,'eta'],[952,'theta'],[953,'iota'],[954,'kappa'],[955,'lambda'],[956,'mu'],[957,'nu'],[958,'xi'],[959,'omicron'],[960,'pi'],[961,'rho'],[962,'sigmaf'],[963,'sigma'],[964,'tau'],[965,'upsilon'],[966,'phi'],[967,'chi'],[968,'psi'],[969,'omega'],[977,'thetasym'],[978,'upsih'],[982,'piv'],[8194,'ensp'],[8195,'emsp'],[8201,'thinsp'],[8204,'zwnj'],[8205,'zwj'],[8206,'lrm'],[8207,'rlm'],[8211,'ndash'],[8212,'mdash'],[8216,'lsquo'],[8217,'rsquo'],[8218,'sbquo'],[8220,'ldquo'],[8221,'rdquo'],[8222,'bdquo'],[8224,'dagger'],[8225,'Dagger'],[8226,'bull'],[8230,'hellip'],[8240,'permil'],[8242,'prime'],[8243,'Prime'],[8249,'lsaquo'],[8250,'rsaquo'],[8254,'oline'],[8260,'frasl'],[8364,'euro'],[8465,'image'],[8472,'weierp'],[8476,'real'],[8482,'trade'],[8501,'alefsym'],[8592,'larr'],[8593,'uarr'],[8594,'rarr'],[8595,'darr'],[8596,'harr'],[8629,'crarr'],[8656,'lArr'],[8657,'uArr'],[8658,'rArr'],[8659,'dArr'],[8660,'hArr'],[8704,'forall'],[8706,'part'],[8707,'exist'],[8709,'empty'],[8711,'nabla'],[8712,'isin'],[8713,'notin'],[8715,'ni'],[8719,'prod'],[8721,'sum'],[8722,'minus'],[8727,'lowast'],[8730,'radic'],[8733,'prop'],[8734,'infin'],[8736,'ang'],[8743,'and'],[8744,'or'],[8745,'cap'],[8746,'cup'],[8747,'int'],[8764,'sim'],[8773,'cong'],[8776,'asymp'],[8800,'ne'],[8801,'equiv'],[8804,'le'],[8805,'ge'],[8834,'sub'],[8835,'sup'],[8836,'nsub'],[8838,'sube'],[8839,'supe'],[8853,'oplus'],[8855,'otimes'],[8869,'perp'],[8901,'sdot'],[8968,'lceil'],[8969,'rceil'],[8970,'lfloor'],[8971,'rfloor'],[9001,'lang'],[9002,'rang'],[9674,'loz'],[9824,'spades'],[9827,'clubs'],[9829,'hearts'],[9830,'diams']]
; ===============================================================================================================================
; #CURRENT# =====================================================================================================================
;_HtmlEntities_Encode
;_HtmlEntities_Decode
; ===============================================================================================================================
; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Encode
; Description ...: Replaces Html Entities with the reserved Chars.
; Syntax.........: _HtmlEntities_Encode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Encode(ByRef $sTxt)
For $i=0 to 245
  $sTxt=StringReplace($sTxt,ChrW($aisEntities[$i][0]),'&'&$aisEntities[$i][1]&';',0,1)
Next
EndFunc ;==>_HtmlEntities_Encode
; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Decode
; Description ...: Replaces reserved Chars with its Html Entities.
; Syntax.........: _HtmlEntities_Decode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Decode(ByRef $sTxt)
For $i=0 to 245
  $sTxt=StringReplace($sTxt,'&'&$aisEntities[$i][1]&';',ChrW($aisEntities[$i][0]),0,1)
Next
EndFunc ;==>_HtmlEntities_Decode

Example:

#include "EncodeHtmlEntities.au3"
$txt="<Äöü>"
_HtmlEntities_Decode($txt)
MsgBox(0,"Decode",$txt)
_HtmlEntities_Encode($txt)
MsgBox(0,"Encode",$txt)

EncodeHtmlEntities.au3

EncodeHtmlEntities_Example.au3

AutoIt-Syntaxsheme for Proton & Phase5 * Firefox Addons by me (resizable Textarea 0.1d) (docked JS-Console 0.1.1)

Link to comment
Share on other sites

  • 1 year later...
  • 5 weeks later...
  • 2 years later...

Minor bug with the double quote character. It gets translated to &quote; instead of ". Simple fix. Just move the entry for "&" to the beginning of the array.

Global Const $aisEntities[246][2]=[[34,'quot'],[38,'amp'],[39,'apos'],

becomes

Global Const $aisEntities[246][2]=[[38,'amp'],[34,'quot'],[39,'apos'],

BTW, as per http://www.w3.org/TR/xhtml1/#C_16 , the entry for the apostrophe should be removed. ' is not valid html.

 

 

Edited by GPinzone
Gerard J. Pinzonegpinzone AT yahoo.com
Link to comment
Share on other sites

  • 2 years later...

I needed these functionalities, so I used them changing the code a little:

  • put change suggested by GPinzone about amp
  • added management of tags composed by "&#" + (utf code number) + ";", in decode function
  • added 2 functions, _Html_GetEncoded and _Html_GetDecoded: they return text encoded/decoded, do not change it using ByRef
Global Const $aisEntities[246][2]=[[34,'quot'],[38,'amp'],[60,'lt'],[62,'gt'],[160,'nbsp'],[161,'iexcl'],[162,'cent'],[163,'pound'],[164,'curren'],[165,'yen'],[166,'brvbar'],[167,'sect'],[168,'uml'],[169,'copy'],[170,'ordf'],[171,'laquo'],[172,'not'],[173,'shy'],[174,'reg'],[175,'macr'],[176,'deg'],[177,'plusmn'],[180,'acute'],[181,'micro'],[182,'para'],[183,'middot'],[184,'cedil'],[186,'ordm'],[187,'raquo'],[191,'iquest'],[192,'Agrave'],[193,'Aacute'],[194,'Acirc'],[195,'Atilde'],[196,'Auml'],[197,'Aring'],[198,'AElig'],[199,'Ccedil'],[200,'Egrave'],[201,'Eacute'],[202,'Ecirc'],[203,'Euml'],[204,'Igrave'],[205,'Iacute'],[206,'Icirc'],[207,'Iuml'],[208,'ETH'],[209,'Ntilde'],[210,'Ograve'],[211,'Oacute'],[212,'Ocirc'],[213,'Otilde'],[214,'Ouml'],[215,'times'],[216,'Oslash'],[217,'Ugrave'],[218,'Uacute'],[219,'Ucirc'],[220,'Uuml'],[221,'Yacute'],[222,'THORN'],[223,'szlig'],[224,'agrave'],[225,'aacute'],[226,'acirc'],[227,'atilde'],[228,'auml'],[229,'aring'],[230,'aelig'],[231,'ccedil'],[232,'egrave'],[233,'eacute'],[234,'ecirc'],[235,'euml'],[236,'igrave'],[237,'iacute'],[238,'icirc'],[239,'iuml'],[240,'eth'],[241,'ntilde'],[242,'ograve'],[243,'oacute'],[244,'ocirc'],[245,'otilde'],[246,'ouml'],[247,'divide'],[248,'oslash'],[249,'ugrave'],[250,'uacute'],[251,'ucirc'],[252,'uuml'],[253,'yacute'],[254,'thorn'],[255,'yuml'],[338,'OElig'],[339,'oelig'],[352,'Scaron'],[353,'scaron'],[376,'Yuml'],[402,'fnof'],[710,'circ'],[732,'tilde'],[913,'Alpha'],[914,'Beta'],[915,'Gamma'],[916,'Delta'],[917,'Epsilon'],[918,'Zeta'],[919,'Eta'],[920,'Theta'],[921,'Iota'],[922,'Kappa'],[923,'Lambda'],[924,'Mu'],[925,'Nu'],[926,'Xi'],[927,'Omicron'],[928,'Pi'],[929,'Rho'],[931,'Sigma'],[932,'Tau'],[933,'Upsilon'],[934,'Phi'],[935,'Chi'],[936,'Psi'],[937,'Omega'],[945,'alpha'],[946,'beta'],[947,'gamma'],[948,'delta'],[949,'epsilon'],[950,'zeta'],[951,'eta'],[952,'theta'],[953,'iota'],[954,'kappa'],[955,'lambda'],[956,'mu'],[957,'nu'],[958,'xi'],[959,'omicron'],[960,'pi'],[961,'rho'],[962,'sigmaf'],[963,'sigma'],[964,'tau'],[965,'upsilon'],[966,'phi'],[967,'chi'],[968,'psi'],[969,'omega'],[977,'thetasym'],[978,'upsih'],[982,'piv'],[8194,'ensp'],[8195,'emsp'],[8201,'thinsp'],[8204,'zwnj'],[8205,'zwj'],[8206,'lrm'],[8207,'rlm'],[8211,'ndash'],[8212,'mdash'],[8216,'lsquo'],[8217,'rsquo'],[8218,'sbquo'],[8220,'ldquo'],[8221,'rdquo'],[8222,'bdquo'],[8224,'dagger'],[8225,'Dagger'],[8226,'bull'],[8230,'hellip'],[8240,'permil'],[8242,'prime'],[8243,'Prime'],[8249,'lsaquo'],[8250,'rsaquo'],[8254,'oline'],[8260,'frasl'],[8364,'euro'],[8465,'image'],[8472,'weierp'],[8476,'real'],[8482,'trade'],[8501,'alefsym'],[8592,'larr'],[8593,'uarr'],[8594,'rarr'],[8595,'darr'],[8596,'harr'],[8629,'crarr'],[8656,'lArr'],[8657,'uArr'],[8658,'rArr'],[8659,'dArr'],[8660,'hArr'],[8704,'forall'],[8706,'part'],[8707,'exist'],[8709,'empty'],[8711,'nabla'],[8712,'isin'],[8713,'notin'],[8715,'ni'],[8719,'prod'],[8721,'sum'],[8722,'minus'],[8727,'lowast'],[8730,'radic'],[8733,'prop'],[8734,'infin'],[8736,'ang'],[8743,'and'],[8744,'or'],[8745,'cap'],[8746,'cup'],[8747,'int'],[8764,'sim'],[8773,'cong'],[8776,'asymp'],[8800,'ne'],[8801,'equiv'],[8804,'le'],[8805,'ge'],[8834,'sub'],[8835,'sup'],[8836,'nsub'],[8838,'sube'],[8839,'supe'],[8853,'oplus'],[8855,'otimes'],[8869,'perp'],[8901,'sdot'],[8968,'lceil'],[8969,'rceil'],[8970,'lfloor'],[8971,'rfloor'],[9001,'lang'],[9002,'rang'],[9674,'loz'],[9824,'spades'],[9827,'clubs'],[9829,'hearts'],[9830,'diams']]
Global Const $HE_DECODETYPE_Html  ="HTML"
Global Const $HE_DECODETYPE_Number="Number"

; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Encode
; Description ...: Replaces Html Entities with the reserved Chars.
; Syntax.........: _HtmlEntities_Encode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Encode(ByRef $sTxt, $decodeType_IN = $HE_DECODETYPE_Html)
    Switch $decodeType_IN
        Case $HE_DECODETYPE_Html
            For $i=0 to 245
                $sTxt=StringReplace($sTxt, ChrW($aisEntities[$i][0]), '&' & $aisEntities[$i][1] & ';', 0, 0)
            Next
        Case $HE_DECODETYPE_Number
            For $i=0 to 245
                $sTxt=StringReplace($sTxt, ChrW($aisEntities[$i][0]), '#' & $aisEntities[$i][0] & ';', 0, 0)
            Next
    EndSwitch
EndFunc ;==>_HtmlEntities_Encode

; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Decode
; Description ...: Replaces reserved Chars with its Html Entities.
; Syntax.........: _HtmlEntities_Decode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Decode(ByRef $sTxt)
    For $i=0 to 245
        $sTxt=StringReplace($sTxt, '&'  & $aisEntities[$i][1] & ';', ChrW($aisEntities[$i][0]), 0, 0)
        $sTxt=StringReplace($sTxt, '&#' & $aisEntities[$i][0] & ';', ChrW($aisEntities[$i][0]), 0, 0)
    Next
EndFunc ;==>_HtmlEntities_Decode

Func _Html_GetEncoded (ByRef $txt_IN, $decodeType_IN = $HE_DECODETYPE_Html)
  Local $txt_LOC=$txt_IN
  _HtmlEntities_Encode($txt_LOC, $decodeType_IN)
  Return $txt_LOC
EndFunc ;==>_HtmlEntities_Encode


Func _Html_GetDecoded(ByRef $txt_IN)
  Local $txt_LOC=$txt_IN
  _HtmlEntities_Decode($txt_LOC)
  Return $txt_LOC
EndFunc ;==>_HtmlEntities_Decode

 

 

Thank you,

Link to comment
Share on other sites

  • 1 year later...

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
 Share

  • Recently Browsing   0 members

    • No registered users viewing this page.
×
×
  • Create New...