我如何编码和解码HTML实体使用JavaScript或JQuery?
var varTitle = "Chris' corner";
我希望它是:
var varTitle = "Chris' corner";
我如何编码和解码HTML实体使用JavaScript或JQuery?
var varTitle = "Chris' corner";
我希望它是:
var varTitle = "Chris' corner";
当前回答
以下是完整版本
function htmldecode(s){
window.HTML_ESC_MAP = {
"nbsp":" ","iexcl":"¡","cent":"¢","pound":"£","curren":"¤","yen":"¥","brvbar":"¦","sect":"§","uml":"¨","copy":"©","ordf":"ª","laquo":"«","not":"¬","reg":"®","macr":"¯","deg":"°","plusmn":"±","sup2":"²","sup3":"³","acute":"´","micro":"µ","para":"¶","middot":"·","cedil":"¸","sup1":"¹","ordm":"º","raquo":"»","frac14":"¼","frac12":"½","frac34":"¾","iquest":"¿","Agrave":"À","Aacute":"Á","Acirc":"Â","Atilde":"Ã","Auml":"Ä","Aring":"Å","AElig":"Æ","Ccedil":"Ç","Egrave":"È","Eacute":"É","Ecirc":"Ê","Euml":"Ë","Igrave":"Ì","Iacute":"Í","Icirc":"Î","Iuml":"Ï","ETH":"Ð","Ntilde":"Ñ","Ograve":"Ò","Oacute":"Ó","Ocirc":"Ô","Otilde":"Õ","Ouml":"Ö","times":"×","Oslash":"Ø","Ugrave":"Ù","Uacute":"Ú","Ucirc":"Û","Uuml":"Ü","Yacute":"Ý","THORN":"Þ","szlig":"ß","agrave":"à","aacute":"á","acirc":"â","atilde":"ã","auml":"ä","aring":"å","aelig":"æ","ccedil":"ç","egrave":"è","eacute":"é","ecirc":"ê","euml":"ë","igrave":"ì","iacute":"í","icirc":"î","iuml":"ï","eth":"ð","ntilde":"ñ","ograve":"ò","oacute":"ó","ocirc":"ô","otilde":"õ","ouml":"ö","divide":"÷","oslash":"ø","ugrave":"ù","uacute":"ú","ucirc":"û","uuml":"ü","yacute":"ý","thorn":"þ","yuml":"ÿ","fnof":"ƒ","Alpha":"Α","Beta":"Β","Gamma":"Γ","Delta":"Δ","Epsilon":"Ε","Zeta":"Ζ","Eta":"Η","Theta":"Θ","Iota":"Ι","Kappa":"Κ","Lambda":"Λ","Mu":"Μ","Nu":"Ν","Xi":"Ξ","Omicron":"Ο","Pi":"Π","Rho":"Ρ","Sigma":"Σ","Tau":"Τ","Upsilon":"Υ","Phi":"Φ","Chi":"Χ","Psi":"Ψ","Omega":"Ω","alpha":"α","beta":"β","gamma":"γ","delta":"δ","epsilon":"ε","zeta":"ζ","eta":"η","theta":"θ","iota":"ι","kappa":"κ","lambda":"λ","mu":"μ","nu":"ν","xi":"ξ","omicron":"ο","pi":"π","rho":"ρ","sigmaf":"ς","sigma":"σ","tau":"τ","upsilon":"υ","phi":"φ","chi":"χ","psi":"ψ","omega":"ω","thetasym":"ϑ","upsih":"ϒ","piv":"ϖ","bull":"•","hellip":"…","prime":"′","Prime":"″","oline":"‾","frasl":"⁄","weierp":"℘","image":"ℑ","real":"ℜ","trade":"™","alefsym":"ℵ","larr":"←","uarr":"↑","rarr":"→","darr":"↓","harr":"↔","crarr":"↵","lArr":"⇐","uArr":"⇑","rArr":"⇒","dArr":"⇓","hArr":"⇔","forall":"∀","part":"∂","exist":"∃","empty":"∅","nabla":"∇","isin":"∈","notin":"∉","ni":"∋","prod":"∏","sum":"∑","minus":"−","lowast":"∗","radic":"√","prop":"∝","infin":"∞","ang":"∠","and":"∧","or":"∨","cap":"∩","cup":"∪","int":"∫","there4":"∴","sim":"∼","cong":"≅","asymp":"≈","ne":"≠","equiv":"≡","le":"≤","ge":"≥","sub":"⊂","sup":"⊃","nsub":"⊄","sube":"⊆","supe":"⊇","oplus":"⊕","otimes":"⊗","perp":"⊥","sdot":"⋅","lceil":"⌈","rceil":"⌉","lfloor":"⌊","rfloor":"⌋","lang":"〈","rang":"〉","loz":"◊","spades":"♠","clubs":"♣","hearts":"♥","diams":"♦","\"":"quot","amp":"&","lt":"<","gt":">","OElig":"Œ","oelig":"œ","Scaron":"Š","scaron":"š","Yuml":"Ÿ","circ":"ˆ","tilde":"˜","ndash":"–","mdash":"—","lsquo":"‘","rsquo":"’","sbquo":"‚","ldquo":"“","rdquo":"”","bdquo":"„","dagger":"†","Dagger":"‡","permil":"‰","lsaquo":"‹","rsaquo":"›","euro":"€"};
if(!window.HTML_ESC_MAP_EXP)
window.HTML_ESC_MAP_EXP = new RegExp("&("+Object.keys(HTML_ESC_MAP).join("|")+");","g");
return s?s.replace(window.HTML_ESC_MAP_EXP,function(x){
return HTML_ESC_MAP[x.substring(1,x.length-1)]||x;
}):s;
}
使用
htmldecode("∑ >€");
其他回答
我知道我有点晚了,但我认为我可以提供以下片段作为我如何使用jQuery解码HTML实体的示例:
var varTitleE = "Chris' corner";
var varTitleD = $("<div/>").html(varTitleE).text();
console.log(varTitleE + " vs. " + varTitleD);
不要忘记启动检查器/firebug以查看控制台结果——或者简单地将console.log(…)替换为/alert(…)
也就是说,以下是我的控制台通过谷歌Chrome检查器读取的内容:
Chris' corner vs. Chris' corner
受Robert K的解决方案的启发,这个版本不剥离HTML标记,而且同样安全。
var decode_entities = (function() {
// Remove HTML Entities
var element = document.createElement('div');
function decode_HTML_entities (str) {
if(str && typeof str === 'string') {
// Escape HTML before decoding for HTML Entities
str = escape(str).replace(/%26/g,'&').replace(/%23/g,'#').replace(/%3B/g,';');
element.innerHTML = str;
if(element.innerText){
str = element.innerText;
element.innerText = '';
}else{
// Firefox support
str = element.textContent;
element.textContent = '';
}
}
return unescape(str);
}
return decode_HTML_entities;
})();
原作者的答案在这里。
这是我最喜欢的解码HTML字符的方法。使用此代码的优点是还保留了标记。
function decodeHtml(html) {
var txt = document.createElement("textarea");
txt.innerHTML = html;
return txt.value;
}
例如:http://jsfiddle.net/k65s3/
输入:
Entity: Bad attempt at XSS:<script>alert('new\nline?')</script><br>
输出:
Entity: Bad attempt at XSS:<script>alert('new\nline?')</script><br>
为了在列表中添加另一个“受Robert K的启发”,这里是另一个不剥离HTML标签的安全版本。它不是通过HTML解析器运行整个字符串,而是只提取实体并转换它们。
var decodeEntities = (function() {
// this prevents any overhead from creating the object each time
var element = document.createElement('div');
// regular expression matching HTML entities
var entity = /&(?:#x[a-f0-9]+|#[0-9]+|[a-z0-9]+);?/ig;
return function decodeHTMLEntities(str) {
// find and replace all the html entities
str = str.replace(entity, function(m) {
element.innerHTML = m;
return element.textContent;
});
// reset the value
element.textContent = '';
return str;
}
})();
就像Robert K说的,不要使用jQuery.html().text()来解码html实体,因为这是不安全的,因为用户输入永远不能访问DOM。阅读关于XSS的文章,了解为什么这是不安全的。
相反,尝试使用带有escape和unescape方法的Underscore.js实用带库:
_.escape(string)
转义插入HTML的字符串,替换&,<,>,",',和'字符。
_.escape('Curly, Larry & Moe');
=> "Curly, Larry & Moe"
_.unescape(string)
escape的反义词,代替&, <, >, ", `和& # x27;和他们没有逃脱的同伴。
_.unescape('Curly, Larry & Moe');
=> "Curly, Larry & Moe"
要支持解码更多字符,只需复制下划线unescape方法并向映射添加更多字符。