我如何编码和解码HTML实体使用JavaScript或JQuery?

var varTitle = "Chris' corner";

我希望它是:

var varTitle = "Chris' corner";

当前回答

原作者的答案在这里。

这是我最喜欢的解码HTML字符的方法。使用此代码的优点是还保留了标记。

function decodeHtml(html) {
    var txt = document.createElement("textarea");
    txt.innerHTML = html;
    return txt.value;
}

例如:http://jsfiddle.net/k65s3/

输入:

Entity:&nbsp;Bad attempt at XSS:<script>alert('new\nline?')</script><br>

输出:

Entity: Bad attempt at XSS:<script>alert('new\nline?')</script><br>

其他回答

你可以尝试这样做:

var Title = $('<textarea />').html("Chris&apos;角”)。text (); console.log(标题); < script src = " https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js " > < /脚本>

J.S.小提琴。

更具互动性的版本:

$('form').submit(function() { var theString = $('#string').val(); var varTitle = $('<textarea />').html(theString).text(); $('#output').text(varTitle); return false; }); <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script> <form action="#" method="post"> <fieldset> <label for="string">Enter a html-encoded string to decode</label> <input type="text" name="string" id="string" /> </fieldset> <fieldset> <input type="submit" value="decode" /> </fieldset> </form> <div id="output"></div>

J.S.小提琴。

受Robert K的解决方案的启发,剥离html标签,防止执行脚本和事件处理程序,如:<img src=fake onerror="prompt(1)" > 在最新的Chrome, FF, IE上测试(应该可以在IE9上工作,但还没有测试)。

var decodeEntities = (function () {
        //create a new html document (doesn't execute script tags in child elements)
        var doc = document.implementation.createHTMLDocument("");
        var element = doc.createElement('div');

        function getText(str) {
            element.innerHTML = str;
            str = element.textContent;
            element.textContent = '';
            return str;
        }

        function decodeHTMLEntities(str) {
            if (str && typeof str === 'string') {
                var x = getText(str);
                while (str !== x) {
                    str = x;
                    x = getText(x);
                }
                return x;
            }
        }
        return decodeHTMLEntities;
    })();

简单地调用:

decodeEntities('<img src=fake onerror="prompt(1)">');
decodeEntities("<script>alert('aaa!')</script>");

因为@Robert K和@mattcasey都有很好的代码,我想在这里贡献一个CoffeeScript版本,以防将来有人会使用它:

    String::unescape = (strict = false) ->
      ###
      # Take escaped text, and return the unescaped version
      #
      # @param string str | String to be used
      # @param bool strict | Stict mode will remove all HTML
      #
      # Test it here:
      # https://jsfiddle.net/tigerhawkvok/t9pn1dn5/
      #
      # Code: https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d
      ###
      # Create a dummy element
      element = document.createElement("div")
      decodeHTMLEntities = (str) ->
        if str? and typeof str is "string"
          unless strict is true
            # escape HTML tags
            str = escape(str).replace(/%26/g,'&').replace(/%23/g,'#').replace(/%3B/g,';')
          else
            str = str.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '')
            str = str.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '')
          element.innerHTML = str
          if element.innerText
            # Do we support innerText?
            str = element.innerText
            element.innerText = ""
          else
            # Firefox
            str = element.textContent
            element.textContent = ""
        unescape(str)
      # Remove encoded or double-encoded tags
      fixHtmlEncodings = (string) ->
        string = string.replace(/\&amp;#/mg, '&#') # The rest, for double-encodings
        string = string.replace(/\&quot;/mg, '"')
        string = string.replace(/\&quote;/mg, '"')
        string = string.replace(/\&#95;/mg, '_')
        string = string.replace(/\&#39;/mg, "'")
        string = string.replace(/\&#34;/mg, '"')
        string = string.replace(/\&#62;/mg, '>')
        string = string.replace(/\&#60;/mg, '<')
        string
      # Run it
      tmp = fixHtmlEncodings(this)
      decodeHTMLEntities(tmp)

请参阅https://jsfiddle.net/tigerhawkvok/t9pn1dn5/7/或https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d(包括编译过的JS,可能比这个答案更新了)

为了在列表中添加另一个“受Robert K的启发”,这里是另一个不剥离HTML标签的安全版本。它不是通过HTML解析器运行整个字符串,而是只提取实体并转换它们。

var decodeEntities = (function() {
    // this prevents any overhead from creating the object each time
    var element = document.createElement('div');

    // regular expression matching HTML entities
    var entity = /&(?:#x[a-f0-9]+|#[0-9]+|[a-z0-9]+);?/ig;

    return function decodeHTMLEntities(str) {
        // find and replace all the html entities
        str = str.replace(entity, function(m) {
            element.innerHTML = m;
            return element.textContent;
        });

        // reset the value
        element.textContent = '';

        return str;
    }
})();

我知道我有点晚了,但我认为我可以提供以下片段作为我如何使用jQuery解码HTML实体的示例:

var varTitleE = "Chris&apos; corner";
var varTitleD = $("<div/>").html(varTitleE).text();

console.log(varTitleE + " vs. " + varTitleD);​​​​​​​​​​​

不要忘记启动检查器/firebug以查看控制台结果——或者简单地将console.log(…)替换为/alert(…)

也就是说,以下是我的控制台通过谷歌Chrome检查器读取的内容:

Chris&apos; corner vs. Chris' corner