我如何编码和解码HTML实体使用JavaScript或JQuery?

var varTitle = "Chris' corner";

我希望它是:

var varTitle = "Chris' corner";

当前回答

因为@Robert K和@mattcasey都有很好的代码,我想在这里贡献一个CoffeeScript版本,以防将来有人会使用它:

    String::unescape = (strict = false) ->
      ###
      # Take escaped text, and return the unescaped version
      #
      # @param string str | String to be used
      # @param bool strict | Stict mode will remove all HTML
      #
      # Test it here:
      # https://jsfiddle.net/tigerhawkvok/t9pn1dn5/
      #
      # Code: https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d
      ###
      # Create a dummy element
      element = document.createElement("div")
      decodeHTMLEntities = (str) ->
        if str? and typeof str is "string"
          unless strict is true
            # escape HTML tags
            str = escape(str).replace(/%26/g,'&').replace(/%23/g,'#').replace(/%3B/g,';')
          else
            str = str.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '')
            str = str.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '')
          element.innerHTML = str
          if element.innerText
            # Do we support innerText?
            str = element.innerText
            element.innerText = ""
          else
            # Firefox
            str = element.textContent
            element.textContent = ""
        unescape(str)
      # Remove encoded or double-encoded tags
      fixHtmlEncodings = (string) ->
        string = string.replace(/\&amp;#/mg, '&#') # The rest, for double-encodings
        string = string.replace(/\&quot;/mg, '"')
        string = string.replace(/\&quote;/mg, '"')
        string = string.replace(/\&#95;/mg, '_')
        string = string.replace(/\&#39;/mg, "'")
        string = string.replace(/\&#34;/mg, '"')
        string = string.replace(/\&#62;/mg, '>')
        string = string.replace(/\&#60;/mg, '<')
        string
      # Run it
      tmp = fixHtmlEncodings(this)
      decodeHTMLEntities(tmp)

请参阅https://jsfiddle.net/tigerhawkvok/t9pn1dn5/7/或https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d(包括编译过的JS,可能比这个答案更新了)

其他回答

要做到这一点,在纯javascript没有jquery或预定义一切,你可以循环编码的html字符串通过元素innerHTML和innerText(/textContent)属性的每解码步骤,这是必需的:

<html>
  <head>
    <title>For every decode step, cycle through innerHTML and innerText </title>
    <script>
function decode(str) {
  var d = document.createElement("div");
  d.innerHTML = str; 
  return typeof d.innerText !== 'undefined' ? d.innerText : d.textContent;
}
    </script>
  </head>
  <body>
    <script>
var encodedString = "&lt;p&gt;name&lt;/p&gt;&lt;p&gt;&lt;span style=\"font-size:xx-small;\"&gt;ajde&lt;/span&gt;&lt;/p&gt;&lt;p&gt;&lt;em&gt;da&lt;/em&gt;&lt;/p&gt;";
    </script>
    <input type=button onclick="document.body.innerHTML=decode(encodedString)"/>
  </body>
</html>

因为@Robert K和@mattcasey都有很好的代码,我想在这里贡献一个CoffeeScript版本,以防将来有人会使用它:

    String::unescape = (strict = false) ->
      ###
      # Take escaped text, and return the unescaped version
      #
      # @param string str | String to be used
      # @param bool strict | Stict mode will remove all HTML
      #
      # Test it here:
      # https://jsfiddle.net/tigerhawkvok/t9pn1dn5/
      #
      # Code: https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d
      ###
      # Create a dummy element
      element = document.createElement("div")
      decodeHTMLEntities = (str) ->
        if str? and typeof str is "string"
          unless strict is true
            # escape HTML tags
            str = escape(str).replace(/%26/g,'&').replace(/%23/g,'#').replace(/%3B/g,';')
          else
            str = str.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '')
            str = str.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '')
          element.innerHTML = str
          if element.innerText
            # Do we support innerText?
            str = element.innerText
            element.innerText = ""
          else
            # Firefox
            str = element.textContent
            element.textContent = ""
        unescape(str)
      # Remove encoded or double-encoded tags
      fixHtmlEncodings = (string) ->
        string = string.replace(/\&amp;#/mg, '&#') # The rest, for double-encodings
        string = string.replace(/\&quot;/mg, '"')
        string = string.replace(/\&quote;/mg, '"')
        string = string.replace(/\&#95;/mg, '_')
        string = string.replace(/\&#39;/mg, "'")
        string = string.replace(/\&#34;/mg, '"')
        string = string.replace(/\&#62;/mg, '>')
        string = string.replace(/\&#60;/mg, '<')
        string
      # Run it
      tmp = fixHtmlEncodings(this)
      decodeHTMLEntities(tmp)

请参阅https://jsfiddle.net/tigerhawkvok/t9pn1dn5/7/或https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d(包括编译过的JS,可能比这个答案更新了)

这里有一个不需要创建div的快速方法,并解码“最常见的”HTML转义字符:

function decodeHTMLEntities(text) {
    var entities = [
        ['amp', '&'],
        ['apos', '\''],
        ['#x27', '\''],
        ['#x2F', '/'],
        ['#39', '\''],
        ['#47', '/'],
        ['lt', '<'],
        ['gt', '>'],
        ['nbsp', ' '],
        ['quot', '"']
    ];

    for (var i = 0, max = entities.length; i < max; ++i) 
        text = text.replace(new RegExp('&'+entities[i][0]+';', 'g'), entities[i][1]);

    return text;
}

jQuery提供了一种编码和解码html实体的方法。

如果你使用"<div/>"标签,它会删除所有的html。

function htmlDecode(value) {
    return $("<div/>").html(value).text();
}

function htmlEncode(value) {
    return $('<div/>').text(value).html();
}

如果你使用"<textarea/>"标签,它将保留html标签。

function htmlDecode(value) {
    return $("<textarea/>").html(value).text();
}

function htmlEncode(value) {
    return $('<textarea/>').text(value).html();
}

我知道我有点晚了,但我认为我可以提供以下片段作为我如何使用jQuery解码HTML实体的示例:

var varTitleE = "Chris&apos; corner";
var varTitleD = $("<div/>").html(varTitleE).text();

console.log(varTitleE + " vs. " + varTitleD);​​​​​​​​​​​

不要忘记启动检查器/firebug以查看控制台结果——或者简单地将console.log(…)替换为/alert(…)

也就是说,以下是我的控制台通过谷歌Chrome检查器读取的内容:

Chris&apos; corner vs. Chris' corner