如何从字符串中删除重音字符? 特别是在IE6中,我有这样的东西:

accentsTidy = function(s){
    var r=s.toLowerCase();
    r = r.replace(new RegExp(/\s/g),"");
    r = r.replace(new RegExp(/[àáâãäå]/g),"a");
    r = r.replace(new RegExp(/æ/g),"ae");
    r = r.replace(new RegExp(/ç/g),"c");
    r = r.replace(new RegExp(/[èéêë]/g),"e");
    r = r.replace(new RegExp(/[ìíîï]/g),"i");
    r = r.replace(new RegExp(/ñ/g),"n");                
    r = r.replace(new RegExp(/[òóôõö]/g),"o");
    r = r.replace(new RegExp(/œ/g),"oe");
    r = r.replace(new RegExp(/[ùúûü]/g),"u");
    r = r.replace(new RegExp(/[ýÿ]/g),"y");
    r = r.replace(new RegExp(/\W/g),"");
    return r;
};

但是IE6让我很烦,它好像不喜欢我的正则表达式。


当前回答

将一个用户定义的函数传递给Array.sort()方法,并在这个用户定义的函数中使用String.localeCompare()

function myCompareFunction(a, b) {
  return a.localeCompare(b);
}

var values = ["pêches", "épinards", "tomates", "fraises"];

// WRONG: ["fraises", "pêches", "tomates", "épinards"]
values.sort();

 // **GOOD**: ["épinards", "fraises", "pêches", "tomates"]
values.sort(myCompareFunction);

其他回答

我发现所有这些都有点笨拙,而且我不太擅长正则表达式,所以这里有一个更简单的版本。将它翻译成你最喜欢的服务器端语言是很容易的,假设字符串已经在Unicode中:

// String containing replacement characters for stripping accents 
var stripstring = 
    'AAAAAAACEEEEIIII'+
    'DNOOOOO.OUUUUY..'+
    'aaaaaaaceeeeiiii'+
    'dnooooo.ouuuuy.y'+
    'AaAaAaCcCcCcCcDd'+
    'DdEeEeEeEeEeGgGg'+
    'GgGgHhHhIiIiIiIi'+
    'IiIiJjKkkLlLlLlL'+
    'lJlNnNnNnnNnOoOo'+
    'OoOoRrRrRrSsSsSs'+
    'SsTtTtTtUuUuUuUu'+
    'UuUuWwYyYZzZzZz.';

function stripaccents(str){
    var answer='';
    for(var i=0;i<str.length;i++){
        var ch=str[i];
        var chindex=ch.charCodeAt(0)-192;   // Index of character code in the strip string
        if(chindex>=0 && chindex<stripstring.length){
            // Character is within our table, so we can strip the accent...
            var outch=stripstring.charAt(chindex);
            // ...unless it was shown as a '.'
            if(outch!='.')ch=outch;
        }
        answer+=ch;
    }
    return answer;
}

新RegExp的格式为

RegExp(something, 'modifiers');

所以你会想

accentsTidy = function(s){
                        var r=s.toLowerCase();
                        r = r.replace(new RegExp("\\s", 'g'),"");
                        r = r.replace(new RegExp("[àáâãäå]", 'g'),"a");
                        r = r.replace(new RegExp("æ", 'g'),"ae");
                        r = r.replace(new RegExp("ç", 'g'),"c");
                        r = r.replace(new RegExp("[èéêë]", 'g'),"e");
                        r = r.replace(new RegExp("[ìíîï]", 'g'),"i");
                        r = r.replace(new RegExp("ñ", 'g'),"n");                            
                        r = r.replace(new RegExp("[òóôõö]", 'g'),"o");
                        r = r.replace(new RegExp("œ", 'g'),"oe");
                        r = r.replace(new RegExp("[ùúûü]", 'g'),"u");
                        r = r.replace(new RegExp("[ýÿ]", 'g'),"y");
                        r = r.replace(new RegExp("\\W", 'g'),"");
                        return r;
                };
$scope.legal_name = $sanitize($scope.legal_name);
    console.log("Name before function...",$scope.legal_name);

    var str = "";
    for(var i=0; i < $scope.legal_name.length; i++) {
        var charName = $scope.legal_name.charAt(i);
        if((charName == '&') && ($scope.legal_name.charAt(i + 1) == '#')){
            var count = 0;
            var subString = "";
            while(true) {
                if(($scope.legal_name.charCodeAt(i + 2 + count) > 47) && ($scope.legal_name.charCodeAt(i + 1 + count) < 58 )) {
                    subString = subString + $scope.legal_name.charAt(i + 2 + count);
                    count++;
                } else {
                    if(subString.length > 0) {
                        var value = parseInt(subString);
                        str = str + String.fromCharCode(value);
                        i = i + 1 + count; 
                        break;
                    }
                }
            }               
        } else {
            str = str + charName;
        }
    }
    $scope.legal_name = str;
    console.log("Name After function...",str);

以上所有都不能与Mac OS上使用的分解字符一起工作。 为了在这种情况下删除变音符,它更简单:

r = r.replace(new RegExp(/[\u0300-\u036f]/g),"")

参见Olivier Miakinen的评论: https://groups.google.com/d/msg/fr.comp.lang.regexp/6IGJTbedGTM/G0sB2kAsR34J (法语张贴)

替换变音符的一种更简单的方法。

function replaceDiacritics(str){

  var diacritics = [
    {char: 'A', base: /[\300-\306]/g},
    {char: 'a', base: /[\340-\346]/g},
    {char: 'E', base: /[\310-\313]/g},
    {char: 'e', base: /[\350-\353]/g},
    {char: 'I', base: /[\314-\317]/g},
    {char: 'i', base: /[\354-\357]/g},
    {char: 'O', base: /[\322-\330]/g},
    {char: 'o', base: /[\362-\370]/g},
    {char: 'U', base: /[\331-\334]/g},
    {char: 'u', base: /[\371-\374]/g},
    {char: 'N', base: /[\321]/g},
    {char: 'n', base: /[\361]/g},
    {char: 'C', base: /[\307]/g},
    {char: 'c', base: /[\347]/g}
  ]

  diacritics.forEach(function(letter){
    str = str.replace(letter.base, letter.char);
  });

  return str;
};