在Window上执行'btoa'失败:要编码的字符串包含Latin1范围之外的字符。

根据我的测试，标题中的错误只在谷歌Chrome中抛出。我正在base64编码一个大的XML文件，以便它可以下载:

this.loader.src = "data:application/x-forcedownload;base64,"+
                  btoa("<?xml version=\"1.0\" encoding=\"utf-8\"?>"
                  +"<"+this.gamesave.tagName+">"
                  +this.xml.firstChild.innerHTML
                  +"</"+this.gamesave.tagName+">");

这一点。加载器是隐藏的iframe。

这个错误实际上是一个相当大的变化，因为通常情况下，谷歌Chrome浏览器会崩溃在btoa调用。Mozilla Firefox在这里没有问题，所以这个问题与浏览器有关。我不知道档案里有什么奇怪的字。事实上，我相信没有非ascii字符。

问: 我如何找到有问题的字符，并替换他们，让Chrome停止抱怨?

我已经尝试使用downadify来启动下载，但它不起作用。它是不可靠的，并且不会抛出任何错误来进行调试。

当前回答

作为Stefan Steiger回答的补充:(作为评论看起来不太好)

扩展字符串原型:

String.prototype.b64encode = function() { 
    return btoa(unescape(encodeURIComponent(this))); 
};
String.prototype.b64decode = function() { 
    return decodeURIComponent(escape(atob(this))); 
};

用法:

var str = "äöüÄÖÜçéèñ";
var encoded = str.b64encode();
console.log( encoded.b64decode() );

注意:

正如评论中所述，不建议使用unescape，因为它将来可能会被删除:

警告:尽管unescape()并没有被严格弃用(就像“从Web标准中删除”一样)，但它在ECMA-262标准的附件B中有定义，其介绍说明: 本附录中规定的所有语言特性和行为都有一个或多个不受欢迎的特性，如果没有遗留用法，将从本规范中删除。注意:不要使用unescape来解码uri，而是使用decodeURI或decodeURIComponent来代替。

2015-11-10 09:26:17

其他回答

我只是觉得我应该分享我是如何解决这个问题的，以及为什么我认为这是正确的解决方案(前提是你不针对旧浏览器进行优化)。

将数据转换为dataURL (data:…)

var blob = new Blob(
              // I'm using page innerHTML as data
              // note that you can use the array
              // to concatenate many long strings EFFICIENTLY
              [document.body.innerHTML],
              // Mime type is important for data url
              {type : 'text/html'}
); 
// This FileReader works asynchronously, so it doesn't lag
// the web application
var a = new FileReader();
a.onload = function(e) {
     // Capture result here
     console.log(e.target.result);
};
a.readAsDataURL(blob);

允许用户保存数据

除了显而易见的解决方案-打开新窗口，以您的dataURL作为URL，您还可以做其他两件事。

1. 使用fileSaver.js

文件保护程序可以使用预定义的文件名创建实际的文件保存对话框。它还可以退回到正常的dataURL方法。

2. 使用(实验性)URL.createObjectURL

这对于重用base64编码的数据非常有用。它为你的dataURL创建一个简短的URL:

console.log(URL.createObjectURL(blob));
//Prints: blob:http://stackoverflow.com/7c18953f-f5f8-41d2-abf5-e9cbced9bc42

不要忘记使用包含前导blob前缀的URL。我使用文档。身体:

你可以使用这个短URL作为AJAX目标，<脚本>源或< > href位置。你要负责破坏URL:

URL.revokeObjectURL('blob:http://stackoverflow.com/7c18953f-f5f8-41d2-abf5-e9cbced9bc42')

2015-10-15 07:50:42

使用btoa和unescape和encodeURIComponent不适合我。用XML/HTML实体替换所有特殊字符，然后转换为base64表示是解决这个问题的唯一方法。一些代码:

base64 = btoa(str.replace(/[\u00A0-\u2666]/g, function(c) {
    return '&#' + c.charCodeAt(0) + ';';
}));

2015-10-15 04:49:31

btoa()只支持String.fromCodePoint(0)到String.fromCodePoint(255)的字符。对于编码点为256或更高的Base64字符，您需要在它们之前和之后编码/解码。

在这一点上，它变得棘手……

每一个可能的符号都被安排在一个unicode表中。Unicode-Table被划分在不同的平面上(语言、数学符号等等)。平面上的每个符号都有一个唯一的代码点号。理论上，这个数字可以变得任意大。

计算机以字节存储数据(8位，十六进制0x00 - 0xff，二进制00000000 - 11111111，十进制0 - 255)。这个范围通常用来保存基本字符(Latin1范围)。

对于更高码位的字符，则存在255种不同的编码。JavaScript使用16位每符号(UTF-16)，字符串称为DOMString。Unicode可以处理0x10fffff以内的代码点。这意味着，一个方法必须存在，以存储几个单元格中的几个位。

String.fromCodePoint(0x10000).length == 2

UTF-16使用代理对在两个16位单元中存储20位。第一个较高的代理以110110xxxxxxxxxx开始，第二个较低的代理以110111xxxxxxxxxx开始。Unicode为此保留了自己的平面:https://unicode-table.com/de/#high-surrogates

以字节(Latin1范围)存储字符的标准化过程使用UTF-8。

很抱歉这么说，但我认为没有其他方法来实现这个函数self。

function stringToUTF8(str)
{
    let bytes = [];

    for(let character of str)
    {
        let code = character.codePointAt(0);

        if(code <= 127)
        {
            let byte1 = code;

            bytes.push(byte1);
        }
        else if(code <= 2047)
        {
            let byte1 = 0xC0 | (code >> 6);
            let byte2 = 0x80 | (code & 0x3F);

            bytes.push(byte1, byte2);
        }
        else if(code <= 65535)
        {
            let byte1 = 0xE0 | (code >> 12);
            let byte2 = 0x80 | ((code >> 6) & 0x3F);
            let byte3 = 0x80 | (code & 0x3F);

            bytes.push(byte1, byte2, byte3);
        }
        else if(code <= 2097151)
        {
            let byte1 = 0xF0 | (code >> 18);
            let byte2 = 0x80 | ((code >> 12) & 0x3F);
            let byte3 = 0x80 | ((code >> 6) & 0x3F);
            let byte4 = 0x80 | (code & 0x3F);

            bytes.push(byte1, byte2, byte3, byte4);
        }
    }

    return bytes;
}

function utf8ToString(bytes, fallback)
{
    let valid = undefined;
    let codePoint = undefined;
    let codeBlocks = [0, 0, 0, 0];

    let result = "";

    for(let offset = 0; offset < bytes.length; offset++)
    {
        let byte = bytes[offset];

        if((byte & 0x80) == 0x00)
        {
            codeBlocks[0] = byte & 0x7F;

            codePoint = codeBlocks[0];
        }
        else if((byte & 0xE0) == 0xC0)
        {
            codeBlocks[0] = byte & 0x1F;

            byte = bytes[++offset];
            if(offset >= bytes.length || (byte & 0xC0) != 0x80) { valid = false; break; }

            codeBlocks[1] = byte & 0x3F;

            codePoint = (codeBlocks[0] << 6) + codeBlocks[1];
        }
        else if((byte & 0xF0) == 0xE0)
        {
            codeBlocks[0] = byte & 0xF;

            for(let blockIndex = 1; blockIndex <= 2; blockIndex++)
            {
                byte = bytes[++offset];
                if(offset >= bytes.length || (byte & 0xC0) != 0x80) { valid = false; break; }

                codeBlocks[blockIndex] = byte & 0x3F;
            }
            if(valid === false) { break; }

            codePoint = (codeBlocks[0] << 12) + (codeBlocks[1] << 6) + codeBlocks[2];
        }
        else if((byte & 0xF8) == 0xF0)
        {
            codeBlocks[0] = byte & 0x7;

            for(let blockIndex = 1; blockIndex <= 3; blockIndex++)
            {
                byte = bytes[++offset];
                if(offset >= bytes.length || (byte & 0xC0) != 0x80) { valid = false; break; }

                codeBlocks[blockIndex] = byte & 0x3F;
            }
            if(valid === false) { break; }

            codePoint = (codeBlocks[0] << 18) + (codeBlocks[1] << 12) + (codeBlocks[2] << 6) + (codeBlocks[3]);
        }
        else
        {
            valid = false; break;
        }

        result += String.fromCodePoint(codePoint);
    }

    if(valid === false)
    {
        if(!fallback)
        {
            throw new TypeError("Malformed utf-8 encoding.");
        }

        result = "";

        for(let offset = 0; offset != bytes.length; offset++)
        {
            result += String.fromCharCode(bytes[offset] & 0xFF);
        }
    }

    return result;
}

function decodeBase64(text, binary)
{
    if(/[^0-9a-zA-Z\+\/\=]/.test(text)) { throw new TypeError("The string to be decoded contains characters outside of the valid base64 range."); }

    let codePointA = 'A'.codePointAt(0);
    let codePointZ = 'Z'.codePointAt(0);
    let codePointa = 'a'.codePointAt(0);
    let codePointz = 'z'.codePointAt(0);
    let codePointZero = '0'.codePointAt(0);
    let codePointNine = '9'.codePointAt(0);
    let codePointPlus = '+'.codePointAt(0);
    let codePointSlash = '/'.codePointAt(0);

    function getCodeFromKey(key)
    {
        let keyCode = key.codePointAt(0);

        if(keyCode >= codePointA && keyCode <= codePointZ)
        {
            return keyCode - codePointA;
        }
        else if(keyCode >= codePointa && keyCode <= codePointz)
        {
            return keyCode + 26 - codePointa;
        }
        else if(keyCode >= codePointZero && keyCode <= codePointNine)
        {
            return keyCode + 52 - codePointZero;
        }
        else if(keyCode == codePointPlus)
        {
            return 62;
        }
        else if(keyCode == codePointSlash)
        {
            return 63;
        }

        return undefined;
    }

    let codes = Array.from(text).map(character => getCodeFromKey(character));

    let bytesLength = Math.ceil(codes.length / 4) * 3;

    if(codes[codes.length - 2] == undefined) { bytesLength = bytesLength - 2; } else if(codes[codes.length - 1] == undefined) { bytesLength--; }

    let bytes = new Uint8Array(bytesLength);

    for(let offset = 0, index = 0; offset < bytes.length;)
    {
        let code1 = codes[index++];
        let code2 = codes[index++];
        let code3 = codes[index++];
        let code4 = codes[index++];

        let byte1 = (code1 << 2) | (code2 >> 4);
        let byte2 = ((code2 & 0xf) << 4) | (code3 >> 2);
        let byte3 = ((code3 & 0x3) << 6) | code4;

        bytes[offset++] = byte1;
        bytes[offset++] = byte2;
        bytes[offset++] = byte3;
    }

    if(binary) { return bytes; }

    return utf8ToString(bytes, true);
}

function encodeBase64(bytes) {
    if (bytes === undefined || bytes === null) {
        return '';
    }
    if (bytes instanceof Array) {
        bytes = bytes.filter(item => {
            return Number.isFinite(item) && item >= 0 && item <= 255;
        });
    }

    if (
        !(
            bytes instanceof Uint8Array ||
            bytes instanceof Uint8ClampedArray ||
            bytes instanceof Array
        )
    ) {
        if (typeof bytes === 'string') {
            const str = bytes;
            bytes = Array.from(unescape(encodeURIComponent(str))).map(ch =>
                ch.codePointAt(0)
            );
        } else {
            throw new TypeError('bytes must be of type Uint8Array or String.');
        }
    }

    const keys = [
        'A',
        'B',
        'C',
        'D',
        'E',
        'F',
        'G',
        'H',
        'I',
        'J',
        'K',
        'L',
        'M',
        'N',
        'O',
        'P',
        'Q',
        'R',
        'S',
        'T',
        'U',
        'V',
        'W',
        'X',
        'Y',
        'Z',
        'a',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g',
        'h',
        'i',
        'j',
        'k',
        'l',
        'm',
        'n',
        'o',
        'p',
        'q',
        'r',
        's',
        't',
        'u',
        'v',
        'w',
        'x',
        'y',
        'z',
        '0',
        '1',
        '2',
        '3',
        '4',
        '5',
        '6',
        '7',
        '8',
        '9',
        '+',
        '/'
    ];
    const fillKey = '=';

    let byte1;
    let byte2;
    let byte3;
    let sign1 = ' ';
    let sign2 = ' ';
    let sign3 = ' ';
    let sign4 = ' ';

    let result = '';

    for (let index = 0; index < bytes.length; ) {
        let fillUpAt = 0;

        // tslint:disable:no-increment-decrement
        byte1 = bytes[index++];
        byte2 = bytes[index++];
        byte3 = bytes[index++];

        if (byte2 === undefined) {
            byte2 = 0;
            fillUpAt = 2;
        }

        if (byte3 === undefined) {
            byte3 = 0;
            if (!fillUpAt) {
                fillUpAt = 3;
            }
        }

        // tslint:disable:no-bitwise
        sign1 = keys[byte1 >> 2];
        sign2 = keys[((byte1 & 0x3) << 4) + (byte2 >> 4)];
        sign3 = keys[((byte2 & 0xf) << 2) + (byte3 >> 6)];
        sign4 = keys[byte3 & 0x3f];

        if (fillUpAt > 0) {
            if (fillUpAt <= 2) {
                sign3 = fillKey;
            }
            if (fillUpAt <= 3) {
                sign4 = fillKey;
            }
        }

        result += sign1 + sign2 + sign3 + sign4;

        if (fillUpAt) {
            break;
        }
    }

    return result;
}

let base64 = encodeBase64("\u{1F604}"); // unicode code point escapes for smiley
let str = decodeBase64(base64);

console.log("base64", base64);
console.log("str", str);

document.body.innerText = str;

encodeBase64("\u{1F604}"))

演示:https://jsfiddle.net/qrLadeb8/

2018-01-26 15:57:13

使用库代替

我们没必要白费力气。使用库来节省时间和麻烦。

js-base64

https://github.com/dankogai/js-base64很好，我确认它很好地支持unicode。

Base64.encode('dankogai');  // ZGFua29nYWk=
Base64.encode('小飼弾');    // 5bCP6aO85by+
Base64.encodeURI('小飼弾'); // 5bCP6aO85by-

Base64.decode('ZGFua29nYWk=');  // dankogai
Base64.decode('5bCP6aO85by+');  // 小飼弾
// note .decodeURI() is unnecessary since it accepts both flavors
Base64.decode('5bCP6aO85by-');  // 小飼弾

2017-08-17 01:50:17

将字符串转换为utf-8的解决方案，这比许多其他答案建议的utf-16或URLEncoded版本略短。它也与其他语言(如python和PHP)解码字符串的方式更加兼容:

编码

function btoa_utf8(value) {
    return btoa(
        String.fromCharCode(
            ...new TextEncoder('utf-8')
                   .encode(hash_value)
        )
    );
}

解码

function atob_utf8(value) {
    const value_latin1 = atob(value);
    return new TextDecoder('utf-8').decode(
        Uint8Array.from(
            { length: value_latin1.length },
            (element, index) => value_latin1.charCodeAt(index)
        )
    )
}

如果您愿意，您可以使用不同的字符编码替换其中的'utf-8'字符串。

注意:这取决于TextEncoder类。目前大多数浏览器都支持这一功能，但如果您需要针对较老的浏览器，请检查它是否可用。

2023-01-18 07:41:03

在Window上执行'btoa'失败:要编码的字符串包含Latin1范围之外的字符。

推荐文章

最新文章

标签