我使用Javascript window.atob()函数来解码base64编码的字符串(特别是来自GitHub API的base64编码的内容)。问题是我得到ascii编码的字符返回(如âⅱ而不是™)。我如何正确地处理传入的base64编码流,以便将其解码为utf-8?
当前回答
包括上述解决方案,如果仍然面临问题,尝试如下,考虑转义不支持TS的情况。
blob = new Blob(["\ufeff", csv_content]); // this will make symbols to appears in excel
对于csv_content,您可以像下面这样尝试。
function b64DecodeUnicode(str: any) {
return decodeURIComponent(atob(str).split('').map((c: any) => {
return '%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2);
}).join(''));
}
其他回答
如果试图解码节点中utf8编码数据的Base64表示,您可以使用本机Buffer helper
Buffer.from("4pyTIMOgIGxhIG1vZGU=", "base64").toString(); // '✓ à la mode'
Buffer的toString方法默认为utf8,但您可以指定任何所需的编码。例如,相反的操作是这样的
Buffer.from('✓ à la mode', "utf8").toString("base64"); // "4pyTIMOgIGxhIG1vZGU="
I would assume that one might want a solution that produces a widely useable base64 URI. Please visit data:text/plain;charset=utf-8;base64,4pi44pi54pi64pi74pi84pi+4pi/ to see a demonstration (copy the data uri, open a new tab, paste the data URI into the address bar, then press enter to go to the page). Despite the fact that this URI is base64-encoded, the browser is still able to recognize the high code points and decode them properly. The minified encoder+decoder is 1058 bytes (+Gzip→589 bytes)
!function(e){"use strict";function h(b){var a=b.charCodeAt(0);if(55296<=a&&56319>=a)if(b=b.charCodeAt(1),b===b&&56320<=b&&57343>=b){if(a=1024*(a-55296)+b-56320+65536,65535<a)return d(240|a>>>18,128|a>>>12&63,128|a>>>6&63,128|a&63)}else return d(239,191,189);return 127>=a?inputString:2047>=a?d(192|a>>>6,128|a&63):d(224|a>>>12,128|a>>>6&63,128|a&63)}function k(b){var a=b.charCodeAt(0)<<24,f=l(~a),c=0,e=b.length,g="";if(5>f&&e>=f){a=a<<f>>>24+f;for(c=1;c<f;++c)a=a<<6|b.charCodeAt(c)&63;65535>=a?g+=d(a):1114111>=a?(a-=65536,g+=d((a>>10)+55296,(a&1023)+56320)):c=0}for(;c<e;++c)g+="\ufffd";return g}var m=Math.log,n=Math.LN2,l=Math.clz32||function(b){return 31-m(b>>>0)/n|0},d=String.fromCharCode,p=atob,q=btoa;e.btoaUTF8=function(b,a){return q((a?"\u00ef\u00bb\u00bf":"")+b.replace(/[\x80-\uD7ff\uDC00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g,h))};e.atobUTF8=function(b,a){a||"\u00ef\u00bb\u00bf"!==b.substring(0,3)||(b=b.substring(3));return p(b).replace(/[\xc0-\xff][\x80-\xbf]*/g,k)}}(""+void 0==typeof global?""+void 0==typeof self?this:self:global)
下面是用于生成它的源代码。
var fromCharCode = String.fromCharCode;
var btoaUTF8 = (function(btoa, replacer){"use strict";
return function(inputString, BOMit){
return btoa((BOMit ? "\xEF\xBB\xBF" : "") + inputString.replace(
/[\x80-\uD7ff\uDC00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g, replacer
));
}
})(btoa, function(nonAsciiChars){"use strict";
// make the UTF string into a binary UTF-8 encoded string
var point = nonAsciiChars.charCodeAt(0);
if (point >= 0xD800 && point <= 0xDBFF) {
var nextcode = nonAsciiChars.charCodeAt(1);
if (nextcode !== nextcode) // NaN because string is 1 code point long
return fromCharCode(0xef/*11101111*/, 0xbf/*10111111*/, 0xbd/*10111101*/);
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
if (nextcode >= 0xDC00 && nextcode <= 0xDFFF) {
point = (point - 0xD800) * 0x400 + nextcode - 0xDC00 + 0x10000;
if (point > 0xffff)
return fromCharCode(
(0x1e/*0b11110*/<<3) | (point>>>18),
(0x2/*0b10*/<<6) | ((point>>>12)&0x3f/*0b00111111*/),
(0x2/*0b10*/<<6) | ((point>>>6)&0x3f/*0b00111111*/),
(0x2/*0b10*/<<6) | (point&0x3f/*0b00111111*/)
);
} else return fromCharCode(0xef, 0xbf, 0xbd);
}
if (point <= 0x007f) return nonAsciiChars;
else if (point <= 0x07ff) {
return fromCharCode((0x6<<5)|(point>>>6), (0x2<<6)|(point&0x3f));
} else return fromCharCode(
(0xe/*0b1110*/<<4) | (point>>>12),
(0x2/*0b10*/<<6) | ((point>>>6)&0x3f/*0b00111111*/),
(0x2/*0b10*/<<6) | (point&0x3f/*0b00111111*/)
);
});
然后,要解码base64数据,HTTP可以将数据作为数据URI获取,也可以使用下面的函数。
var clz32 = Math.clz32 || (function(log, LN2){"use strict";
return function(x) {return 31 - log(x >>> 0) / LN2 | 0};
})(Math.log, Math.LN2);
var fromCharCode = String.fromCharCode;
var atobUTF8 = (function(atob, replacer){"use strict";
return function(inputString, keepBOM){
inputString = atob(inputString);
if (!keepBOM && inputString.substring(0,3) === "\xEF\xBB\xBF")
inputString = inputString.substring(3); // eradicate UTF-8 BOM
// 0xc0 => 0b11000000; 0xff => 0b11111111; 0xc0-0xff => 0b11xxxxxx
// 0x80 => 0b10000000; 0xbf => 0b10111111; 0x80-0xbf => 0b10xxxxxx
return inputString.replace(/[\xc0-\xff][\x80-\xbf]*/g, replacer);
}
})(atob, function(encoded){"use strict";
var codePoint = encoded.charCodeAt(0) << 24;
var leadingOnes = clz32(~codePoint);
var endPos = 0, stringLen = encoded.length;
var result = "";
if (leadingOnes < 5 && stringLen >= leadingOnes) {
codePoint = (codePoint<<leadingOnes)>>>(24+leadingOnes);
for (endPos = 1; endPos < leadingOnes; ++endPos)
codePoint = (codePoint<<6) | (encoded.charCodeAt(endPos)&0x3f/*0b00111111*/);
if (codePoint <= 0xFFFF) { // BMP code point
result += fromCharCode(codePoint);
} else if (codePoint <= 0x10FFFF) {
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000;
result += fromCharCode(
(codePoint >> 10) + 0xD800, // highSurrogate
(codePoint & 0x3ff) + 0xDC00 // lowSurrogate
);
} else endPos = 0; // to fill it in with INVALIDs
}
for (; endPos < stringLen; ++endPos) result += "\ufffd"; // replacement character
return result;
});
更标准的优点是这个编码器和这个解码器更广泛地适用,因为它们可以用作正确显示的有效URL。观察。
(function(window){ "use strict"; var sourceEle = document.getElementById("source"); var urlBarEle = document.getElementById("urlBar"); var mainFrameEle = document.getElementById("mainframe"); var gotoButton = document.getElementById("gotoButton"); var parseInt = window.parseInt; var fromCodePoint = String.fromCodePoint; var parse = JSON.parse; function unescape(str){ return str.replace(/\\u[\da-f]{0,4}|\\x[\da-f]{0,2}|\\u{[^}]*}|\\[bfnrtv"'\\]|\\0[0-7]{1,3}|\\\d{1,3}/g, function(match){ try{ if (match.startsWith("\\u{")) return fromCodePoint(parseInt(match.slice(2,-1),16)); if (match.startsWith("\\u") || match.startsWith("\\x")) return fromCodePoint(parseInt(match.substring(2),16)); if (match.startsWith("\\0") && match.length > 2) return fromCodePoint(parseInt(match.substring(2),8)); if (/^\\\d/.test(match)) return fromCodePoint(+match.slice(1)); }catch(e){return "\ufffd".repeat(match.length)} return parse('"' + match + '"'); }); } function whenChange(){ try{ urlBarEle.value = "data:text/plain;charset=UTF-8;base64," + btoaUTF8(unescape(sourceEle.value), true); } finally{ gotoURL(); } } sourceEle.addEventListener("change",whenChange,{passive:1}); sourceEle.addEventListener("input",whenChange,{passive:1}); // IFrame Setup: function gotoURL(){mainFrameEle.src = urlBarEle.value} gotoButton.addEventListener("click", gotoURL, {passive: 1}); function urlChanged(){urlBarEle.value = mainFrameEle.src} mainFrameEle.addEventListener("load", urlChanged, {passive: 1}); urlBarEle.addEventListener("keypress", function(evt){ if (evt.key === "enter") evt.preventDefault(), urlChanged(); }, {passive: 1}); var fromCharCode = String.fromCharCode; var btoaUTF8 = (function(btoa, replacer){ "use strict"; return function(inputString, BOMit){ return btoa((BOMit?"\xEF\xBB\xBF":"") + inputString.replace( /[\x80-\uD7ff\uDC00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g, replacer )); } })(btoa, function(nonAsciiChars){ "use strict"; // make the UTF string into a binary UTF-8 encoded string var point = nonAsciiChars.charCodeAt(0); if (point >= 0xD800 && point <= 0xDBFF) { var nextcode = nonAsciiChars.charCodeAt(1); if (nextcode !== nextcode) { // NaN because string is 1code point long return fromCharCode(0xef/*11101111*/, 0xbf/*10111111*/, 0xbd/*10111101*/); } // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae if (nextcode >= 0xDC00 && nextcode <= 0xDFFF) { point = (point - 0xD800) * 0x400 + nextcode - 0xDC00 + 0x10000; if (point > 0xffff) { return fromCharCode( (0x1e/*0b11110*/<<3) | (point>>>18), (0x2/*0b10*/<<6) | ((point>>>12)&0x3f/*0b00111111*/), (0x2/*0b10*/<<6) | ((point>>>6)&0x3f/*0b00111111*/), (0x2/*0b10*/<<6) | (point&0x3f/*0b00111111*/) ); } } else { return fromCharCode(0xef, 0xbf, 0xbd); } } if (point <= 0x007f) { return inputString; } else if (point <= 0x07ff) { return fromCharCode((0x6<<5)|(point>>>6), (0x2<<6)|(point&0x3f/*00111111*/)); } else { return fromCharCode( (0xe/*0b1110*/<<4) | (point>>>12), (0x2/*0b10*/<<6) | ((point>>>6)&0x3f/*0b00111111*/), (0x2/*0b10*/<<6) | (point&0x3f/*0b00111111*/) ); } }); setTimeout(whenChange, 0); })(window); img:active{opacity:0.8} <center> <textarea id="source" style="width:66.7vw">Hello \u1234 W\186\0256ld! Enter text into the top box. Then the URL will update automatically. </textarea><br /> <div style="width:66.7vw;display:inline-block;height:calc(25vw + 1em + 6px);border:2px solid;text-align:left;line-height:1em"> <input id="urlBar" style="width:calc(100% - 1em - 13px)" /><img id="gotoButton" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABsAAAAeCAMAAADqx5XUAAAAclBMVEX///9NczZ8e32ko6fDxsU/fBoSQgdFtwA5pAHVxt+7vLzq5ex23y4SXABLiiTm0+/c2N6DhoQ6WSxSyweVlZVvdG/Uz9aF5kYlbwElkwAggACxs7Jl3hX07/cQbQCar5SU9lRntEWGum+C9zIDHwCGnH5IvZAOAAABmUlEQVQoz7WS25acIBBFkRLkIgKKtOCttbv//xdDmTGZzHv2S63ltuBQQP4rdRiRUP8UK4wh6nVddQwj/NtDQTvac8577zTQb72zj65/876qqt7wykU6/1U6vFEgjE1mt/5LRqrpu7oVsn0sjZejMfxR3W/yLikqAFcUx93YxLmZGOtElmEu6Ufd9xV3ZDTGcEvGLbMk0mHHlUSvS5svCwS+hVL8loQQyfpI1Ay8RF/xlNxcsTchGjGDIuBG3Ik7TMyNxn8m0TSnBAK6Z8UZfp3IbAonmJvmsEACum6aNv7B0CnvpezDcNhw9XWsuAr7qnRg6dABmeM4dTgn/DZdXWs3LMspZ1KDMt1kcPJ6S1icWNp2qaEmjq6myx7jbQK3VKItLJaW5FR+cuYlRhYNKzGa9vF4vM5roLW3OSVjkmiGJrPhUq301/16pVKZRGFYWjTP50spTxBN5Z4EKnSonruk+n4tUokv1aJSEl/MLZU90S3L6/U6o0J142iQVp3HcZxKSo8LfkNRCtJaKYFSRX7iaoAAUDty8wvWYR6HJEepdwAAAABJRU5ErkJggg==" style="width:calc(1em + 4px);line-height:1em;vertical-align:-40%;cursor:pointer" /> <iframe id="mainframe" style="width:66.7vw;height:25vw" frameBorder="0"></iframe> </div> </center>
In addition to being very standardized, the above code snippets are also very fast. Instead of an indirect chain of succession where the data has to be converted several times between various forms (such as in Riccardo Galli's response), the above code snippet is as direct as performantly possible. It uses only one simple fast String.prototype.replace call to process the data when encoding, and only one to decode the data when decoding. Another plus is that (especially for big strings), String.prototype.replace allows the browser to automatically handle the underlying memory management of resizing the string, leading a significant performance boost especially in evergreen browsers like Chrome and Firefox that heavily optimize String.prototype.replace. Finally, the icing on the cake is that for you latin script exclūsīvō users, strings which don't contain any code points above 0x7f are extra fast to process because the string remains unmodified by the replacement algorithm.
我已经在https://github.com/anonyco/BestBase64EncoderDecoder/为这个解决方案创建了一个github存储库
对我有用的完整文章:https://developer.mozilla.org/en-US/docs/Web/JavaScript/Base64_encoding_and_decoding
我们从Unicode/UTF-8编码的部分是
function utf8_to_b64( str ) {
return window.btoa(unescape(encodeURIComponent( str )));
}
function b64_to_utf8( str ) {
return decodeURIComponent(escape(window.atob( str )));
}
// Usage:
utf8_to_b64('✓ à la mode'); // "4pyTIMOgIGxhIG1vZGU="
b64_to_utf8('4pyTIMOgIGxhIG1vZGU='); // "✓ à la mode"
这是当今最常用的方法之一。
下面是一些面向可能缺少escape/unescape()的浏览器的面向未来的代码。请注意,ie9及以上版本不支持atob/btoa(),因此您需要为它们使用自定义base64函数。
// Polyfill for escape/unescape
if( !window.unescape ){
window.unescape = function( s ){
return s.replace( /%([0-9A-F]{2})/g, function( m, p ) {
return String.fromCharCode( '0x' + p );
} );
};
}
if( !window.escape ){
window.escape = function( s ){
var chr, hex, i = 0, l = s.length, out = '';
for( ; i < l; i ++ ){
chr = s.charAt( i );
if( chr.search( /[A-Za-z0-9\@\*\_\+\-\.\/]/ ) > -1 ){
out += chr; continue; }
hex = s.charCodeAt( i ).toString( 16 );
out += '%' + ( hex.length % 2 != 0 ? '0' : '' ) + hex;
}
return out;
};
}
// Base64 encoding of UTF-8 strings
var utf8ToB64 = function( s ){
return btoa( unescape( encodeURIComponent( s ) ) );
};
var b64ToUtf8 = function( s ){
return decodeURIComponent( escape( atob( s ) ) );
};
一个更全面的UTF-8编码和解码的例子可以在这里找到:http://jsfiddle.net/47zwb41o/
以下是Mozilla开发资源中描述的2018年更新的解决方案
从unicode编码到b64
function b64EncodeUnicode(str) {
// first we use encodeURIComponent to get percent-encoded UTF-8,
// then we convert the percent encodings into raw bytes which
// can be fed into btoa.
return btoa(encodeURIComponent(str).replace(/%([0-9A-F]{2})/g,
function toSolidBytes(match, p1) {
return String.fromCharCode('0x' + p1);
}));
}
b64EncodeUnicode('✓ à la mode'); // "4pyTIMOgIGxhIG1vZGU="
b64EncodeUnicode('\n'); // "Cg=="
从b64解码到unicode
function b64DecodeUnicode(str) {
// Going backwards: from bytestream, to percent-encoding, to original string.
return decodeURIComponent(atob(str).split('').map(function(c) {
return '%' + ('00' + c.charCodeAt(0).toString(16)).slice(-2);
}).join(''));
}
b64DecodeUnicode('4pyTIMOgIGxhIG1vZGU='); // "✓ à la mode"
b64DecodeUnicode('Cg=='); // "\n"
推荐文章
- 相当于字符串。jQuery格式
- 如何在vue-cli项目中更改端口号
- Angular 2模板中的标签是什么意思?
- JavaScript .includes()方法的多个条件
- 窗口。亲近与自我。close不关闭Chrome中的窗口
- 为什么PHP的json_encode函数转换UTF-8字符串为十六进制实体?
- 同步和异步编程(在node.js中)的区别是什么?
- 在d3.js中调整窗口大小时调整svg的大小
- 如何将两个字符串相加,就好像它们是数字一样?
- 绑定多个事件到一个监听器(没有JQuery)?
- 在JavaScript中将JSON字符串解析为特定对象原型
- 将字符串“true”/“false”转换为布尔值
- 如何使用JavaScript代码获得浏览器宽度?
- event.preventDefault()函数在IE中无法工作
- indexOf()和search()的区别是什么?