如果你有UTF8,使用这个(实际上与SVG源代码一起工作),像这样:
btoa(unescape(encodeURIComponent(str)))
例子:
var imgsrc = 'data:image/svg+xml;base64,' + btoa(unescape(encodeURIComponent(markup)));
var img = new Image(1, 1); // width, height values are optional params
img.src = imgsrc;
如果你需要解码base64,使用这个:
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
例子:
var str = "äöüÄÖÜçéèñ";
var b64 = window.btoa(unescape(encodeURIComponent(str)))
console.log(b64);
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
注意:如果你需要让它在mobile-safari中工作,你可能需要从base64数据中剥离所有的空白…
function b64_to_utf8( str ) {
str = str.replace(/\s/g, '');
return decodeURIComponent(escape(window.atob( str )));
}
2017年更新
这个问题又困扰我了。
简单的事实是,atob并不真正处理utf8 -string -它只处理ASCII。
另外,我不会使用像js-base64这样的臃肿软件。
但webtoolkit确实有一个小的、漂亮的、非常可维护的实现:
/**
*
* Base64 encode / decode
* http://www.webtoolkit.info
*
**/
var Base64 = {
// private property
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
// public method for encoding
, encode: function (input)
{
var output = "";
var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
var i = 0;
input = Base64._utf8_encode(input);
while (i < input.length)
{
chr1 = input.charCodeAt(i++);
chr2 = input.charCodeAt(i++);
chr3 = input.charCodeAt(i++);
enc1 = chr1 >> 2;
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
enc4 = chr3 & 63;
if (isNaN(chr2))
{
enc3 = enc4 = 64;
}
else if (isNaN(chr3))
{
enc4 = 64;
}
output = output +
this._keyStr.charAt(enc1) + this._keyStr.charAt(enc2) +
this._keyStr.charAt(enc3) + this._keyStr.charAt(enc4);
} // Whend
return output;
} // End Function encode
// public method for decoding
,decode: function (input)
{
var output = "";
var chr1, chr2, chr3;
var enc1, enc2, enc3, enc4;
var i = 0;
input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
while (i < input.length)
{
enc1 = this._keyStr.indexOf(input.charAt(i++));
enc2 = this._keyStr.indexOf(input.charAt(i++));
enc3 = this._keyStr.indexOf(input.charAt(i++));
enc4 = this._keyStr.indexOf(input.charAt(i++));
chr1 = (enc1 << 2) | (enc2 >> 4);
chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
chr3 = ((enc3 & 3) << 6) | enc4;
output = output + String.fromCharCode(chr1);
if (enc3 != 64)
{
output = output + String.fromCharCode(chr2);
}
if (enc4 != 64)
{
output = output + String.fromCharCode(chr3);
}
} // Whend
output = Base64._utf8_decode(output);
return output;
} // End Function decode
// private method for UTF-8 encoding
,_utf8_encode: function (string)
{
var utftext = "";
string = string.replace(/\r\n/g, "\n");
for (var n = 0; n < string.length; n++)
{
var c = string.charCodeAt(n);
if (c < 128)
{
utftext += String.fromCharCode(c);
}
else if ((c > 127) && (c < 2048))
{
utftext += String.fromCharCode((c >> 6) | 192);
utftext += String.fromCharCode((c & 63) | 128);
}
else
{
utftext += String.fromCharCode((c >> 12) | 224);
utftext += String.fromCharCode(((c >> 6) & 63) | 128);
utftext += String.fromCharCode((c & 63) | 128);
}
} // Next n
return utftext;
} // End Function _utf8_encode
// private method for UTF-8 decoding
,_utf8_decode: function (utftext)
{
var string = "";
var i = 0;
var c, c1, c2, c3;
c = c1 = c2 = 0;
while (i < utftext.length)
{
c = utftext.charCodeAt(i);
if (c < 128)
{
string += String.fromCharCode(c);
i++;
}
else if ((c > 191) && (c < 224))
{
c2 = utftext.charCodeAt(i + 1);
string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
i += 2;
}
else
{
c2 = utftext.charCodeAt(i + 1);
c3 = utftext.charCodeAt(i + 2);
string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
i += 3;
}
} // Whend
return string;
} // End Function _utf8_decode
}
https://www.fileformat.info/info/unicode/utf8.htm
For any character equal to or below 127 (hex 0x7F), the UTF-8
representation is one byte. It is just the lowest 7 bits of the full
unicode value. This is also the same as the ASCII value.
For characters equal to or below 2047 (hex 0x07FF), the UTF-8
representation is spread across two bytes. The first byte will have
the two high bits set and the third bit clear (i.e. 0xC2 to 0xDF). The
second byte will have the top bit set and the second bit clear (i.e.
0x80 to 0xBF).
For all characters equal to or greater than 2048 but less than 65535
(0xFFFF), the UTF-8 representation is spread across three bytes.