我有一些UTF-8编码的数据生活在Javascript Uint8Array元素的范围内。是否有一种有效的方法来解码这些到一个常规的javascript字符串(我相信javascript使用16位Unicode)?我不想一次添加一个字符,因为字符串连接会变得CPU密集。
当前回答
做什么@Sudhir说,然后得到一个字符串,逗号分隔的数字列表使用:
for (var i=0; i<unitArr.byteLength; i++) {
myString += String.fromCharCode(unitArr[i])
}
这会给你想要的字符串, 如果还相关的话
其他回答
试试这些函数,
var JsonToArray = function(json)
{
var str = JSON.stringify(json, null, 0);
var ret = new Uint8Array(str.length);
for (var i = 0; i < str.length; i++) {
ret[i] = str.charCodeAt(i);
}
return ret
};
var binArrayToJson = function(binArray)
{
var str = "";
for (var i = 0; i < binArray.length; i++) {
str += String.fromCharCode(parseInt(binArray[i]));
}
return JSON.parse(str)
}
来源:https://gist.github.com/tomfa/706d10fed78c497731ac,向Tomfa致敬
class UTF8{
static encode(str:string){return new UTF8().encode(str)}
static decode(data:Uint8Array){return new UTF8().decode(data)}
private EOF_byte:number = -1;
private EOF_code_point:number = -1;
private encoderError(code_point) {
console.error("UTF8 encoderError",code_point)
}
private decoderError(fatal, opt_code_point?):number {
if (fatal) console.error("UTF8 decoderError",opt_code_point)
return opt_code_point || 0xFFFD;
}
private inRange(a:number, min:number, max:number) {
return min <= a && a <= max;
}
private div(n:number, d:number) {
return Math.floor(n / d);
}
private stringToCodePoints(string:string) {
/** @type {Array.<number>} */
let cps = [];
// Based on http://www.w3.org/TR/WebIDL/#idl-DOMString
let i = 0, n = string.length;
while (i < string.length) {
let c = string.charCodeAt(i);
if (!this.inRange(c, 0xD800, 0xDFFF)) {
cps.push(c);
} else if (this.inRange(c, 0xDC00, 0xDFFF)) {
cps.push(0xFFFD);
} else { // (inRange(c, 0xD800, 0xDBFF))
if (i == n - 1) {
cps.push(0xFFFD);
} else {
let d = string.charCodeAt(i + 1);
if (this.inRange(d, 0xDC00, 0xDFFF)) {
let a = c & 0x3FF;
let b = d & 0x3FF;
i += 1;
cps.push(0x10000 + (a << 10) + b);
} else {
cps.push(0xFFFD);
}
}
}
i += 1;
}
return cps;
}
private encode(str:string):Uint8Array {
let pos:number = 0;
let codePoints = this.stringToCodePoints(str);
let outputBytes = [];
while (codePoints.length > pos) {
let code_point:number = codePoints[pos++];
if (this.inRange(code_point, 0xD800, 0xDFFF)) {
this.encoderError(code_point);
}
else if (this.inRange(code_point, 0x0000, 0x007f)) {
outputBytes.push(code_point);
} else {
let count = 0, offset = 0;
if (this.inRange(code_point, 0x0080, 0x07FF)) {
count = 1;
offset = 0xC0;
} else if (this.inRange(code_point, 0x0800, 0xFFFF)) {
count = 2;
offset = 0xE0;
} else if (this.inRange(code_point, 0x10000, 0x10FFFF)) {
count = 3;
offset = 0xF0;
}
outputBytes.push(this.div(code_point, Math.pow(64, count)) + offset);
while (count > 0) {
let temp = this.div(code_point, Math.pow(64, count - 1));
outputBytes.push(0x80 + (temp % 64));
count -= 1;
}
}
}
return new Uint8Array(outputBytes);
}
private decode(data:Uint8Array):string {
let fatal:boolean = false;
let pos:number = 0;
let result:string = "";
let code_point:number;
let utf8_code_point = 0;
let utf8_bytes_needed = 0;
let utf8_bytes_seen = 0;
let utf8_lower_boundary = 0;
while (data.length > pos) {
let _byte = data[pos++];
if (_byte == this.EOF_byte) {
if (utf8_bytes_needed != 0) {
code_point = this.decoderError(fatal);
} else {
code_point = this.EOF_code_point;
}
} else {
if (utf8_bytes_needed == 0) {
if (this.inRange(_byte, 0x00, 0x7F)) {
code_point = _byte;
} else {
if (this.inRange(_byte, 0xC2, 0xDF)) {
utf8_bytes_needed = 1;
utf8_lower_boundary = 0x80;
utf8_code_point = _byte - 0xC0;
} else if (this.inRange(_byte, 0xE0, 0xEF)) {
utf8_bytes_needed = 2;
utf8_lower_boundary = 0x800;
utf8_code_point = _byte - 0xE0;
} else if (this.inRange(_byte, 0xF0, 0xF4)) {
utf8_bytes_needed = 3;
utf8_lower_boundary = 0x10000;
utf8_code_point = _byte - 0xF0;
} else {
this.decoderError(fatal);
}
utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed);
code_point = null;
}
} else if (!this.inRange(_byte, 0x80, 0xBF)) {
utf8_code_point = 0;
utf8_bytes_needed = 0;
utf8_bytes_seen = 0;
utf8_lower_boundary = 0;
pos--;
code_point = this.decoderError(fatal, _byte);
} else {
utf8_bytes_seen += 1;
utf8_code_point = utf8_code_point + (_byte - 0x80) * Math.pow(64, utf8_bytes_needed - utf8_bytes_seen);
if (utf8_bytes_seen !== utf8_bytes_needed) {
code_point = null;
} else {
let cp = utf8_code_point;
let lower_boundary = utf8_lower_boundary;
utf8_code_point = 0;
utf8_bytes_needed = 0;
utf8_bytes_seen = 0;
utf8_lower_boundary = 0;
if (this.inRange(cp, lower_boundary, 0x10FFFF) && !this.inRange(cp, 0xD800, 0xDFFF)) {
code_point = cp;
} else {
code_point = this.decoderError(fatal, _byte);
}
}
}
}
//Decode string
if (code_point !== null && code_point !== this.EOF_code_point) {
if (code_point <= 0xFFFF) {
if (code_point > 0)result += String.fromCharCode(code_point);
} else {
code_point -= 0x10000;
result += String.fromCharCode(0xD800 + ((code_point >> 10) & 0x3ff));
result += String.fromCharCode(0xDC00 + (code_point & 0x3ff));
}
}
}
return result;
}
`
我正在使用这个函数,它对我有用:
function uint8ArrayToBase64(data) {
return btoa(Array.from(data).map((c) => String.fromCharCode(c)).join(''));
}
我正在使用这个Typescript代码片段:
function UInt8ArrayToString(uInt8Array: Uint8Array): string
{
var s: string = "[";
for(var i: number = 0; i < uInt8Array.byteLength; i++)
{
if( i > 0 )
s += ", ";
s += uInt8Array[i];
}
s += "]";
return s;
}
如果需要JavaScript版本,则删除类型注释。 希望这能有所帮助!
以下是我使用的方法:
var str = String.fromCharCode.apply(null, uint8Arr);
推荐文章
- 我如何使用Jest模拟JavaScript的“窗口”对象?
- 我如何等待一个承诺完成之前返回一个函数的变量?
- 在JavaScript中根据键值查找和删除数组中的对象
- 使嵌套JavaScript对象平放/不平放的最快方法
- 如何以及为什么'a'['toUpperCase']()在JavaScript工作?
- 有Grunt生成index.html不同的设置
- 文档之间的区别。addEventListener和window。addEventListener?
- 如何检查动态附加的事件监听器是否存在?
- 如何写setTimeout与参数Coffeescript
- 将JavaScript字符串中的多个空格替换为单个空格
- JavaScript: override alert()
- 重置setTimeout
- 如何确保<select>表单字段被禁用时提交?
- jQuery有不聚焦的方法吗?
- 反应钩子-正确的方式清除超时和间隔