我有一些UTF-8编码的数据生活在Javascript Uint8Array元素的范围内。是否有一种有效的方法来解码这些到一个常规的javascript字符串(我相信javascript使用16位Unicode)?我不想一次添加一个字符,因为字符串连接会变得CPU密集。


当前回答

对于ES6和UTF8字符串

decodeURIComponent(escape(String.fromCharCode(...uint8arrData)))

其他回答

使用base64作为编码格式效果很好。这就是它是如何实现通过url在Firefox发送传递秘密。您将需要base64-js包。下面是Send源代码中的函数:

const b64 = require("base64-js")

function arrayToB64(array) {
  return b64.fromByteArray(array).replace(/\+/g, "-").replace(/\//g, "_").replace(/=/g, "")
}

function b64ToArray(str) {
  return b64.toByteArray(str + "===".slice((str.length + 3) % 4))
}
class UTF8{
static encode(str:string){return new UTF8().encode(str)}
static decode(data:Uint8Array){return new UTF8().decode(data)}

private EOF_byte:number = -1;
private EOF_code_point:number = -1;
private encoderError(code_point) {
    console.error("UTF8 encoderError",code_point)
}
private decoderError(fatal, opt_code_point?):number {
    if (fatal) console.error("UTF8 decoderError",opt_code_point)
    return opt_code_point || 0xFFFD;
}
private inRange(a:number, min:number, max:number) {
    return min <= a && a <= max;
}
private div(n:number, d:number) {
    return Math.floor(n / d);
}
private stringToCodePoints(string:string) {
    /** @type {Array.<number>} */
    let cps = [];
    // Based on http://www.w3.org/TR/WebIDL/#idl-DOMString
    let i = 0, n = string.length;
    while (i < string.length) {
        let c = string.charCodeAt(i);
        if (!this.inRange(c, 0xD800, 0xDFFF)) {
            cps.push(c);
        } else if (this.inRange(c, 0xDC00, 0xDFFF)) {
            cps.push(0xFFFD);
        } else { // (inRange(c, 0xD800, 0xDBFF))
            if (i == n - 1) {
                cps.push(0xFFFD);
            } else {
                let d = string.charCodeAt(i + 1);
                if (this.inRange(d, 0xDC00, 0xDFFF)) {
                    let a = c & 0x3FF;
                    let b = d & 0x3FF;
                    i += 1;
                    cps.push(0x10000 + (a << 10) + b);
                } else {
                    cps.push(0xFFFD);
                }
            }
        }
        i += 1;
    }
    return cps;
}

private encode(str:string):Uint8Array {
    let pos:number = 0;
    let codePoints = this.stringToCodePoints(str);
    let outputBytes = [];

    while (codePoints.length > pos) {
        let code_point:number = codePoints[pos++];

        if (this.inRange(code_point, 0xD800, 0xDFFF)) {
            this.encoderError(code_point);
        }
        else if (this.inRange(code_point, 0x0000, 0x007f)) {
            outputBytes.push(code_point);
        } else {
            let count = 0, offset = 0;
            if (this.inRange(code_point, 0x0080, 0x07FF)) {
                count = 1;
                offset = 0xC0;
            } else if (this.inRange(code_point, 0x0800, 0xFFFF)) {
                count = 2;
                offset = 0xE0;
            } else if (this.inRange(code_point, 0x10000, 0x10FFFF)) {
                count = 3;
                offset = 0xF0;
            }

            outputBytes.push(this.div(code_point, Math.pow(64, count)) + offset);

            while (count > 0) {
                let temp = this.div(code_point, Math.pow(64, count - 1));
                outputBytes.push(0x80 + (temp % 64));
                count -= 1;
            }
        }
    }
    return new Uint8Array(outputBytes);
}

private decode(data:Uint8Array):string {
    let fatal:boolean = false;
    let pos:number = 0;
    let result:string = "";
    let code_point:number;
    let utf8_code_point = 0;
    let utf8_bytes_needed = 0;
    let utf8_bytes_seen = 0;
    let utf8_lower_boundary = 0;

    while (data.length > pos) {
        let _byte = data[pos++];

        if (_byte == this.EOF_byte) {
            if (utf8_bytes_needed != 0) {
                code_point = this.decoderError(fatal);
            } else {
                code_point = this.EOF_code_point;
            }
        } else {
            if (utf8_bytes_needed == 0) {
                if (this.inRange(_byte, 0x00, 0x7F)) {
                    code_point = _byte;
                } else {
                    if (this.inRange(_byte, 0xC2, 0xDF)) {
                        utf8_bytes_needed = 1;
                        utf8_lower_boundary = 0x80;
                        utf8_code_point = _byte - 0xC0;
                    } else if (this.inRange(_byte, 0xE0, 0xEF)) {
                        utf8_bytes_needed = 2;
                        utf8_lower_boundary = 0x800;
                        utf8_code_point = _byte - 0xE0;
                    } else if (this.inRange(_byte, 0xF0, 0xF4)) {
                        utf8_bytes_needed = 3;
                        utf8_lower_boundary = 0x10000;
                        utf8_code_point = _byte - 0xF0;
                    } else {
                        this.decoderError(fatal);
                    }
                    utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed);
                    code_point = null;
                }
            } else if (!this.inRange(_byte, 0x80, 0xBF)) {
                utf8_code_point = 0;
                utf8_bytes_needed = 0;
                utf8_bytes_seen = 0;
                utf8_lower_boundary = 0;
                pos--;
                code_point = this.decoderError(fatal, _byte);
            } else {
                utf8_bytes_seen += 1;
                utf8_code_point = utf8_code_point + (_byte - 0x80) * Math.pow(64, utf8_bytes_needed - utf8_bytes_seen);

                if (utf8_bytes_seen !== utf8_bytes_needed) {
                    code_point = null;
                } else {
                    let cp = utf8_code_point;
                    let lower_boundary = utf8_lower_boundary;
                    utf8_code_point = 0;
                    utf8_bytes_needed = 0;
                    utf8_bytes_seen = 0;
                    utf8_lower_boundary = 0;
                    if (this.inRange(cp, lower_boundary, 0x10FFFF) && !this.inRange(cp, 0xD800, 0xDFFF)) {
                        code_point = cp;
                    } else {
                        code_point = this.decoderError(fatal, _byte);
                    }
                }

            }
        }
        //Decode string
        if (code_point !== null && code_point !== this.EOF_code_point) {
            if (code_point <= 0xFFFF) {
                if (code_point > 0)result += String.fromCharCode(code_point);
            } else {
                code_point -= 0x10000;
                result += String.fromCharCode(0xD800 + ((code_point >> 10) & 0x3ff));
                result += String.fromCharCode(0xDC00 + (code_point & 0x3ff));
            }
        }
    }
    return result;
}

`

var decodedString = decodeURIComponent(escape(String.fromCharCode(...new Uint8Array(err)))); var obj = JSON.parse(decodedString);

我正在使用这个Typescript代码片段:

function UInt8ArrayToString(uInt8Array: Uint8Array): string
{
    var s: string = "[";
    for(var i: number = 0; i < uInt8Array.byteLength; i++)
    {
        if( i > 0 )
            s += ", ";
        s += uInt8Array[i];
    }
    s += "]";
    return s;
}

如果需要JavaScript版本,则删除类型注释。 希望这能有所帮助!

我很沮丧地看到,人们没有显示如何双向或显示事情的工作在不平凡的UTF8字符串。我在codereview.stackexchange.com上找到了一个帖子,其中有一些运行良好的代码。我用它把古老的符文转换成字节,在字节上测试一些加密,然后把东西转换回字符串。工作代码在github这里。为了清晰起见,我重命名了这些方法:

// https://codereview.stackexchange.com/a/3589/75693
function bytesToSring(bytes) {
    var chars = [];
    for(var i = 0, n = bytes.length; i < n;) {
        chars.push(((bytes[i++] & 0xff) << 8) | (bytes[i++] & 0xff));
    }
    return String.fromCharCode.apply(null, chars);
}

// https://codereview.stackexchange.com/a/3589/75693
function stringToBytes(str) {
    var bytes = [];
    for(var i = 0, n = str.length; i < n; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8, char & 0xFF);
    }
    return bytes;
}

单元测试使用这个UTF-8字符串:

    // http://kermitproject.org/utf8.html
    // From the Anglo-Saxon Rune Poem (Rune version) 
    const secretUtf8 = `ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ
ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ
ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬`;

请注意,字符串长度只有117个字符,但编码时字节长度为234。

如果我取消console.log行注释,我可以看到解码的字符串与编码的字符串相同(通过Shamir的秘密共享算法传递的字节!):