我有一些UTF-8编码的数据生活在Javascript Uint8Array元素的范围内。是否有一种有效的方法来解码这些到一个常规的javascript字符串(我相信javascript使用16位Unicode)?我不想一次添加一个字符,因为字符串连接会变得CPU密集。
当前回答
对于ES6和UTF8字符串
decodeURIComponent(escape(String.fromCharCode(...uint8arrData)))
其他回答
使用base64作为编码格式效果很好。这就是它是如何实现通过url在Firefox发送传递秘密。您将需要base64-js包。下面是Send源代码中的函数:
const b64 = require("base64-js")
function arrayToB64(array) {
return b64.fromByteArray(array).replace(/\+/g, "-").replace(/\//g, "_").replace(/=/g, "")
}
function b64ToArray(str) {
return b64.toByteArray(str + "===".slice((str.length + 3) % 4))
}
class UTF8{
static encode(str:string){return new UTF8().encode(str)}
static decode(data:Uint8Array){return new UTF8().decode(data)}
private EOF_byte:number = -1;
private EOF_code_point:number = -1;
private encoderError(code_point) {
console.error("UTF8 encoderError",code_point)
}
private decoderError(fatal, opt_code_point?):number {
if (fatal) console.error("UTF8 decoderError",opt_code_point)
return opt_code_point || 0xFFFD;
}
private inRange(a:number, min:number, max:number) {
return min <= a && a <= max;
}
private div(n:number, d:number) {
return Math.floor(n / d);
}
private stringToCodePoints(string:string) {
/** @type {Array.<number>} */
let cps = [];
// Based on http://www.w3.org/TR/WebIDL/#idl-DOMString
let i = 0, n = string.length;
while (i < string.length) {
let c = string.charCodeAt(i);
if (!this.inRange(c, 0xD800, 0xDFFF)) {
cps.push(c);
} else if (this.inRange(c, 0xDC00, 0xDFFF)) {
cps.push(0xFFFD);
} else { // (inRange(c, 0xD800, 0xDBFF))
if (i == n - 1) {
cps.push(0xFFFD);
} else {
let d = string.charCodeAt(i + 1);
if (this.inRange(d, 0xDC00, 0xDFFF)) {
let a = c & 0x3FF;
let b = d & 0x3FF;
i += 1;
cps.push(0x10000 + (a << 10) + b);
} else {
cps.push(0xFFFD);
}
}
}
i += 1;
}
return cps;
}
private encode(str:string):Uint8Array {
let pos:number = 0;
let codePoints = this.stringToCodePoints(str);
let outputBytes = [];
while (codePoints.length > pos) {
let code_point:number = codePoints[pos++];
if (this.inRange(code_point, 0xD800, 0xDFFF)) {
this.encoderError(code_point);
}
else if (this.inRange(code_point, 0x0000, 0x007f)) {
outputBytes.push(code_point);
} else {
let count = 0, offset = 0;
if (this.inRange(code_point, 0x0080, 0x07FF)) {
count = 1;
offset = 0xC0;
} else if (this.inRange(code_point, 0x0800, 0xFFFF)) {
count = 2;
offset = 0xE0;
} else if (this.inRange(code_point, 0x10000, 0x10FFFF)) {
count = 3;
offset = 0xF0;
}
outputBytes.push(this.div(code_point, Math.pow(64, count)) + offset);
while (count > 0) {
let temp = this.div(code_point, Math.pow(64, count - 1));
outputBytes.push(0x80 + (temp % 64));
count -= 1;
}
}
}
return new Uint8Array(outputBytes);
}
private decode(data:Uint8Array):string {
let fatal:boolean = false;
let pos:number = 0;
let result:string = "";
let code_point:number;
let utf8_code_point = 0;
let utf8_bytes_needed = 0;
let utf8_bytes_seen = 0;
let utf8_lower_boundary = 0;
while (data.length > pos) {
let _byte = data[pos++];
if (_byte == this.EOF_byte) {
if (utf8_bytes_needed != 0) {
code_point = this.decoderError(fatal);
} else {
code_point = this.EOF_code_point;
}
} else {
if (utf8_bytes_needed == 0) {
if (this.inRange(_byte, 0x00, 0x7F)) {
code_point = _byte;
} else {
if (this.inRange(_byte, 0xC2, 0xDF)) {
utf8_bytes_needed = 1;
utf8_lower_boundary = 0x80;
utf8_code_point = _byte - 0xC0;
} else if (this.inRange(_byte, 0xE0, 0xEF)) {
utf8_bytes_needed = 2;
utf8_lower_boundary = 0x800;
utf8_code_point = _byte - 0xE0;
} else if (this.inRange(_byte, 0xF0, 0xF4)) {
utf8_bytes_needed = 3;
utf8_lower_boundary = 0x10000;
utf8_code_point = _byte - 0xF0;
} else {
this.decoderError(fatal);
}
utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed);
code_point = null;
}
} else if (!this.inRange(_byte, 0x80, 0xBF)) {
utf8_code_point = 0;
utf8_bytes_needed = 0;
utf8_bytes_seen = 0;
utf8_lower_boundary = 0;
pos--;
code_point = this.decoderError(fatal, _byte);
} else {
utf8_bytes_seen += 1;
utf8_code_point = utf8_code_point + (_byte - 0x80) * Math.pow(64, utf8_bytes_needed - utf8_bytes_seen);
if (utf8_bytes_seen !== utf8_bytes_needed) {
code_point = null;
} else {
let cp = utf8_code_point;
let lower_boundary = utf8_lower_boundary;
utf8_code_point = 0;
utf8_bytes_needed = 0;
utf8_bytes_seen = 0;
utf8_lower_boundary = 0;
if (this.inRange(cp, lower_boundary, 0x10FFFF) && !this.inRange(cp, 0xD800, 0xDFFF)) {
code_point = cp;
} else {
code_point = this.decoderError(fatal, _byte);
}
}
}
}
//Decode string
if (code_point !== null && code_point !== this.EOF_code_point) {
if (code_point <= 0xFFFF) {
if (code_point > 0)result += String.fromCharCode(code_point);
} else {
code_point -= 0x10000;
result += String.fromCharCode(0xD800 + ((code_point >> 10) & 0x3ff));
result += String.fromCharCode(0xDC00 + (code_point & 0x3ff));
}
}
}
return result;
}
`
var decodedString = decodeURIComponent(escape(String.fromCharCode(...new Uint8Array(err)))); var obj = JSON.parse(decodedString);
我正在使用这个Typescript代码片段:
function UInt8ArrayToString(uInt8Array: Uint8Array): string
{
var s: string = "[";
for(var i: number = 0; i < uInt8Array.byteLength; i++)
{
if( i > 0 )
s += ", ";
s += uInt8Array[i];
}
s += "]";
return s;
}
如果需要JavaScript版本,则删除类型注释。 希望这能有所帮助!
我很沮丧地看到,人们没有显示如何双向或显示事情的工作在不平凡的UTF8字符串。我在codereview.stackexchange.com上找到了一个帖子,其中有一些运行良好的代码。我用它把古老的符文转换成字节,在字节上测试一些加密,然后把东西转换回字符串。工作代码在github这里。为了清晰起见,我重命名了这些方法:
// https://codereview.stackexchange.com/a/3589/75693
function bytesToSring(bytes) {
var chars = [];
for(var i = 0, n = bytes.length; i < n;) {
chars.push(((bytes[i++] & 0xff) << 8) | (bytes[i++] & 0xff));
}
return String.fromCharCode.apply(null, chars);
}
// https://codereview.stackexchange.com/a/3589/75693
function stringToBytes(str) {
var bytes = [];
for(var i = 0, n = str.length; i < n; i++) {
var char = str.charCodeAt(i);
bytes.push(char >>> 8, char & 0xFF);
}
return bytes;
}
单元测试使用这个UTF-8字符串:
// http://kermitproject.org/utf8.html
// From the Anglo-Saxon Rune Poem (Rune version)
const secretUtf8 = `ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ
ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ
ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬`;
请注意,字符串长度只有117个字符,但编码时字节长度为234。
如果我取消console.log行注释,我可以看到解码的字符串与编码的字符串相同(通过Shamir的秘密共享算法传递的字节!):
推荐文章
- 使用jQuery改变输入字段的类型
- 在JavaScript中,什么相当于Java的Thread.sleep() ?
- 使用jQuery以像素为整数填充或边距值
- 检查是否选择了jQuery选项,如果没有选择默认值
- Next.js React应用中没有定义Window
- 如何重置笑话模拟函数调用计数之前,每次测试
- 如何强制一个功能React组件渲染?
- 在javascript中从平面数组构建树数组
- 将Dropzone.js与其他字段集成到现有的HTML表单中
- 如何在AngularJS中观察路由变化?
- JavaScript DOM删除元素
- 将dd-mm-yyyy字符串转换为日期
- Javascript复选框onChange
- Javascript函数前导bang !语法
- 如何在页面上遍历所有DOM元素?