我有一些UTF-8编码的数据生活在Javascript Uint8Array元素的范围内。是否有一种有效的方法来解码这些到一个常规的javascript字符串(我相信javascript使用16位Unicode)?我不想一次添加一个字符,因为字符串连接会变得CPU密集。


当前回答

我很沮丧地看到,人们没有显示如何双向或显示事情的工作在不平凡的UTF8字符串。我在codereview.stackexchange.com上找到了一个帖子,其中有一些运行良好的代码。我用它把古老的符文转换成字节,在字节上测试一些加密,然后把东西转换回字符串。工作代码在github这里。为了清晰起见,我重命名了这些方法:

// https://codereview.stackexchange.com/a/3589/75693
function bytesToSring(bytes) {
    var chars = [];
    for(var i = 0, n = bytes.length; i < n;) {
        chars.push(((bytes[i++] & 0xff) << 8) | (bytes[i++] & 0xff));
    }
    return String.fromCharCode.apply(null, chars);
}

// https://codereview.stackexchange.com/a/3589/75693
function stringToBytes(str) {
    var bytes = [];
    for(var i = 0, n = str.length; i < n; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8, char & 0xFF);
    }
    return bytes;
}

单元测试使用这个UTF-8字符串:

    // http://kermitproject.org/utf8.html
    // From the Anglo-Saxon Rune Poem (Rune version) 
    const secretUtf8 = `ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ
ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ
ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬`;

请注意,字符串长度只有117个字符,但编码时字节长度为234。

如果我取消console.log行注释,我可以看到解码的字符串与编码的字符串相同(通过Shamir的秘密共享算法传递的字节!):

其他回答

来自Encoding标准的TextEncoder和TextDecoder,由stringencoding库填充,在字符串和ArrayBuffers之间转换:

var uint8array = new TextEncoder().encode("someString");
var string = new TextDecoder().decode(uint8array);

使用base64作为编码格式效果很好。这就是它是如何实现通过url在Firefox发送传递秘密。您将需要base64-js包。下面是Send源代码中的函数:

const b64 = require("base64-js")

function arrayToB64(array) {
  return b64.fromByteArray(array).replace(/\+/g, "-").replace(/\//g, "_").replace(/=/g, "")
}

function b64ToArray(str) {
  return b64.toByteArray(str + "===".slice((str.length + 3) % 4))
}

以下是我使用的方法:

var str = String.fromCharCode.apply(null, uint8Arr);

对于香草,浏览器端,从麦克风录音,base64函数为我工作(我必须实现一个音频发送功能到聊天)。

      const ui8a =  new Uint8Array(e.target.result);
      const string = btoa(ui8a);
      const ui8a_2 = atob(string).split(',');

现在是完整代码。感谢Bryan Jennings & breakspirit@py4u.net提供的代码。

https://medium.com/@bryanjenningz/how-to-record-and-play-audio-in-javascript-faa1b2b3e49b

https://www.py4u.net/discuss/282499

index . html

<html>
  <head>
    <title>Record Audio Test</title>
    <meta name="encoding" charset="utf-8" />
  </head>
  <body>
    <h1>Audio Recording Test</h1>
    <script src="index.js"></script>
    <button id="action" onclick="start()">Start</button>
    <button id="stop" onclick="stop()">Stop</button>
    <button id="play" onclick="play()">Listen</button>
  </body>
</html>

index.js:

const recordAudio = () =>
  new Promise(async resolve => {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    const mediaRecorder = new MediaRecorder(stream);
    const audioChunks = [];

    mediaRecorder.addEventListener("dataavailable", event => {
      audioChunks.push(event.data);
    });

    const start = () => mediaRecorder.start();

    const stop = () =>
      new Promise(resolve => {
        mediaRecorder.addEventListener("stop", () => {
          const audioBlob = new Blob(audioChunks);
          const audioUrl = URL.createObjectURL(audioBlob);
          const audio = new Audio(audioUrl);
          const play = () => audio.play();
          resolve({ audioBlob, audioUrl, play });
        });

        mediaRecorder.stop();
      });

    resolve({ start, stop });
  });

let recorder = null;
let audio = null;
const sleep = time => new Promise(resolve => setTimeout(resolve, time));

const start = async () => {
  recorder = await recordAudio();
  recorder.start();
}

const stop = async () => {
  audio = await recorder.stop();
  read(audio.audioUrl);
}

const play = ()=> {
  audio.play();
}

const read = (blobUrl)=> {

  var xhr = new XMLHttpRequest;
  xhr.responseType = 'blob';
  
  xhr.onload = function() {
      var recoveredBlob = xhr.response;
      const reader = new FileReader();
      // This fires after the blob has been read/loaded.
      reader.addEventListener('loadend', (e) => {

          const ui8a =  new Uint8Array(e.target.result);
          const string = btoa(ui8a);
          const ui8a_2 = atob(string).split(',');
          
          playByteArray(ui8a_2);
      });
      // Start reading the blob as text.
      reader.readAsArrayBuffer(recoveredBlob);
  };
  // get the blob through blob url 
  xhr.open('GET', blobUrl);
  xhr.send();
}

window.onload = init;
var context;    // Audio context
var buf;        // Audio buffer

function init() {
  if (!window.AudioContext) {
      if (!window.webkitAudioContext) {
          alert("Your browser does not support any AudioContext and cannot play back this audio.");
          return;
      }
        window.AudioContext = window.webkitAudioContext;
    }

    context = new AudioContext();
}

function playByteArray(byteArray) {

    var arrayBuffer = new ArrayBuffer(byteArray.length);
    var bufferView = new Uint8Array(arrayBuffer);
    for (i = 0; i < byteArray.length; i++) {
      bufferView[i] = byteArray[i];
    }

    context.decodeAudioData(arrayBuffer, function(buffer) {
        buf = buffer;
        play2();
    });
}

// Play the loaded file
function play2() {
    // Create a source node from the buffer
    var source = context.createBufferSource();
    source.buffer = buf;
    // Connect to the final output node (the speakers)
    source.connect(context.destination);
    // Play immediately
    source.start(0);
}

Albert给出的解决方案,只要不经常调用所提供的函数,并且只用于大小适中的数组,就能很好地工作,否则效率非常低。下面是一个增强的普通JavaScript解决方案,它适用于Node和浏览器,具有以下优点:

•有效工作于所有八字节数组大小

•不生成中间丢弃字符串

•在现代JS引擎上支持4字节字符(否则“?”将被替换)

var utf8ArrayToStr = (function () {
    var charCache = new Array(128);  // Preallocate the cache for the common single byte chars
    var charFromCodePt = String.fromCodePoint || String.fromCharCode;
    var result = [];

    return function (array) {
        var codePt, byte1;
        var buffLen = array.length;

        result.length = 0;

        for (var i = 0; i < buffLen;) {
            byte1 = array[i++];

            if (byte1 <= 0x7F) {
                codePt = byte1;
            } else if (byte1 <= 0xDF) {
                codePt = ((byte1 & 0x1F) << 6) | (array[i++] & 0x3F);
            } else if (byte1 <= 0xEF) {
                codePt = ((byte1 & 0x0F) << 12) | ((array[i++] & 0x3F) << 6) | (array[i++] & 0x3F);
            } else if (String.fromCodePoint) {
                codePt = ((byte1 & 0x07) << 18) | ((array[i++] & 0x3F) << 12) | ((array[i++] & 0x3F) << 6) | (array[i++] & 0x3F);
            } else {
                codePt = 63;    // Cannot convert four byte code points, so use "?" instead
                i += 3;
            }

            result.push(charCache[codePt] || (charCache[codePt] = charFromCodePt(codePt)));
        }

        return result.join('');
    };
})();