const leftDataList = [];
const rightDataList = [];
function onAudioProcess(event) {
// 一帧的音频PCM数据
let audioBuffer = event.inputBuffer;
leftDataList.push(audioBuffer.getChannelData(0).slice(0));
rightDataList.push(audioBuffer.getChannelData(1).slice(0));
}
// 交叉合并左右声道的数据
function interleaveLeftAndRight(left, right) {
let totalLength = left.length + right.length;
let data = new Float32Array(totalLength);
for (let i = 0; i < left.length; i++) {
let k = i * 2;
data[k] = left[i];
data[k + 1] = right[i];
}
return data;
}
Float32 转 Int16
const float32 = new Float32Array(1)
const int16 = Int16Array.from(
float32.map(x => (x > 0 ? x * 0x7fff : x * 0x8000)),
)
function mergeArray(list) {
const length = list.length * list[0].length
const data = new Float32Array(length)
let offset = 0
for (let i = 0; i < list.length; i++) {
data.set(list[i], offset)
offset += list[i].length
}
return data
}
function writeUTFBytes(view, offset, string) {
var lng = string.length
for (let i = 0; i < lng; i++) {
view.setUint8(offset + i, string.charCodeAt(i))
}
}
function createWavBuffer(audioData, sampleRate = 44100, channels = 1) {
const WAV_HEAD_SIZE = 44
const buffer = new ArrayBuffer(audioData.length * 2 + WAV_HEAD_SIZE)
// 需要用一个view来操控buffer
const view = new DataView(buffer)
// 写入wav头部信息
// RIFF chunk descriptor/identifier
writeUTFBytes(view, 0, 'RIFF')
// RIFF chunk length
view.setUint32(4, 44 + audioData.length * 2, true)
// RIFF type
writeUTFBytes(view, 8, 'WAVE')
// format chunk identifier
// FMT sub-chunk
writeUTFBytes(view, 12, 'fmt')
// format chunk length
view.setUint32(16, 16, true)
// sample format (raw)
view.setUint16(20, 1, true)
// stereo (2 channels)
view.setUint16(22, channels, true)
// sample rate
view.setUint32(24, sampleRate, true)
// byte rate (sample rate * block align)
view.setUint32(28, sampleRate * 2, true)
// block align (channel count * bytes per sample)
view.setUint16(32, channels * 2, true)
// bits per sample
view.setUint16(34, 16, true)
// data sub-chunk
// data chunk identifier
writeUTFBytes(view, 36, 'data')
// data chunk length
view.setUint32(40, audioData.length * 2, true)
// 写入PCM数据
let index = 44
const volume = 1
const { length } = audioData
for (let i = 0; i < length; i++) {
view.setInt16(index, audioData[i] * (0x7fff * volume), true)
index += 2
}
return buffer
}
// 需要onAudioProcess每一帧的buffer合并后的数组
createWavBuffer(mergeArray(audioBuffers))
WAV 基本上是 PCM 加上一些音频信息
简单的短时能量计算
function shortTimeEnergy(audioData) {
let sum = 0
const energy = []
const { length } = audioData
for (let i = 0; i < length; i++) {
sum += audioData[i] ** 2
if ((i + 1) % 256 === 0) {
energy.push(sum)
sum = 0
} else if (i === length - 1) {
energy.push(sum)
}
}
return energy
}
由于计算结果有会因设备的录音增益差异较大, 计算出数据也较大, 所以使用比值简单区分人声和噪音
查看 DEMO
const NoiseVoiceWatershedWave = 2.3
const energy = shortTimeEnergy(e.inputBuffer.getChannelData(0).slice(0))
const avg = energy.reduce((a, b) => a + b) / energy.length
const nextState = Math.max(...energy) / avg > NoiseVoiceWatershedWave ? 'voice' : 'noise'
Web Worker 优化性能
音频数据数据量较大, 所以可以使用 Web Worker 进行优化, 不卡 UI 线程
在 Webpack 项目里 Web Worker 比较简单, 安装 worker-loader 即可