jiti-meet/react/features/stream-effects/noise-suppression/NoiseSuppressorWorklet.ts

172 lines
7.6 KiB
TypeScript
Raw Normal View History

// @ts-ignore
import { createRNNWasmModuleSync } from '@jitsi/rnnoise-wasm';
import { leastCommonMultiple } from '../../base/util/math';
import RnnoiseProcessor from '../rnnoise/RnnoiseProcessor';
/**
* Audio worklet which will denoise targeted audio stream using rnnoise.
*/
class NoiseSuppressorWorklet extends AudioWorkletProcessor {
/**
* RnnoiseProcessor instance.
*/
private _denoiseProcessor: RnnoiseProcessor;
/**
* Audio worklets work with a predefined sample rate of 128.
*/
private _procNodeSampleRate = 128;
/**
* PCM Sample size expected by the denoise processor.
*/
private _denoiseSampleSize: number;
/**
* Circular buffer data used for efficient memory operations.
*/
private _circularBufferLength: number;
private _circularBuffer: Float32Array;
/**
* The circular buffer uses a couple of indexes to track data segments. Input data from the stream is
* copied to the circular buffer as it comes in, one `procNodeSampleRate` sized sample at a time.
* _inputBufferLength denotes the current length of all gathered raw audio segments.
*/
private _inputBufferLength = 0;
/**
* Denoising is done directly on the circular buffer using subArray views, but because
* `procNodeSampleRate` and `_denoiseSampleSize` have different sizes, denoised samples lag behind
* the current gathered raw audio samples so we need a different index, `_denoisedBufferLength`.
*/
private _denoisedBufferLength = 0;
/**
* Once enough data has been denoised (size of procNodeSampleRate) it's sent to the
* output buffer, `_denoisedBufferIndx` indicates the start index on the circular buffer
* of denoised data not yet sent.
*/
private _denoisedBufferIndx = 0;
/**
* C'tor.
*/
constructor() {
super();
/**
* The wasm module needs to be compiled to load synchronously as the audio worklet `addModule()`
* initialization process does not wait for the resolution of promises in the AudioWorkletGlobalScope.
*/
this._denoiseProcessor = new RnnoiseProcessor(createRNNWasmModuleSync());
/**
* PCM Sample size expected by the denoise processor.
*/
this._denoiseSampleSize = this._denoiseProcessor.getSampleLength();
/**
* In order to avoid unnecessary memory related operations a circular buffer was used.
* Because the audio worklet input array does not match the sample size required by rnnoise two cases can occur
* 1. There is not enough data in which case we buffer it.
* 2. There is enough data but some residue remains after the call to `processAudioFrame`, so its buffered
* for the next call.
* A problem arises when the circular buffer reaches the end and a rollover is required, namely
* the residue could potentially be split between the end of buffer and the beginning and would
* require some complicated logic to handle. Using the lcm as the size of the buffer will
* guarantee that by the time the buffer reaches the end the residue will be a multiple of the
* `procNodeSampleRate` and the residue won't be split.
*/
this._circularBufferLength = leastCommonMultiple(this._procNodeSampleRate, this._denoiseSampleSize);
this._circularBuffer = new Float32Array(this._circularBufferLength);
}
/**
* Worklet interface process method. The inputs parameter contains PCM audio that is then sent to rnnoise.
* Rnnoise only accepts PCM samples of 480 bytes whereas `process` handles 128 sized samples, we take this into
* account using a circular buffer.
*
* @param {Float32Array[]} inputs - Array of inputs connected to the node, each of them with their associated
* array of channels. Each channel is an array of 128 pcm samples.
* @param {Float32Array[]} outputs - Array of outputs similar to the inputs parameter structure, expected to be
* filled during the execution of `process`. By default each channel is zero filled.
* @returns {boolean} - Boolean value that returns whether or not the processor should remain active. Returning
* false will terminate it.
*/
process(inputs: Float32Array[][], outputs: Float32Array[][]) {
// We expect the incoming track to be mono, if a stereo track is passed only on of its channels will get
// denoised and sent pack.
// TODO Technically we can denoise both channel however this might require a new rnnoise context, some more
// investigation is required.
const inData = inputs[0][0];
const outData = outputs[0][0];
// Append new raw PCM sample.
this._circularBuffer.set(inData, this._inputBufferLength);
this._inputBufferLength += inData.length;
// New raw samples were just added, start denoising frames, _denoisedBufferLength gives us
// the position at which the previous denoise iteration ended, basically it takes into account
// residue data.
for (; this._denoisedBufferLength + this._denoiseSampleSize <= this._inputBufferLength;
this._denoisedBufferLength += this._denoiseSampleSize) {
// Create view of circular buffer so it can be modified in place, removing the need for
// extra copies.
const denoiseFrame = this._circularBuffer.subarray(
this._denoisedBufferLength,
this._denoisedBufferLength + this._denoiseSampleSize
);
this._denoiseProcessor.processAudioFrame(denoiseFrame, true);
}
// Determine how much denoised audio is available, if the start index of denoised samples is smaller
2022-08-30 14:21:58 +00:00
// then _denoisedBufferLength that means a rollover occurred.
let unsentDenoisedDataLength;
if (this._denoisedBufferIndx > this._denoisedBufferLength) {
unsentDenoisedDataLength = this._circularBufferLength - this._denoisedBufferIndx;
} else {
unsentDenoisedDataLength = this._denoisedBufferLength - this._denoisedBufferIndx;
}
// Only copy denoised data to output when there's enough of it to fit the exact buffer length.
// e.g. if the buffer size is 1024 samples but we only denoised 960 (this happens on the first iteration)
// nothing happens, then on the next iteration 1920 samples will be denoised so we send 1024 which leaves
// 896 for the next iteration and so on.
if (unsentDenoisedDataLength >= outData.length) {
const denoisedFrame = this._circularBuffer.subarray(
this._denoisedBufferIndx,
this._denoisedBufferIndx + outData.length
);
outData.set(denoisedFrame, 0);
this._denoisedBufferIndx += outData.length;
}
2022-08-30 14:21:58 +00:00
// When the end of the circular buffer has been reached, start from the beginning. By the time the index
// starts over, the data from the begging is stale (has already been processed) and can be safely
// overwritten.
if (this._denoisedBufferIndx === this._circularBufferLength) {
this._denoisedBufferIndx = 0;
}
// Because the circular buffer's length is the lcm of both input size and the processor's sample size,
// by the time we reach the end with the input index the denoise length index will be there as well.
if (this._inputBufferLength === this._circularBufferLength) {
this._inputBufferLength = 0;
this._denoisedBufferLength = 0;
}
return true;
}
}
registerProcessor('NoiseSuppressorWorklet', NoiseSuppressorWorklet);