172 lines
7.6 KiB
TypeScript
172 lines
7.6 KiB
TypeScript
// @ts-ignore
|
|
import { createRNNWasmModuleSync } from '@jitsi/rnnoise-wasm';
|
|
|
|
import { leastCommonMultiple } from '../../base/util/math';
|
|
import RnnoiseProcessor from '../rnnoise/RnnoiseProcessor';
|
|
|
|
|
|
/**
|
|
* Audio worklet which will denoise targeted audio stream using rnnoise.
|
|
*/
|
|
class NoiseSuppressorWorklet extends AudioWorkletProcessor {
|
|
/**
|
|
* RnnoiseProcessor instance.
|
|
*/
|
|
private _denoiseProcessor: RnnoiseProcessor;
|
|
|
|
/**
|
|
* Audio worklets work with a predefined sample rate of 128.
|
|
*/
|
|
private _procNodeSampleRate = 128;
|
|
|
|
/**
|
|
* PCM Sample size expected by the denoise processor.
|
|
*/
|
|
private _denoiseSampleSize: number;
|
|
|
|
/**
|
|
* Circular buffer data used for efficient memory operations.
|
|
*/
|
|
private _circularBufferLength: number;
|
|
|
|
private _circularBuffer: Float32Array;
|
|
|
|
/**
|
|
* The circular buffer uses a couple of indexes to track data segments. Input data from the stream is
|
|
* copied to the circular buffer as it comes in, one `procNodeSampleRate` sized sample at a time.
|
|
* _inputBufferLength denotes the current length of all gathered raw audio segments.
|
|
*/
|
|
private _inputBufferLength = 0;
|
|
|
|
/**
|
|
* Denoising is done directly on the circular buffer using subArray views, but because
|
|
* `procNodeSampleRate` and `_denoiseSampleSize` have different sizes, denoised samples lag behind
|
|
* the current gathered raw audio samples so we need a different index, `_denoisedBufferLength`.
|
|
*/
|
|
private _denoisedBufferLength = 0;
|
|
|
|
/**
|
|
* Once enough data has been denoised (size of procNodeSampleRate) it's sent to the
|
|
* output buffer, `_denoisedBufferIndx` indicates the start index on the circular buffer
|
|
* of denoised data not yet sent.
|
|
*/
|
|
private _denoisedBufferIndx = 0;
|
|
|
|
/**
|
|
* C'tor.
|
|
*/
|
|
constructor() {
|
|
super();
|
|
|
|
/**
|
|
* The wasm module needs to be compiled to load synchronously as the audio worklet `addModule()`
|
|
* initialization process does not wait for the resolution of promises in the AudioWorkletGlobalScope.
|
|
*/
|
|
this._denoiseProcessor = new RnnoiseProcessor(createRNNWasmModuleSync());
|
|
|
|
/**
|
|
* PCM Sample size expected by the denoise processor.
|
|
*/
|
|
this._denoiseSampleSize = this._denoiseProcessor.getSampleLength();
|
|
|
|
/**
|
|
* In order to avoid unnecessary memory related operations a circular buffer was used.
|
|
* Because the audio worklet input array does not match the sample size required by rnnoise two cases can occur
|
|
* 1. There is not enough data in which case we buffer it.
|
|
* 2. There is enough data but some residue remains after the call to `processAudioFrame`, so its buffered
|
|
* for the next call.
|
|
* A problem arises when the circular buffer reaches the end and a rollover is required, namely
|
|
* the residue could potentially be split between the end of buffer and the beginning and would
|
|
* require some complicated logic to handle. Using the lcm as the size of the buffer will
|
|
* guarantee that by the time the buffer reaches the end the residue will be a multiple of the
|
|
* `procNodeSampleRate` and the residue won't be split.
|
|
*/
|
|
this._circularBufferLength = leastCommonMultiple(this._procNodeSampleRate, this._denoiseSampleSize);
|
|
this._circularBuffer = new Float32Array(this._circularBufferLength);
|
|
}
|
|
|
|
/**
|
|
* Worklet interface process method. The inputs parameter contains PCM audio that is then sent to rnnoise.
|
|
* Rnnoise only accepts PCM samples of 480 bytes whereas `process` handles 128 sized samples, we take this into
|
|
* account using a circular buffer.
|
|
*
|
|
* @param {Float32Array[]} inputs - Array of inputs connected to the node, each of them with their associated
|
|
* array of channels. Each channel is an array of 128 pcm samples.
|
|
* @param {Float32Array[]} outputs - Array of outputs similar to the inputs parameter structure, expected to be
|
|
* filled during the execution of `process`. By default each channel is zero filled.
|
|
* @returns {boolean} - Boolean value that returns whether or not the processor should remain active. Returning
|
|
* false will terminate it.
|
|
*/
|
|
process(inputs: Float32Array[][], outputs: Float32Array[][]) {
|
|
|
|
// We expect the incoming track to be mono, if a stereo track is passed only on of its channels will get
|
|
// denoised and sent pack.
|
|
// TODO Technically we can denoise both channel however this might require a new rnnoise context, some more
|
|
// investigation is required.
|
|
const inData = inputs[0][0];
|
|
const outData = outputs[0][0];
|
|
|
|
// Append new raw PCM sample.
|
|
this._circularBuffer.set(inData, this._inputBufferLength);
|
|
this._inputBufferLength += inData.length;
|
|
|
|
// New raw samples were just added, start denoising frames, _denoisedBufferLength gives us
|
|
// the position at which the previous denoise iteration ended, basically it takes into account
|
|
// residue data.
|
|
for (; this._denoisedBufferLength + this._denoiseSampleSize <= this._inputBufferLength;
|
|
this._denoisedBufferLength += this._denoiseSampleSize) {
|
|
// Create view of circular buffer so it can be modified in place, removing the need for
|
|
// extra copies.
|
|
|
|
const denoiseFrame = this._circularBuffer.subarray(
|
|
this._denoisedBufferLength,
|
|
this._denoisedBufferLength + this._denoiseSampleSize
|
|
);
|
|
|
|
this._denoiseProcessor.processAudioFrame(denoiseFrame, true);
|
|
}
|
|
|
|
// Determine how much denoised audio is available, if the start index of denoised samples is smaller
|
|
// then _denoisedBufferLength that means a rollover occured.
|
|
let unsentDenoisedDataLength;
|
|
|
|
if (this._denoisedBufferIndx > this._denoisedBufferLength) {
|
|
unsentDenoisedDataLength = this._circularBufferLength - this._denoisedBufferIndx;
|
|
} else {
|
|
unsentDenoisedDataLength = this._denoisedBufferLength - this._denoisedBufferIndx;
|
|
}
|
|
|
|
// Only copy denoised data to output when there's enough of it to fit the exact buffer length.
|
|
// e.g. if the buffer size is 1024 samples but we only denoised 960 (this happens on the first iteration)
|
|
// nothing happens, then on the next iteration 1920 samples will be denoised so we send 1024 which leaves
|
|
// 896 for the next iteration and so on.
|
|
if (unsentDenoisedDataLength >= outData.length) {
|
|
const denoisedFrame = this._circularBuffer.subarray(
|
|
this._denoisedBufferIndx,
|
|
this._denoisedBufferIndx + outData.length
|
|
);
|
|
|
|
outData.set(denoisedFrame, 0);
|
|
this._denoisedBufferIndx += outData.length;
|
|
}
|
|
|
|
// When the end of the circular buffer has been reached, start from the beggining. By the time the index
|
|
// starts over, the data from the begging is stale (has already been processed) and can be safely
|
|
// overwritten.
|
|
if (this._denoisedBufferIndx === this._circularBufferLength) {
|
|
this._denoisedBufferIndx = 0;
|
|
}
|
|
|
|
// Because the circular buffer's length is the lcm of both input size and the processor's sample size,
|
|
// by the time we reach the end with the input index the denoise length index will be there as well.
|
|
if (this._inputBufferLength === this._circularBufferLength) {
|
|
this._inputBufferLength = 0;
|
|
this._denoisedBufferLength = 0;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
registerProcessor('NoiseSuppressorWorklet', NoiseSuppressorWorklet);
|