jiti-meet/react/features/vad-reporter/TrackVADEmitter.js

// @flow

import { createRnnoiseProcessorPromise, getSampleLength } from '../rnnoise/';
import EventEmitter from 'events';
import JitsiMeetJS from '../base/lib-jitsi-meet';
import logger from './logger';
import { VAD_SCORE_PUBLISHED } from './VADEvents';

/**
 * The structure used by TrackVADEmitter to relay a score
 */
export type VADScore = {

    /**
     * Device ID associated with the VAD score
     */
    deviceId: string,

    /**
     * The PCM score from 0 - 1 i.e. 0.60
     */
    score: number,

    /**
     * Epoch time at which PCM was recorded
     */
    timestamp: number

};

/**
 * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.
 * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
 * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the
 * score is published to consumers via an EventEmitter.
 * After work is done with this service the destroy method needs to be called for a proper cleanup.
 */
export default class TrackVADEmitter extends EventEmitter {
    /**
     * The AudioContext instance.
     */
    _audioContext: AudioContext;

    /**
     * The MediaStreamAudioSourceNode instance.
     */
    _audioSource: MediaStreamAudioSourceNode;

    /**
     * The ScriptProcessorNode instance.
     */
    _audioProcessingNode: ScriptProcessorNode;

    /**
     * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
     */
    _bufferResidue: Float32Array;

    /**
     * State flag, check if the instance was destroyed
     */
    _destroyed: boolean = false;

    /**
     * The JitsiLocalTrack instance.
     */
    _localTrack: Object;

    /**
     * Device ID of the target microphone.
     */
    _micDeviceId: string;

    /**
     * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample
     * rate.
     */
    _onAudioProcess: (audioEvent: Object) => void;

    /**
     * Sample rate of the ScriptProcessorNode.
     */
    _procNodeSampleRate: number;

    /**
     * Rnnoise adapter that allows us to calculate VAD score for PCM samples
     */
    _rnnoiseProcessor: Object;

    /**
     * PCM Sample size expected by the RnnoiseProcessor instance.
     */
    _rnnoiseSampleSize: number;

    /**
     * Constructor.
     *
     * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values  256, 512, 1024,
     *  2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
     * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score
     * for PCM samples.
     * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
     */
    constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {
        super();
        this._procNodeSampleRate = procNodeSampleRate;
        this._rnnoiseProcessor = rnnoiseProcessor;
        this._localTrack = jitsiLocalTrack;
        this._micDeviceId = jitsiLocalTrack.getDeviceId();
        this._bufferResidue = new Float32Array([]);
        this._audioContext = new AudioContext();
        this._rnnoiseSampleSize = getSampleLength();
        this._onAudioProcess = this._onAudioProcess.bind(this);

        this._initializeAudioContext();
        this._connectAudioGraph();

        logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
    }

    /**
     * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
     *
     * @param {string} micDeviceId - Target microphone device id.
     * @param {number} procNodeSampleRate - Sample rate of the proc node.
     * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
     */
    static async create(micDeviceId: string, procNodeSampleRate: number) {
        let rnnoiseProcessor = null;
        let localTrack = null;

        try {
            logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);

            rnnoiseProcessor = await createRnnoiseProcessorPromise();
            localTrack = await JitsiMeetJS.createLocalTracks({
                devices: [ 'audio' ],
                micDeviceId
            });

            // We only expect one audio track when specifying a device id.
            if (!localTrack[0]) {
                throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
            }

            return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);
        } catch (error) {
            logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);

            if (rnnoiseProcessor) {
                rnnoiseProcessor.destroy();
            }

            if (localTrack) {
                localTrack.stopStream();
            }

            throw error;
        }
    }

    /**
     * Sets up the audio graph in the AudioContext.
     *
     * @returns {Promise<void>}
     */
    _initializeAudioContext() {
        this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);

        // TODO AudioProcessingNode is deprecated check and replace with alternative.
        // We don't need stereo for determining the VAD score so we create a single chanel processing node.
        this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
        this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
    }

    /**
     * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
     * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
     * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
     * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.
     *
     * @param {AudioProcessingEvent} audioEvent - Audio event.
     * @returns {void}
     */
    _onAudioProcess(audioEvent: Object) {
        // Prepend the residue PCM buffer from the previous process callback.
        const inData = audioEvent.inputBuffer.getChannelData(0);
        const completeInData = [ ...this._bufferResidue, ...inData ];
        const sampleTimestamp = Date.now();

        let i = 0;

        for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {
            const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);
            const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);

            this.emit(VAD_SCORE_PUBLISHED, {
                timestamp: sampleTimestamp,
                score: vadScore,
                deviceId: this._micDeviceId
            });
        }

        this._bufferResidue = completeInData.slice(i, completeInData.length);
    }

    /**
     * Connects the nodes in the AudioContext to start the flow of audio data.
     *
     * @returns {void}
     */
    _connectAudioGraph() {
        this._audioSource.connect(this._audioProcessingNode);
        this._audioProcessingNode.connect(this._audioContext.destination);
    }

    /**
     * Disconnects the nodes in the AudioContext.
     *
     * @returns {void}
     */
    _disconnectAudioGraph() {
        // Even thought we disconnect the processing node it seems that some callbacks remain queued,
        // resulting in calls with and uninitialized context.
        // eslint-disable-next-line no-empty-function
        this._audioProcessingNode.onaudioprocess = () => {};
        this._audioProcessingNode.disconnect();
        this._audioSource.disconnect();
    }

    /**
     * Cleanup potentially acquired resources.
     *
     * @returns {void}
     */
    _cleanupResources() {
        logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);

        this._disconnectAudioGraph();
        this._localTrack.stopStream();
        this._rnnoiseProcessor.destroy();
    }

    /**
     * Destroy TrackVADEmitter instance (release resources and stop callbacks).
     *
     * @returns {void}
     */
    destroy() {
        if (this._destroyed) {
            return;
        }

        logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
        this._cleanupResources();
        this._destroyed = true;
    }
}
feat: integrate rnnoise based service for voice activity (VAD) detection 2019-10-04 10:55:18 +00:00			`// @flow`

			`import { createRnnoiseProcessorPromise, getSampleLength } from '../rnnoise/';`
			`import EventEmitter from 'events';`
			`import JitsiMeetJS from '../base/lib-jitsi-meet';`
			`import logger from './logger';`
			`import { VAD_SCORE_PUBLISHED } from './VADEvents';`

			`/**`
			`* The structure used by TrackVADEmitter to relay a score`
			`*/`
			`export type VADScore = {`

			`/**`
			`* Device ID associated with the VAD score`
			`*/`
			`deviceId: string,`

			`/**`
			`* The PCM score from 0 - 1 i.e. 0.60`
			`*/`
			`score: number,`

			`/**`
			`* Epoch time at which PCM was recorded`
			`*/`
			`timestamp: number`

			`};`

			`/**`
			`* Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.`
			`* Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.`
			`* The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the`
			`* score is published to consumers via an EventEmitter.`
			`* After work is done with this service the destroy method needs to be called for a proper cleanup.`
			`*/`
			`export default class TrackVADEmitter extends EventEmitter {`
			`/**`
			`* The AudioContext instance.`
			`*/`
			`_audioContext: AudioContext;`

			`/**`
			`* The MediaStreamAudioSourceNode instance.`
			`*/`
			`_audioSource: MediaStreamAudioSourceNode;`

			`/**`
			`* The ScriptProcessorNode instance.`
			`*/`
			`_audioProcessingNode: ScriptProcessorNode;`

			`/**`
			`* Buffer to hold residue PCM resulting after a ScriptProcessorNode callback`
			`*/`
			`_bufferResidue: Float32Array;`

			`/**`
			`* State flag, check if the instance was destroyed`
			`*/`
			`_destroyed: boolean = false;`

			`/**`
			`* The JitsiLocalTrack instance.`
			`*/`
			`_localTrack: Object;`

			`/**`
			`* Device ID of the target microphone.`
			`*/`
			`_micDeviceId: string;`

			`/**`
			`* Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample`
			`* rate.`
			`*/`
			`_onAudioProcess: (audioEvent: Object) => void;`

			`/**`
			`* Sample rate of the ScriptProcessorNode.`
			`*/`
			`_procNodeSampleRate: number;`

			`/**`
			`* Rnnoise adapter that allows us to calculate VAD score for PCM samples`
			`*/`
			`_rnnoiseProcessor: Object;`

			`/**`
			`* PCM Sample size expected by the RnnoiseProcessor instance.`
			`*/`
			`_rnnoiseSampleSize: number;`

			`/**`
			`* Constructor.`
			`*`
			`* @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,`
			`* 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.`
			`* @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score`
			`* for PCM samples.`
			`* @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.`
			`*/`
			`constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {`
			`super();`
			`this._procNodeSampleRate = procNodeSampleRate;`
			`this._rnnoiseProcessor = rnnoiseProcessor;`
			`this._localTrack = jitsiLocalTrack;`
			`this._micDeviceId = jitsiLocalTrack.getDeviceId();`
			`this._bufferResidue = new Float32Array([]);`
			`this._audioContext = new AudioContext();`
			`this._rnnoiseSampleSize = getSampleLength();`
			`this._onAudioProcess = this._onAudioProcess.bind(this);`

			`this._initializeAudioContext();`
			`this._connectAudioGraph();`

			logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
			`}`

			`/**`
			`* Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.`
			`*`
			`* @param {string} micDeviceId - Target microphone device id.`
			`* @param {number} procNodeSampleRate - Sample rate of the proc node.`
			`* @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.`
			`*/`
			`static async create(micDeviceId: string, procNodeSampleRate: number) {`
			`let rnnoiseProcessor = null;`
			`let localTrack = null;`

			`try {`
			logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);

			`rnnoiseProcessor = await createRnnoiseProcessorPromise();`
			`localTrack = await JitsiMeetJS.createLocalTracks({`
			`devices: [ 'audio' ],`
			`micDeviceId`
			`});`

			`// We only expect one audio track when specifying a device id.`
			`if (!localTrack[0]) {`
			throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
			`}`

			`return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);`
			`} catch (error) {`
			logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);

			`if (rnnoiseProcessor) {`
			`rnnoiseProcessor.destroy();`
			`}`

			`if (localTrack) {`
			`localTrack.stopStream();`
			`}`

			`throw error;`
			`}`
			`}`

			`/**`
			`* Sets up the audio graph in the AudioContext.`
			`*`
			`* @returns {Promise<void>}`
			`*/`
			`_initializeAudioContext() {`
			`this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);`

			`// TODO AudioProcessingNode is deprecated check and replace with alternative.`
			`// We don't need stereo for determining the VAD score so we create a single chanel processing node.`
			`this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);`
			`this._audioProcessingNode.onaudioprocess = this._onAudioProcess;`
			`}`

			`/**`
			`* ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.`
			`* Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple`
			`* of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal`
			`* to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.`
			`*`
			`* @param {AudioProcessingEvent} audioEvent - Audio event.`
			`* @returns {void}`
			`*/`
			`_onAudioProcess(audioEvent: Object) {`
			`// Prepend the residue PCM buffer from the previous process callback.`
			`const inData = audioEvent.inputBuffer.getChannelData(0);`
			`const completeInData = [ ...this._bufferResidue, ...inData ];`
			`const sampleTimestamp = Date.now();`

			`let i = 0;`

			`for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {`
			`const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);`
			`const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);`

			`this.emit(VAD_SCORE_PUBLISHED, {`
			`timestamp: sampleTimestamp,`
			`score: vadScore,`
			`deviceId: this._micDeviceId`
			`});`
			`}`

			`this._bufferResidue = completeInData.slice(i, completeInData.length);`
			`}`

			`/**`
			`* Connects the nodes in the AudioContext to start the flow of audio data.`
			`*`
			`* @returns {void}`
			`*/`
			`_connectAudioGraph() {`
			`this._audioSource.connect(this._audioProcessingNode);`
			`this._audioProcessingNode.connect(this._audioContext.destination);`
			`}`

			`/**`
			`* Disconnects the nodes in the AudioContext.`
			`*`
			`* @returns {void}`
			`*/`
			`_disconnectAudioGraph() {`
			`// Even thought we disconnect the processing node it seems that some callbacks remain queued,`
			`// resulting in calls with and uninitialized context.`
			`// eslint-disable-next-line no-empty-function`
			`this._audioProcessingNode.onaudioprocess = () => {};`
			`this._audioProcessingNode.disconnect();`
			`this._audioSource.disconnect();`
			`}`

			`/**`
			`* Cleanup potentially acquired resources.`
			`*`
			`* @returns {void}`
			`*/`
			`_cleanupResources() {`
			logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);

			`this._disconnectAudioGraph();`
			`this._localTrack.stopStream();`
			`this._rnnoiseProcessor.destroy();`
			`}`

			`/**`
			`* Destroy TrackVADEmitter instance (release resources and stop callbacks).`
			`*`
			`* @returns {void}`
			`*/`
			`destroy() {`
			`if (this._destroyed) {`
			`return;`
			`}`

			logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
			`this._cleanupResources();`
			`this._destroyed = true;`
			`}`
			`}`