feat: integrate rnnoise based service for voice activity (VAD) detection

2019-10-04 13:55:18 +03:00 · 2019-10-04 13:55:18 +03:00 · 761ac6a730
parent 11d3a343e5
commit 761ac6a730
12 changed files with 888 additions and 5 deletions
--- a/12
+++ b/12
@ -3,6 +3,7 @@ CLEANCSS = ./node_modules/.bin/cleancss
 DEPLOY_DIR = libs
 LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
 LIBFLAC_DIR = node_modules/libflacjs/dist/min/
+RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
 NODE_SASS = ./node_modules/.bin/node-sass
 NPM = npm
 OUTPUT_DIR = .
@ -20,7 +21,7 @@ compile:
 clean:
 	rm -fr $(BUILD_DIR)

-deploy: deploy-init deploy-appbundle deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local
+deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local

 deploy-init:
 	rm -fr $(DEPLOY_DIR)
@ -47,6 +48,8 @@ deploy-appbundle:
 		$(BUILD_DIR)/analytics-ga.min.map \
 		$(BUILD_DIR)/video-blur-effect.min.js \
 		$(BUILD_DIR)/video-blur-effect.min.map \
+		$(BUILD_DIR)/rnnoise-processor.min.js \
+		$(BUILD_DIR)/rnnoise-processor.min.map \
 		$(DEPLOY_DIR)

 deploy-lib-jitsi-meet:
@ -63,6 +66,11 @@ deploy-libflac:
 		$(LIBFLAC_DIR)/libflac4-1.3.2.min.js.mem \
 		$(DEPLOY_DIR)

+deploy-rnnoise-binary:
+	cp \
+		$(RNNOISE_WASM_DIR)/rnnoise.wasm \
+		$(DEPLOY_DIR)
+
 deploy-css:
 	$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
 	$(CLEANCSS) $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
@ -71,7 +79,7 @@ deploy-css:
 deploy-local:
 	([ ! -x deploy-local.sh ] || ./deploy-local.sh)

-dev: deploy-init deploy-css deploy-lib-jitsi-meet deploy-libflac
+dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac
 	$(WEBPACK_DEV_SERVER)

 source-package:
--- a/package-lock.json
+++ b/package-lock.json
@ -16413,6 +16413,39 @@
        "inherits": "^2.0.1"
      }
    },
+    "rnnoise-wasm": {
+      "version": "github:jitsi/rnnoise-wasm#db96d11f175a22ef56c7db1ba9550835b716e615",
+      "from": "github:jitsi/rnnoise-wasm#db96d11f175a22ef56c7db1ba9550835b716e615"
+    },
+    "rollup-plugin-visualizer": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/rollup-plugin-visualizer/-/rollup-plugin-visualizer-1.1.1.tgz",
+      "integrity": "sha512-7xkSKp+dyJmSC7jg2LXqViaHuOnF1VvIFCnsZEKjrgT5ZVyiLLSbeszxFcQSfNJILphqgAEmWAUz0Z4xYScrRw==",
+      "optional": true,
+      "requires": {
+        "mkdirp": "^0.5.1",
+        "opn": "^5.4.0",
+        "source-map": "^0.7.3",
+        "typeface-oswald": "0.0.54"
+      },
+      "dependencies": {
+        "opn": {
+          "version": "5.5.0",
+          "resolved": "https://registry.npmjs.org/opn/-/opn-5.5.0.tgz",
+          "integrity": "sha512-PqHpggC9bLV0VeWcdKhkpxY+3JTzetLSqTCWL/z/tFIbI6G8JCjondXklT1JinczLz2Xib62sSp0T/gKT4KksA==",
+          "optional": true,
+          "requires": {
+            "is-wsl": "^1.1.0"
+          }
+        },
+        "source-map": {
+          "version": "0.7.3",
+          "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz",
+          "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==",
+          "optional": true
+        }
+      }
+    },
    "rsvp": {
      "version": "4.8.5",
      "resolved": "https://registry.npmjs.org/rsvp/-/rsvp-4.8.5.tgz",
--- a/package.json
+++ b/package.json
@ -87,6 +87,7 @@
    "react-transition-group": "2.4.0",
    "redux": "4.0.4",
    "redux-thunk": "2.2.0",
+    "rnnoise-wasm": "github:jitsi/rnnoise-wasm.git#db96d11f175a22ef56c7db1ba9550835b716e615",
    "styled-components": "3.4.9",
    "util": "0.12.1",
    "uuid": "3.1.0",
--- a/react/features/rnnoise/functions.js
+++ b/react/features/rnnoise/functions.js
@ -0,0 +1,44 @@
+// @flow
+
+import { getJitsiMeetGlobalNS, loadScript } from '../base/util';
+
+let loadRnnoisePromise;
+
+/**
+ * Returns promise that resolves with a RnnoiseProcessor instance.
+ *
+ * @returns {Promise<RnnoiseProcessor>} - Resolves with the blur effect instance.
+ */
+export function createRnnoiseProcessorPromise() {
+    // Subsequent calls should not attempt to load the script multiple times.
+    if (!loadRnnoisePromise) {
+        loadRnnoisePromise = loadScript('libs/rnnoise-processor.min.js');
+    }
+
+    return loadRnnoisePromise.then(() => {
+        const ns = getJitsiMeetGlobalNS();
+
+        if (ns?.effects?.rnnoise?.createRnnoiseProcessor) {
+            return ns.effects.rnnoise.createRnnoiseProcessor();
+        }
+
+        throw new Error('Rnnoise module binding createRnnoiseProcessor not found!');
+    });
+}
+
+/**
+ * Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs.
+ *
+ * @returns {number}
+ */
+export function getSampleLength() {
+    const ns = getJitsiMeetGlobalNS();
+
+    const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH;
+
+    if (!rnnoiseSample) {
+        throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!');
+    }
+
+    return rnnoiseSample;
+}
--- a/react/features/rnnoise/index.js
+++ b/react/features/rnnoise/index.js
@ -0,0 +1,2 @@
+
+export * from './functions';
--- a/react/features/stream-effects/rnnoise/RnnoiseProcessor.js
+++ b/react/features/stream-effects/rnnoise/RnnoiseProcessor.js
@ -0,0 +1,174 @@
+// @flow
+
+/**
+ * Constant. Rnnoise default sample size, samples of different size won't work.
+ */
+export const RNNOISE_SAMPLE_LENGTH: number = 480;
+
+/**
+ *  Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
+ */
+const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
+
+/**
+ * Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
+ * memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
+ * detection) scores.
+ */
+export default class RnnoiseProcessor {
+    /**
+     * Rnnoise context object needed to perform the audio processing.
+     */
+    _context: ?Object;
+
+    /**
+     * State flag, check if the instance was destroyed.
+     */
+    _destroyed: boolean = false;
+
+    /**
+     * WASM interface through which calls to rnnoise are made.
+     */
+    _wasmInterface: Object;
+
+    /**
+     * WASM dynamic memory buffer used as input for rnnoise processing method.
+     */
+    _wasmPcmInput: Object;
+
+    /**
+     * The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
+     */
+    _wasmPcmInputF32Index: number;
+
+    /**
+     * WASM dynamic memory buffer used as output for rnnoise processing method.
+     */
+    _wasmPcmOutput: Object;
+
+    /**
+     * Constructor.
+     *
+     * @class
+     * @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
+     */
+    constructor(wasmInterface: Object) {
+        // Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
+        // i.e. in case of exception there are no side effects.
+        try {
+            this._wasmInterface = wasmInterface;
+
+            // For VAD score purposes only allocate the buffers once and reuse them
+            this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
+
+            if (!this._wasmPcmInput) {
+                throw Error('Failed to create wasm input memory buffer!');
+            }
+
+            this._wasmPcmOutput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
+
+            if (!this._wasmPcmOutput) {
+                wasmInterface._free(this._wasmPcmInput);
+                throw Error('Failed to create wasm output memory buffer!');
+            }
+
+            // The HEAPF32.set function requires an index relative to a Float32 array view of the wasm memory model
+            // which is an array of bytes. This means we have to divide it by the size of a float to get the index
+            // relative to a Float32 Array.
+            this._wasmPcmInputF32Index = this._wasmPcmInput / 4;
+
+            this._context = this._wasmInterface._rnnoise_create();
+        } catch (error) {
+            // release can be called even if not all the components were initialized.
+            this._releaseWasmResources();
+            throw error;
+        }
+    }
+
+    /**
+     * Copy the input PCM Audio Sample to the wasm input buffer.
+     *
+     * @param {Float32Array} pcmSample - Array containing 16 bit format PCM sample stored in 32 Floats .
+     * @returns {void}
+     */
+    _copyPCMSampleToWasmBuffer(pcmSample: Float32Array) {
+        this._wasmInterface.HEAPF32.set(pcmSample, this._wasmPcmInputF32Index);
+    }
+
+    /**
+     * Convert 32 bit Float PCM samples to 16 bit Float PCM samples and store them in 32 bit Floats.
+     *
+     * @param {Float32Array} f32Array - Array containing 32 bit PCM samples.
+     * @returns {void}
+     */
+    _convertTo16BitPCM(f32Array: Float32Array) {
+        for (const [ index, value ] of f32Array.entries()) {
+            f32Array[index] = value * 0x7fff;
+        }
+    }
+
+    /**
+     * Release resources associated with the wasm context. If something goes downhill here
+     * i.e. Exception is thrown, there is nothing much we can do.
+     *
+     * @returns {void}
+     */
+    _releaseWasmResources() {
+        // For VAD score purposes only allocate the buffers once and reuse them
+        if (this._wasmPcmInput) {
+            this._wasmInterface._free(this._wasmPcmInput);
+            this._wasmPcmInput = null;
+        }
+
+        if (this._wasmPcmOutput) {
+            this._wasmInterface._free(this._wasmPcmOutput);
+            this._wasmPcmOutput = null;
+        }
+
+        if (this._context) {
+            this._wasmInterface._rnnoise_destroy(this._context);
+            this._context = null;
+        }
+    }
+
+    /**
+     * Release any resources required by the rnnoise context this needs to be called
+     * before destroying any context that uses the processor.
+     *
+     * @returns {void}
+     */
+    destroy() {
+        // Attempting to release a non initialized processor, do nothing.
+        if (this._destroyed) {
+            return;
+        }
+
+        this._releaseWasmResources();
+
+        this._destroyed = true;
+    }
+
+    /**
+     * Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
+     * The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
+     *
+     * @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
+     * @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
+     */
+    calculateAudioFrameVAD(pcmFrame: Float32Array) {
+        if (this._destroyed) {
+            throw new Error('RnnoiseProcessor instance is destroyed, please create another one!');
+        }
+
+        const pcmFrameLength = pcmFrame.length;
+
+        if (pcmFrameLength !== RNNOISE_SAMPLE_LENGTH) {
+            throw new Error(`Rnnoise can only process PCM frames of 480 samples! Input sample was:${pcmFrameLength}`);
+        }
+
+        this._convertTo16BitPCM(pcmFrame);
+        this._copyPCMSampleToWasmBuffer(pcmFrame);
+
+        return this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmOutput, this._wasmPcmInput);
+    }
+}
--- a/react/features/stream-effects/rnnoise/index.js
+++ b/react/features/stream-effects/rnnoise/index.js
@ -0,0 +1,36 @@
+// @flow
+
+// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load
+// During the build phase this needs to be taken care of manually
+import rnnoiseWasmInit from 'rnnoise-wasm';
+import RnnoiseProcessor from './RnnoiseProcessor';
+
+export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor';
+export type { RnnoiseProcessor };
+
+let rnnoiseWasmInterface;
+let initializePromise;
+
+/**
+ * Creates a new instance of RnnoiseProcessor.
+ *
+ * @returns {Promise<RnnoiseProcessor>}
+ */
+export function createRnnoiseProcessor() {
+    if (!initializePromise) {
+        initializePromise = new Promise((resolve, reject) => {
+            rnnoiseWasmInterface = rnnoiseWasmInit({
+                onRuntimeInitialized() {
+                    resolve();
+                },
+                onAbort(reason) {
+                    reject(reason);
+                }
+            });
+        });
+    }
+
+    return initializePromise.then(
+        () => new RnnoiseProcessor(rnnoiseWasmInterface)
+    );
+}
--- a/react/features/vad-reporter/TrackVADEmitter.js
+++ b/react/features/vad-reporter/TrackVADEmitter.js
@ -0,0 +1,258 @@
+// @flow
+
+import { createRnnoiseProcessorPromise, getSampleLength } from '../rnnoise/';
+import EventEmitter from 'events';
+import JitsiMeetJS from '../base/lib-jitsi-meet';
+import logger from './logger';
+import { VAD_SCORE_PUBLISHED } from './VADEvents';
+
+/**
+ * The structure used by TrackVADEmitter to relay a score
+ */
+export type VADScore = {
+
+    /**
+     * Device ID associated with the VAD score
+     */
+    deviceId: string,
+
+    /**
+     * The PCM score from 0 - 1 i.e. 0.60
+     */
+    score: number,
+
+    /**
+     * Epoch time at which PCM was recorded
+     */
+    timestamp: number
+
+};
+
+/**
+ * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.
+ * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
+ * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the
+ * score is published to consumers via an EventEmitter.
+ * After work is done with this service the destroy method needs to be called for a proper cleanup.
+ */
+export default class TrackVADEmitter extends EventEmitter {
+    /**
+     * The AudioContext instance.
+     */
+    _audioContext: AudioContext;
+
+    /**
+     * The MediaStreamAudioSourceNode instance.
+     */
+    _audioSource: MediaStreamAudioSourceNode;
+
+    /**
+     * The ScriptProcessorNode instance.
+     */
+    _audioProcessingNode: ScriptProcessorNode;
+
+    /**
+     * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
+     */
+    _bufferResidue: Float32Array;
+
+    /**
+     * State flag, check if the instance was destroyed
+     */
+    _destroyed: boolean = false;
+
+    /**
+     * The JitsiLocalTrack instance.
+     */
+    _localTrack: Object;
+
+    /**
+     * Device ID of the target microphone.
+     */
+    _micDeviceId: string;
+
+    /**
+     * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample
+     * rate.
+     */
+    _onAudioProcess: (audioEvent: Object) => void;
+
+    /**
+     * Sample rate of the ScriptProcessorNode.
+     */
+    _procNodeSampleRate: number;
+
+    /**
+     * Rnnoise adapter that allows us to calculate VAD score for PCM samples
+     */
+    _rnnoiseProcessor: Object;
+
+    /**
+     * PCM Sample size expected by the RnnoiseProcessor instance.
+     */
+    _rnnoiseSampleSize: number;
+
+    /**
+     * Constructor.
+     *
+     * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values  256, 512, 1024,
+     *  2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
+     * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score
+     * for PCM samples.
+     * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
+     */
+    constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {
+        super();
+        this._procNodeSampleRate = procNodeSampleRate;
+        this._rnnoiseProcessor = rnnoiseProcessor;
+        this._localTrack = jitsiLocalTrack;
+        this._micDeviceId = jitsiLocalTrack.getDeviceId();
+        this._bufferResidue = new Float32Array([]);
+        this._audioContext = new AudioContext();
+        this._rnnoiseSampleSize = getSampleLength();
+        this._onAudioProcess = this._onAudioProcess.bind(this);
+
+        this._initializeAudioContext();
+        this._connectAudioGraph();
+
+        logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
+    }
+
+    /**
+     * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
+     *
+     * @param {string} micDeviceId - Target microphone device id.
+     * @param {number} procNodeSampleRate - Sample rate of the proc node.
+     * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
+     */
+    static async create(micDeviceId: string, procNodeSampleRate: number) {
+        let rnnoiseProcessor = null;
+        let localTrack = null;
+
+        try {
+            logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);
+
+            rnnoiseProcessor = await createRnnoiseProcessorPromise();
+            localTrack = await JitsiMeetJS.createLocalTracks({
+                devices: [ 'audio' ],
+                micDeviceId
+            });
+
+            // We only expect one audio track when specifying a device id.
+            if (!localTrack[0]) {
+                throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
+            }
+
+            return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);
+        } catch (error) {
+            logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);
+
+            if (rnnoiseProcessor) {
+                rnnoiseProcessor.destroy();
+            }
+
+            if (localTrack) {
+                localTrack.stopStream();
+            }
+
+            throw error;
+        }
+    }
+
+    /**
+     * Sets up the audio graph in the AudioContext.
+     *
+     * @returns {Promise<void>}
+     */
+    _initializeAudioContext() {
+        this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
+
+        // TODO AudioProcessingNode is deprecated check and replace with alternative.
+        // We don't need stereo for determining the VAD score so we create a single chanel processing node.
+        this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
+        this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
+    }
+
+    /**
+     * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
+     * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
+     * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
+     * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.
+     *
+     * @param {AudioProcessingEvent} audioEvent - Audio event.
+     * @returns {void}
+     */
+    _onAudioProcess(audioEvent: Object) {
+        // Prepend the residue PCM buffer from the previous process callback.
+        const inData = audioEvent.inputBuffer.getChannelData(0);
+        const completeInData = [ ...this._bufferResidue, ...inData ];
+        const sampleTimestamp = Date.now();
+
+        let i = 0;
+
+        for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {
+            const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);
+            const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);
+
+            this.emit(VAD_SCORE_PUBLISHED, {
+                timestamp: sampleTimestamp,
+                score: vadScore,
+                deviceId: this._micDeviceId
+            });
+        }
+
+        this._bufferResidue = completeInData.slice(i, completeInData.length);
+    }
+
+    /**
+     * Connects the nodes in the AudioContext to start the flow of audio data.
+     *
+     * @returns {void}
+     */
+    _connectAudioGraph() {
+        this._audioSource.connect(this._audioProcessingNode);
+        this._audioProcessingNode.connect(this._audioContext.destination);
+    }
+
+    /**
+     * Disconnects the nodes in the AudioContext.
+     *
+     * @returns {void}
+     */
+    _disconnectAudioGraph() {
+        // Even thought we disconnect the processing node it seems that some callbacks remain queued,
+        // resulting in calls with and uninitialized context.
+        // eslint-disable-next-line no-empty-function
+        this._audioProcessingNode.onaudioprocess = () => {};
+        this._audioProcessingNode.disconnect();
+        this._audioSource.disconnect();
+    }
+
+    /**
+     * Cleanup potentially acquired resources.
+     *
+     * @returns {void}
+     */
+    _cleanupResources() {
+        logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);
+
+        this._disconnectAudioGraph();
+        this._localTrack.stopStream();
+        this._rnnoiseProcessor.destroy();
+    }
+
+    /**
+     * Destroy TrackVADEmitter instance (release resources and stop callbacks).
+     *
+     * @returns {void}
+     */
+    destroy() {
+        if (this._destroyed) {
+            return;
+        }
+
+        logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
+        this._cleanupResources();
+        this._destroyed = true;
+    }
+}
--- a/react/features/vad-reporter/VADEvents.js
+++ b/react/features/vad-reporter/VADEvents.js
@ -0,0 +1,7 @@
+// Event generated by a TrackVADEmitter when it emits a VAD score from rnnoise.
+// The generated objects are of type VADScore
+export const VAD_SCORE_PUBLISHED = 'vad-score-published';
+
+// Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices.
+// The generated objects are of type Array<VADReportScore>, one score for each monitored device.
+export const VAD_REPORT_PUBLISHED = 'vad-report-published';
--- a/react/features/vad-reporter/VADReportingService.js
+++ b/react/features/vad-reporter/VADReportingService.js
@ -0,0 +1,284 @@
+// @flow
+
+import EventEmitter from 'events';
+import logger from './logger';
+import TrackVADEmitter from './TrackVADEmitter';
+import { VAD_SCORE_PUBLISHED, VAD_REPORT_PUBLISHED } from './VADEvents';
+import type { VADScore } from './TrackVADEmitter';
+export type { VADScore };
+
+/**
+ * Sample rate used by TrackVADEmitter, this value determines how often the ScriptProcessorNode is going to call the
+ * process audio function and with what sample size.
+ * Basically lower values mean more callbacks with lower processing times bigger values less callbacks with longer
+ * processing times. This value is somewhere in the middle, so we strike a balance between flooding with callbacks
+ * and processing time. Possible values  256, 512, 1024, 2048, 4096, 8192, 16384. Passing other values will default
+ * to closes neighbor.
+ */
+const SCRIPT_NODE_SAMPLE_RATE = 4096;
+
+/**
+ * Context that contains the emitter and additional information about the device.
+ */
+type VADDeviceContext = {
+
+    /**
+     * MediaDeviceInfo for associated context
+     */
+    deviceInfo: MediaDeviceInfo,
+
+    /**
+     * Array with VAD scores publish from the emitter.
+     */
+    scoreArray: Array<VADScore>,
+
+    /**
+     * TrackVADEmitter associated with media device
+     */
+    vadEmitter: TrackVADEmitter
+};
+
+/**
+ * The structure used by VADReportingService to relay a score report
+ */
+export type VADReportScore = {
+
+    /**
+     * Device ID associated with the VAD score
+     */
+    deviceId: string,
+
+    /**
+     * The PCM score from 0 - 1 i.e. 0.60
+     */
+    score: number,
+
+    /**
+     * Epoch time at which PCM was recorded
+     */
+    timestamp: number
+};
+
+
+/**
+ * Voice activity detection reporting service. The service create TrackVADEmitters for the provided devices and
+ * publishes an average of their VAD score over the specified interval via EventEmitter.
+ * The service is not reusable if destroyed a new one needs to be created, i.e. when a new device is added to the system
+ * a new service needs to be created and the old discarded.
+ */
+export default class VADReportingService extends EventEmitter {
+    /**
+     * Map containing context for devices currently being monitored by the reporting service.
+     */
+    _contextMap: Map<string, VADDeviceContext>;
+
+    /**
+     * State flag, check if the instance was destroyed.
+     */
+    _destroyed: boolean = false;
+
+    /**
+     * Delay at which to publish VAD score for monitored devices.
+     */
+    _intervalDelay: number;
+
+    /**
+     * Identifier for the interval publishing stats on the set interval.
+     */
+    _intervalId: ?IntervalID;
+
+    /**
+     * Constructor.
+     *
+     * @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices.
+     * @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score.
+     */
+    constructor(intervalDelay: number) {
+        super();
+        this._contextMap = new Map();
+        this._intervalDelay = intervalDelay;
+
+        logger.log(`Constructed VADReportingService with publish interval of: ${intervalDelay}`);
+    }
+
+    /**
+     * Factory methods that creates the TrackVADEmitters for the associated array of devices and instantiates
+     * a VADReportingService.
+     *
+     * @param {Array<MediaDeviceInfo>} micDeviceList - Device list that is monitored inside the service.
+     * @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices.
+     * @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score.
+     *
+     * @returns {Promise<VADReportingService>}
+     */
+    static create(micDeviceList: Array<MediaDeviceInfo>, intervalDelay: number) {
+        const vadReportingService = new VADReportingService(intervalDelay);
+        const emitterPromiseArray = [];
+
+        // Create a TrackVADEmitter for each provided audioinput device.
+        for (const micDevice of micDeviceList) {
+            if (micDevice.kind !== 'audioinput') {
+                logger.warn(`Provided device ${micDevice.label} -> ${micDevice.deviceId}, is not audioinput ignoring!`);
+
+                return;
+            }
+
+            logger.log(`Initializing VAD context for mic: ${micDevice.label} -> ${micDevice.deviceId}`);
+
+            const emitterPromise = TrackVADEmitter.create(micDevice.deviceId, SCRIPT_NODE_SAMPLE_RATE).then(emitter => {
+                emitter.on(VAD_SCORE_PUBLISHED, vadReportingService._devicePublishVADScore.bind(vadReportingService));
+
+                return {
+                    vadEmitter: emitter,
+                    deviceInfo: micDevice,
+                    scoreArray: []
+                };
+            });
+
+            emitterPromiseArray.push(emitterPromise);
+        }
+
+        // Once all the TrackVADEmitter promises are resolved check if all of them resolved properly if not reject
+        // the promise and clear the already created emitters.
+        // $FlowFixMe - allSettled is not part of flow prototype even though it's a valid Promise function
+        return Promise.allSettled(emitterPromiseArray).then(outcomeArray => {
+            const vadContextArray = [];
+            const rejectedEmitterPromiseArray = [];
+
+            for (const outcome of outcomeArray) {
+                if (outcome.status === 'fulfilled') {
+                    vadContextArray.push(outcome.value);
+                } else {
+                    // Promise was rejected.
+                    logger.error(`Create TrackVADEmitter promise failed with ${outcome.reason}`);
+
+                    rejectedEmitterPromiseArray.push(outcome);
+                }
+            }
+
+            // Check if there were any rejected promises and clear the already created ones list.
+            if (rejectedEmitterPromiseArray.length > 0) {
+                logger.error('Cleaning up remaining VADDeviceContext, due to create fail!');
+
+                for (const context of vadContextArray) {
+                    context.vadEmitter.destroy();
+                }
+
+                // Reject create promise if one emitter failed to instantiate, we might one just ignore it,
+                // leaving it like this for now
+                throw new Error('Create VADReportingService failed due to TrackVADEmitter creation issues!');
+            }
+
+            vadReportingService._setVADContextArray(vadContextArray);
+            vadReportingService._startPublish();
+
+            return vadReportingService;
+        });
+    }
+
+    /**
+     * Destroy TrackVADEmitters and clear the context map.
+     *
+     * @returns {void}
+     */
+    _clearContextMap() {
+        for (const vadContext of this._contextMap.values()) {
+            vadContext.vadEmitter.destroy();
+        }
+        this._contextMap.clear();
+    }
+
+    /**
+     * Set the watched device contexts.
+     *
+     * @param {Array<VADDeviceContext>} vadContextArray - List of mics.
+     * @returns {void}
+     */
+    _setVADContextArray(vadContextArray: Array<VADDeviceContext>): void {
+        for (const vadContext of vadContextArray) {
+            this._contextMap.set(vadContext.deviceInfo.deviceId, vadContext);
+        }
+    }
+
+    /**
+     * Start the setInterval reporting process.
+     *
+     * @returns {void}.
+     */
+    _startPublish() {
+        logger.log('VADReportingService started publishing.');
+        this._intervalId = setInterval(() => {
+            this._reportVadScore();
+        }, this._intervalDelay);
+    }
+
+    /**
+     * Function called at set interval with selected compute. The result will be published on the set callback.
+     *
+     * @returns {void}
+     */
+    _reportVadScore() {
+        const vadComputeScoreArray = [];
+        const computeTimestamp = Date.now();
+
+        // Go through each device and compute cumulated VAD score.
+
+        for (const [ deviceId, vadContext ] of this._contextMap) {
+            const nrOfVADScores = vadContext.scoreArray.length;
+            let vadSum = 0;
+
+            vadContext.scoreArray.forEach(vadScore => {
+                vadSum += vadScore.score;
+            });
+
+            // TODO For now we just calculate the average score for each device, more compute algorithms will be added.
+            const avgVAD = vadSum / nrOfVADScores;
+
+            vadContext.scoreArray = [];
+
+            vadComputeScoreArray.push({
+                timestamp: computeTimestamp,
+                score: avgVAD,
+                deviceId
+            });
+        }
+
+        this.emit(VAD_REPORT_PUBLISHED, vadComputeScoreArray);
+    }
+
+    /**
+     * Callback method passed to vad emitters in order to publish their score.
+     *
+     * @param {VADScore} vadScore - Mic publishing the score.
+     * @returns {void}
+     */
+    _devicePublishVADScore(vadScore: VADScore) {
+        const context = this._contextMap.get(vadScore.deviceId);
+
+        if (context) {
+            context.scoreArray.push(vadScore);
+        }
+    }
+
+    /**
+     * Destroy the VADReportingService, stops the setInterval reporting, destroys the emitters and clears the map.
+     * After this call the instance is no longer usable.
+     *
+     * @returns {void}.
+     */
+    destroy() {
+        if (this._destroyed) {
+            return;
+        }
+
+        logger.log('Destroying VADReportingService.');
+
+        if (this._intervalId) {
+            clearInterval(this._intervalId);
+            this._intervalId = null;
+        }
+        this._clearContextMap();
+        this._destroyed = true;
+    }
+
+}
--- a/react/features/vad-reporter/logger.js
+++ b/react/features/vad-reporter/logger.js
@ -0,0 +1,5 @@
+// @flow
+
+import { getLogger } from '../base/logging/functions';
+
+export default getLogger('features/vad-reporter');
--- a/webpack.config.js
+++ b/webpack.config.js
@ -214,16 +214,46 @@ module.exports = [
        },
        performance: getPerformanceHints(5 * 1024)
    }),
+
+    // Because both video-blur-effect and rnnoise-processor modules are loaded
+    // in a lazy manner using the loadScript function with a hard coded name,
+    // i.e.loadScript('libs/rnnoise-processor.min.js'), webpack dev server
+    // won't know how to properly load them using the default config filename
+    // and sourceMapFilename parameters which target libs without .min in dev
+    // mode. Thus we change these modules to have the same filename in both
+    // prod and dev mode.
    Object.assign({}, config, {
        entry: {
            'video-blur-effect': './react/features/stream-effects/blur/index.js'
        },
        output: Object.assign({}, config.output, {
            library: [ 'JitsiMeetJS', 'app', 'effects' ],
-            libraryTarget: 'window'
+            libraryTarget: 'window',
+            filename: '[name].min.js',
+            sourceMapFilename: '[name].min.map'
        }),
        performance: getPerformanceHints(1 * 1024 * 1024)
    }),
+
+    Object.assign({}, config, {
+        entry: {
+            'rnnoise-processor': './react/features/stream-effects/rnnoise/index.js'
+        },
+        node: {
+            // Emscripten generated glue code "rnnoise.js" expects node fs module,
+            // we need to specify this parameter so webpack knows how to properly
+            // interpret it when encountered.
+            fs: 'empty'
+        },
+        output: Object.assign({}, config.output, {
+            library: [ 'JitsiMeetJS', 'app', 'effects', 'rnnoise' ],
+            libraryTarget: 'window',
+            filename: '[name].min.js',
+            sourceMapFilename: '[name].min.map'
+        }),
+        performance: getPerformanceHints(30 * 1024)
+    }),
+
    Object.assign({}, config, {
        entry: {
            'external_api': './modules/API/external/index.js'
@ -249,7 +279,8 @@ function devServerProxyBypass({ path }) {
    if (path.startsWith('/css/') || path.startsWith('/doc/')
            || path.startsWith('/fonts/') || path.startsWith('/images/')
            || path.startsWith('/sounds/')
-            || path.startsWith('/static/')) {
+            || path.startsWith('/static/')
+            || path.endsWith('.wasm')) {
        return path;
    }