refactor / enable VAD talk while muted
This commit is contained in:
parent
c1d261445e
commit
b125bff7c7
|
@ -1268,7 +1268,7 @@ export default {
|
|||
options.applicationName = interfaceConfig.APP_NAME;
|
||||
options.getWiFiStatsMethod = this._getWiFiStatsMethod;
|
||||
options.confID = `${locationURL.host}${locationURL.pathname}`;
|
||||
options.vadProcessor = createRnnoiseProcessorPromise;
|
||||
options.createVADProcessor = createRnnoiseProcessorPromise;
|
||||
|
||||
return options;
|
||||
},
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
export * from './device-detect/ActiveDeviceDetector';
|
||||
export * from './device-detect/Events';
|
||||
export * from './vad-reporter/Events';
|
||||
export * from './vad-reporter/TrackVADEmitter';
|
||||
export * from './vad-reporter/VADReportingService';
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
// Event generated by a TrackVADEmitter when it emits a VAD score from rnnoise.
|
||||
// The generated objects are of type VADScore
|
||||
export const VAD_SCORE_PUBLISHED = 'vad-score-published';
|
||||
|
||||
// Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices.
|
||||
// The generated objects are of type Array<VADReportScore>, one score for each monitored device.
|
||||
/**
|
||||
* Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices.
|
||||
* The generated objects are of type Array<Object>, one score for each monitored device.
|
||||
* @event VAD_REPORT_PUBLISHED
|
||||
* @type Array<Object> with the following structure:
|
||||
* @property {Date} timestamp - Timestamp at which the compute took place.
|
||||
* @property {number} avgVAD - Average VAD score over monitored period of time.
|
||||
* @property {string} deviceId - Associate local audio device ID.
|
||||
*/
|
||||
export const VAD_REPORT_PUBLISHED = 'vad-report-published';
|
||||
|
|
|
@ -1,258 +0,0 @@
|
|||
// @flow
|
||||
|
||||
import { createRnnoiseProcessorPromise, getSampleLength } from '../../../../rnnoise';
|
||||
import EventEmitter from 'events';
|
||||
import JitsiMeetJS from '../../../lib-jitsi-meet';
|
||||
import logger from '../../logger';
|
||||
import { VAD_SCORE_PUBLISHED } from './Events';
|
||||
|
||||
/**
|
||||
* The structure used by TrackVADEmitter to relay a score
|
||||
*/
|
||||
export type VADScore = {
|
||||
|
||||
/**
|
||||
* Device ID associated with the VAD score
|
||||
*/
|
||||
deviceId: string,
|
||||
|
||||
/**
|
||||
* The PCM score from 0 - 1 i.e. 0.60
|
||||
*/
|
||||
score: number,
|
||||
|
||||
/**
|
||||
* Epoch time at which PCM was recorded
|
||||
*/
|
||||
timestamp: number
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.
|
||||
* Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
|
||||
* The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the
|
||||
* score is published to consumers via an EventEmitter.
|
||||
* After work is done with this service the destroy method needs to be called for a proper cleanup.
|
||||
*/
|
||||
export default class TrackVADEmitter extends EventEmitter {
|
||||
/**
|
||||
* The AudioContext instance.
|
||||
*/
|
||||
_audioContext: AudioContext;
|
||||
|
||||
/**
|
||||
* The MediaStreamAudioSourceNode instance.
|
||||
*/
|
||||
_audioSource: MediaStreamAudioSourceNode;
|
||||
|
||||
/**
|
||||
* The ScriptProcessorNode instance.
|
||||
*/
|
||||
_audioProcessingNode: ScriptProcessorNode;
|
||||
|
||||
/**
|
||||
* Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
|
||||
*/
|
||||
_bufferResidue: Float32Array;
|
||||
|
||||
/**
|
||||
* State flag, check if the instance was destroyed
|
||||
*/
|
||||
_destroyed: boolean = false;
|
||||
|
||||
/**
|
||||
* The JitsiLocalTrack instance.
|
||||
*/
|
||||
_localTrack: Object;
|
||||
|
||||
/**
|
||||
* Device ID of the target microphone.
|
||||
*/
|
||||
_micDeviceId: string;
|
||||
|
||||
/**
|
||||
* Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample
|
||||
* rate.
|
||||
*/
|
||||
_onAudioProcess: (audioEvent: Object) => void;
|
||||
|
||||
/**
|
||||
* Sample rate of the ScriptProcessorNode.
|
||||
*/
|
||||
_procNodeSampleRate: number;
|
||||
|
||||
/**
|
||||
* Rnnoise adapter that allows us to calculate VAD score for PCM samples
|
||||
*/
|
||||
_rnnoiseProcessor: Object;
|
||||
|
||||
/**
|
||||
* PCM Sample size expected by the RnnoiseProcessor instance.
|
||||
*/
|
||||
_rnnoiseSampleSize: number;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
|
||||
* 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
|
||||
* @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score
|
||||
* for PCM samples.
|
||||
* @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
|
||||
*/
|
||||
constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {
|
||||
super();
|
||||
this._procNodeSampleRate = procNodeSampleRate;
|
||||
this._rnnoiseProcessor = rnnoiseProcessor;
|
||||
this._localTrack = jitsiLocalTrack;
|
||||
this._micDeviceId = jitsiLocalTrack.getDeviceId();
|
||||
this._bufferResidue = new Float32Array([]);
|
||||
this._audioContext = new AudioContext();
|
||||
this._rnnoiseSampleSize = getSampleLength();
|
||||
this._onAudioProcess = this._onAudioProcess.bind(this);
|
||||
|
||||
this._initializeAudioContext();
|
||||
this._connectAudioGraph();
|
||||
|
||||
logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
|
||||
*
|
||||
* @param {string} micDeviceId - Target microphone device id.
|
||||
* @param {number} procNodeSampleRate - Sample rate of the proc node.
|
||||
* @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
|
||||
*/
|
||||
static async create(micDeviceId: string, procNodeSampleRate: number) {
|
||||
let rnnoiseProcessor = null;
|
||||
let localTrack = null;
|
||||
|
||||
try {
|
||||
logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);
|
||||
|
||||
rnnoiseProcessor = await createRnnoiseProcessorPromise();
|
||||
localTrack = await JitsiMeetJS.createLocalTracks({
|
||||
devices: [ 'audio' ],
|
||||
micDeviceId
|
||||
});
|
||||
|
||||
// We only expect one audio track when specifying a device id.
|
||||
if (!localTrack[0]) {
|
||||
throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
|
||||
}
|
||||
|
||||
return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);
|
||||
|
||||
if (rnnoiseProcessor) {
|
||||
rnnoiseProcessor.destroy();
|
||||
}
|
||||
|
||||
if (localTrack) {
|
||||
localTrack.stopStream();
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up the audio graph in the AudioContext.
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
_initializeAudioContext() {
|
||||
this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
|
||||
|
||||
// TODO AudioProcessingNode is deprecated check and replace with alternative.
|
||||
// We don't need stereo for determining the VAD score so we create a single chanel processing node.
|
||||
this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
|
||||
this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
|
||||
}
|
||||
|
||||
/**
|
||||
* ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
|
||||
* Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
|
||||
* of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
|
||||
* to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.
|
||||
*
|
||||
* @param {AudioProcessingEvent} audioEvent - Audio event.
|
||||
* @returns {void}
|
||||
*/
|
||||
_onAudioProcess(audioEvent: Object) {
|
||||
// Prepend the residue PCM buffer from the previous process callback.
|
||||
const inData = audioEvent.inputBuffer.getChannelData(0);
|
||||
const completeInData = [ ...this._bufferResidue, ...inData ];
|
||||
const sampleTimestamp = Date.now();
|
||||
|
||||
let i = 0;
|
||||
|
||||
for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {
|
||||
const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);
|
||||
const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);
|
||||
|
||||
this.emit(VAD_SCORE_PUBLISHED, {
|
||||
timestamp: sampleTimestamp,
|
||||
score: vadScore,
|
||||
deviceId: this._micDeviceId
|
||||
});
|
||||
}
|
||||
|
||||
this._bufferResidue = completeInData.slice(i, completeInData.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects the nodes in the AudioContext to start the flow of audio data.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
_connectAudioGraph() {
|
||||
this._audioSource.connect(this._audioProcessingNode);
|
||||
this._audioProcessingNode.connect(this._audioContext.destination);
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnects the nodes in the AudioContext.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
_disconnectAudioGraph() {
|
||||
// Even thought we disconnect the processing node it seems that some callbacks remain queued,
|
||||
// resulting in calls with and uninitialized context.
|
||||
// eslint-disable-next-line no-empty-function
|
||||
this._audioProcessingNode.onaudioprocess = () => {};
|
||||
this._audioProcessingNode.disconnect();
|
||||
this._audioSource.disconnect();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup potentially acquired resources.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
_cleanupResources() {
|
||||
logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);
|
||||
|
||||
this._disconnectAudioGraph();
|
||||
this._localTrack.stopStream();
|
||||
this._rnnoiseProcessor.destroy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy TrackVADEmitter instance (release resources and stop callbacks).
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
destroy() {
|
||||
if (this._destroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
|
||||
this._cleanupResources();
|
||||
this._destroyed = true;
|
||||
}
|
||||
}
|
|
@ -1,11 +1,10 @@
|
|||
// @flow
|
||||
|
||||
import { createRnnoiseProcessorPromise } from '../../../../rnnoise';
|
||||
import EventEmitter from 'events';
|
||||
import logger from '../../logger';
|
||||
import TrackVADEmitter from './TrackVADEmitter';
|
||||
import { VAD_SCORE_PUBLISHED, VAD_REPORT_PUBLISHED } from './Events';
|
||||
import type { VADScore } from './TrackVADEmitter';
|
||||
export type { VADScore };
|
||||
import JitsiMeetJS, { JitsiDetectionEvents } from '../../../lib-jitsi-meet';
|
||||
import { VAD_REPORT_PUBLISHED } from './Events';
|
||||
|
||||
/**
|
||||
* Sample rate used by TrackVADEmitter, this value determines how often the ScriptProcessorNode is going to call the
|
||||
|
@ -30,36 +29,14 @@ type VADDeviceContext = {
|
|||
/**
|
||||
* Array with VAD scores publish from the emitter.
|
||||
*/
|
||||
scoreArray: Array<VADScore>,
|
||||
scoreArray: Array<Object>,
|
||||
|
||||
/**
|
||||
* TrackVADEmitter associated with media device
|
||||
*/
|
||||
vadEmitter: TrackVADEmitter
|
||||
vadEmitter: Object
|
||||
};
|
||||
|
||||
/**
|
||||
* The structure used by VADReportingService to relay a score report
|
||||
*/
|
||||
export type VADReportScore = {
|
||||
|
||||
/**
|
||||
* Device ID associated with the VAD score
|
||||
*/
|
||||
deviceId: string,
|
||||
|
||||
/**
|
||||
* The PCM score from 0 - 1 i.e. 0.60
|
||||
*/
|
||||
score: number,
|
||||
|
||||
/**
|
||||
* Epoch time at which PCM was recorded
|
||||
*/
|
||||
timestamp: number
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Voice activity detection reporting service. The service create TrackVADEmitters for the provided devices and
|
||||
* publishes an average of their VAD score over the specified interval via EventEmitter.
|
||||
|
@ -111,7 +88,7 @@ export default class VADReportingService extends EventEmitter {
|
|||
*
|
||||
* @returns {Promise<VADReportingService>}
|
||||
*/
|
||||
static create(micDeviceList: Array<MediaDeviceInfo>, intervalDelay: number) {
|
||||
static async create(micDeviceList: Array<MediaDeviceInfo>, intervalDelay: number) {
|
||||
const vadReportingService = new VADReportingService(intervalDelay);
|
||||
const emitterPromiseArray = [];
|
||||
|
||||
|
@ -125,8 +102,17 @@ export default class VADReportingService extends EventEmitter {
|
|||
|
||||
logger.log(`Initializing VAD context for mic: ${micDevice.label} -> ${micDevice.deviceId}`);
|
||||
|
||||
const emitterPromise = TrackVADEmitter.create(micDevice.deviceId, SCRIPT_NODE_SAMPLE_RATE).then(emitter => {
|
||||
emitter.on(VAD_SCORE_PUBLISHED, vadReportingService._devicePublishVADScore.bind(vadReportingService));
|
||||
const rnnoiseProcessor = await createRnnoiseProcessorPromise();
|
||||
|
||||
const emitterPromise = JitsiMeetJS.createTrackVADEmitter(
|
||||
micDevice.deviceId,
|
||||
SCRIPT_NODE_SAMPLE_RATE,
|
||||
rnnoiseProcessor
|
||||
).then(emitter => {
|
||||
emitter.on(
|
||||
JitsiDetectionEvents.VAD_SCORE_PUBLISHED,
|
||||
vadReportingService._devicePublishVADScore.bind(vadReportingService)
|
||||
);
|
||||
|
||||
return {
|
||||
vadEmitter: emitter,
|
||||
|
@ -216,6 +202,7 @@ export default class VADReportingService extends EventEmitter {
|
|||
* Function called at set interval with selected compute. The result will be published on the set callback.
|
||||
*
|
||||
* @returns {void}
|
||||
* @fires VAD_REPORT_PUBLISHED
|
||||
*/
|
||||
_reportVadScore() {
|
||||
const vadComputeScoreArray = [];
|
||||
|
@ -243,16 +230,30 @@ export default class VADReportingService extends EventEmitter {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Once the computation for all the tracked devices is done, fire an event containing all the necessary
|
||||
* information.
|
||||
*
|
||||
* @event VAD_REPORT_PUBLISHED
|
||||
* @type Array<Object> with the following structure:
|
||||
* @property {Date} timestamp - Timestamo at which the compute took place.
|
||||
* @property {number} avgVAD - Average VAD score over monitored period of time.
|
||||
* @property {string} deviceId - Associate local audio device ID.
|
||||
*/
|
||||
this.emit(VAD_REPORT_PUBLISHED, vadComputeScoreArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback method passed to vad emitters in order to publish their score.
|
||||
*
|
||||
* @param {VADScore} vadScore - Mic publishing the score.
|
||||
* @param {Object} vadScore -VAD score emitted by.
|
||||
* @param {Date} vadScore.timestamp - Exact time at which processed PCM sample was generated.
|
||||
* @param {number} vadScore.score - VAD score on a scale from 0 to 1 (i.e. 0.7).
|
||||
* @param {string} vadScore.deviceId - Device id of the associated track.
|
||||
* @returns {void}
|
||||
* @listens VAD_SCORE_PUBLISHED
|
||||
*/
|
||||
_devicePublishVADScore(vadScore: VADScore) {
|
||||
_devicePublishVADScore(vadScore: Object) {
|
||||
const context = this._contextMap.get(vadScore.deviceId);
|
||||
|
||||
if (context) {
|
||||
|
@ -280,5 +281,4 @@ export default class VADReportingService extends EventEmitter {
|
|||
this._clearContextMap();
|
||||
this._destroyed = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ export const JitsiConnectionErrors = JitsiMeetJS.errors.connection;
|
|||
export const JitsiConnectionEvents = JitsiMeetJS.events.connection;
|
||||
export const JitsiConnectionQualityEvents
|
||||
= JitsiMeetJS.events.connectionQuality;
|
||||
export const JitsiDetectionEvents = JitsiMeetJS.events.detection;
|
||||
export const JitsiE2ePingEvents = JitsiMeetJS.events.e2eping;
|
||||
export const JitsiMediaDevicesEvents = JitsiMeetJS.events.mediaDevices;
|
||||
export const JitsiParticipantConnectionStatus
|
||||
|
|
|
@ -25,20 +25,3 @@ export function createRnnoiseProcessorPromise() {
|
|||
throw new Error('Rnnoise module binding createRnnoiseProcessor not found!');
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs.
|
||||
*
|
||||
* @returns {number}
|
||||
*/
|
||||
export function getSampleLength() {
|
||||
const ns = getJitsiMeetGlobalNS();
|
||||
|
||||
const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH;
|
||||
|
||||
if (!rnnoiseSample) {
|
||||
throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!');
|
||||
}
|
||||
|
||||
return rnnoiseSample;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,11 @@ export const RNNOISE_SAMPLE_LENGTH: number = 480;
|
|||
*/
|
||||
const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
|
||||
|
||||
/**
|
||||
* Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM.
|
||||
*/
|
||||
const PCM_FREQUENCY: number = 44100;
|
||||
|
||||
/**
|
||||
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
|
||||
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
|
||||
|
@ -132,14 +137,23 @@ export default class RnnoiseProcessor {
|
|||
}
|
||||
|
||||
/**
|
||||
* Such comment very wow.
|
||||
* Rnnoise can only operate on a certain PCM array size.
|
||||
*
|
||||
* @returns {number}
|
||||
* @returns {number} - The PCM sample array size as required by rnnoise.
|
||||
*/
|
||||
getSampleLength() {
|
||||
return RNNOISE_SAMPLE_LENGTH;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
|
||||
*
|
||||
* @returns {number} - PCM sample frequency as required by rnnoise.
|
||||
*/
|
||||
getRequiredPCMFrequency() {
|
||||
return PCM_FREQUENCY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any resources required by the rnnoise context this needs to be called
|
||||
* before destroying any context that uses the processor.
|
||||
|
|
|
@ -33,14 +33,12 @@ function getPerformanceHints(size) {
|
|||
const config = {
|
||||
devServer: {
|
||||
https: true,
|
||||
logLevel: 'debug',
|
||||
inline: true,
|
||||
proxy: {
|
||||
'/': {
|
||||
bypass: devServerProxyBypass,
|
||||
secure: false,
|
||||
target: devServerProxyTarget,
|
||||
logLevel: 'debug'
|
||||
target: devServerProxyTarget
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -278,13 +276,11 @@ module.exports = [
|
|||
* target, undefined; otherwise, the path to the local file to be served.
|
||||
*/
|
||||
function devServerProxyBypass({ path }) {
|
||||
console.log('Fetching path: ', path);
|
||||
if (path.startsWith('/css/') || path.startsWith('/doc/')
|
||||
|| path.startsWith('/fonts/') || path.startsWith('/images/')
|
||||
|| path.startsWith('/sounds/')
|
||||
|| path.startsWith('/static/')
|
||||
|| path.endsWith('.wasm')
|
||||
|| path.startsWith('/libs/lib-jitsi-meet')) {
|
||||
|| path.endsWith('.wasm')) {
|
||||
return path;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue