feat(blur) replace BodyPix with TFLite

Use the Google Meet model and SIMD optimized WASM.
This commit is contained in:
Tudor D. Pop 2021-02-17 17:03:33 +02:00 committed by GitHub
parent f71e8a9982
commit 946339a52e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 223 additions and 74 deletions

View File

@ -6,6 +6,8 @@ build/*
flow-typed/*
libs/*
react/features/stream-effects/blur/vendor/*
# ESLint will by default ignore its own configuration file. However, there does
# not seem to be a reason why we will want to risk being inconsistent with our
# remaining JavaScript source code.

View File

@ -5,6 +5,8 @@ LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
LIBFLAC_DIR = node_modules/libflacjs/dist/min/
OLM_DIR = node_modules/olm
RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
TFLITE_WASM = react/features/stream-effects/blur/vendor/tflite
MEET_MODELS_DIR = react/features/stream-effects/blur/vendor/models/
NODE_SASS = ./node_modules/.bin/sass
NPM = npm
OUTPUT_DIR = .
@ -26,7 +28,7 @@ clean:
rm -fr $(BUILD_DIR)
.NOTPARALLEL:
deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
deploy-init:
rm -fr $(DEPLOY_DIR)
@ -82,6 +84,16 @@ deploy-rnnoise-binary:
$(RNNOISE_WASM_DIR)/rnnoise.wasm \
$(DEPLOY_DIR)
deploy-tflite:
cp \
$(TFLITE_WASM)/*.wasm \
$(DEPLOY_DIR)
deploy-meet-models:
cp \
$(MEET_MODELS_DIR)/*.tflite \
$(DEPLOY_DIR)
deploy-css:
$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
$(CLEANCSS) --skip-rebase $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
@ -91,7 +103,7 @@ deploy-local:
([ ! -x deploy-local.sh ] || ./deploy-local.sh)
.NOTPARALLEL:
dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm
dev: deploy-init deploy-css deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm
$(WEBPACK_DEV_SERVER) --detect-circular-deps
source-package:

10
package-lock.json generated
View File

@ -15491,11 +15491,6 @@
"resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.2.tgz",
"integrity": "sha512-MTX+MeG5U994cazkjd/9KNAapsHnibjMLnfXodlkXw76JEea0UiNzrqidzo1emMwk7w5Qhc9jd4Bn9TBb1MFwA=="
},
"stackblur-canvas": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/stackblur-canvas/-/stackblur-canvas-2.3.0.tgz",
"integrity": "sha512-3ZHJv+43D8YttgumssIxkfs3hBXW7XaMS5Ux65fOBhKDYMjbG5hF8Ey8a90RiiJ58aQnAhWbGilPzZ9rkIlWgQ=="
},
"stacktrace-parser": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/stacktrace-parser/-/stacktrace-parser-0.1.8.tgz",
@ -17071,6 +17066,11 @@
"loose-envify": "^1.0.0"
}
},
"wasm-check": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/wasm-check/-/wasm-check-2.0.1.tgz",
"integrity": "sha512-5otny2JrfRNKIc+zi1YSOrNxXe47trEQbpY6g/MtHrFwLumKSJyAIobGXH1tlEBezE95eIsmDokBbUZtIZTvvA=="
},
"watchpack": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.7.2.tgz",

View File

@ -95,10 +95,10 @@
"redux-thunk": "2.2.0",
"rnnoise-wasm": "github:jitsi/rnnoise-wasm#566a16885897704d6e6d67a1d5ac5d39781db2af",
"rtcstats": "github:jitsi/rtcstats#v6.2.0",
"stackblur-canvas": "2.3.0",
"styled-components": "3.4.9",
"util": "0.12.1",
"uuid": "3.1.0",
"wasm-check": "2.0.1",
"windows-iana": "^3.1.0",
"xmldom": "0.1.27",
"zxcvbn": "4.4.2"

View File

@ -1,7 +1,4 @@
// @flow
import * as StackBlur from 'stackblur-canvas';
import {
CLEAR_TIMEOUT,
TIMEOUT_TICK,
@ -9,21 +6,27 @@ import {
timerWorkerScript
} from './TimerWorker';
const segmentationWidth = 256;
const segmentationHeight = 144;
const segmentationPixelCount = segmentationWidth * segmentationHeight;
const blurValue = '25px';
/**
* Represents a modified MediaStream that adds blur to video background.
* <tt>JitsiStreamBlurEffect</tt> does the processing of the original
* video stream.
*/
export default class JitsiStreamBlurEffect {
_bpModel: Object;
_model: Object;
_inputVideoElement: HTMLVideoElement;
_inputVideoCanvasElement: HTMLCanvasElement;
_onMaskFrameTimer: Function;
_maskFrameTimerWorker: Worker;
_maskInProgress: boolean;
_outputCanvasElement: HTMLCanvasElement;
_outputCanvasCtx: Object;
_segmentationMaskCtx: Object;
_segmentationMask: Object;
_segmentationMaskCanvas: Object;
_renderMask: Function;
_segmentationData: Object;
isEnabled: Function;
startEffect: Function;
stopEffect: Function;
@ -35,7 +38,7 @@ export default class JitsiStreamBlurEffect {
* @param {BodyPix} bpModel - BodyPix model.
*/
constructor(bpModel: Object) {
this._bpModel = bpModel;
this._model = bpModel;
// Bind event handler so it is only bound once for every instance.
this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this);
@ -44,7 +47,6 @@ export default class JitsiStreamBlurEffect {
this._outputCanvasElement = document.createElement('canvas');
this._outputCanvasElement.getContext('2d');
this._inputVideoElement = document.createElement('video');
this._inputVideoCanvasElement = document.createElement('canvas');
}
/**
@ -60,62 +62,109 @@ export default class JitsiStreamBlurEffect {
}
}
/**
* Represents the run post processing.
*
* @returns {void}
*/
runPostProcessing() {
this._outputCanvasCtx.globalCompositeOperation = 'copy';
// Draw segmentation mask.
this._outputCanvasCtx.filter = `blur(${blurValue})`;
this._outputCanvasCtx.drawImage(
this._segmentationMaskCanvas,
0,
0,
segmentationWidth,
segmentationHeight,
0,
0,
this._inputVideoElement.width,
this._inputVideoElement.height
);
this._outputCanvasCtx.globalCompositeOperation = 'source-in';
this._outputCanvasCtx.filter = 'none';
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
this._outputCanvasCtx.globalCompositeOperation = 'destination-over';
this._outputCanvasCtx.filter = `blur(${blurValue})`; // FIXME Does not work on Safari.
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
}
/**
* Represents the run Tensorflow Interference.
*
* @returns {void}
*/
runInference() {
this._model._runInference();
const outputMemoryOffset = this._model._getOutputMemoryOffset() / 4;
for (let i = 0; i < segmentationPixelCount; i++) {
const background = this._model.HEAPF32[outputMemoryOffset + (i * 2)];
const person = this._model.HEAPF32[outputMemoryOffset + (i * 2) + 1];
const shift = Math.max(background, person);
const backgroundExp = Math.exp(background - shift);
const personExp = Math.exp(person - shift);
// Sets only the alpha component of each pixel.
this._segmentationMask.data[(i * 4) + 3] = (255 * personExp) / (backgroundExp + personExp);
}
this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0);
}
/**
* Loop function to render the background mask.
*
* @private
* @returns {void}
*/
async _renderMask() {
if (!this._maskInProgress) {
this._maskInProgress = true;
this._bpModel.segmentPerson(this._inputVideoElement, {
internalResolution: 'low', // resized to 0.5 times of the original resolution before inference
maxDetections: 1, // max. number of person poses to detect per image
segmentationThreshold: 0.7, // represents probability that a pixel belongs to a person
flipHorizontal: false,
scoreThreshold: 0.2
}).then(data => {
this._segmentationData = data;
this._maskInProgress = false;
});
}
const inputCanvasCtx = this._inputVideoCanvasElement.getContext('2d');
_renderMask() {
this.resizeSource();
this.runInference();
this.runPostProcessing();
inputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
const currentFrame = inputCanvasCtx.getImageData(
0,
0,
this._inputVideoCanvasElement.width,
this._inputVideoCanvasElement.height
);
if (this._segmentationData) {
const blurData = new ImageData(currentFrame.data.slice(), currentFrame.width, currentFrame.height);
StackBlur.imageDataRGB(blurData, 0, 0, currentFrame.width, currentFrame.height, 12);
for (let x = 0; x < this._outputCanvasElement.width; x++) {
for (let y = 0; y < this._outputCanvasElement.height; y++) {
const n = (y * this._outputCanvasElement.width) + x;
if (this._segmentationData.data[n] === 0) {
currentFrame.data[n * 4] = blurData.data[n * 4];
currentFrame.data[(n * 4) + 1] = blurData.data[(n * 4) + 1];
currentFrame.data[(n * 4) + 2] = blurData.data[(n * 4) + 2];
currentFrame.data[(n * 4) + 3] = blurData.data[(n * 4) + 3];
}
}
}
}
this._outputCanvasElement.getContext('2d').putImageData(currentFrame, 0, 0);
this._maskFrameTimerWorker.postMessage({
id: SET_TIMEOUT,
timeMs: 1000 / 30
});
}
/**
* Represents the resize source process.
*
* @returns {void}
*/
resizeSource() {
this._segmentationMaskCtx.drawImage(
this._inputVideoElement,
0,
0,
this._inputVideoElement.width,
this._inputVideoElement.height,
0,
0,
segmentationWidth,
segmentationHeight
);
const imageData = this._segmentationMaskCtx.getImageData(
0,
0,
segmentationWidth,
segmentationHeight
);
const inputMemoryOffset = this._model._getInputMemoryOffset() / 4;
for (let i = 0; i < segmentationPixelCount; i++) {
this._model.HEAPF32[inputMemoryOffset + (i * 3)] = imageData.data[i * 4] / 255;
this._model.HEAPF32[inputMemoryOffset + (i * 3) + 1] = imageData.data[(i * 4) + 1] / 255;
this._model.HEAPF32[inputMemoryOffset + (i * 3) + 2] = imageData.data[(i * 4) + 2] / 255;
}
}
/**
* Checks if the local track supports this effect.
*
@ -136,15 +185,18 @@ export default class JitsiStreamBlurEffect {
startEffect(stream: MediaStream) {
this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' });
this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer;
const firstVideoTrack = stream.getVideoTracks()[0];
const { height, frameRate, width }
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints();
this._segmentationMask = new ImageData(segmentationWidth, segmentationHeight);
this._segmentationMaskCanvas = document.createElement('canvas');
this._segmentationMaskCanvas.width = segmentationWidth;
this._segmentationMaskCanvas.height = segmentationHeight;
this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d');
this._outputCanvasElement.width = parseInt(width, 10);
this._outputCanvasElement.height = parseInt(height, 10);
this._inputVideoCanvasElement.width = parseInt(width, 10);
this._inputVideoCanvasElement.height = parseInt(height, 10);
this._outputCanvasCtx = this._outputCanvasElement.getContext('2d');
this._inputVideoElement.width = parseInt(width, 10);
this._inputVideoElement.height = parseInt(height, 10);
this._inputVideoElement.autoplay = true;

View File

@ -1,8 +1,15 @@
// @flow
import * as bodyPix from '@tensorflow-models/body-pix';
import * as wasmCheck from 'wasm-check';
import JitsiStreamBlurEffect from './JitsiStreamBlurEffect';
import createTFLiteModule from './vendor/tflite/tflite';
import createTFLiteSIMDModule from './vendor/tflite/tflite-simd';
const models = {
'96': '/libs/segm_lite_v681.tflite',
'144': '/libs/segm_full_v679.tflite'
};
/**
* Creates a new instance of JitsiStreamBlurEffect. This loads the bodyPix model that is used to
@ -14,15 +21,24 @@ export async function createBlurEffect() {
if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) {
throw new Error('JitsiStreamBlurEffect not supported!');
}
let tflite;
// An output stride of 16 and a multiplier of 0.5 are used for improved
// performance on a larger range of CPUs.
const bpModel = await bodyPix.load({
architecture: 'MobileNetV1',
outputStride: 16,
multiplier: 0.50,
quantBytes: 2
});
if (wasmCheck.feature.simd) {
tflite = await createTFLiteSIMDModule();
} else {
tflite = await createTFLiteModule();
}
return new JitsiStreamBlurEffect(bpModel);
const modelBufferOffset = tflite._getModelBufferMemoryOffset();
const modelResponse = await fetch(
models['144']
);
const model = await modelResponse.arrayBuffer();
tflite.HEAPU8.set(new Uint8Array(model), modelBufferOffset);
tflite._loadModel(model.byteLength);
return new JitsiStreamBlurEffect(tflite);
}

View File

@ -0,0 +1,24 @@
# Virtual Background on stream effects
> Inspired from https://ai.googleblog.com/2020/10/background-features-in-google-meet.html and https://github.com/Volcomix/virtual-background.git
#### Canvas 2D + CPU
This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask.
Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output.
The framerate is higher and the quality looks better than BodyPix
#### SIMD and non-SIMD
How to test on SIMD:
1. Go to chrome://flags/
2. Search for SIMD flag
3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal).
4. Reopen Google Chrome
More details:
- [WebAssembly](https://webassembly.org/)
- [WebAssembly SIMD](https://github.com/WebAssembly/simd)
- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html)

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -1,6 +1,7 @@
// @flow
import React, { Component } from 'react';
import * as wasmCheck from 'wasm-check';
import {
ACTION_SHORTCUT_TRIGGERED,
@ -1069,7 +1070,7 @@ class Toolbox extends Component<Props, State> {
&& <VideoBlurButton
key = 'videobackgroundblur'
showLabel = { true }
visible = { !_screensharing } />,
visible = { !_screensharing && wasmCheck.feature.simd } />,
this._shouldShowButton('settings')
&& <SettingsButton
key = 'settings'