feat(blur) replace BodyPix with TFLite

Use the Google Meet model and SIMD optimized WASM.
2021-02-17 17:03:33 +02:00 · 2021-02-17 17:03:33 +02:00 · 946339a52e
parent f71e8a9982
commit 946339a52e
14 changed files with 223 additions and 74 deletions
--- a/.eslintignore
+++ b/.eslintignore
@ -6,6 +6,8 @@ build/*
 flow-typed/*
 libs/*

+react/features/stream-effects/blur/vendor/*
+
 # ESLint will by default ignore its own configuration file. However, there does
 # not seem to be a reason why we will want to risk being inconsistent with our
 # remaining JavaScript source code.
--- a/16
+++ b/16
@ -5,6 +5,8 @@ LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
 LIBFLAC_DIR = node_modules/libflacjs/dist/min/
 OLM_DIR = node_modules/olm
 RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
+TFLITE_WASM = react/features/stream-effects/blur/vendor/tflite
+MEET_MODELS_DIR  = react/features/stream-effects/blur/vendor/models/
 NODE_SASS = ./node_modules/.bin/sass
 NPM = npm
 OUTPUT_DIR = .
@ -26,7 +28,7 @@ clean:
 	rm -fr $(BUILD_DIR)

 .NOTPARALLEL:
-deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
+deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local

 deploy-init:
 	rm -fr $(DEPLOY_DIR)
@ -82,6 +84,16 @@ deploy-rnnoise-binary:
 		$(RNNOISE_WASM_DIR)/rnnoise.wasm \
 		$(DEPLOY_DIR)

+deploy-tflite:
+	cp \
+		$(TFLITE_WASM)/*.wasm \
+		$(DEPLOY_DIR)		
+
+deploy-meet-models:
+	cp \
+		$(MEET_MODELS_DIR)/*.tflite \
+		$(DEPLOY_DIR)	
+
 deploy-css:
 	$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
 	$(CLEANCSS) --skip-rebase $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
@ -91,7 +103,7 @@ deploy-local:
 	([ ! -x deploy-local.sh ] || ./deploy-local.sh)

 .NOTPARALLEL:
-dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm
+dev: deploy-init deploy-css deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm
 	$(WEBPACK_DEV_SERVER) --detect-circular-deps

 source-package:
--- a/package-lock.json
+++ b/package-lock.json
@ -15491,11 +15491,6 @@
      "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.2.tgz",
      "integrity": "sha512-MTX+MeG5U994cazkjd/9KNAapsHnibjMLnfXodlkXw76JEea0UiNzrqidzo1emMwk7w5Qhc9jd4Bn9TBb1MFwA=="
    },
-    "stackblur-canvas": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/stackblur-canvas/-/stackblur-canvas-2.3.0.tgz",
-      "integrity": "sha512-3ZHJv+43D8YttgumssIxkfs3hBXW7XaMS5Ux65fOBhKDYMjbG5hF8Ey8a90RiiJ58aQnAhWbGilPzZ9rkIlWgQ=="
-    },
    "stacktrace-parser": {
      "version": "0.1.8",
      "resolved": "https://registry.npmjs.org/stacktrace-parser/-/stacktrace-parser-0.1.8.tgz",
@ -17071,6 +17066,11 @@
        "loose-envify": "^1.0.0"
      }
    },
+    "wasm-check": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/wasm-check/-/wasm-check-2.0.1.tgz",
+      "integrity": "sha512-5otny2JrfRNKIc+zi1YSOrNxXe47trEQbpY6g/MtHrFwLumKSJyAIobGXH1tlEBezE95eIsmDokBbUZtIZTvvA=="
+    },
    "watchpack": {
      "version": "1.7.2",
      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.7.2.tgz",
--- a/package.json
+++ b/package.json
@ -95,10 +95,10 @@
    "redux-thunk": "2.2.0",
    "rnnoise-wasm": "github:jitsi/rnnoise-wasm#566a16885897704d6e6d67a1d5ac5d39781db2af",
    "rtcstats": "github:jitsi/rtcstats#v6.2.0",
-    "stackblur-canvas": "2.3.0",
    "styled-components": "3.4.9",
    "util": "0.12.1",
    "uuid": "3.1.0",
+    "wasm-check": "2.0.1",
    "windows-iana": "^3.1.0",
    "xmldom": "0.1.27",
    "zxcvbn": "4.4.2"
--- a/react/features/stream-effects/blur/JitsiStreamBlurEffect.js
+++ b/react/features/stream-effects/blur/JitsiStreamBlurEffect.js
@ -1,7 +1,4 @@
 // @flow
-
-import * as StackBlur from 'stackblur-canvas';
-
 import {
    CLEAR_TIMEOUT,
    TIMEOUT_TICK,
@ -9,21 +6,27 @@ import {
    timerWorkerScript
 } from './TimerWorker';

+const segmentationWidth = 256;
+const segmentationHeight = 144;
+const segmentationPixelCount = segmentationWidth * segmentationHeight;
+const blurValue = '25px';
+
 /**
 * Represents a modified MediaStream that adds blur to video background.
 * <tt>JitsiStreamBlurEffect</tt> does the processing of the original
 * video stream.
 */
 export default class JitsiStreamBlurEffect {
-    _bpModel: Object;
+    _model: Object;
    _inputVideoElement: HTMLVideoElement;
-    _inputVideoCanvasElement: HTMLCanvasElement;
    _onMaskFrameTimer: Function;
    _maskFrameTimerWorker: Worker;
-    _maskInProgress: boolean;
    _outputCanvasElement: HTMLCanvasElement;
+    _outputCanvasCtx: Object;
+    _segmentationMaskCtx: Object;
+    _segmentationMask: Object;
+    _segmentationMaskCanvas: Object;
    _renderMask: Function;
-    _segmentationData: Object;
    isEnabled: Function;
    startEffect: Function;
    stopEffect: Function;
@ -35,7 +38,7 @@ export default class JitsiStreamBlurEffect {
     * @param {BodyPix} bpModel - BodyPix model.
     */
    constructor(bpModel: Object) {
-        this._bpModel = bpModel;
+        this._model = bpModel;

        // Bind event handler so it is only bound once for every instance.
        this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this);
@ -44,7 +47,6 @@ export default class JitsiStreamBlurEffect {
        this._outputCanvasElement = document.createElement('canvas');
        this._outputCanvasElement.getContext('2d');
        this._inputVideoElement = document.createElement('video');
-        this._inputVideoCanvasElement = document.createElement('canvas');
    }

    /**
@ -60,62 +62,109 @@ export default class JitsiStreamBlurEffect {
        }
    }

+    /**
+     * Represents the run post processing.
+     *
+     * @returns {void}
+     */
+    runPostProcessing() {
+        this._outputCanvasCtx.globalCompositeOperation = 'copy';
+
+        // Draw segmentation mask.
+        this._outputCanvasCtx.filter = `blur(${blurValue})`;
+        this._outputCanvasCtx.drawImage(
+            this._segmentationMaskCanvas,
+            0,
+            0,
+            segmentationWidth,
+            segmentationHeight,
+            0,
+            0,
+            this._inputVideoElement.width,
+            this._inputVideoElement.height
+        );
+
+        this._outputCanvasCtx.globalCompositeOperation = 'source-in';
+        this._outputCanvasCtx.filter = 'none';
+        this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
+
+        this._outputCanvasCtx.globalCompositeOperation = 'destination-over';
+        this._outputCanvasCtx.filter = `blur(${blurValue})`; // FIXME Does not work on Safari.
+        this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
+    }
+
+    /**
+     * Represents the run Tensorflow Interference.
+     *
+     * @returns {void}
+     */
+    runInference() {
+        this._model._runInference();
+        const outputMemoryOffset = this._model._getOutputMemoryOffset() / 4;
+
+        for (let i = 0; i < segmentationPixelCount; i++) {
+            const background = this._model.HEAPF32[outputMemoryOffset + (i * 2)];
+            const person = this._model.HEAPF32[outputMemoryOffset + (i * 2) + 1];
+            const shift = Math.max(background, person);
+            const backgroundExp = Math.exp(background - shift);
+            const personExp = Math.exp(person - shift);
+
+            // Sets only the alpha component of each pixel.
+            this._segmentationMask.data[(i * 4) + 3] = (255 * personExp) / (backgroundExp + personExp);
+        }
+        this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0);
+    }
+
    /**
     * Loop function to render the background mask.
     *
     * @private
     * @returns {void}
     */
-    async _renderMask() {
-        if (!this._maskInProgress) {
-            this._maskInProgress = true;
-            this._bpModel.segmentPerson(this._inputVideoElement, {
-                internalResolution: 'low', // resized to 0.5 times of the original resolution before inference
-                maxDetections: 1, // max. number of person poses to detect per image
-                segmentationThreshold: 0.7, // represents probability that a pixel belongs to a person
-                flipHorizontal: false,
-                scoreThreshold: 0.2
-            }).then(data => {
-                this._segmentationData = data;
-                this._maskInProgress = false;
-            });
-        }
-        const inputCanvasCtx = this._inputVideoCanvasElement.getContext('2d');
+    _renderMask() {
+        this.resizeSource();
+        this.runInference();
+        this.runPostProcessing();

-        inputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
-
-        const currentFrame = inputCanvasCtx.getImageData(
-            0,
-            0,
-            this._inputVideoCanvasElement.width,
-            this._inputVideoCanvasElement.height
-        );
-
-        if (this._segmentationData) {
-            const blurData = new ImageData(currentFrame.data.slice(), currentFrame.width, currentFrame.height);
-
-            StackBlur.imageDataRGB(blurData, 0, 0, currentFrame.width, currentFrame.height, 12);
-
-            for (let x = 0; x < this._outputCanvasElement.width; x++) {
-                for (let y = 0; y < this._outputCanvasElement.height; y++) {
-                    const n = (y * this._outputCanvasElement.width) + x;
-
-                    if (this._segmentationData.data[n] === 0) {
-                        currentFrame.data[n * 4] = blurData.data[n * 4];
-                        currentFrame.data[(n * 4) + 1] = blurData.data[(n * 4) + 1];
-                        currentFrame.data[(n * 4) + 2] = blurData.data[(n * 4) + 2];
-                        currentFrame.data[(n * 4) + 3] = blurData.data[(n * 4) + 3];
-                    }
-                }
-            }
-        }
-        this._outputCanvasElement.getContext('2d').putImageData(currentFrame, 0, 0);
        this._maskFrameTimerWorker.postMessage({
            id: SET_TIMEOUT,
            timeMs: 1000 / 30
        });
    }

+    /**
+     * Represents the resize source process.
+     *
+     * @returns {void}
+     */
+    resizeSource() {
+        this._segmentationMaskCtx.drawImage(
+            this._inputVideoElement,
+            0,
+            0,
+            this._inputVideoElement.width,
+            this._inputVideoElement.height,
+            0,
+            0,
+            segmentationWidth,
+            segmentationHeight
+        );
+
+        const imageData = this._segmentationMaskCtx.getImageData(
+            0,
+            0,
+            segmentationWidth,
+            segmentationHeight
+        );
+        const inputMemoryOffset = this._model._getInputMemoryOffset() / 4;
+
+        for (let i = 0; i < segmentationPixelCount; i++) {
+            this._model.HEAPF32[inputMemoryOffset + (i * 3)] = imageData.data[i * 4] / 255;
+            this._model.HEAPF32[inputMemoryOffset + (i * 3) + 1] = imageData.data[(i * 4) + 1] / 255;
+            this._model.HEAPF32[inputMemoryOffset + (i * 3) + 2] = imageData.data[(i * 4) + 2] / 255;
+        }
+    }
+
    /**
     * Checks if the local track supports this effect.
     *
@ -136,15 +185,18 @@ export default class JitsiStreamBlurEffect {
    startEffect(stream: MediaStream) {
        this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' });
        this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer;
-
        const firstVideoTrack = stream.getVideoTracks()[0];
        const { height, frameRate, width }
            = firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints();

+        this._segmentationMask = new ImageData(segmentationWidth, segmentationHeight);
+        this._segmentationMaskCanvas = document.createElement('canvas');
+        this._segmentationMaskCanvas.width = segmentationWidth;
+        this._segmentationMaskCanvas.height = segmentationHeight;
+        this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d');
        this._outputCanvasElement.width = parseInt(width, 10);
        this._outputCanvasElement.height = parseInt(height, 10);
-        this._inputVideoCanvasElement.width = parseInt(width, 10);
-        this._inputVideoCanvasElement.height = parseInt(height, 10);
+        this._outputCanvasCtx = this._outputCanvasElement.getContext('2d');
        this._inputVideoElement.width = parseInt(width, 10);
        this._inputVideoElement.height = parseInt(height, 10);
        this._inputVideoElement.autoplay = true;
--- a/react/features/stream-effects/blur/index.js
+++ b/react/features/stream-effects/blur/index.js
@ -1,8 +1,15 @@
 // @flow

-import * as bodyPix from '@tensorflow-models/body-pix';
+import * as wasmCheck from 'wasm-check';

 import JitsiStreamBlurEffect from './JitsiStreamBlurEffect';
+import createTFLiteModule from './vendor/tflite/tflite';
+import createTFLiteSIMDModule from './vendor/tflite/tflite-simd';
+
+const models = {
+    '96': '/libs/segm_lite_v681.tflite',
+    '144': '/libs/segm_full_v679.tflite'
+};

 /**
 * Creates a new instance of JitsiStreamBlurEffect. This loads the bodyPix model that is used to
@ -14,15 +21,24 @@ export async function createBlurEffect() {
    if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) {
        throw new Error('JitsiStreamBlurEffect not supported!');
    }
+    let tflite;

-    // An output stride of 16 and a multiplier of 0.5 are used for improved
-    // performance on a larger range of CPUs.
-    const bpModel = await bodyPix.load({
-        architecture: 'MobileNetV1',
-        outputStride: 16,
-        multiplier: 0.50,
-        quantBytes: 2
-    });
+    if (wasmCheck.feature.simd) {
+        tflite = await createTFLiteSIMDModule();
+    } else {
+        tflite = await createTFLiteModule();
+    }

-    return new JitsiStreamBlurEffect(bpModel);
+    const modelBufferOffset = tflite._getModelBufferMemoryOffset();
+    const modelResponse = await fetch(
+        models['144']
+    );
+
+    const model = await modelResponse.arrayBuffer();
+
+    tflite.HEAPU8.set(new Uint8Array(model), modelBufferOffset);
+
+    tflite._loadModel(model.byteLength);
+
+    return new JitsiStreamBlurEffect(tflite);
 }
--- a/react/features/stream-effects/blur/vendor/README.md
+++ b/react/features/stream-effects/blur/vendor/README.md
@ -0,0 +1,24 @@
+# Virtual Background on stream effects
+
+> Inspired from https://ai.googleblog.com/2020/10/background-features-in-google-meet.html and https://github.com/Volcomix/virtual-background.git
+
+#### Canvas 2D + CPU
+
+This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask.
+
+Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output.
+
+The framerate is higher and the quality looks better than BodyPix
+
+#### SIMD and non-SIMD
+
+How to test on SIMD:
+1. Go to chrome://flags/
+2. Search for SIMD flag
+3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal).
+4. Reopen Google Chrome
+
+More details:
+- [WebAssembly](https://webassembly.org/)
+- [WebAssembly SIMD](https://github.com/WebAssembly/simd)
+- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html)
--- a/react/features/stream-effects/blur/vendor/models/segm_full_v679.tflite
+++ b/react/features/stream-effects/blur/vendor/models/segm_full_v679.tflite
--- a/react/features/stream-effects/blur/vendor/models/segm_lite_v681.tflite
+++ b/react/features/stream-effects/blur/vendor/models/segm_lite_v681.tflite
--- a/react/features/stream-effects/blur/vendor/tflite/tflite-simd.js
+++ b/react/features/stream-effects/blur/vendor/tflite/tflite-simd.js
--- a/react/features/stream-effects/blur/vendor/tflite/tflite-simd.wasm
+++ b/react/features/stream-effects/blur/vendor/tflite/tflite-simd.wasm
--- a/react/features/stream-effects/blur/vendor/tflite/tflite.js
+++ b/react/features/stream-effects/blur/vendor/tflite/tflite.js
--- a/react/features/stream-effects/blur/vendor/tflite/tflite.wasm
+++ b/react/features/stream-effects/blur/vendor/tflite/tflite.wasm
--- a/react/features/toolbox/components/web/Toolbox.js
+++ b/react/features/toolbox/components/web/Toolbox.js
@ -1,6 +1,7 @@
 // @flow

 import React, { Component } from 'react';
+import * as wasmCheck from 'wasm-check';

 import {
    ACTION_SHORTCUT_TRIGGERED,
@ -1069,7 +1070,7 @@ class Toolbox extends Component<Props, State> {
                && <VideoBlurButton
                    key = 'videobackgroundblur'
                    showLabel = { true }
-                    visible = { !_screensharing } />,
+                    visible = { !_screensharing && wasmCheck.feature.simd } />,
            this._shouldShowButton('settings')
                && <SettingsButton
                    key = 'settings'