Browse Source
feat: Implement a GPU-only pipeline for the virtual background effect.
feat: Implement a GPU-only pipeline for the virtual background effect.
For the inference part, the old Selfie Segmentation using tflite has been replaced by the modern MediaPipe vision tasks API, which runs on GPU if available (otherwise gracefully falls back to wasm/CPU). For compositing (applying the mask), a new custom WebGL2 compositor performs all work directly on GPU (completely avoiding CPU-GPU back & forth); if WebGL2 is not available, the old canvas compositing is used. This change introduces significant gains in CPU usage during video calls that use blurring/backgrounds, since the CPU is now mostly idle and all work is done on GPU. As a bonus, virtual backgrounds are now also available on Safari (WebGL compositor only). The old JitsiStreamBackgroundEffect class has been renamed to VideoStreamBackgroundEffect, since the deviation from the original code is significant. Signed-off-by: Dimitris Kazakos <nemphys@gmail.com>
23 changed files with 1525 additions and 949 deletions
-
2REUSE.toml
-
6src/components/AdminSettings/WebServerSetupChecks.vue
-
513src/utils/media/effects/virtual-background/JitsiStreamBackgroundEffect.js
-
52src/utils/media/effects/virtual-background/JitsiStreamBackgroundEffect.spec.js
-
131src/utils/media/effects/virtual-background/JitsiStreamBackgroundEffect.worker.js
-
667src/utils/media/effects/virtual-background/VideoStreamBackgroundEffect.js
-
681src/utils/media/effects/virtual-background/WebGLCompositor.js
-
70src/utils/media/effects/virtual-background/index.js
-
24src/utils/media/effects/virtual-background/vendor/README.md
-
6src/utils/media/effects/virtual-background/vendor/mediapipe/vision_bundle.js
-
5src/utils/media/effects/virtual-background/vendor/mediapipe/vision_bundle.js.map
-
24src/utils/media/effects/virtual-background/vendor/mediapipe/vision_wasm_internal.js
-
BINsrc/utils/media/effects/virtual-background/vendor/mediapipe/vision_wasm_internal.wasm
-
24src/utils/media/effects/virtual-background/vendor/mediapipe/vision_wasm_nosimd_internal.js
-
BINsrc/utils/media/effects/virtual-background/vendor/mediapipe/vision_wasm_nosimd_internal.wasm
-
BINsrc/utils/media/effects/virtual-background/vendor/models/selfie_segmentation_landscape.tflite
-
BINsrc/utils/media/effects/virtual-background/vendor/models/selfie_segmenter.tflite
-
24src/utils/media/effects/virtual-background/vendor/tflite/tflite-simd.js
-
BINsrc/utils/media/effects/virtual-background/vendor/tflite/tflite-simd.wasm
-
24src/utils/media/effects/virtual-background/vendor/tflite/tflite.js
-
BINsrc/utils/media/effects/virtual-background/vendor/tflite/tflite.wasm
-
85src/utils/media/pipeline/VirtualBackground.js
-
136src/utils/media/pipeline/VirtualBackground.spec.js
@ -1,513 +0,0 @@ |
|||
/** |
|||
* SPDX-FileCopyrightText: 2021 Nextcloud GmbH and Nextcloud contributors |
|||
* SPDX-License-Identifier: AGPL-3.0-or-later |
|||
*/ |
|||
// @flow
|
|||
|
|||
import { VIRTUAL_BACKGROUND } from '../../../../constants.ts' |
|||
import WebWorker from './JitsiStreamBackgroundEffect.worker.js' |
|||
import { |
|||
CLEAR_TIMEOUT, |
|||
SET_TIMEOUT, |
|||
TIMEOUT_TICK, |
|||
timerWorkerScript, |
|||
} from './TimerWorker.js' |
|||
|
|||
/** |
|||
* Represents a modified MediaStream that adds effects to video background. |
|||
* <tt>JitsiStreamBackgroundEffect</tt> does the processing of the original |
|||
* video stream. |
|||
*/ |
|||
export default class JitsiStreamBackgroundEffect { |
|||
// _model: Object;
|
|||
// _options: Object;
|
|||
// _stream: Object;
|
|||
// _segmentationPixelCount: number;
|
|||
// _inputVideoElement: HTMLVideoElement;
|
|||
// _onMaskFrameTimer: Function;
|
|||
// _maskFrameTimerWorker: Worker;
|
|||
// _outputCanvasElement: HTMLCanvasElement;
|
|||
// _outputCanvasCtx: Object;
|
|||
// _segmentationMaskCtx: Object;
|
|||
// _segmentationMask: Object;
|
|||
// _segmentationMaskCanvas: Object;
|
|||
// _renderMask: Function;
|
|||
// _virtualImage: HTMLImageElement;
|
|||
// _virtualVideo: HTMLVideoElement;
|
|||
// isEnabled: Function;
|
|||
// startEffect: Function;
|
|||
// stopEffect: Function;
|
|||
|
|||
/** |
|||
* Represents a modified video MediaStream track. |
|||
* |
|||
* @class |
|||
* @param {object} options object with the parameters. |
|||
* @param {number} options.width segmentation width. |
|||
* @param {number} options.height segmentation height. |
|||
* @param {object} options.virtualBackground see "setVirtualBackground()". |
|||
*/ |
|||
constructor(options) { |
|||
const isSimd = options.simd |
|||
this._options = options |
|||
this._loadPromise = new Promise((resolve, reject) => { |
|||
this._loadPromiseResolve = resolve |
|||
this._loadPromiseReject = reject |
|||
}) |
|||
this._loaded = false |
|||
this._loadFailed = false |
|||
|
|||
this.setVirtualBackground(this._options.virtualBackground) |
|||
|
|||
const segmentationPixelCount = this._options.width * this._options.height |
|||
this._segmentationPixelCount = segmentationPixelCount |
|||
this._model = new WebWorker() |
|||
this._model.postMessage({ |
|||
message: 'makeTFLite', |
|||
segmentationPixelCount, |
|||
simd: isSimd, |
|||
}) |
|||
|
|||
this._segmentationPixelCount = segmentationPixelCount |
|||
|
|||
// Bind event handler so it is only bound once for every instance.
|
|||
this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this) |
|||
this._startFx = this._startFx.bind(this) |
|||
|
|||
this._model.onmessage = this._startFx |
|||
|
|||
// Workaround for FF issue https://bugzilla.mozilla.org/show_bug.cgi?id=1388974
|
|||
this._outputCanvasElement = document.createElement('canvas') |
|||
this._outputCanvasElement.getContext('2d') |
|||
this._inputVideoElement = document.createElement('video') |
|||
} |
|||
|
|||
/** |
|||
* EventHandler onmessage for the maskFrameTimerWorker WebWorker. |
|||
* |
|||
* @private |
|||
* @param {object} response - The onmessage EventHandler parameter. |
|||
* @return {void} |
|||
*/ |
|||
_onMaskFrameTimer(response) { |
|||
if (response.data.id === TIMEOUT_TICK) { |
|||
this._renderMask() |
|||
} |
|||
} |
|||
|
|||
_startFx(e) { |
|||
switch (e.data.message) { |
|||
case 'inferenceRun': |
|||
if (e.data.frameId === this._lastFrameId + 1) { |
|||
this._lastFrameId = e.data.frameId |
|||
|
|||
this.runInference(e.data.segmentationResult) |
|||
this.runPostProcessing() |
|||
} |
|||
break |
|||
case 'loaded': |
|||
this._loaded = true |
|||
this._loadPromiseResolve() |
|||
break |
|||
case 'loadFailed': |
|||
this._loadFailed = true |
|||
this._loadPromiseReject() |
|||
break |
|||
default: |
|||
console.error('_startFx: Something went wrong.') |
|||
break |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Helper method to know when the model was loaded after creating the |
|||
* object. |
|||
* |
|||
* Note that it is not needed to call this method to actually load the |
|||
* effect; the load will automatically start as soon as the object is |
|||
* created, but it can be waited on this method to know once it has finished |
|||
* (or failed). |
|||
* |
|||
* @return {Promise} promise resolved or rejected once the load has finished |
|||
* or failed. |
|||
*/ |
|||
async load() { |
|||
return this._loadPromise |
|||
} |
|||
|
|||
/** |
|||
* Returns whether loading the TFLite model failed or not. |
|||
* |
|||
* @return {boolean} true if loading failed, false otherwise |
|||
*/ |
|||
didLoadFail() { |
|||
return this._loadFailed |
|||
} |
|||
|
|||
/** |
|||
* Returns the virtual background properties. |
|||
* |
|||
* @return {object} the virtual background properties. |
|||
*/ |
|||
getVirtualBackground() { |
|||
return this._options.virtualBackground |
|||
} |
|||
|
|||
/** |
|||
* Sets the virtual background properties to use. |
|||
* |
|||
* The virtual background can be modified while the effect is running. |
|||
* |
|||
* If an image or video URL is given it can be any URL accepted by the "src" |
|||
* attribute of HTML image or video elements, so it is possible to set a |
|||
* "real" URL or, for example, one generated with "URL.createObjectURL()". |
|||
* |
|||
* @param {object} virtualBackground an object with the virtual background |
|||
* properties. |
|||
* @param {string} virtualBackground.backgroundType BLUR, IMAGE, VIDEO or |
|||
* VIDEO_STREAM. |
|||
* @param {number} virtualBackground.blurValue the blur to apply on a 720p |
|||
* video; it will be automatically scaled as needed. |
|||
* Optional, only needed when background type is BLUR. |
|||
* @param {string|MediaStream} virtualBackground.virtualSource the URL to |
|||
* the image or video, or a video stream. |
|||
* Optional, only needed when background type is IMAGE, VIDEO or |
|||
* VIDEO_STREAM. |
|||
*/ |
|||
setVirtualBackground(virtualBackground) { |
|||
// Clear previous elements to allow them to be garbage collected
|
|||
this._virtualImage = null |
|||
this._virtualVideo = null |
|||
|
|||
this._options.virtualBackground = virtualBackground |
|||
|
|||
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE) { |
|||
this._virtualImage = document.createElement('img') |
|||
this._virtualImage.crossOrigin = 'anonymous' |
|||
this._virtualImage.src = this._options.virtualBackground.virtualSource |
|||
|
|||
return |
|||
} |
|||
|
|||
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO) { |
|||
this._virtualVideo = document.createElement('video') |
|||
this._virtualVideo.crossOrigin = 'anonymous' |
|||
this._virtualVideo.loop = true |
|||
this._virtualVideo.muted = true |
|||
this._virtualVideo.src = this._options.virtualBackground.virtualSource |
|||
|
|||
if (this._running) { |
|||
this._virtualVideo.play() |
|||
} |
|||
|
|||
return |
|||
} |
|||
|
|||
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO_STREAM) { |
|||
this._virtualVideo = document.createElement('video') |
|||
this._virtualVideo.srcObject = this._options.virtualBackground.virtualSource |
|||
|
|||
if (this._running) { |
|||
this._virtualVideo.play() |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Represents the run post processing. |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
runPostProcessing() { |
|||
const height = this._inputVideoElement.videoHeight |
|||
const width = this._inputVideoElement.videoWidth |
|||
const { backgroundType } = this._options.virtualBackground |
|||
|
|||
const scaledBlurFactor = width / 720.0 |
|||
const backgroundBlurValue = this._options.virtualBackground.blurValue * scaledBlurFactor |
|||
const edgesBlurValue = (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE ? 4 : 8) * scaledBlurFactor |
|||
|
|||
this._outputCanvasElement.height = height |
|||
this._outputCanvasElement.width = width |
|||
this._outputCanvasCtx.globalCompositeOperation = 'copy' |
|||
|
|||
// Draw segmentation mask.
|
|||
|
|||
// Smooth out the edges.
|
|||
this._outputCanvasCtx.filter = `blur(${edgesBlurValue}px)` |
|||
this._outputCanvasCtx.drawImage( |
|||
this._segmentationMaskCanvas, |
|||
0, |
|||
0, |
|||
this._options.width, |
|||
this._options.height, |
|||
0, |
|||
0, |
|||
this._inputVideoElement.videoWidth, |
|||
this._inputVideoElement.videoHeight, |
|||
) |
|||
this._outputCanvasCtx.globalCompositeOperation = 'source-in' |
|||
this._outputCanvasCtx.filter = 'none' |
|||
|
|||
// Draw the foreground video.
|
|||
|
|||
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0) |
|||
|
|||
// Draw the background.
|
|||
|
|||
this._outputCanvasCtx.globalCompositeOperation = 'destination-over' |
|||
if (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE |
|||
|| backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO |
|||
|| backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO_STREAM) { |
|||
let source |
|||
let sourceWidthOriginal |
|||
let sourceHeightOriginal |
|||
|
|||
if (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE) { |
|||
source = this._virtualImage |
|||
sourceWidthOriginal = source.naturalWidth |
|||
sourceHeightOriginal = source.naturalHeight |
|||
} else { |
|||
source = this._virtualVideo |
|||
sourceWidthOriginal = source.videoWidth |
|||
sourceHeightOriginal = source.videoHeight |
|||
} |
|||
|
|||
const destinationWidth = this._outputCanvasElement.width |
|||
const destinationHeight = this._outputCanvasElement.height |
|||
|
|||
const [sourceX, sourceY, sourceWidth, sourceHeight] = JitsiStreamBackgroundEffect.getSourcePropertiesForDrawingBackgroundImage(sourceWidthOriginal, sourceHeightOriginal, destinationWidth, destinationHeight) |
|||
|
|||
this._outputCanvasCtx.drawImage( |
|||
source, |
|||
sourceX, |
|||
sourceY, |
|||
sourceWidth, |
|||
sourceHeight, |
|||
0, |
|||
0, |
|||
destinationWidth, |
|||
destinationHeight, |
|||
) |
|||
} else { |
|||
this._outputCanvasCtx.filter = `blur(${backgroundBlurValue}px)` |
|||
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0) |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Returns the coordinates, width and height to draw the background image |
|||
* onto the canvas. |
|||
* |
|||
* The background image is cropped and centered as needed to cover the whole |
|||
* canvas while maintaining the original aspect ratio of the background. |
|||
* |
|||
* @param {number} sourceWidth the width of the source image |
|||
* @param {number} sourceHeight the height of the source image |
|||
* @param {number} destinationWidth the width of the destination canvas |
|||
* @param {number} destinationHeight the height of the destination canvas |
|||
* @return {Array} the X and Y coordinates, width and height of the source |
|||
* image after cropping and centering |
|||
*/ |
|||
static getSourcePropertiesForDrawingBackgroundImage(sourceWidth, sourceHeight, destinationWidth, destinationHeight) { |
|||
let croppedSourceX = 0 |
|||
let croppedSourceY = 0 |
|||
let croppedSourceWidth = sourceWidth |
|||
let croppedSourceHeight = sourceHeight |
|||
|
|||
if (sourceWidth <= 0 || sourceHeight <= 0 || destinationWidth <= 0 || destinationHeight <= 0) { |
|||
return [croppedSourceX, croppedSourceY, croppedSourceWidth, croppedSourceHeight] |
|||
} |
|||
|
|||
const sourceAspectRatio = sourceWidth / sourceHeight |
|||
const destinationAspectRatio = destinationWidth / destinationHeight |
|||
|
|||
if (sourceAspectRatio > destinationAspectRatio) { |
|||
croppedSourceWidth = sourceHeight * destinationAspectRatio |
|||
croppedSourceX = (sourceWidth - croppedSourceWidth) / 2 |
|||
} else { |
|||
croppedSourceHeight = sourceWidth / destinationAspectRatio |
|||
croppedSourceY = (sourceHeight - croppedSourceHeight) / 2 |
|||
} |
|||
|
|||
return [croppedSourceX, croppedSourceY, croppedSourceWidth, croppedSourceHeight] |
|||
} |
|||
|
|||
/** |
|||
* Represents the run Tensorflow Interference. |
|||
* Worker partly |
|||
* |
|||
* @param {Array} data the segmentation result |
|||
* @return {void} |
|||
*/ |
|||
runInference(data) { |
|||
// All consts in Worker in obj array.
|
|||
for (let i = 0; i < this._segmentationPixelCount; i++) { |
|||
this._segmentationMask.data[(i * 4) + 3] = 255 * data[i].person |
|||
} |
|||
this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0) |
|||
} |
|||
|
|||
/** |
|||
* Loop function to render the background mask. |
|||
* |
|||
* @private |
|||
* @return {void} |
|||
*/ |
|||
_renderMask() { |
|||
if (this._frameId < this._lastFrameId) { |
|||
console.debug('Fixing frame id, this should not happen', this._frameId, this._lastFrameId) |
|||
|
|||
this._frameId = this._lastFrameId |
|||
} |
|||
|
|||
// Calculate segmentation data only if the previous one finished
|
|||
// already.
|
|||
if (this._loaded && this._frameId === this._lastFrameId) { |
|||
this._frameId++ |
|||
|
|||
this.resizeSource() |
|||
} |
|||
|
|||
this._maskFrameTimerWorker.postMessage({ |
|||
id: SET_TIMEOUT, |
|||
timeMs: 1000 / this._frameRate, |
|||
message: 'this._maskFrameTimerWorker', |
|||
}) |
|||
} |
|||
|
|||
/** |
|||
* Represents the resize source process. |
|||
* Worker partly |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
resizeSource() { |
|||
this._segmentationMaskCtx.drawImage( |
|||
this._inputVideoElement, |
|||
0, |
|||
0, |
|||
this._inputVideoElement.videoWidth, |
|||
this._inputVideoElement.videoHeight, |
|||
0, |
|||
0, |
|||
this._options.width, |
|||
this._options.height, |
|||
) |
|||
|
|||
const imageData = this._segmentationMaskCtx.getImageData( |
|||
0, |
|||
0, |
|||
this._options.width, |
|||
this._options.height, |
|||
) |
|||
|
|||
this._model.postMessage({ message: 'resizeSource', imageData, frameId: this._frameId }) |
|||
} |
|||
|
|||
/** |
|||
* Checks if the local track supports this effect. |
|||
* |
|||
* @param {object} jitsiLocalTrack - Track to apply effect. |
|||
* @return {boolean} - Returns true if this effect can run on the specified track |
|||
* false otherwise. |
|||
*/ |
|||
isEnabled(jitsiLocalTrack) { |
|||
return jitsiLocalTrack.isVideoTrack() && jitsiLocalTrack.videoType === 'camera' |
|||
} |
|||
|
|||
/** |
|||
* Starts loop to capture video frame and render the segmentation mask. |
|||
* |
|||
* @param {MediaStream} stream - Stream to be used for processing. |
|||
* @return {MediaStream} - The stream with the applied effect. |
|||
*/ |
|||
startEffect(stream) { |
|||
this._running = true |
|||
|
|||
this._stream = stream |
|||
this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' }) |
|||
this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer |
|||
const firstVideoTrack = this._stream.getVideoTracks()[0] |
|||
const { height, frameRate, width } |
|||
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints() |
|||
|
|||
this._frameRate = parseInt(frameRate, 10) |
|||
|
|||
this._segmentationMask = new ImageData(this._options.width, this._options.height) |
|||
this._segmentationMaskCanvas = document.createElement('canvas') |
|||
this._segmentationMaskCanvas.width = this._options.width |
|||
this._segmentationMaskCanvas.height = this._options.height |
|||
this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d') |
|||
|
|||
this._outputCanvasElement.width = parseInt(width, 10) |
|||
this._outputCanvasElement.height = parseInt(height, 10) |
|||
this._outputCanvasCtx = this._outputCanvasElement.getContext('2d') |
|||
this._inputVideoElement.autoplay = true |
|||
this._inputVideoElement.srcObject = this._stream |
|||
this._inputVideoElement.onloadeddata = () => { |
|||
this._maskFrameTimerWorker.postMessage({ |
|||
id: SET_TIMEOUT, |
|||
timeMs: 1000 / this._frameRate, |
|||
message: 'this._maskFrameTimerWorker', |
|||
}) |
|||
this._inputVideoElement.onloadeddata = null |
|||
} |
|||
|
|||
if (this._virtualVideo) { |
|||
this._virtualVideo.play() |
|||
} |
|||
|
|||
this._frameId = -1 |
|||
this._lastFrameId = -1 |
|||
|
|||
this._outputStream = this._outputCanvasElement.captureStream(this._frameRate) |
|||
|
|||
return this._outputStream |
|||
} |
|||
|
|||
updateInputStream() { |
|||
const firstVideoTrack = this._stream.getVideoTracks()[0] |
|||
const { frameRate } |
|||
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints() |
|||
|
|||
this._frameRate = parseInt(frameRate, 10) |
|||
|
|||
this._outputStream.getVideoTracks()[0].applyConstraints({ frameRate: this._frameRate }).catch((error) => { |
|||
console.error('Frame rate could not be adjusted in background effect', error) |
|||
}) |
|||
|
|||
this._frameId = -1 |
|||
this._lastFrameId = -1 |
|||
} |
|||
|
|||
/** |
|||
* Stops the capture and render loop. |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
stopEffect() { |
|||
this._running = false |
|||
|
|||
if (this._maskFrameTimerWorker) { |
|||
this._maskFrameTimerWorker.postMessage({ |
|||
id: CLEAR_TIMEOUT, |
|||
message: 'stopEffect', |
|||
}) |
|||
this._maskFrameTimerWorker.terminate() |
|||
} |
|||
|
|||
if (this._virtualVideo) { |
|||
this._virtualVideo.pause() |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Destroys the JitsiStreamBackgroundEffect instance and releases all resources. |
|||
*/ |
|||
destroy() { |
|||
this.stopEffect() |
|||
this._model.terminate() |
|||
this._model = null |
|||
} |
|||
} |
@ -1,52 +0,0 @@ |
|||
/** |
|||
* SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors |
|||
* SPDX-License-Identifier: AGPL-3.0-or-later |
|||
*/ |
|||
|
|||
import { describe, expect, test } from 'vitest' |
|||
import JitsiStreamBackgroundEffect from './JitsiStreamBackgroundEffect.js' |
|||
|
|||
describe('JitsiStreamBackgroundEffect', () => { |
|||
describe('getSourcePropertiesForDrawingBackgroundImage', () => { |
|||
test.each([ |
|||
['landscape source and landscape destination, wider aspect ratio source, wider and higher source', [1200, 500], [300, 200], [225, 0], [750, 500]], |
|||
['landscape source and landscape destination, wider aspect ratio source, wider source', [450, 150], [300, 200], [112.5, 0], [225, 150]], |
|||
['landscape source and landscape destination, wider aspect ratio source, same width', [300, 100], [300, 200], [75, 0], [150, 100]], |
|||
['landscape source and landscape destination, wider aspect ratio source, narrower source', [200, 50], [300, 200], [62.5, 0], [75, 50]], |
|||
['landscape source and landscape destination, wider aspect ratio destination, wider and higher destination', [300, 200], [1200, 500], [0, 37.5], [300, 125]], |
|||
['landscape source and landscape destination, wider aspect ratio destination, wider destination', [300, 200], [450, 150], [0, 50], [300, 100]], |
|||
['landscape source and landscape destination, wider aspect ratio destination, same width', [300, 200], [300, 100], [0, 50], [300, 100]], |
|||
['landscape source and landscape destination, wider aspect ratio destination, narrower destination', [300, 200], [200, 50], [0, 62.5], [300, 75]], |
|||
['landscape source and portrait destination, wider and higher source', [1200, 500], [201, 300], [432.5, 0], [335, 500]], |
|||
['landscape source and portrait destination, wider source', [450, 150], [200, 300], [175, 0], [100, 150]], |
|||
['landscape source and portrait destination, same width', [200, 100.5], [200, 300], [66.5, 0], [67, 100.5]], |
|||
['landscape source and portrait destination, narrower source', [150, 51], [200, 300], [58, 0], [34, 51]], |
|||
['portrait source and landscape destination, wider and higher source', [501, 1200], [300, 200], [0, 433], [501, 334]], |
|||
['portrait source and landscape destination, higher source', [150, 450], [300, 200], [0, 175], [150, 100]], |
|||
['portrait source and landscape destination, same height', [99, 200], [300, 200], [0, 67], [99, 66]], |
|||
['portrait source and landscape destination, shorter source', [51, 150], [300, 200], [0, 58], [51, 34]], |
|||
['portrait source and portrait destination, higher aspect ratio source, wider and higher source', [500, 1200], [200, 300], [0, 225], [500, 750]], |
|||
['portrait source and portrait destination, higher aspect ratio source, higher source', [150, 450], [200, 300], [0, 112.5], [150, 225]], |
|||
['portrait source and portrait destination, higher aspect ratio source, same height', [100, 300], [200, 300], [0, 75], [100, 150]], |
|||
['portrait source and portrait destination, higher aspect ratio source, shorter source', [50, 200], [200, 300], [0, 62.5], [50, 75]], |
|||
['portrait source and portrait destination, higher aspect ratio destination, wider and higher destination', [200, 300], [500, 1200], [37.5, 0], [125, 300]], |
|||
['portrait source and portrait destination, higher aspect ratio destination, higher destination', [200, 300], [150, 450], [50, 0], [100, 300]], |
|||
['portrait source and portrait destination, higher aspect ratio destination, same height', [200, 300], [100, 300], [50, 0], [100, 300]], |
|||
['portrait source and portrait destination, higher aspect ratio destination, shorter destination', [200, 300], [50, 200], [62.5, 0], [75, 300]], |
|||
['invalid source width', [0, 200], [100, 50], [0, 0], [0, 200]], |
|||
['invalid source height', [200, 0], [100, 50], [0, 0], [200, 0]], |
|||
['invalid destination width', [100, 50], [0, 200], [0, 0], [100, 50]], |
|||
['invalid destination height', [100, 50], [200, 0], [0, 0], [100, 50]], |
|||
])('%s', (name, [sourceWidth, sourceHeight], [destinationWidth, destinationHeight], [expectedSourceX, expectedSourceY], [expectedSourceWidth, expectedSourceHeight]) => { |
|||
let sourceX |
|||
let sourceY |
|||
|
|||
[sourceX, sourceY, sourceWidth, sourceHeight] = JitsiStreamBackgroundEffect.getSourcePropertiesForDrawingBackgroundImage(sourceWidth, sourceHeight, destinationWidth, destinationHeight) |
|||
|
|||
expect(sourceX).toBe(expectedSourceX) |
|||
expect(sourceY).toBe(expectedSourceY) |
|||
expect(sourceWidth).toBe(expectedSourceWidth) |
|||
expect(sourceHeight).toBe(expectedSourceHeight) |
|||
}) |
|||
}) |
|||
}) |
@ -1,131 +0,0 @@ |
|||
/** |
|||
* SPDX-FileCopyrightText: 2021 Nextcloud GmbH and Nextcloud contributors |
|||
* SPDX-License-Identifier: AGPL-3.0-or-later |
|||
*/ |
|||
import landscape from './vendor/models/selfie_segmentation_landscape.tflite' |
|||
import createTFLiteSIMDModule from './vendor/tflite/tflite-simd.js' |
|||
import withSIMD from './vendor/tflite/tflite-simd.wasm' |
|||
import createTFLiteModule from './vendor/tflite/tflite.js' |
|||
import withoutSIMD from './vendor/tflite/tflite.wasm' |
|||
|
|||
const models = { |
|||
modelLandscape: landscape.split('/').pop(), |
|||
} |
|||
|
|||
self.compiled = false |
|||
|
|||
self.onmessage = (e) => { |
|||
const message = e.data.message |
|||
switch (message) { |
|||
case 'makeTFLite': |
|||
self.segmentationPixelCount = e.data.segmentationPixelCount |
|||
makeTFLite(e.data.simd) |
|||
break |
|||
case 'resizeSource': |
|||
if (!self.compiled) { |
|||
return |
|||
} |
|||
resizeSource(e.data.imageData, e.data.frameId) |
|||
break |
|||
case 'runInference': |
|||
runInference() |
|||
break |
|||
default: |
|||
console.error('JitsiStreamBackgroundEffect.worker: Message unknown.') |
|||
console.error(message) |
|||
break |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* @param {boolean} isSimd whether WebAssembly SIMD is available or not |
|||
*/ |
|||
async function makeTFLite(isSimd) { |
|||
try { |
|||
switch (isSimd) { |
|||
case true: |
|||
self.wasmUrl = withSIMD.split('/').pop() |
|||
self.tflite = await createTFLiteSIMDModule({ |
|||
locateFile: (path) => { |
|||
return self.wasmUrl |
|||
}, |
|||
}) |
|||
break |
|||
case false: |
|||
self.wasmUrl = withoutSIMD.split('/').pop() |
|||
self.tflite = await createTFLiteModule({ |
|||
locateFile: (path) => { |
|||
return self.wasmUrl |
|||
}, |
|||
}) |
|||
break |
|||
default: |
|||
return |
|||
} |
|||
self.modelBufferOffset = self.tflite._getModelBufferMemoryOffset() |
|||
self.modelResponse = await fetch(models.modelLandscape) |
|||
|
|||
if (!self.modelResponse.ok) { |
|||
throw new Error('Failed to download tflite model!') |
|||
} |
|||
self.model = await self.modelResponse.arrayBuffer() |
|||
|
|||
self.tflite.HEAPU8.set(new Uint8Array(self.model), self.modelBufferOffset) |
|||
|
|||
await self.tflite._loadModel(self.model.byteLength) |
|||
|
|||
// Even if the wrong tflite file is downloaded (for example, if an HTML
|
|||
// error is downloaded instead of the file) loading the model will
|
|||
// succeed. However, if the model does not have certain values it could
|
|||
// be assumed that the model failed to load.
|
|||
if (!self.tflite._getInputWidth() || !self.tflite._getInputHeight() |
|||
|| !self.tflite._getOutputWidth() || !self.tflite._getOutputHeight()) { |
|||
throw new Error('Failed to load tflite model!') |
|||
} |
|||
|
|||
self.compiled = true |
|||
|
|||
self.postMessage({ message: 'loaded' }) |
|||
} catch (error) { |
|||
console.error(error) |
|||
console.error('JitsiStreamBackgroundEffect.worker: tflite compilation failed. The web server may not be properly configured to send wasm and/or tflite files.') |
|||
|
|||
self.postMessage({ message: 'loadFailed' }) |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* @param {ImageData} imageData the image data from the canvas |
|||
* @param {number} frameId the ID of the frame that the image data belongs to |
|||
*/ |
|||
function resizeSource(imageData, frameId) { |
|||
const inputMemoryOffset = self.tflite._getInputMemoryOffset() / 4 |
|||
for (let i = 0; i < self.segmentationPixelCount; i++) { |
|||
self.tflite.HEAPF32[inputMemoryOffset + (i * 3)] = imageData.data[i * 4] / 255 |
|||
self.tflite.HEAPF32[inputMemoryOffset + (i * 3) + 1] = imageData.data[(i * 4) + 1] / 255 |
|||
self.tflite.HEAPF32[inputMemoryOffset + (i * 3) + 2] = imageData.data[(i * 4) + 2] / 255 |
|||
} |
|||
runInference(frameId) |
|||
} |
|||
|
|||
/** |
|||
* @param {number} frameId the ID of the frame that the image data belongs to |
|||
*/ |
|||
function runInference(frameId) { |
|||
self.tflite._runInference() |
|||
const outputMemoryOffset = self.tflite._getOutputMemoryOffset() / 4 |
|||
const segmentationMaskData = [] |
|||
// All consts in Worker in obj array.
|
|||
for (let i = 0; i < self.segmentationPixelCount; i++) { |
|||
const person = self.tflite.HEAPF32[outputMemoryOffset + i] |
|||
|
|||
segmentationMaskData.push({ |
|||
person, |
|||
}) |
|||
} |
|||
self.postMessage({ message: 'inferenceRun', segmentationResult: segmentationMaskData, frameId }) |
|||
} |
|||
|
|||
// This is needed to make the linter happy, but even if nothing is actually
|
|||
// exported the worker is loaded as expected.
|
|||
export default null |
@ -0,0 +1,667 @@ |
|||
/** |
|||
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors |
|||
* SPDX-License-Identifier: AGPL-3.0-or-later |
|||
*/ |
|||
// @flow
|
|||
|
|||
import { VIRTUAL_BACKGROUND } from '../../../../constants.ts' |
|||
import { |
|||
CLEAR_TIMEOUT, |
|||
SET_TIMEOUT, |
|||
TIMEOUT_TICK, |
|||
timerWorkerScript, |
|||
} from './TimerWorker.js' |
|||
import { FilesetResolver, ImageSegmenter } from './vendor/mediapipe/vision_bundle.js' |
|||
import WebGLCompositor from './WebGLCompositor.js' |
|||
|
|||
new URL('./vendor/mediapipe/vision_wasm_internal.js', import.meta.url) |
|||
new URL('./vendor/mediapipe/vision_wasm_nosimd_internal.js', import.meta.url) |
|||
new URL('./vendor/mediapipe/vision_wasm_internal.wasm', import.meta.url) |
|||
new URL('./vendor/mediapipe/vision_wasm_nosimd_internal.wasm', import.meta.url) |
|||
new URL('./vendor/models/selfie_segmenter.tflite', import.meta.url) |
|||
|
|||
/** |
|||
* Represents a modified MediaStream that applies virtual background effects |
|||
* (blur, image, video, or video stream) using MediaPipe segmentation. |
|||
* |
|||
* @class |
|||
*/ |
|||
export default class VideoStreamBackgroundEffect { |
|||
// _options: Object;
|
|||
// _stream: MediaStream;
|
|||
// _segmentationPixelCount: number;
|
|||
// _inputVideoElement: HTMLVideoElement;
|
|||
// _onMaskFrameTimer: Function;
|
|||
// _maskFrameTimerWorker: Worker;
|
|||
// _outputCanvasElement: HTMLCanvasElement;
|
|||
// _outputCanvasCtx: CanvasRenderingContext2D;
|
|||
// _segmentationMaskCtx: CanvasRenderingContext2D;
|
|||
// _segmentationMask: ImageData;
|
|||
// _segmentationMaskCanvas: HTMLCanvasElement;
|
|||
// _renderMask: Function;
|
|||
// _virtualImage: HTMLImageElement;
|
|||
// _virtualVideo: HTMLVideoElement;
|
|||
|
|||
/** |
|||
* Create a new background effect processor. |
|||
* |
|||
* @param {object} options - Options for the effect. |
|||
* @param {number} options.width - Segmentation mask width. |
|||
* @param {number} options.height - Segmentation mask height. |
|||
* @param {object} options.virtualBackground - Virtual background properties (see setVirtualBackground()). |
|||
* @param {boolean} options.webGL - Whether to use WebGL compositor instead of 2D canvas. |
|||
*/ |
|||
constructor(options) { |
|||
this._options = options |
|||
this._loadPromise = new Promise((resolve, reject) => { |
|||
this._loadPromiseResolve = resolve |
|||
this._loadPromiseReject = reject |
|||
}) |
|||
this._loaded = false |
|||
this._loadFailed = false |
|||
|
|||
this.setVirtualBackground(this._options.virtualBackground) |
|||
this._useWebGL = this._options.webGL |
|||
|
|||
this._segmentationPixelCount = this._options.width * this._options.height |
|||
|
|||
this._initMediaPipe().catch((e) => console.error(e)) |
|||
|
|||
// Bind event handler so it is only bound once for every instance.
|
|||
this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this) |
|||
this._renderMask = this._renderMask.bind(this) |
|||
|
|||
// caches for mask processing
|
|||
this._tempImageData = null |
|||
this._maskWidth = 0 |
|||
this._maskHeight = 0 |
|||
|
|||
// Create canvas elements
|
|||
this._outputCanvasElement = document.createElement('canvas') |
|||
if (!this._useWebGL) { |
|||
this._outputCanvasElement.getContext('2d') |
|||
} |
|||
this._inputVideoElement = document.createElement('video') |
|||
this._videoResizeObserver = null |
|||
this._bgChanged = false |
|||
this._lastVideoW = 0 |
|||
this._lastVideoH = 0 |
|||
} |
|||
|
|||
/** |
|||
* Initialize MediaPipe segmentation model. |
|||
* |
|||
* @private |
|||
* @return {Promise<void>} |
|||
*/ |
|||
async _initMediaPipe() { |
|||
try { |
|||
const vision = await FilesetResolver.forVisionTasks('/apps/spreed/js') |
|||
|
|||
this._imageSegmenter = await ImageSegmenter.createFromOptions(vision, { |
|||
baseOptions: { |
|||
modelAssetPath: '/apps/spreed/js/selfie_segmenter.tflite', |
|||
delegate: 'GPU', |
|||
}, |
|||
runningMode: 'VIDEO', |
|||
outputCategoryMask: false, |
|||
outputConfidenceMasks: true, |
|||
}) |
|||
|
|||
this._loaded = true |
|||
this._loadPromiseResolve() |
|||
} catch (error) { |
|||
console.error('MediaPipe Tasks initialization failed:', error) |
|||
this._loadFailed = true |
|||
this._loadPromiseReject(error) |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Run segmentation inference on the current video frame. |
|||
* |
|||
* @private |
|||
* @return {Promise<void>} |
|||
*/ |
|||
async _runInference() { |
|||
if (!this._imageSegmenter || !this._loaded) { |
|||
return |
|||
} |
|||
|
|||
let segmentationResult |
|||
try { |
|||
segmentationResult = await this._imageSegmenter.segmentForVideo( |
|||
this._inputVideoElement, |
|||
performance.now(), |
|||
) |
|||
|
|||
if (segmentationResult.confidenceMasks && segmentationResult.confidenceMasks.length > 0) { |
|||
this._processSegmentationResult(segmentationResult) |
|||
} |
|||
|
|||
this.runPostProcessing() |
|||
this._lastFrameId = this._frameId |
|||
} catch (error) { |
|||
console.error('MediaPipe inference failed:', error) |
|||
} finally { |
|||
if (segmentationResult?.categoryMask) { |
|||
segmentationResult.categoryMask.close() |
|||
} |
|||
|
|||
if (segmentationResult?.confidenceMasks?.length) { |
|||
segmentationResult.confidenceMasks.forEach((mask) => mask.close()) |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Process MediaPipe segmentation result and update internal mask. |
|||
* |
|||
* @private |
|||
* @param {object} segmentationResult - The segmentation result from MediaPipe. |
|||
* @return {void} |
|||
*/ |
|||
_processSegmentationResult(segmentationResult) { |
|||
const confidenceMasks = segmentationResult.confidenceMasks |
|||
if (!confidenceMasks || confidenceMasks.length === 0) { |
|||
return |
|||
} |
|||
|
|||
const mask = confidenceMasks[0] |
|||
const maskData = !this._useWebGL ? mask.getAsFloat32Array() : mask |
|||
const maskWidth = mask.width |
|||
const maskHeight = mask.height |
|||
|
|||
if (!this._useWebGL) { |
|||
// Prepare backing ImageData
|
|||
if (!this._segmentationMask |
|||
|| this._segmentationMask.width !== this._options.width |
|||
|| this._segmentationMask.height !== this._options.height) { |
|||
this._segmentationMask = new ImageData(this._options.width, this._options.height) |
|||
} |
|||
|
|||
// Convert float32 mask [0..1] → grayscale canvas
|
|||
if (this._tempCanvas.width !== maskWidth || this._tempCanvas.height !== maskHeight) { |
|||
this._tempCanvas.width = maskWidth |
|||
this._tempCanvas.height = maskHeight |
|||
} |
|||
const tempCanvas = this._tempCanvas |
|||
const tempCtx = this._tempCanvasCtx |
|||
|
|||
if (!this._tempImageData |
|||
|| this._maskWidth !== maskWidth |
|||
|| this._maskHeight !== maskHeight) { |
|||
this._tempImageData = new ImageData(maskWidth, maskHeight) |
|||
this._maskWidth = maskWidth |
|||
this._maskHeight = maskHeight |
|||
} |
|||
for (let i = 0; i < maskData.length; i++) { |
|||
const v = Math.min(1.0, Math.max(0.0, maskData[i])) // clamp
|
|||
const gray = Math.round(v * 255) |
|||
const idx = i * 4 |
|||
this._tempImageData.data[idx] = gray |
|||
this._tempImageData.data[idx + 1] = gray |
|||
this._tempImageData.data[idx + 2] = gray |
|||
this._tempImageData.data[idx + 3] = 255 |
|||
} |
|||
tempCtx.putImageData(this._tempImageData, 0, 0) |
|||
|
|||
// Resize into segmentation canvas
|
|||
this._segmentationMaskCtx.drawImage( |
|||
tempCanvas, |
|||
0, |
|||
0, |
|||
maskWidth, |
|||
maskHeight, |
|||
0, |
|||
0, |
|||
this._options.width, |
|||
this._options.height, |
|||
) |
|||
|
|||
// Extract resized alpha channel into _segmentationMask
|
|||
const resized = this._segmentationMaskCtx.getImageData(0, 0, this._options.width, this._options.height) |
|||
for (let i = 0; i < this._segmentationPixelCount; i++) { |
|||
this._segmentationMask.data[i * 4 + 3] = resized.data[i * 4] // R channel
|
|||
} |
|||
|
|||
// Update segmentation mask canvas
|
|||
this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0) |
|||
} else { |
|||
this._lastMask = maskData |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Loop function to render the background mask and trigger inference. |
|||
* |
|||
* @private |
|||
* @return {void} |
|||
*/ |
|||
_renderMask() { |
|||
if (this._frameId < this._lastFrameId) { |
|||
console.debug('Fixing frame id, this should not happen', this._frameId, this._lastFrameId) |
|||
this._frameId = this._lastFrameId |
|||
} |
|||
|
|||
// Run inference if ready
|
|||
if (this._loaded && this._frameId === this._lastFrameId) { |
|||
this._frameId++ |
|||
this._runInference().catch((e) => console.error(e)) |
|||
} |
|||
|
|||
// Schedule next frame
|
|||
this._maskFrameTimerWorker.postMessage({ |
|||
id: SET_TIMEOUT, |
|||
timeMs: 1000 / this._frameRate, |
|||
message: 'this._maskFrameTimerWorker', |
|||
}) |
|||
} |
|||
|
|||
/** |
|||
* Handle timer worker ticks to schedule mask rendering. |
|||
* |
|||
* @private |
|||
* @param {MessageEvent} response - Message from the worker. |
|||
* @return {void} |
|||
*/ |
|||
_onMaskFrameTimer(response) { |
|||
if (response.data.id === TIMEOUT_TICK) { |
|||
this._renderMask() |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Helper method to know when the model was loaded after creating the |
|||
* object. |
|||
* |
|||
* Note that it is not needed to call this method to actually load the |
|||
* effect; the load will automatically start as soon as the object is |
|||
* created, but it can be waited on this method to know once it has finished |
|||
* (or failed). |
|||
* |
|||
* @return {Promise} promise resolved or rejected once the load has finished |
|||
* or failed. |
|||
*/ |
|||
async load() { |
|||
return this._loadPromise |
|||
} |
|||
|
|||
/** |
|||
* Returns whether loading the TFLite model failed or not. |
|||
* |
|||
* @return {boolean} true if loading failed, false otherwise |
|||
*/ |
|||
didLoadFail() { |
|||
return this._loadFailed |
|||
} |
|||
|
|||
/** |
|||
* Returns the virtual background properties. |
|||
* |
|||
* @return {object} the virtual background properties. |
|||
*/ |
|||
getVirtualBackground() { |
|||
return this._options.virtualBackground |
|||
} |
|||
|
|||
/** |
|||
* Sets the virtual background properties to use. |
|||
* |
|||
* The virtual background can be modified while the effect is running. |
|||
* |
|||
* If an image or video URL is given it can be any URL accepted by the "src" |
|||
* attribute of HTML image or video elements, so it is possible to set a |
|||
* "real" URL or, for example, one generated with "URL.createObjectURL()". |
|||
* |
|||
* @param {object} virtualBackground an object with the virtual background |
|||
* properties. |
|||
* @param {string} virtualBackground.backgroundType BLUR, IMAGE, VIDEO or |
|||
* VIDEO_STREAM. |
|||
* @param {number} virtualBackground.blurValue the blur to apply on a 720p |
|||
* video; it will be automatically scaled as needed. |
|||
* Optional, only needed when background type is BLUR. |
|||
* @param {string|MediaStream} virtualBackground.virtualSource the URL to |
|||
* the image or video, or a video stream. |
|||
* Optional, only needed when background type is IMAGE, VIDEO or |
|||
* VIDEO_STREAM. |
|||
*/ |
|||
setVirtualBackground(virtualBackground) { |
|||
// Clear previous elements to allow them to be garbage collected
|
|||
this._virtualImage = null |
|||
this._virtualVideo = null |
|||
this._bgChanged = false |
|||
|
|||
this._options.virtualBackground = virtualBackground |
|||
|
|||
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE) { |
|||
this._virtualImage = document.createElement('img') |
|||
this._virtualImage.crossOrigin = 'anonymous' |
|||
this._virtualImage.src = this._options.virtualBackground.virtualSource |
|||
this._virtualImage.onload = () => { |
|||
this._bgChanged = true |
|||
} |
|||
this._bgChanged = false |
|||
|
|||
return |
|||
} |
|||
|
|||
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO) { |
|||
this._virtualVideo = document.createElement('video') |
|||
this._virtualVideo.crossOrigin = 'anonymous' |
|||
this._virtualVideo.loop = true |
|||
this._virtualVideo.muted = true |
|||
this._virtualVideo.src = this._options.virtualBackground.virtualSource |
|||
|
|||
if (this._running) { |
|||
this._virtualVideo.play() |
|||
} |
|||
|
|||
return |
|||
} |
|||
|
|||
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO_STREAM) { |
|||
this._virtualVideo = document.createElement('video') |
|||
this._virtualVideo.srcObject = this._options.virtualBackground.virtualSource |
|||
|
|||
if (this._running) { |
|||
this._virtualVideo.play() |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Run background/foreground compositing. |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
runPostProcessing() { |
|||
const height = this._inputVideoElement.videoHeight |
|||
const width = this._inputVideoElement.videoWidth |
|||
const { backgroundType } = this._options.virtualBackground |
|||
|
|||
const scaledBlurFactor = width / 720.0 |
|||
const backgroundBlurValue = this._options.virtualBackground.blurValue * scaledBlurFactor |
|||
const edgesBlurValue = (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE ? 4 : 8) * scaledBlurFactor |
|||
|
|||
if (!this._outputCanvasElement.width |
|||
|| !this._outputCanvasElement.height) { |
|||
return |
|||
} |
|||
|
|||
this._outputCanvasElement.width = width |
|||
this._outputCanvasElement.height = height |
|||
|
|||
if (this._useWebGL) { |
|||
if (!this._glFx) { |
|||
return |
|||
} |
|||
|
|||
let mode = 1 |
|||
let bgSource = null |
|||
let refreshBg = false |
|||
|
|||
if (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE |
|||
|| backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO |
|||
|| backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO_STREAM) { |
|||
mode = 0 |
|||
if (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE) { |
|||
bgSource = this._virtualImage |
|||
refreshBg = this._bgChanged && bgSource && bgSource.complete && bgSource.naturalWidth > 0 |
|||
if (refreshBg) { |
|||
this._bgChanged = false |
|||
} |
|||
} else { |
|||
bgSource = this._virtualVideo |
|||
refreshBg = true |
|||
} |
|||
} |
|||
|
|||
this._glFx.render({ |
|||
videoEl: this._inputVideoElement, |
|||
mask: this._lastMask, |
|||
bgSource, |
|||
mode, |
|||
outW: width, |
|||
outH: height, |
|||
edgeFeatherPx: edgesBlurValue, |
|||
refreshBg, |
|||
}) |
|||
} else { |
|||
this._outputCanvasCtx.globalCompositeOperation = 'copy' |
|||
|
|||
// Draw segmentation mask.
|
|||
|
|||
// Smooth out the edges.
|
|||
this._outputCanvasCtx.filter = `blur(${edgesBlurValue}px)` |
|||
this._outputCanvasCtx.drawImage( |
|||
this._segmentationMaskCanvas, |
|||
0, |
|||
0, |
|||
this._options.width, |
|||
this._options.height, |
|||
0, |
|||
0, |
|||
this._inputVideoElement.videoWidth, |
|||
this._inputVideoElement.videoHeight, |
|||
) |
|||
this._outputCanvasCtx.globalCompositeOperation = 'source-in' |
|||
this._outputCanvasCtx.filter = 'none' |
|||
|
|||
// Draw the foreground video.
|
|||
|
|||
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0) |
|||
|
|||
// Draw the background.
|
|||
|
|||
this._outputCanvasCtx.globalCompositeOperation = 'destination-over' |
|||
if (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE |
|||
|| backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO |
|||
|| backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.VIDEO_STREAM) { |
|||
let source |
|||
let sourceWidthOriginal |
|||
let sourceHeightOriginal |
|||
|
|||
if (backgroundType === VIRTUAL_BACKGROUND.BACKGROUND_TYPE.IMAGE) { |
|||
source = this._virtualImage |
|||
sourceWidthOriginal = source.naturalWidth |
|||
sourceHeightOriginal = source.naturalHeight |
|||
} else { |
|||
source = this._virtualVideo |
|||
sourceWidthOriginal = source.videoWidth |
|||
sourceHeightOriginal = source.videoHeight |
|||
} |
|||
|
|||
const destinationWidth = this._outputCanvasElement.width |
|||
const destinationHeight = this._outputCanvasElement.height |
|||
|
|||
const [sourceX, sourceY, sourceWidth, sourceHeight] = VideoStreamBackgroundEffect.getSourcePropertiesForDrawingBackgroundImage(sourceWidthOriginal, sourceHeightOriginal, destinationWidth, destinationHeight) |
|||
|
|||
this._outputCanvasCtx.drawImage( |
|||
source, |
|||
sourceX, |
|||
sourceY, |
|||
sourceWidth, |
|||
sourceHeight, |
|||
0, |
|||
0, |
|||
destinationWidth, |
|||
destinationHeight, |
|||
) |
|||
} else { |
|||
this._outputCanvasCtx.filter = `blur(${backgroundBlurValue}px)` |
|||
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0) |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Returns the coordinates, width and height to draw the background image |
|||
* onto the canvas. |
|||
* |
|||
* The background image is cropped and centered as needed to cover the whole |
|||
* canvas while maintaining the original aspect ratio of the background. |
|||
* |
|||
* @param {number} sourceWidth the width of the source image |
|||
* @param {number} sourceHeight the height of the source image |
|||
* @param {number} destinationWidth the width of the destination canvas |
|||
* @param {number} destinationHeight the height of the destination canvas |
|||
* @return {Array} the X and Y coordinates, width and height of the source |
|||
* image after cropping and centering |
|||
*/ |
|||
static getSourcePropertiesForDrawingBackgroundImage(sourceWidth, sourceHeight, destinationWidth, destinationHeight) { |
|||
let croppedSourceX = 0 |
|||
let croppedSourceY = 0 |
|||
let croppedSourceWidth = sourceWidth |
|||
let croppedSourceHeight = sourceHeight |
|||
|
|||
if (sourceWidth <= 0 || sourceHeight <= 0 || destinationWidth <= 0 || destinationHeight <= 0) { |
|||
return [croppedSourceX, croppedSourceY, croppedSourceWidth, croppedSourceHeight] |
|||
} |
|||
|
|||
const sourceAspectRatio = sourceWidth / sourceHeight |
|||
const destinationAspectRatio = destinationWidth / destinationHeight |
|||
|
|||
if (sourceAspectRatio > destinationAspectRatio) { |
|||
croppedSourceWidth = sourceHeight * destinationAspectRatio |
|||
croppedSourceX = (sourceWidth - croppedSourceWidth) / 2 |
|||
} else { |
|||
croppedSourceHeight = sourceWidth / destinationAspectRatio |
|||
croppedSourceY = (sourceHeight - croppedSourceHeight) / 2 |
|||
} |
|||
|
|||
return [croppedSourceX, croppedSourceY, croppedSourceWidth, croppedSourceHeight] |
|||
} |
|||
|
|||
/** |
|||
* Checks if the local track supports this effect. |
|||
* |
|||
* @param {object} jitsiLocalTrack - Track to apply effect. |
|||
* @return {boolean} - Returns true if this effect can run on the specified track |
|||
* false otherwise. |
|||
*/ |
|||
isEnabled(jitsiLocalTrack) { |
|||
return jitsiLocalTrack.isVideoTrack() && jitsiLocalTrack.videoType === 'camera' |
|||
} |
|||
|
|||
/** |
|||
* Starts loop to capture video frame and render the segmentation mask. |
|||
* |
|||
* @param {MediaStream} stream - Stream to be used for processing. |
|||
* @return {MediaStream} - The stream with the applied effect. |
|||
*/ |
|||
startEffect(stream) { |
|||
this._running = true |
|||
|
|||
this._stream = stream |
|||
this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' }) |
|||
this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer |
|||
const firstVideoTrack = this._stream.getVideoTracks()[0] |
|||
const { height, frameRate, width } |
|||
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints() |
|||
|
|||
this._frameRate = parseInt(frameRate, 10) |
|||
|
|||
this._outputCanvasElement.width = parseInt(width, 10) |
|||
this._outputCanvasElement.height = parseInt(height, 10) |
|||
|
|||
if (this._useWebGL) { |
|||
if (!this._glFx) { |
|||
this._glFx = new WebGLCompositor(this._outputCanvasElement) |
|||
} |
|||
} else { |
|||
this._outputCanvasCtx = this._outputCanvasElement.getContext('2d') |
|||
this._segmentationMask = new ImageData(this._options.width, this._options.height) |
|||
this._segmentationMaskCanvas = document.createElement('canvas') |
|||
this._segmentationMaskCanvas.width = this._options.width |
|||
this._segmentationMaskCanvas.height = this._options.height |
|||
this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d', { willReadFrequently: true }) |
|||
|
|||
this._tempCanvas = document.createElement('canvas') |
|||
this._tempCanvasCtx = this._tempCanvas.getContext('2d', { willReadFrequently: true }) |
|||
} |
|||
|
|||
this._inputVideoElement.autoplay = true |
|||
this._inputVideoElement.srcObject = this._stream |
|||
this._inputVideoElement.onloadeddata = () => { |
|||
this._maskFrameTimerWorker.postMessage({ |
|||
id: SET_TIMEOUT, |
|||
timeMs: 1000 / this._frameRate, |
|||
message: 'this._maskFrameTimerWorker', |
|||
}) |
|||
this._inputVideoElement.onloadeddata = null |
|||
} |
|||
|
|||
if (this._virtualVideo) { |
|||
this._virtualVideo.play() |
|||
} |
|||
|
|||
this._frameId = -1 |
|||
this._lastFrameId = -1 |
|||
|
|||
this._outputStream = this._outputCanvasElement.captureStream(this._frameRate) |
|||
|
|||
return this._outputStream |
|||
} |
|||
|
|||
/** |
|||
* Update constraints (e.g. framerate) on the output stream when the input stream changes. |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
updateInputStream() { |
|||
const firstVideoTrack = this._stream.getVideoTracks()[0] |
|||
const { frameRate } |
|||
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints() |
|||
|
|||
this._frameRate = parseInt(frameRate, 10) |
|||
|
|||
this._outputStream.getVideoTracks()[0].applyConstraints({ frameRate: this._frameRate }).catch((error) => { |
|||
console.error('Frame rate could not be adjusted in background effect', error) |
|||
}) |
|||
|
|||
this._frameId = -1 |
|||
this._lastFrameId = -1 |
|||
} |
|||
|
|||
/** |
|||
* Stop background effect and release resources. |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
stopEffect() { |
|||
this._running = false |
|||
|
|||
if (this._maskFrameTimerWorker) { |
|||
this._maskFrameTimerWorker.postMessage({ |
|||
id: CLEAR_TIMEOUT, |
|||
message: 'stopEffect', |
|||
}) |
|||
this._maskFrameTimerWorker.terminate() |
|||
} |
|||
|
|||
if (this._virtualVideo) { |
|||
this._virtualVideo.pause() |
|||
} |
|||
|
|||
if (this._glFx) { |
|||
this._glFx.dispose() |
|||
this._glFx = null |
|||
} |
|||
|
|||
this._segmentationMask = null |
|||
this._segmentationMaskCanvas = null |
|||
this._segmentationMaskCtx = null |
|||
this._tempCanvas = null |
|||
this._tempCanvasCtx = null |
|||
} |
|||
|
|||
/** |
|||
* Destroys the VideoStreamBackgroundEffect instance and releases all resources. |
|||
*/ |
|||
destroy() { |
|||
this.stopEffect() |
|||
this._imageSegmenter.close() |
|||
this._imageSegmenter = null |
|||
} |
|||
} |
@ -0,0 +1,681 @@ |
|||
/** |
|||
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors |
|||
* SPDX-License-Identifier: AGPL-3.0-or-later |
|||
*/ |
|||
// @flow
|
|||
|
|||
/** |
|||
* WebGL-based compositor for background effects. |
|||
* Incorporates joint bilateral filtering and multi-pass blur for improved quality. |
|||
* |
|||
* @class |
|||
*/ |
|||
export default class WebGLCompositor2 { |
|||
/** |
|||
* Create a new WebGL compositor bound to a canvas. |
|||
* |
|||
* @param {HTMLCanvasElement} canvas - Canvas element to render into. |
|||
* @throws {Error} If WebGL is not available. |
|||
*/ |
|||
constructor(canvas) { |
|||
this.canvas = canvas |
|||
this.gl = canvas.getContext('webgl2', { premultipliedAlpha: false, alpha: true }) |
|||
if (!this.gl) { |
|||
throw new Error('WebGL2 not available') |
|||
} |
|||
|
|||
const gl = this.gl |
|||
|
|||
// --- Compile Helpers ---
|
|||
this._compileShader = (gl, type, src) => { |
|||
const s = gl.createShader(type) |
|||
gl.shaderSource(s, src) |
|||
gl.compileShader(s) |
|||
if (!gl.getShaderParameter(s, gl.COMPILE_STATUS)) { |
|||
throw new Error(gl.getShaderInfoLog(s)) |
|||
} |
|||
return s |
|||
} |
|||
|
|||
this._linkProgram = (gl, vsSrc, fsSrc) => { |
|||
const prog = gl.createProgram() |
|||
gl.attachShader(prog, this._compileShader(gl, gl.VERTEX_SHADER, vsSrc)) |
|||
gl.attachShader(prog, this._compileShader(gl, gl.FRAGMENT_SHADER, fsSrc)) |
|||
gl.linkProgram(prog) |
|||
if (!gl.getProgramParameter(prog, gl.LINK_STATUS)) { |
|||
throw new Error(gl.getProgramInfoLog(prog)) |
|||
} |
|||
return prog |
|||
} |
|||
|
|||
// --- Main Vertex Shader ---
|
|||
const vs = `#version 300 es
|
|||
in vec2 a_pos; |
|||
in vec2 a_texCoord; |
|||
out vec2 v_texCoord; |
|||
void main() { |
|||
gl_Position = vec4(a_pos, 0.0, 1.0); |
|||
v_texCoord = a_texCoord; |
|||
}`
|
|||
|
|||
// --- Vertex shader for final output (flips Y) ---
|
|||
const vsOutput = `#version 300 es
|
|||
in vec2 a_pos; |
|||
in vec2 a_texCoord; |
|||
out vec2 v_texCoord; |
|||
void main() { |
|||
// Flipping Y is required when rendering to canvas
|
|||
gl_Position = vec4(a_pos * vec2(1.0, -1.0), 0.0, 1.0); |
|||
v_texCoord = a_texCoord; |
|||
}`
|
|||
|
|||
// --- Joint Bilateral Filter Fragment Shader ---
|
|||
const bilateralFS = `#version 300 es
|
|||
precision highp float; |
|||
|
|||
uniform sampler2D u_inputFrame; |
|||
uniform sampler2D u_segmentationMask; |
|||
uniform vec2 u_texelSize; |
|||
uniform float u_step; |
|||
uniform float u_radius; |
|||
uniform float u_offset; |
|||
uniform float u_sigmaTexel; |
|||
uniform float u_sigmaColor; |
|||
|
|||
in vec2 v_texCoord; |
|||
out vec4 outColor; |
|||
|
|||
float gaussian(float x, float sigma) { |
|||
float coeff = -0.5 / (sigma * sigma * 4.0 + 1.0e-6); |
|||
return exp((x * x) * coeff); |
|||
} |
|||
|
|||
void main() { |
|||
vec2 centerCoord = v_texCoord; |
|||
vec3 centerColor = texture(u_inputFrame, centerCoord).rgb; |
|||
float newVal = 0.0; |
|||
|
|||
float spaceWeight = 0.0; |
|||
float colorWeight = 0.0; |
|||
float totalWeight = 0.0; |
|||
|
|||
// Subsample kernel space
|
|||
for (float i = -u_radius + u_offset; i <= u_radius; i += u_step) { |
|||
for (float j = -u_radius + u_offset; j <= u_radius; j += u_step) { |
|||
vec2 shift = vec2(j, i) * u_texelSize; |
|||
vec2 coord = centerCoord + shift; |
|||
vec3 frameColor = texture(u_inputFrame, coord).rgb; |
|||
float outVal = texture(u_segmentationMask, coord).r; |
|||
|
|||
spaceWeight = gaussian(distance(centerCoord, coord), u_sigmaTexel); |
|||
colorWeight = gaussian(distance(centerColor, frameColor), u_sigmaColor); |
|||
totalWeight += spaceWeight * colorWeight; |
|||
|
|||
newVal += spaceWeight * colorWeight * outVal; |
|||
} |
|||
} |
|||
newVal /= totalWeight; |
|||
|
|||
outColor = vec4(vec3(0.0), newVal); |
|||
}`
|
|||
|
|||
// --- Gaussian Blur Fragment Shader ---
|
|||
const blurFS = `#version 300 es
|
|||
precision highp float; |
|||
|
|||
uniform sampler2D u_inputFrame; |
|||
uniform sampler2D u_personMask; |
|||
uniform vec2 u_texelSize; |
|||
|
|||
in vec2 v_texCoord; |
|||
out vec4 outColor; |
|||
|
|||
const float offset[5] = float[](0.0, 1.0, 2.0, 3.0, 4.0); |
|||
const float weight[5] = float[](0.2270270270, 0.1945945946, 0.1216216216, |
|||
0.0540540541, 0.0162162162); |
|||
|
|||
void main() { |
|||
vec4 centerColor = texture(u_inputFrame, v_texCoord); |
|||
float personMask = texture(u_personMask, v_texCoord).a; |
|||
|
|||
vec4 frameColor = centerColor * weight[0] * (1.0 - personMask); |
|||
|
|||
for (int i = 1; i < 5; i++) { |
|||
vec2 offsetVec = vec2(offset[i]) * u_texelSize; |
|||
|
|||
vec2 texCoord = v_texCoord + offsetVec; |
|||
frameColor += texture(u_inputFrame, texCoord) * weight[i] * |
|||
(1.0 - texture(u_personMask, texCoord).a); |
|||
|
|||
texCoord = v_texCoord - offsetVec; |
|||
frameColor += texture(u_inputFrame, texCoord) * weight[i] * |
|||
(1.0 - texture(u_personMask, texCoord).a); |
|||
} |
|||
outColor = vec4(frameColor.rgb + (1.0 - frameColor.a) * centerColor.rgb, 1.0); |
|||
}`
|
|||
|
|||
// --- Final Blend Fragment Shader ---
|
|||
const blendFS = `#version 300 es
|
|||
precision highp float; |
|||
|
|||
uniform sampler2D u_inputFrame; |
|||
uniform sampler2D u_personMask; |
|||
uniform sampler2D u_blurredFrame; |
|||
uniform sampler2D u_background; |
|||
uniform vec2 u_coverage; |
|||
uniform float u_lightWrapping; |
|||
uniform int u_mode; |
|||
|
|||
in vec2 v_texCoord; |
|||
out vec4 outColor; |
|||
|
|||
vec3 screen(vec3 a, vec3 b) { |
|||
return 1.0 - (1.0 - a) * (1.0 - b); |
|||
} |
|||
|
|||
vec3 linearDodge(vec3 a, vec3 b) { |
|||
return a + b; |
|||
} |
|||
|
|||
void main() { |
|||
vec3 frameColor = texture(u_inputFrame, v_texCoord).rgb; |
|||
float personMask = texture(u_personMask, v_texCoord).a; |
|||
|
|||
vec3 bgColor; |
|||
if (u_mode == 1) { |
|||
// Blur mode
|
|||
bgColor = texture(u_blurredFrame, v_texCoord).rgb; |
|||
} else { |
|||
// Background image mode
|
|||
vec2 bgCoord = v_texCoord; |
|||
bgCoord.x = 1.0 - bgCoord.x; // horizontal flip
|
|||
bgColor = texture(u_background, bgCoord).rgb; |
|||
|
|||
// Apply light wrapping
|
|||
float lightWrapMask = 1.0 - max(0.0, personMask - u_coverage.y) / (1.0 - u_coverage.y); |
|||
vec3 lightWrap = u_lightWrapping * lightWrapMask * bgColor; |
|||
frameColor = screen(frameColor, lightWrap); |
|||
} |
|||
|
|||
// Apply coverage smoothing
|
|||
personMask = smoothstep(u_coverage.x, u_coverage.y, personMask); |
|||
|
|||
outColor = vec4(mix(bgColor, frameColor, personMask), 1.0); |
|||
}`
|
|||
|
|||
// --- Link Programs ---
|
|||
this.progBilateral = this._linkProgram(gl, vs, bilateralFS) |
|||
this.progBlur = this._linkProgram(gl, vs, blurFS) |
|||
this.progBlend = this._linkProgram(gl, vsOutput, blendFS) |
|||
|
|||
// --- Setup vertex buffers ---
|
|||
this.vertexArray = gl.createVertexArray() |
|||
gl.bindVertexArray(this.vertexArray) |
|||
|
|||
this.positionBuffer = gl.createBuffer() |
|||
gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer) |
|||
gl.bufferData( |
|||
gl.ARRAY_BUFFER, |
|||
new Float32Array([-1, -1, 1, -1, -1, 1, 1, 1]), |
|||
gl.STATIC_DRAW, |
|||
) |
|||
|
|||
this.texCoordBuffer = gl.createBuffer() |
|||
gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer) |
|||
gl.bufferData( |
|||
gl.ARRAY_BUFFER, |
|||
new Float32Array([0, 0, 1, 0, 0, 1, 1, 1]), |
|||
gl.STATIC_DRAW, |
|||
) |
|||
|
|||
// --- Textures ---
|
|||
this.texFrame = this._makeTex() |
|||
this.texMask = this._makeTex() |
|||
this.texMaskFiltered = this._makeTex() |
|||
this.texBg = this._makeTex() |
|||
this.texBlurred1 = this._makeTex() |
|||
this.texBlurred2 = this._makeTex() |
|||
|
|||
// --- Framebuffers ---
|
|||
this.fboMask = gl.createFramebuffer() |
|||
this.fboBlur1 = gl.createFramebuffer() |
|||
this.fboBlur2 = gl.createFramebuffer() |
|||
|
|||
// --- Blit variables, lazy loaded ---
|
|||
this.progBlit = null |
|||
this.blitBuf = null |
|||
this.blitPosLoc = null |
|||
this.blitSamplerLoc = null |
|||
|
|||
// --- Default parameters ---
|
|||
this.sigmaSpace = 10.0 |
|||
this.sigmaColor = 0.15 |
|||
this.coverage = [0.45, 0.75] |
|||
this.lightWrapping = 0.3 |
|||
} |
|||
|
|||
/** |
|||
* Create and initialize a WebGL texture. |
|||
* |
|||
* @private |
|||
* @return {WebGLTexture} Newly created texture. |
|||
*/ |
|||
_makeTex() { |
|||
const gl = this.gl |
|||
const t = gl.createTexture() |
|||
gl.bindTexture(gl.TEXTURE_2D, t) |
|||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR) |
|||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR) |
|||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE) |
|||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE) |
|||
return t |
|||
} |
|||
|
|||
/** |
|||
* Upload an image/video/canvas frame into a WebGL texture. |
|||
* |
|||
* @private |
|||
* @param {WebGLTexture} tex - Texture to upload into. |
|||
* @param {HTMLImageElement|HTMLVideoElement|HTMLCanvasElement} source - Source element. |
|||
* @param {object} [options] - Upload options. |
|||
* @param {boolean} [options.flipY] - Whether to flip vertically. |
|||
* @param {number} [options.min] - Minification filter. |
|||
* @param {number} [options.mag] - Magnification filter. |
|||
* @return {void} |
|||
*/ |
|||
_upload(tex, source, options = {}) { |
|||
const gl = this.gl |
|||
if (!source) { |
|||
return |
|||
} |
|||
|
|||
// Validation
|
|||
if (source instanceof HTMLImageElement) { |
|||
if (!source.complete || source.naturalWidth === 0) { |
|||
return |
|||
} |
|||
} |
|||
if (source instanceof HTMLVideoElement) { |
|||
if (source.videoWidth === 0 || source.videoHeight === 0) { |
|||
return |
|||
} |
|||
} |
|||
if (source instanceof HTMLCanvasElement) { |
|||
if (source.width === 0 || source.height === 0) { |
|||
return |
|||
} |
|||
} |
|||
|
|||
// Default to flipping Y, but allow it to be overridden
|
|||
const flipY = options.flipY !== undefined ? options.flipY : false |
|||
|
|||
gl.bindTexture(gl.TEXTURE_2D, tex) |
|||
gl.pixelStorei(gl.UNPACK_FLIP_Y_WEBGL, flipY) |
|||
|
|||
// Allow custom texture parameters to be set
|
|||
if (options.min) { |
|||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, options.min) |
|||
} |
|||
if (options.mag) { |
|||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, options.mag) |
|||
} |
|||
|
|||
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, source) |
|||
} |
|||
|
|||
/** |
|||
* Initialize shaders and buffers for blitting textures. |
|||
* |
|||
* @private |
|||
* @param {WebGLRenderingContext} gl - GL context. |
|||
* @return {void} |
|||
*/ |
|||
_initBlitResources(gl) { |
|||
if (this.progBlit) { |
|||
return |
|||
} |
|||
|
|||
const blitVS = `
|
|||
attribute vec2 a_pos; |
|||
varying vec2 v_uv; |
|||
void main() { |
|||
v_uv = (a_pos + 1.0) * 0.5; |
|||
gl_Position = vec4(a_pos, 0.0, 1.0); |
|||
}`
|
|||
const blitFS = `
|
|||
precision mediump float; |
|||
varying vec2 v_uv; |
|||
uniform sampler2D u_tex; |
|||
void main() { |
|||
gl_FragColor = texture2D(u_tex, v_uv); |
|||
}`
|
|||
|
|||
this.progBlit = this._linkProgram(gl, blitVS, blitFS) |
|||
|
|||
this.blitBuf = gl.createBuffer() |
|||
gl.bindBuffer(gl.ARRAY_BUFFER, this.blitBuf) |
|||
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([-1, -1, 1, -1, -1, 1, 1, -1, 1, 1, -1, 1]), gl.STATIC_DRAW) |
|||
|
|||
this.blitPosLoc = gl.getAttribLocation(this.progBlit, 'a_pos') |
|||
this.blitSamplerLoc = gl.getUniformLocation(this.progBlit, 'u_tex') |
|||
} |
|||
|
|||
/** |
|||
* Copy a MediaPipe mask texture into a canvas. |
|||
* |
|||
* @private |
|||
* @param {object} mask - MediaPipe mask object with canvas + getAsWebGLTexture(). |
|||
* @return {void} |
|||
*/ |
|||
_blitTextureToCanvas(mask) { |
|||
const gl = mask.canvas.getContext('webgl2') |
|||
if (!gl) { |
|||
console.error('Could not get WebGL context from mask canvas.') |
|||
return |
|||
} |
|||
this._initBlitResources(gl) |
|||
|
|||
const texture = mask.getAsWebGLTexture() |
|||
const { width, height } = mask |
|||
|
|||
gl.useProgram(this.progBlit) |
|||
|
|||
gl.bindBuffer(gl.ARRAY_BUFFER, this.blitBuf) |
|||
gl.enableVertexAttribArray(this.blitPosLoc) |
|||
gl.vertexAttribPointer(this.blitPosLoc, 2, gl.FLOAT, false, 0, 0) |
|||
|
|||
gl.activeTexture(gl.TEXTURE0) |
|||
gl.bindTexture(gl.TEXTURE_2D, texture) |
|||
gl.uniform1i(this.blitSamplerLoc, 0) |
|||
|
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, null) |
|||
gl.viewport(0, 0, width, height) |
|||
gl.clearColor(0, 0, 0, 0) |
|||
gl.clear(gl.COLOR_BUFFER_BIT) |
|||
gl.drawArrays(gl.TRIANGLES, 0, 6) |
|||
} |
|||
|
|||
/** |
|||
* Setup vertex attributes for rendering. |
|||
* |
|||
* @private |
|||
* @param {WebGLProgram} prog - Shader program. |
|||
* @return {void} |
|||
*/ |
|||
_setupVertexAttributes(prog) { |
|||
const gl = this.gl |
|||
|
|||
const posLoc = gl.getAttribLocation(prog, 'a_pos') |
|||
if (posLoc !== -1) { |
|||
gl.bindBuffer(gl.ARRAY_BUFFER, this.positionBuffer) |
|||
gl.enableVertexAttribArray(posLoc) |
|||
gl.vertexAttribPointer(posLoc, 2, gl.FLOAT, false, 0, 0) |
|||
} |
|||
|
|||
const texLoc = gl.getAttribLocation(prog, 'a_texCoord') |
|||
if (texLoc !== -1) { |
|||
gl.bindBuffer(gl.ARRAY_BUFFER, this.texCoordBuffer) |
|||
gl.enableVertexAttribArray(texLoc) |
|||
gl.vertexAttribPointer(texLoc, 2, gl.FLOAT, false, 0, 0) |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Apply joint bilateral filter to mask. |
|||
* |
|||
* @private |
|||
* @param {number} width - Output width. |
|||
* @param {number} height - Output height. |
|||
* @return {void} |
|||
*/ |
|||
_applyBilateralFilter(width, height) { |
|||
const gl = this.gl |
|||
|
|||
// Bind filtered mask FBO
|
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fboMask) |
|||
gl.framebufferTexture2D( |
|||
gl.FRAMEBUFFER, |
|||
gl.COLOR_ATTACHMENT0, |
|||
gl.TEXTURE_2D, |
|||
this.texMaskFiltered, |
|||
0, |
|||
) |
|||
|
|||
gl.viewport(0, 0, width, height) |
|||
gl.useProgram(this.progBilateral) |
|||
this._setupVertexAttributes(this.progBilateral) |
|||
|
|||
// Calculate filter parameters
|
|||
const texelWidth = 1 / width |
|||
const texelHeight = 1 / height |
|||
const kSparsityFactor = 0.66 |
|||
const step = Math.max(1, Math.sqrt(this.sigmaSpace) * kSparsityFactor) |
|||
const radius = this.sigmaSpace |
|||
const offset = step > 1 ? step * 0.5 : 0 |
|||
const sigmaTexel = Math.max(texelWidth, texelHeight) * this.sigmaSpace |
|||
|
|||
// Set uniforms
|
|||
gl.uniform1i(gl.getUniformLocation(this.progBilateral, 'u_inputFrame'), 0) |
|||
gl.uniform1i(gl.getUniformLocation(this.progBilateral, 'u_segmentationMask'), 1) |
|||
gl.uniform2f(gl.getUniformLocation(this.progBilateral, 'u_texelSize'), texelWidth, texelHeight) |
|||
gl.uniform1f(gl.getUniformLocation(this.progBilateral, 'u_step'), step) |
|||
gl.uniform1f(gl.getUniformLocation(this.progBilateral, 'u_radius'), radius) |
|||
gl.uniform1f(gl.getUniformLocation(this.progBilateral, 'u_offset'), offset) |
|||
gl.uniform1f(gl.getUniformLocation(this.progBilateral, 'u_sigmaTexel'), sigmaTexel) |
|||
gl.uniform1f(gl.getUniformLocation(this.progBilateral, 'u_sigmaColor'), this.sigmaColor) |
|||
|
|||
// Bind textures
|
|||
gl.activeTexture(gl.TEXTURE0) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texFrame) |
|||
gl.activeTexture(gl.TEXTURE1) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texMask) |
|||
|
|||
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4) |
|||
} |
|||
|
|||
/** |
|||
* Apply multi-pass Gaussian blur. |
|||
* |
|||
* @private |
|||
* @param {number} width - Output width. |
|||
* @param {number} height - Output height. |
|||
* @return {void} |
|||
*/ |
|||
_applyMultiPassBlur(width, height) { |
|||
const gl = this.gl |
|||
const scale = 0.5 |
|||
const blurWidth = width * scale |
|||
const blurHeight = height * scale |
|||
const texelWidth = 1 / blurWidth |
|||
const texelHeight = 1 / blurHeight |
|||
|
|||
// Allocate blur textures
|
|||
gl.bindTexture(gl.TEXTURE_2D, this.texBlurred1) |
|||
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, blurWidth, blurHeight, 0, gl.RGBA, gl.UNSIGNED_BYTE, null) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texBlurred2) |
|||
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, blurWidth, blurHeight, 0, gl.RGBA, gl.UNSIGNED_BYTE, null) |
|||
|
|||
// Setup FBOs
|
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fboBlur1) |
|||
gl.framebufferTexture2D(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, this.texBlurred1, 0) |
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fboBlur2) |
|||
gl.framebufferTexture2D(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, this.texBlurred2, 0) |
|||
|
|||
gl.viewport(0, 0, blurWidth, blurHeight) |
|||
gl.useProgram(this.progBlur) |
|||
this._setupVertexAttributes(this.progBlur) |
|||
|
|||
// Set static uniforms
|
|||
gl.uniform1i(gl.getUniformLocation(this.progBlur, 'u_inputFrame'), 0) |
|||
gl.uniform1i(gl.getUniformLocation(this.progBlur, 'u_personMask'), 1) |
|||
|
|||
gl.activeTexture(gl.TEXTURE1) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texMaskFiltered) |
|||
|
|||
// Apply 3 blur passes
|
|||
for (let i = 0; i < 3; i++) { |
|||
// Horizontal pass
|
|||
gl.uniform2f(gl.getUniformLocation(this.progBlur, 'u_texelSize'), 0, texelHeight) |
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fboBlur1) |
|||
|
|||
if (i === 0) { |
|||
gl.activeTexture(gl.TEXTURE0) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texFrame) |
|||
} else { |
|||
gl.activeTexture(gl.TEXTURE0) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texBlurred2) |
|||
} |
|||
|
|||
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4) |
|||
|
|||
// Vertical pass
|
|||
gl.uniform2f(gl.getUniformLocation(this.progBlur, 'u_texelSize'), texelWidth, 0) |
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, this.fboBlur2) |
|||
gl.activeTexture(gl.TEXTURE0) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texBlurred1) |
|||
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4) |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Run the full compositing pipeline. |
|||
* |
|||
* @param {object} opts - Rendering options. |
|||
* @param {HTMLVideoElement} opts.videoEl - Foreground video element. |
|||
* @param {object} [opts.mask] - Segmentation mask object. |
|||
* @param {HTMLImageElement|HTMLVideoElement|HTMLCanvasElement} [opts.bgSource] - Background source. |
|||
* @param {number} opts.mode - Mode (0 = background source, 1 = blur). |
|||
* @param {number} opts.outW - Output width. |
|||
* @param {number} opts.outH - Output height. |
|||
* @param {number} opts.edgeFeatherPx - Edge feather amount. |
|||
* @return {void} |
|||
*/ |
|||
render(opts) { |
|||
const gl = this.gl |
|||
const { |
|||
videoEl, |
|||
mask, |
|||
bgSource, |
|||
refreshBg, |
|||
mode, |
|||
outW, |
|||
outH, |
|||
edgeFeatherPx = 5, |
|||
} = opts |
|||
|
|||
// Validate dimensions
|
|||
if (!outW || !outH || outW <= 0 || outH <= 0) { |
|||
return |
|||
} |
|||
|
|||
// Resize canvas if needed
|
|||
if (this.canvas.width !== outW || this.canvas.height !== outH) { |
|||
this.canvas.width = outW |
|||
this.canvas.height = outH |
|||
} |
|||
// Allocate mask filtered texture
|
|||
gl.bindTexture(gl.TEXTURE_2D, this.texMaskFiltered) |
|||
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, outW, outH, 0, gl.RGBA, gl.UNSIGNED_BYTE, null) |
|||
|
|||
// Upload video frame
|
|||
this._upload(this.texFrame, videoEl) |
|||
|
|||
// Upload and process mask
|
|||
if (mask) { |
|||
this._blitTextureToCanvas(mask) |
|||
this._upload(this.texMask, mask.canvas, { flipY: true }) |
|||
} |
|||
|
|||
// Upload background if in image mode
|
|||
if (mode === 0 && bgSource && refreshBg) { |
|||
this._upload(this.texBg, bgSource) |
|||
} |
|||
|
|||
gl.bindVertexArray(this.vertexArray) |
|||
|
|||
// Apply bilateral filter to mask
|
|||
if (mask) { |
|||
this._applyBilateralFilter(outW, outH) |
|||
} |
|||
|
|||
// Apply multi-pass blur if in blur mode
|
|||
if (mode === 1) { |
|||
this._applyMultiPassBlur(outW, outH) |
|||
} |
|||
|
|||
// Final blend pass
|
|||
gl.bindFramebuffer(gl.FRAMEBUFFER, null) |
|||
gl.viewport(0, 0, outW, outH) |
|||
gl.useProgram(this.progBlend) |
|||
this._setupVertexAttributes(this.progBlend) |
|||
|
|||
// Set blend uniforms
|
|||
this.coverage = [0.45, 0.7 - (edgeFeatherPx * 0.01)] |
|||
gl.uniform1i(gl.getUniformLocation(this.progBlend, 'u_inputFrame'), 0) |
|||
gl.uniform1i(gl.getUniformLocation(this.progBlend, 'u_personMask'), 1) |
|||
gl.uniform1i(gl.getUniformLocation(this.progBlend, 'u_blurredFrame'), 2) |
|||
gl.uniform1i(gl.getUniformLocation(this.progBlend, 'u_background'), 3) |
|||
gl.uniform2f(gl.getUniformLocation(this.progBlend, 'u_coverage'), this.coverage[0], this.coverage[1]) |
|||
gl.uniform1f(gl.getUniformLocation(this.progBlend, 'u_lightWrapping'), this.lightWrapping) |
|||
gl.uniform1i(gl.getUniformLocation(this.progBlend, 'u_mode'), mode) |
|||
|
|||
// Bind textures for final blend
|
|||
gl.activeTexture(gl.TEXTURE0) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texFrame) |
|||
gl.activeTexture(gl.TEXTURE1) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texMaskFiltered) |
|||
gl.activeTexture(gl.TEXTURE2) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texBlurred2) |
|||
gl.activeTexture(gl.TEXTURE3) |
|||
gl.bindTexture(gl.TEXTURE_2D, this.texBg) |
|||
|
|||
gl.clearColor(0, 0, 0, 1) |
|||
gl.clear(gl.COLOR_BUFFER_BIT) |
|||
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4) |
|||
} |
|||
|
|||
/** |
|||
* Release all GL resources. |
|||
* |
|||
* @return {void} |
|||
*/ |
|||
dispose() { |
|||
const gl = this.gl |
|||
if (!gl) { |
|||
return |
|||
} |
|||
|
|||
// Delete textures
|
|||
gl.deleteTexture(this.texFrame) |
|||
gl.deleteTexture(this.texMask) |
|||
gl.deleteTexture(this.texMaskFiltered) |
|||
gl.deleteTexture(this.texBg) |
|||
gl.deleteTexture(this.texBlurred1) |
|||
gl.deleteTexture(this.texBlurred2) |
|||
|
|||
// Delete buffers
|
|||
gl.deleteBuffer(this.positionBuffer) |
|||
gl.deleteBuffer(this.texCoordBuffer) |
|||
|
|||
// Delete programs
|
|||
gl.deleteProgram(this.progBilateral) |
|||
gl.deleteProgram(this.progBlur) |
|||
gl.deleteProgram(this.progBlend) |
|||
|
|||
// Delete framebuffers
|
|||
gl.deleteFramebuffer(this.fboMask) |
|||
gl.deleteFramebuffer(this.fboBlur1) |
|||
gl.deleteFramebuffer(this.fboBlur2) |
|||
|
|||
// Delete vertex array
|
|||
gl.deleteVertexArray(this.vertexArray) |
|||
|
|||
// Clear references
|
|||
this.texFrame = this.texMask = this.texMaskFiltered = null |
|||
this.texBg = this.texBlurred1 = this.texBlurred2 = null |
|||
this.positionBuffer = this.texCoordBuffer = this.blitBuf = null |
|||
this.progBilateral = this.progBlur = this.progBlend = this.progBlit = null |
|||
this.fboMask = this.fboBlur1 = this.fboBlur2 = null |
|||
this.vertexArray = null |
|||
} |
|||
} |
@ -1,70 +0,0 @@ |
|||
/** |
|||
* SPDX-FileCopyrightText: 2021 Nextcloud GmbH and Nextcloud contributors |
|||
* SPDX-License-Identifier: AGPL-3.0-or-later |
|||
*/ |
|||
import * as wasmCheck from 'wasm-check' |
|||
import JitsiStreamBackgroundEffect from './JitsiStreamBackgroundEffect.js' |
|||
import createTFLiteSIMDModule from './vendor/tflite/tflite-simd.js' |
|||
import createTFLiteModule from './vendor/tflite/tflite.js' |
|||
|
|||
const models = { |
|||
modelLandscape: 'libs/selfie_segmentation_landscape.tflite', |
|||
} |
|||
|
|||
const segmentationDimensions = { |
|||
modelLandscape: { |
|||
height: 144, |
|||
width: 256, |
|||
}, |
|||
} |
|||
|
|||
/** |
|||
* Creates a new instance of JitsiStreamBackgroundEffect. This loads the Meet background model that is used to |
|||
* extract person segmentation. |
|||
* |
|||
* @param {object} virtualBackground - The virtual object that contains the background image source and |
|||
* the isVirtualBackground flag that indicates if virtual image is activated. |
|||
* @param {Function} dispatch - The Redux dispatch function. |
|||
* @return {Promise<JitsiStreamBackgroundEffect>} |
|||
*/ |
|||
export async function createVirtualBackgroundEffect(virtualBackground, dispatch) { |
|||
if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) { |
|||
throw new Error('JitsiStreamBackgroundEffect not supported!') |
|||
} |
|||
let tflite |
|||
|
|||
// Checks if WebAssembly feature is supported or enabled by/in the browser.
|
|||
// Conditional import of wasm-check package is done to prevent
|
|||
// the browser from crashing when the user opens the app.
|
|||
try { |
|||
if (wasmCheck?.feature?.simd) { |
|||
tflite = await createTFLiteSIMDModule() |
|||
} else { |
|||
tflite = await createTFLiteModule() |
|||
} |
|||
} catch (err) { |
|||
console.error('Looks like WebAssembly is disabled or not supported on this browser') |
|||
|
|||
return |
|||
} |
|||
|
|||
const modelBufferOffset = tflite._getModelBufferMemoryOffset() |
|||
const modelResponse = await fetch(models.modelLandscape) |
|||
|
|||
if (!modelResponse.ok) { |
|||
throw new Error('Failed to download tflite model!') |
|||
} |
|||
|
|||
const model = await modelResponse.arrayBuffer() |
|||
|
|||
tflite.HEAPU8.set(new Uint8Array(model), modelBufferOffset) |
|||
|
|||
tflite._loadModel(model.byteLength) |
|||
|
|||
const options = { |
|||
...segmentationDimensions.modelLandscape, |
|||
virtualBackground, |
|||
} |
|||
|
|||
return new JitsiStreamBackgroundEffect(tflite, options) |
|||
} |
@ -1,24 +0,0 @@ |
|||
# Virtual Background on stream effects |
|||
|
|||
> From https://google.github.io/mediapipe/solutions/models.html#selfie-segmentation |
|||
|
|||
#### Canvas 2D + CPU |
|||
|
|||
This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask. |
|||
|
|||
Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output. |
|||
|
|||
The framerate is higher and the quality looks better than BodyPix |
|||
|
|||
#### SIMD and non-SIMD |
|||
|
|||
How to test on SIMD: |
|||
1. Go to chrome://flags/ |
|||
2. Search for SIMD flag |
|||
3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal). |
|||
4. Reopen Google Chrome |
|||
|
|||
More details: |
|||
- [WebAssembly](https://webassembly.org/) |
|||
- [WebAssembly SIMD](https://github.com/WebAssembly/simd) |
|||
- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html) |
6
src/utils/media/effects/virtual-background/vendor/mediapipe/vision_bundle.js
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
5
src/utils/media/effects/virtual-background/vendor/mediapipe/vision_bundle.js.map
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
24
src/utils/media/effects/virtual-background/vendor/mediapipe/vision_wasm_internal.js
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
24
src/utils/media/effects/virtual-background/vendor/mediapipe/vision_wasm_nosimd_internal.js
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
24
src/utils/media/effects/virtual-background/vendor/tflite/tflite-simd.js
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
24
src/utils/media/effects/virtual-background/vendor/tflite/tflite.js
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
Write
Preview
Loading…
Cancel
Save
Reference in new issue