Skip to content

Commit

Permalink
More configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
nacmartin committed Mar 12, 2023
1 parent 7755ff4 commit 435643d
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 45 deletions.
7 changes: 3 additions & 4 deletions examples/ui-react/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ const trans = (r: number, s: number, z: number) =>
`perspective(1500px) rotateX(30deg) rotateY(${
r / 10
}deg) rotateZ(${r}deg) scale(${s * z})`;
//const cards = ["./Chev.mp4", "/Voyage.mp4", "/UD_1.mp4"];
const cards = ["./Chev.mp4"]; //, "/Voyage.mp4", "/UD_1.mp4"];
const cards = ["./Chev.mp4", "/Voyage.mp4", "/UD_1.mp4"];

// Who knows the type here TBH
function subscribe(eventName: string, listener: (e: any) => void) {
Expand Down Expand Up @@ -213,7 +212,7 @@ function App() {
};

useEffect(() => {
init();
init({ delegate: "GPU" });
subscribe("gesturestart", gestureStarted);
subscribe("gestureend", gestureEnded);
subscribe("gesturemove", gestureMove);
Expand Down Expand Up @@ -241,7 +240,7 @@ function App() {
id="webcam"
autoPlay
playsInline
// style={{ position: "absolute", filter: "sepia(100%)" }}
style={{ height: "960px", width: "1280px" }}
className={styles.cam}
/>
<div
Expand Down
10 changes: 9 additions & 1 deletion examples/ui-react/src/Instructions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ export function Instructions() {
<p>You can use some gestures:</p>
<ul>
<li>🤟: To play/stop last selected video.</li>
<li>✋: To zoom in/out.</li>
<li>
✋: To zoom in/out (Incline palm forward/backward in an angle with the
wrist).
</li>
<li>👍: Throw last selected video far away above the window.</li>
<li>👎: Throw last selected video far away below the window.</li>
<li>👎: Throw last selected video far away below the window.</li>
Expand All @@ -46,6 +49,11 @@ https://github.com/nacmartin/manitas
</a>{" "}
with React.{" "}
<a href="https://github.com/nacmartin/manitas">See the code</a>
<h2>Notes</h2>
<p>
We have configured mediapipe to use the GPU. It is possible to make it
use the CPU. So if it doesn't work, that may be the reason.
</p>
</div>
);
}
94 changes: 54 additions & 40 deletions packages/manitas/src/gestures.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,33 @@
import vision from "@mediapipe/tasks-vision";
import { emitGestures, emitAirfingers } from "./events";
import type { HandsState, HandState } from "./types";
import type { HandsState, HandState, ManitasConfig } from "./types";

const { GestureRecognizer, FilesetResolver } = vision;

// TODO: configure if left or right handed
// Configure GPU
// Size of video
// Configure this?
const GESTURE_THRESHOLD = 0.6;
const HANDEDNESS_THRESHOLD = 0.8;
const ACTIVE_THRESHOLD = -0.1;

const videoHeight = "960px";
const videoWidth = "1280px";
const defaultConfig: ManitasConfig = {
gestureThreshold: 0.6,
gandednessThreshold: 0.8,
activeThreshold: -0.1,
videoHeight: "960px",
videoWidth: "1280px",
videoId: "webcam",
delegate: "GPU",
modelAssetPath:
"https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task",
mediapipeWasmPath:
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm",
};

// Before we can use GestureRecognizer class we must wait for it to finish
// loading. Machine Learning models can be large and take a moment to
// get everything needed to run.
async function load() {
const vision = await FilesetResolver.forVisionTasks(
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
);
async function load(config: ManitasConfig) {
const vision = await FilesetResolver.forVisionTasks(config.mediapipeWasmPath);

const gestureRecognizer = await GestureRecognizer.createFromOptions(vision, {
baseOptions: {
modelAssetPath:
"https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task",
// TODO: Make configurable
delegate: "GPU",
modelAssetPath: config.modelAssetPath,
delegate: config.delegate,
},
runningMode: "VIDEO",
numHands: 2,
Expand All @@ -42,9 +41,10 @@ function hasGetUserMedia() {
return !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
}

export async function init() {
const gestureRecognizer: vision.GestureRecognizer = await load();
const { video } = setupElements();
export async function init(userConfig: Partial<ManitasConfig> = {}) {
const config: ManitasConfig = { ...defaultConfig, ...userConfig };
const gestureRecognizer: vision.GestureRecognizer = await load(config);
const { video } = setupElements(config.videoId);
if (!video) {
// TODO: Actually create
console.warn("Unable to create auxiliary elements");
Expand All @@ -55,22 +55,24 @@ export async function init() {
console.warn("getUserMedia() is not supported by your browser");
return;
}
getUserMedia(video, () => run(video, gestureRecognizer));
getUserMedia(video, () => run(video, gestureRecognizer, config));
}

function run(
video: HTMLVideoElement,
gestureRecognizer: vision.GestureRecognizer
gestureRecognizer: vision.GestureRecognizer,
config: ManitasConfig
) {
video.style.height = videoHeight;
video.style.width = videoWidth;
video.style.height = config.videoHeight;
video.style.width = config.videoWidth;
video.style.transform = "scaleX(-1)";
runContinously(video, gestureRecognizer);
runContinously(video, gestureRecognizer, config);
}

function runContinously(
video: HTMLVideoElement,
gestureRecognizer: vision.GestureRecognizer
gestureRecognizer: vision.GestureRecognizer,
config: ManitasConfig
) {
let state: HandsState = {
leftHand: {
Expand All @@ -89,13 +91,21 @@ function runContinously(
let leftHand: HandState = { present: false };
handednesses.forEach((hand, idx) => {
const category: vision.Category = hand[0];
if (category.score > HANDEDNESS_THRESHOLD) {
if (category.score > config.gandednessThreshold) {
// Ugly: we flip hands because we need to flip video
if (category.categoryName === "Left") {
rightHand = assembleHandEstimation(gestures[idx], landmarks[idx]);
rightHand = assembleHandEstimation(
gestures[idx],
landmarks[idx],
config
);
}
if (category.categoryName === "Right") {
leftHand = assembleHandEstimation(gestures[idx], landmarks[idx]);
leftHand = assembleHandEstimation(
gestures[idx],
landmarks[idx],
config
);
}
}
});
Expand Down Expand Up @@ -124,10 +134,11 @@ function compareStatesAndEmitEvents(

function assembleHandEstimation(
gestureCategory: vision.Category[],
landmarks: vision.NormalizedLandmark[]
landmarks: vision.NormalizedLandmark[],
config: ManitasConfig
): HandState {
const gesture = bestGesture(gestureCategory);
const active = isActive(landmarks);
const gesture = bestGesture(gestureCategory, config.gestureThreshold);
const active = isActive(landmarks, config.activeThreshold);
return {
gesture,
present: true,
Expand All @@ -136,15 +147,18 @@ function assembleHandEstimation(
};
}

function isActive(landmark: vision.NormalizedLandmark[]) {
function isActive(
landmark: vision.NormalizedLandmark[],
activeThreshold: number
) {
const indexFinger = landmark[8];
return indexFinger.z < ACTIVE_THRESHOLD;
return indexFinger.z < activeThreshold;
}

function bestGesture(category: vision.Category[]) {
function bestGesture(category: vision.Category[], gestureThreshold: number) {
for (let i = 0; i < category.length; i++) {
const gesture = category[i];
if (gesture.score > GESTURE_THRESHOLD && gesture.categoryName !== "None") {
if (gesture.score > gestureThreshold && gesture.categoryName !== "None") {
return gesture.categoryName;
}
}
Expand All @@ -164,8 +178,8 @@ function getUserMedia(video: HTMLVideoElement, onSuccess: () => void) {
});
}

function setupElements() {
const video = document.getElementById("webcam") as HTMLVideoElement;
function setupElements(videoId: string) {
const video = document.getElementById(videoId) as HTMLVideoElement;
return { video };
}

Expand Down
12 changes: 12 additions & 0 deletions packages/manitas/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,15 @@ export interface GestureEventParams {
hand: Hand;
airpoint: Point3D;
}

export interface ManitasConfig {
gestureThreshold: number;
gandednessThreshold: number;
activeThreshold: number;
videoHeight: string;
videoWidth: string;
videoId: string;
delegate: "GPU" | "CPU";
modelAssetPath: string;
mediapipeWasmPath: string;
}

0 comments on commit 435643d

Please sign in to comment.