Skip to content

Instantly share code, notes, and snippets.

@FrankSpierings
Created May 9, 2025 07:19
Show Gist options
  • Save FrankSpierings/e0fa38c6f6e1dd747deeb90259784469 to your computer and use it in GitHub Desktop.
Save FrankSpierings/e0fa38c6f6e1dd747deeb90259784469 to your computer and use it in GitHub Desktop.
Playing with Transformers detection
<!DOCTYPE html>
<html>
<head>
<title>Webcam Detection</title>
<style>
body {
margin: 0;
}
#container {
display: flex;
flex-direction: row;
}
#webcam, #canvas-container {
width: 640px;
height: 480px;
/* position: relative; */
position: absolute;
top: 0;
left: 0;
}
#video-canvas, #overlay-canvas {
position: absolute;
top: 0;
left: 0;
}
</style>
</head>
<body>
<div id="container">
<video
id="webcam"
width="640"
height="480"
autoplay
muted
playsinline
></video>
<div id="canvas-container">
<canvas id="overlay-canvas" width="640" height="480"></canvas>
</div>
</div>
<script type="module">
import { pipeline, RawImage } from "https://cdn.jsdelivr.net/npm/@xenova/[email protected]/dist/transformers.min.js";
const video = document.getElementById("webcam");
const overlayCanvas = document.getElementById("overlay-canvas");
const overlayCtx = overlayCanvas.getContext("2d");
// Lower-resolution canvas for inference
const DETECTION_WIDTH = 320;
const DETECTION_HEIGHT = 240;
const detectorCanvas = new OffscreenCanvas(DETECTION_WIDTH, DETECTION_HEIGHT);
const detectorCtx = detectorCanvas.getContext("2d");
// 1. Set up webcam
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
video.srcObject = stream;
await video.play();
// 2. Load detection pipeline
const detector = await pipeline(
"object-detection",
"Xenova/detr-resnet-50",
{ device: "webgpu" }
);
// 3. Run detection loop
let lastTime = 0;
const interval = 100; // in ms
async function detectLoop(timestamp) {
if (timestamp - lastTime >= interval) {
// Draw to low-res canvas
detectorCtx.drawImage(video, 0, 0, DETECTION_WIDTH, DETECTION_HEIGHT);
const imageUrl = await detectorCanvas.convertToBlob().then(blob => URL.createObjectURL(blob))
const predictions = await detector(imageUrl, {
threshold: 0.6,
percentage: false,
});
console.log("Predictions;", predictions);
// Clear overlay
overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height);
// Draw boxes scaled to original size
const scaleX = overlayCanvas.width / DETECTION_WIDTH;
const scaleY = overlayCanvas.height / DETECTION_HEIGHT;
for (const obj of predictions) {
const { box, label, score } = obj;
const { xmin, ymin, xmax, ymax } = box;
const x = xmin * scaleX;
const y = ymin * scaleY;
const width = (xmax - xmin) * scaleX;
const height = (ymax - ymin) * scaleY;
overlayCtx.strokeStyle = "red";
overlayCtx.lineWidth = 2;
overlayCtx.strokeRect(x, y, width, height);
overlayCtx.fillStyle = "red";
overlayCtx.font = "16px sans-serif";
overlayCtx.fillText(
`${label} (${(score * 100).toFixed(1)}%)`,
x,
y > 20 ? y - 5 : y + 15
);
}
lastTime = timestamp;
}
requestAnimationFrame(detectLoop);
}
detectLoop();
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment