Created
May 9, 2025 07:19
-
-
Save FrankSpierings/e0fa38c6f6e1dd747deeb90259784469 to your computer and use it in GitHub Desktop.
Playing with Transformers detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Webcam Detection</title> | |
<style> | |
body { | |
margin: 0; | |
} | |
#container { | |
display: flex; | |
flex-direction: row; | |
} | |
#webcam, #canvas-container { | |
width: 640px; | |
height: 480px; | |
/* position: relative; */ | |
position: absolute; | |
top: 0; | |
left: 0; | |
} | |
#video-canvas, #overlay-canvas { | |
position: absolute; | |
top: 0; | |
left: 0; | |
} | |
</style> | |
</head> | |
<body> | |
<div id="container"> | |
<video | |
id="webcam" | |
width="640" | |
height="480" | |
autoplay | |
muted | |
playsinline | |
></video> | |
<div id="canvas-container"> | |
<canvas id="overlay-canvas" width="640" height="480"></canvas> | |
</div> | |
</div> | |
<script type="module"> | |
import { pipeline, RawImage } from "https://cdn.jsdelivr.net/npm/@xenova/[email protected]/dist/transformers.min.js"; | |
const video = document.getElementById("webcam"); | |
const overlayCanvas = document.getElementById("overlay-canvas"); | |
const overlayCtx = overlayCanvas.getContext("2d"); | |
// Lower-resolution canvas for inference | |
const DETECTION_WIDTH = 320; | |
const DETECTION_HEIGHT = 240; | |
const detectorCanvas = new OffscreenCanvas(DETECTION_WIDTH, DETECTION_HEIGHT); | |
const detectorCtx = detectorCanvas.getContext("2d"); | |
// 1. Set up webcam | |
const stream = await navigator.mediaDevices.getUserMedia({ video: true }); | |
video.srcObject = stream; | |
await video.play(); | |
// 2. Load detection pipeline | |
const detector = await pipeline( | |
"object-detection", | |
"Xenova/detr-resnet-50", | |
{ device: "webgpu" } | |
); | |
// 3. Run detection loop | |
let lastTime = 0; | |
const interval = 100; // in ms | |
async function detectLoop(timestamp) { | |
if (timestamp - lastTime >= interval) { | |
// Draw to low-res canvas | |
detectorCtx.drawImage(video, 0, 0, DETECTION_WIDTH, DETECTION_HEIGHT); | |
const imageUrl = await detectorCanvas.convertToBlob().then(blob => URL.createObjectURL(blob)) | |
const predictions = await detector(imageUrl, { | |
threshold: 0.6, | |
percentage: false, | |
}); | |
console.log("Predictions;", predictions); | |
// Clear overlay | |
overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height); | |
// Draw boxes scaled to original size | |
const scaleX = overlayCanvas.width / DETECTION_WIDTH; | |
const scaleY = overlayCanvas.height / DETECTION_HEIGHT; | |
for (const obj of predictions) { | |
const { box, label, score } = obj; | |
const { xmin, ymin, xmax, ymax } = box; | |
const x = xmin * scaleX; | |
const y = ymin * scaleY; | |
const width = (xmax - xmin) * scaleX; | |
const height = (ymax - ymin) * scaleY; | |
overlayCtx.strokeStyle = "red"; | |
overlayCtx.lineWidth = 2; | |
overlayCtx.strokeRect(x, y, width, height); | |
overlayCtx.fillStyle = "red"; | |
overlayCtx.font = "16px sans-serif"; | |
overlayCtx.fillText( | |
`${label} (${(score * 100).toFixed(1)}%)`, | |
x, | |
y > 20 ? y - 5 : y + 15 | |
); | |
} | |
lastTime = timestamp; | |
} | |
requestAnimationFrame(detectLoop); | |
} | |
detectLoop(); | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment