FrankSpierings · May 9, 2025 07:19
diff --git a/index.html b/index.html
 <!DOCTYPE html>
 <html>
  <head>
    <title>Webcam Detection</title>
    <style>
    body {
        margin: 0;
    }

    #container {
        display: flex;
        flex-direction: row;
    }

    #webcam, #canvas-container {
        width: 640px;
        height: 480px;
        /* position: relative; */
        position: absolute;
        top: 0;
        left: 0;
    }

    #video-canvas, #overlay-canvas {
        position: absolute;
        top: 0;
        left: 0;
    }
    </style>
  </head>
  <body>
    <div id="container">
      <video
        id="webcam"
        width="640"
        height="480"
        autoplay
        muted
        playsinline
      ></video>

      <div id="canvas-container">
        <canvas id="overlay-canvas" width="640" height="480"></canvas>
      </div>
    </div>
    <script type="module">
      import { pipeline, RawImage } from "https://cdn.jsdelivr.net/npm/@xenova/[email protected]/dist/transformers.min.js";

      const video = document.getElementById("webcam");
      const overlayCanvas = document.getElementById("overlay-canvas");
      const overlayCtx = overlayCanvas.getContext("2d");

      // Lower-resolution canvas for inference
      const DETECTION_WIDTH = 320;
      const DETECTION_HEIGHT = 240;
      const detectorCanvas = new OffscreenCanvas(DETECTION_WIDTH, DETECTION_HEIGHT);
      const detectorCtx = detectorCanvas.getContext("2d");

      // 1. Set up webcam
      const stream = await navigator.mediaDevices.getUserMedia({ video: true });
      video.srcObject = stream;
      await video.play();

      // 2. Load detection pipeline
      const detector = await pipeline(
        "object-detection",
        "Xenova/detr-resnet-50",
        { device: "webgpu" }
      );

      // 3. Run detection loop
      let lastTime = 0;
      const interval = 100; // in ms

      async function detectLoop(timestamp) {
        if (timestamp - lastTime >= interval) {
          // Draw to low-res canvas
          detectorCtx.drawImage(video, 0, 0, DETECTION_WIDTH, DETECTION_HEIGHT);
          const imageUrl = await detectorCanvas.convertToBlob().then(blob => URL.createObjectURL(blob))


          const predictions = await detector(imageUrl, {
            threshold: 0.6,
            percentage: false,
          });

          console.log("Predictions;", predictions);

          // Clear overlay
          overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height);

          // Draw boxes scaled to original size
          const scaleX = overlayCanvas.width / DETECTION_WIDTH;
          const scaleY = overlayCanvas.height / DETECTION_HEIGHT;

          for (const obj of predictions) {
            const { box, label, score } = obj;
            const { xmin, ymin, xmax, ymax } = box;

            const x = xmin * scaleX;
            const y = ymin * scaleY;
            const width = (xmax - xmin) * scaleX;
            const height = (ymax - ymin) * scaleY;

            overlayCtx.strokeStyle = "red";
            overlayCtx.lineWidth = 2;
            overlayCtx.strokeRect(x, y, width, height);

            overlayCtx.fillStyle = "red";
            overlayCtx.font = "16px sans-serif";
            overlayCtx.fillText(
              `${label} (${(score * 100).toFixed(1)}%)`,
              x,
              y > 20 ? y - 5 : y + 15
            );
          }

          lastTime = timestamp;
        }
        requestAnimationFrame(detectLoop);
      }

      detectLoop();
    </script>
  </body>
 </html>
	<!DOCTYPE html>
	<html>
	<head>
	<title>Webcam Detection</title>
	<style>
	body {
	margin: 0;
	}

	#container {
	display: flex;
	flex-direction: row;
	}

	#webcam, #canvas-container {
	width: 640px;
	height: 480px;
	/* position: relative; */
	position: absolute;
	top: 0;
	left: 0;
	}

	#video-canvas, #overlay-canvas {
	position: absolute;
	top: 0;
	left: 0;
	}
	</style>
	</head>
	<body>
	<div id="container">
	<video
	id="webcam"
	width="640"
	height="480"
	autoplay
	muted
	playsinline
	></video>

	<div id="canvas-container">
	<canvas id="overlay-canvas" width="640" height="480"></canvas>
	</div>
	</div>
	<script type="module">
	import { pipeline, RawImage } from "https://cdn.jsdelivr.net/npm/@xenova/[email protected]/dist/transformers.min.js";

	const video = document.getElementById("webcam");
	const overlayCanvas = document.getElementById("overlay-canvas");
	const overlayCtx = overlayCanvas.getContext("2d");

	// Lower-resolution canvas for inference
	const DETECTION_WIDTH = 320;
	const DETECTION_HEIGHT = 240;
	const detectorCanvas = new OffscreenCanvas(DETECTION_WIDTH, DETECTION_HEIGHT);
	const detectorCtx = detectorCanvas.getContext("2d");

	// 1. Set up webcam
	const stream = await navigator.mediaDevices.getUserMedia({ video: true });
	video.srcObject = stream;
	await video.play();

	// 2. Load detection pipeline
	const detector = await pipeline(
	"object-detection",
	"Xenova/detr-resnet-50",
	{ device: "webgpu" }
	);

	// 3. Run detection loop
	let lastTime = 0;
	const interval = 100; // in ms

	async function detectLoop(timestamp) {
	if (timestamp - lastTime >= interval) {
	// Draw to low-res canvas
	detectorCtx.drawImage(video, 0, 0, DETECTION_WIDTH, DETECTION_HEIGHT);
	const imageUrl = await detectorCanvas.convertToBlob().then(blob => URL.createObjectURL(blob))


	const predictions = await detector(imageUrl, {
	threshold: 0.6,
	percentage: false,
	});

	console.log("Predictions;", predictions);

	// Clear overlay
	overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height);

	// Draw boxes scaled to original size
	const scaleX = overlayCanvas.width / DETECTION_WIDTH;
	const scaleY = overlayCanvas.height / DETECTION_HEIGHT;

	for (const obj of predictions) {
	const { box, label, score } = obj;
	const { xmin, ymin, xmax, ymax } = box;

	const x = xmin * scaleX;
	const y = ymin * scaleY;
	const width = (xmax - xmin) * scaleX;
	const height = (ymax - ymin) * scaleY;

	overlayCtx.strokeStyle = "red";
	overlayCtx.lineWidth = 2;
	overlayCtx.strokeRect(x, y, width, height);

	overlayCtx.fillStyle = "red";
	overlayCtx.font = "16px sans-serif";
	overlayCtx.fillText(
	`${label} (${(score * 100).toFixed(1)}%)`,
	x,
	y > 20 ? y - 5 : y + 15
	);
	}

	lastTime = timestamp;
	}
	requestAnimationFrame(detectLoop);
	}

	detectLoop();
	</script>
	</body>
	</html>