Skip to content

Instantly share code, notes, and snippets.

@digitaldrreamer
Last active April 20, 2025 15:23
Show Gist options
  • Save digitaldrreamer/f597412318a1ad57944184c425458679 to your computer and use it in GitHub Desktop.
Save digitaldrreamer/f597412318a1ad57944184c425458679 to your computer and use it in GitHub Desktop.
Dockerfile for Crawlee (Playwright+Chromium) on ARM64
// chrome_test.js
import { chromium } from 'playwright-chromium';
// Debug logger with timestamps
const debug = {
log: (...args) => console.log(`[${new Date().toISOString()}] 🔍 CHROME_TEST:`, ...args),
info: (...args) => console.info(`[${new Date().toISOString()}] ℹ️ CHROME_TEST:`, ...args),
error: (...args) => console.error(`[${new Date().toISOString()}] ❌ CHROME_TEST:`, ...args),
perf: (...args) => console.info(`[${new Date().toISOString()}] ⚡ CHROME_TEST:`, ...args)
};
async function testChromium(options = {}) {
const startTime = Date.now();
const {
headless = true,
executablePath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || '/usr/bin/chromium'
} = options;
debug.info(`Testing Chromium configuration:`, {
headless,
executablePath: executablePath || 'default',
env: {
PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || 'not set',
DISPLAY: process.env.DISPLAY || 'not set',
XVFB_DISPLAY: process.env.XVFB_DISPLAY || 'not set'
}
});
let browser = null;
try {
debug.log('Launching browser...');
browser = await chromium.launch({
headless,
executablePath,
args: ['--no-sandbox'],
// Log browser process info
logger: {
isEnabled: () => true,
log: (name, severity, message) => {
if (severity === 'error' || severity === 'warning') {
debug.error(`Browser ${name}: ${message}`);
}
}
}
});
debug.info('Creating browser context...');
const context = await browser.newContext({
viewport: { width: 1280, height: 720 },
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
});
debug.log('Creating new page...');
const page = await context.newPage();
debug.log('Loading about:blank...');
await page.goto('about:blank');
debug.info('✓ Successfully created page and loaded about:blank');
// Test basic JavaScript execution
const jsVersion = await page.evaluate(() => {
return {
userAgent: navigator.userAgent,
platform: navigator.platform,
language: navigator.language,
webdriver: navigator.webdriver
};
});
debug.info('Browser environment:', jsVersion);
await context.close();
await browser.close();
const duration = Date.now() - startTime;
debug.perf(`✓ Successfully completed browser test in ${duration}ms`);
debug.info('─────────────────────────────────────');
} catch (error) {
debug.error(`Browser test failed: ${error.message}`, {
stack: error.stack,
headless,
executablePath
});
if (browser) {
await browser.close().catch(() => {});
}
throw error;
}
}
export { testChromium };
ARG NODE_VERSION=20
FROM --platform=linux/arm64 node:${NODE_VERSION}-bookworm-slim
LABEL maintainer="[email protected]" description="Base image for Apify Actors using Chromium (ARM64)"
ENV DEBIAN_FRONTEND=noninteractive
# Install OS-level deps, Chromium, fonts, XVFB
RUN apt-get update \
&& apt install -y \
ca-certificates \
gnupg \
curl \
unzip \
xvfb \
xauth \
git \
chromium \
procps \
fonts-freefont-ttf \
fonts-kacst \
fonts-thai-tlwg \
fonts-wqy-zenhei \
--no-install-recommends \
&& groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \
&& mkdir -p /home/myuser/Downloads \
&& chown -R myuser:myuser /home/myuser \
&& mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix \
&& npm config --global set update-notifier false \
&& PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm_config_ignore_scripts=1 npx playwright install-deps chromium \
&& apt clean -y && apt autoremove -y && rm -rf /var/lib/apt/lists/*
# Use unprivileged user
USER myuser
ENV HOME=/home/myuser
WORKDIR /home/myuser
# Copy project source files
COPY --chown=myuser:myuser . /home/myuser/
# Set environment variables for browser configuration
ENV PLAYWRIGHT_BROWSERS_PATH=/usr/bin
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
ENV APIFY_CHROME_EXECUTABLE_PATH=/usr/bin/chromium
ENV APIFY_DEFAULT_BROWSER_PATH=/usr/bin/chromium
ENV CRAWLEE_SKIP_BROWSER_INSTALL=1
ENV NODE_ENV=production
ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000"
# Set XVFB display size and headless display config
ENV DISPLAY=:99
ENV XVFB_WHD=1920x1080x24+32
# Install Node.js dependencies
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional --no-package-lock --prefer-online \
&& npm install playwright@latest --no-save \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --omit=optional || true) \
&& echo "Node.js version:" && node --version \
&& echo "NPM version:" && npm --version \
&& echo "Chromium version:" && chromium --version
# Create cleanup script and make scripts executable
RUN echo '#!/bin/bash\n\
rm -f /tmp/.X99-lock\n\
pkill Xvfb || true\n\
sleep 1' > /home/myuser/cleanup-xvfb.sh \
&& chmod +x /home/myuser/cleanup-xvfb.sh \
&& chmod +x ./new_xvfb_run_cmd.sh
# Set the default command to run cleanup, verify chromium, and start the server
CMD ["bash", "-c", "./cleanup-xvfb.sh && ./new_xvfb_run_cmd.sh node verify-chromium.js && ./new_xvfb_run_cmd.sh node src/server.js"]
// you know what to do here
// inside src folder (or at least make sure it matches in npm script and elsewhere)
#!/bin/bash
# Kill any existing Xvfb process
echo "Killing any existing Xvfb processes..."
pkill Xvfb || true # The || true ensures the script continues even if no process is found
# Wait a moment for the process to be fully terminated
sleep 1
echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp"
Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp &
echo "Executing main command"
exec "$@"
{
"name": "my-crawler",
"version": "0.0.1",
"type": "module",
"description": "This is an example of a Crawlee project.",
"private": true,
"engines": {
"node": ">=18.0.0"
},
"author": {
"name": "digitaldπeamer",
"email": "[email protected]"
},
"license": "ISC",
"dependencies": {
"apify": "^3.3.2",
"crawlee": "*",
"dotenv": "^16.4.7",
"express": "^4.21.2",
"playwright": "*",
"playwright-chromium": "*"
},
"devDependencies": {
"nodemon": "^3.1.9"
},
"scripts": {
"start": "node src/server.js",
"dev": "nodemon",
"verify-browser": "node --expose-gc verify-chromium.js",
"scrape:jobs": "node --expose-gc src/main.js",
"scrape:companies": "node src/companies.js",
"test": "echo \"No tests specified\" && exit 0"
},
"overrides": {
"apify": {
"@crawlee/core": "*",
"@crawlee/types": "*",
"@crawlee/utils": "*"
}
}
}
// you know what to do here
// inside src folder (or at least make sure it matches in npm script and elsewhere)
#!/bin/bash
echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@"
xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@"
// verify-chromium.js
// Purpose: ONLY to check if Playwright's browser can launch.
// DO NOT import server.js, main.js, or companies.js here.
// DO NOT call main() or runCompanies() here.
import { launchPlaywright, getMemoryInfo } from 'crawlee';
import { testChromium } from './chrome_test.js';
async function checkBrowser() {
console.log('Running comprehensive Chromium verification...');
// Log relevant env vars
console.log('PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH:', process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH);
console.log('PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD:', process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD);
console.log('APIFY_DEFAULT_BROWSER_PATH:', process.env.APIFY_DEFAULT_BROWSER_PATH);
try {
console.log('Sanity test with Playwright Chromium...');
const launchOptions = {
headless: true,
args: ['--no-sandbox'],
executablePath: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || '/usr/bin/chromium'
};
const launchContext = { launchOptions };
const browser = await launchPlaywright(launchContext);
await browser.close();
// Run test suite with explicit executable path
const execPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || '/usr/bin/chromium';
await testChromium({ headless: true, executablePath: execPath }); // Headless test
await testChromium({ headless: false, executablePath: execPath }); // With XVFB
await getMemoryInfo(); // Validate system ps/memory info
console.log('All tests completed successfully ✅');
process.exitCode = 0; // Success
} catch (error) {
console.error('❌ Browser verification failed:', error);
process.exitCode = 1; // Failure
}
}
checkBrowser();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment