Last active
April 20, 2025 15:23
-
-
Save digitaldrreamer/f597412318a1ad57944184c425458679 to your computer and use it in GitHub Desktop.
Dockerfile for Crawlee (Playwright+Chromium) on ARM64
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// chrome_test.js | |
import { chromium } from 'playwright-chromium'; | |
// Debug logger with timestamps | |
const debug = { | |
log: (...args) => console.log(`[${new Date().toISOString()}] 🔍 CHROME_TEST:`, ...args), | |
info: (...args) => console.info(`[${new Date().toISOString()}] ℹ️ CHROME_TEST:`, ...args), | |
error: (...args) => console.error(`[${new Date().toISOString()}] ❌ CHROME_TEST:`, ...args), | |
perf: (...args) => console.info(`[${new Date().toISOString()}] ⚡ CHROME_TEST:`, ...args) | |
}; | |
async function testChromium(options = {}) { | |
const startTime = Date.now(); | |
const { | |
headless = true, | |
executablePath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || '/usr/bin/chromium' | |
} = options; | |
debug.info(`Testing Chromium configuration:`, { | |
headless, | |
executablePath: executablePath || 'default', | |
env: { | |
PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || 'not set', | |
DISPLAY: process.env.DISPLAY || 'not set', | |
XVFB_DISPLAY: process.env.XVFB_DISPLAY || 'not set' | |
} | |
}); | |
let browser = null; | |
try { | |
debug.log('Launching browser...'); | |
browser = await chromium.launch({ | |
headless, | |
executablePath, | |
args: ['--no-sandbox'], | |
// Log browser process info | |
logger: { | |
isEnabled: () => true, | |
log: (name, severity, message) => { | |
if (severity === 'error' || severity === 'warning') { | |
debug.error(`Browser ${name}: ${message}`); | |
} | |
} | |
} | |
}); | |
debug.info('Creating browser context...'); | |
const context = await browser.newContext({ | |
viewport: { width: 1280, height: 720 }, | |
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' | |
}); | |
debug.log('Creating new page...'); | |
const page = await context.newPage(); | |
debug.log('Loading about:blank...'); | |
await page.goto('about:blank'); | |
debug.info('✓ Successfully created page and loaded about:blank'); | |
// Test basic JavaScript execution | |
const jsVersion = await page.evaluate(() => { | |
return { | |
userAgent: navigator.userAgent, | |
platform: navigator.platform, | |
language: navigator.language, | |
webdriver: navigator.webdriver | |
}; | |
}); | |
debug.info('Browser environment:', jsVersion); | |
await context.close(); | |
await browser.close(); | |
const duration = Date.now() - startTime; | |
debug.perf(`✓ Successfully completed browser test in ${duration}ms`); | |
debug.info('─────────────────────────────────────'); | |
} catch (error) { | |
debug.error(`Browser test failed: ${error.message}`, { | |
stack: error.stack, | |
headless, | |
executablePath | |
}); | |
if (browser) { | |
await browser.close().catch(() => {}); | |
} | |
throw error; | |
} | |
} | |
export { testChromium }; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ARG NODE_VERSION=20 | |
FROM --platform=linux/arm64 node:${NODE_VERSION}-bookworm-slim | |
LABEL maintainer="[email protected]" description="Base image for Apify Actors using Chromium (ARM64)" | |
ENV DEBIAN_FRONTEND=noninteractive | |
# Install OS-level deps, Chromium, fonts, XVFB | |
RUN apt-get update \ | |
&& apt install -y \ | |
ca-certificates \ | |
gnupg \ | |
curl \ | |
unzip \ | |
xvfb \ | |
xauth \ | |
git \ | |
chromium \ | |
procps \ | |
fonts-freefont-ttf \ | |
fonts-kacst \ | |
fonts-thai-tlwg \ | |
fonts-wqy-zenhei \ | |
--no-install-recommends \ | |
&& groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ | |
&& mkdir -p /home/myuser/Downloads \ | |
&& chown -R myuser:myuser /home/myuser \ | |
&& mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix \ | |
&& npm config --global set update-notifier false \ | |
&& PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm_config_ignore_scripts=1 npx playwright install-deps chromium \ | |
&& apt clean -y && apt autoremove -y && rm -rf /var/lib/apt/lists/* | |
# Use unprivileged user | |
USER myuser | |
ENV HOME=/home/myuser | |
WORKDIR /home/myuser | |
# Copy project source files | |
COPY --chown=myuser:myuser . /home/myuser/ | |
# Set environment variables for browser configuration | |
ENV PLAYWRIGHT_BROWSERS_PATH=/usr/bin | |
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium | |
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 | |
ENV APIFY_CHROME_EXECUTABLE_PATH=/usr/bin/chromium | |
ENV APIFY_DEFAULT_BROWSER_PATH=/usr/bin/chromium | |
ENV CRAWLEE_SKIP_BROWSER_INSTALL=1 | |
ENV NODE_ENV=production | |
ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" | |
# Set XVFB display size and headless display config | |
ENV DISPLAY=:99 | |
ENV XVFB_WHD=1920x1080x24+32 | |
# Install Node.js dependencies | |
RUN npm --quiet set progress=false \ | |
&& npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ | |
&& npm install playwright@latest --no-save \ | |
&& echo "Installed NPM packages:" \ | |
&& (npm list --omit=dev --omit=optional || true) \ | |
&& echo "Node.js version:" && node --version \ | |
&& echo "NPM version:" && npm --version \ | |
&& echo "Chromium version:" && chromium --version | |
# Create cleanup script and make scripts executable | |
RUN echo '#!/bin/bash\n\ | |
rm -f /tmp/.X99-lock\n\ | |
pkill Xvfb || true\n\ | |
sleep 1' > /home/myuser/cleanup-xvfb.sh \ | |
&& chmod +x /home/myuser/cleanup-xvfb.sh \ | |
&& chmod +x ./new_xvfb_run_cmd.sh | |
# Set the default command to run cleanup, verify chromium, and start the server | |
CMD ["bash", "-c", "./cleanup-xvfb.sh && ./new_xvfb_run_cmd.sh node verify-chromium.js && ./new_xvfb_run_cmd.sh node src/server.js"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// you know what to do here | |
// inside src folder (or at least make sure it matches in npm script and elsewhere) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Kill any existing Xvfb process | |
echo "Killing any existing Xvfb processes..." | |
pkill Xvfb || true # The || true ensures the script continues even if no process is found | |
# Wait a moment for the process to be fully terminated | |
sleep 1 | |
echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" | |
Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & | |
echo "Executing main command" | |
exec "$@" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "my-crawler", | |
"version": "0.0.1", | |
"type": "module", | |
"description": "This is an example of a Crawlee project.", | |
"private": true, | |
"engines": { | |
"node": ">=18.0.0" | |
}, | |
"author": { | |
"name": "digitaldπeamer", | |
"email": "[email protected]" | |
}, | |
"license": "ISC", | |
"dependencies": { | |
"apify": "^3.3.2", | |
"crawlee": "*", | |
"dotenv": "^16.4.7", | |
"express": "^4.21.2", | |
"playwright": "*", | |
"playwright-chromium": "*" | |
}, | |
"devDependencies": { | |
"nodemon": "^3.1.9" | |
}, | |
"scripts": { | |
"start": "node src/server.js", | |
"dev": "nodemon", | |
"verify-browser": "node --expose-gc verify-chromium.js", | |
"scrape:jobs": "node --expose-gc src/main.js", | |
"scrape:companies": "node src/companies.js", | |
"test": "echo \"No tests specified\" && exit 0" | |
}, | |
"overrides": { | |
"apify": { | |
"@crawlee/core": "*", | |
"@crawlee/types": "*", | |
"@crawlee/utils": "*" | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// you know what to do here | |
// inside src folder (or at least make sure it matches in npm script and elsewhere) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@" | |
xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// verify-chromium.js | |
// Purpose: ONLY to check if Playwright's browser can launch. | |
// DO NOT import server.js, main.js, or companies.js here. | |
// DO NOT call main() or runCompanies() here. | |
import { launchPlaywright, getMemoryInfo } from 'crawlee'; | |
import { testChromium } from './chrome_test.js'; | |
async function checkBrowser() { | |
console.log('Running comprehensive Chromium verification...'); | |
// Log relevant env vars | |
console.log('PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH:', process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH); | |
console.log('PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD:', process.env.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD); | |
console.log('APIFY_DEFAULT_BROWSER_PATH:', process.env.APIFY_DEFAULT_BROWSER_PATH); | |
try { | |
console.log('Sanity test with Playwright Chromium...'); | |
const launchOptions = { | |
headless: true, | |
args: ['--no-sandbox'], | |
executablePath: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || '/usr/bin/chromium' | |
}; | |
const launchContext = { launchOptions }; | |
const browser = await launchPlaywright(launchContext); | |
await browser.close(); | |
// Run test suite with explicit executable path | |
const execPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || '/usr/bin/chromium'; | |
await testChromium({ headless: true, executablePath: execPath }); // Headless test | |
await testChromium({ headless: false, executablePath: execPath }); // With XVFB | |
await getMemoryInfo(); // Validate system ps/memory info | |
console.log('All tests completed successfully ✅'); | |
process.exitCode = 0; // Success | |
} catch (error) { | |
console.error('❌ Browser verification failed:', error); | |
process.exitCode = 1; // Failure | |
} | |
} | |
checkBrowser(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment