Created
September 25, 2024 22:18
-
-
Save rickt/3369985f3b04dd152a42cd4f666daa30 to your computer and use it in GitHub Desktop.
Dockerfile for faster-whisper in GCP Cloud Run GPU
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Dockerfile for a simple PoC faster-whisper translation server in GCP GPU Cloud Run | |
# 20240925 rickt | |
# | |
# build: $ docker build -t gcr.io/<gcp_project_name>/<service_name> . | |
# push: $ docker push gcr.io/<gcp_project_name>/<service_name> | |
# deploy: $ gcloud beta run deploy <service_name> --region us-central1 --image gcr.io/<gcp_project_name>/<service_name> \ | |
# --port 8080 --cpu 8 --memory 32Gi --gpu 1 --gpu-type nvidia-l4 --max-instances 1 --allow-unauthenticated | |
# | |
# use latest ubuntu 24 NVIDIA CUDA base image | |
FROM nvidia/cuda:12.6.1-runtime-ubuntu24.04 | |
# expose GPU(s) | |
ENV NVIDIA_VISIBLE_DEVICES=all | |
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility | |
# update the system pkg lists & install only what we need | |
RUN apt-get update && apt-get install -y \ | |
ffmpeg \ | |
curl \ | |
wget \ | |
git \ | |
python3-pip \ | |
python3-venv \ | |
&& rm -rf /var/lib/apt/lists/* | |
# nvidia stuff | |
RUN curl "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb" -o cuda.deb && \ | |
dpkg -i cuda.deb && rm cuda.deb | |
RUN apt-get update -y && \ | |
apt-get install -y --no-install-recommends \ | |
cuda-cudart-11-8 cuda-nvcc-11-8 cuda-nvrtc-11-8 \ | |
'libcudnn8=8.9.1.*+cuda11.8' libcublas-11-8 && \ | |
rm -rf /var/*/apt | |
# create & activate a venv | |
RUN python3 -m venv /opt/venv | |
RUN /opt/venv/bin/python -m pip install --upgrade pip | |
# install python modules | |
RUN /opt/venv/bin/pip3 install faster-whisper[server] \ | |
torch --extra-index-url https://download.pytorch.org/whl/cu118 \ | |
nvidia-cublas-cu11 nvidia-cublas-cu12 \ | |
google-cloud-logging \ | |
flask \ | |
uuid | |
# download/preload small whisper model to the Docker image | |
RUN /opt/venv/bin/python3 -c "from faster_whisper import WhisperModel; model = WhisperModel('small', device='cpu')" | |
# app port | |
EXPOSE 8080 | |
# directory | |
WORKDIR /app | |
COPY . /app | |
# entry point | |
CMD ["/opt/venv/bin/python3", "server.py"] | |
# EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment