rickt · September 25, 2024 22:18
diff --git a/Dockerfile b/Dockerfile
 #
 # Dockerfile for a simple PoC faster-whisper translation server in GCP GPU Cloud Run
 # 20240925 rickt
 #
 #  build: $ docker build -t gcr.io/<gcp_project_name>/<service_name> .
 #   push: $ docker push gcr.io/<gcp_project_name>/<service_name>
 # deploy: $ gcloud beta run deploy <service_name> --region us-central1 --image gcr.io/<gcp_project_name>/<service_name> \
 #           --port 8080 --cpu 8 --memory 32Gi --gpu 1 --gpu-type nvidia-l4 --max-instances 1 --allow-unauthenticated
 #

 # use latest ubuntu 24 NVIDIA CUDA base image
 FROM nvidia/cuda:12.6.1-runtime-ubuntu24.04

 # expose GPU(s)
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

 # update the system pkg lists & install only what we need
 RUN apt-get update && apt-get install -y \
    ffmpeg \
    curl \
    wget \
    git \
    python3-pip \
    python3-venv \
    && rm -rf /var/lib/apt/lists/*

 # nvidia stuff
 RUN curl "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb" -o cuda.deb && \
    dpkg -i cuda.deb && rm cuda.deb
 RUN apt-get update -y && \
    apt-get install -y --no-install-recommends \
    cuda-cudart-11-8 cuda-nvcc-11-8 cuda-nvrtc-11-8 \
    'libcudnn8=8.9.1.*+cuda11.8' libcublas-11-8 && \
    rm -rf /var/*/apt

 # create & activate a venv
 RUN python3 -m venv /opt/venv 
 RUN /opt/venv/bin/python -m pip install --upgrade pip 

 # install python modules
 RUN /opt/venv/bin/pip3 install faster-whisper[server] \
    torch --extra-index-url https://download.pytorch.org/whl/cu118 \
    nvidia-cublas-cu11 nvidia-cublas-cu12 \
    google-cloud-logging \
    flask \
    uuid

 # download/preload small whisper model to the Docker image
 RUN /opt/venv/bin/python3 -c "from faster_whisper import WhisperModel; model = WhisperModel('small', device='cpu')"

 # app port
 EXPOSE 8080

 # directory
 WORKDIR /app
 COPY . /app

 # entry point
 CMD ["/opt/venv/bin/python3", "server.py"]

 # EOF
	#
	# Dockerfile for a simple PoC faster-whisper translation server in GCP GPU Cloud Run
	# 20240925 rickt
	#
	# build: $ docker build -t gcr.io/<gcp_project_name>/<service_name> .
	# push: $ docker push gcr.io/<gcp_project_name>/<service_name>
	# deploy: $ gcloud beta run deploy <service_name> --region us-central1 --image gcr.io/<gcp_project_name>/<service_name> \
	# --port 8080 --cpu 8 --memory 32Gi --gpu 1 --gpu-type nvidia-l4 --max-instances 1 --allow-unauthenticated
	#

	# use latest ubuntu 24 NVIDIA CUDA base image
	FROM nvidia/cuda:12.6.1-runtime-ubuntu24.04

	# expose GPU(s)
	ENV NVIDIA_VISIBLE_DEVICES=all
	ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

	# update the system pkg lists & install only what we need
	RUN apt-get update && apt-get install -y \
	ffmpeg \
	curl \
	wget \
	git \
	python3-pip \
	python3-venv \
	&& rm -rf /var/lib/apt/lists/*

	# nvidia stuff
	RUN curl "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb" -o cuda.deb && \
	dpkg -i cuda.deb && rm cuda.deb
	RUN apt-get update -y && \
	apt-get install -y --no-install-recommends \
	cuda-cudart-11-8 cuda-nvcc-11-8 cuda-nvrtc-11-8 \
	'libcudnn8=8.9.1.*+cuda11.8' libcublas-11-8 && \
	rm -rf /var/*/apt

	# create & activate a venv
	RUN python3 -m venv /opt/venv
	RUN /opt/venv/bin/python -m pip install --upgrade pip

	# install python modules
	RUN /opt/venv/bin/pip3 install faster-whisper[server] \
	torch --extra-index-url https://download.pytorch.org/whl/cu118 \
	nvidia-cublas-cu11 nvidia-cublas-cu12 \
	google-cloud-logging \
	flask \
	uuid

	# download/preload small whisper model to the Docker image
	RUN /opt/venv/bin/python3 -c "from faster_whisper import WhisperModel; model = WhisperModel('small', device='cpu')"

	# app port
	EXPOSE 8080

	# directory
	WORKDIR /app
	COPY . /app

	# entry point
	CMD ["/opt/venv/bin/python3", "server.py"]

	# EOF