Since tensorflow
2.13.0
is installed, if you need to install tf-agents
, please use version 0.17.0
.
Installation:
pip install tensorflow==2.13.0 tensorflow_probability~=0.20.1 tf-agents==0.17.0
Install TensorFlow GPU and PyTorch GPU on a NVIDIA Graphics Card available computer. |
#!/bin/bash | |
### stage 1 #### | |
# verify the system has a cuda-capable gpu | |
# download and install the nvidia cuda toolkit and cudnn | |
# setup environmental variables | |
### | |
### to verify your gpu is cuda enable check | |
lspci | grep -i nvidia | |
### remove previous installation | |
sudo apt purge '.*nvidia.*' '.*cuda.*' '.*cudnn.*' | |
sudo apt remove '.*nvidia.*' '.*cuda.*' '.*cudnn.*' | |
sudo rm /etc/apt/sources.list.d/cuda* | |
sudo apt-get autoremove && sudo apt-get autoclean | |
sudo rm -rf /usr/local/cuda* | |
### do system upgrade | |
sudo apt update && sudo apt upgrade -y | |
sudo apt install -y g++ freeglut3-dev build-essential libx11-dev libxmu-dev libxi-dev libglu1-mesa libglu1-mesa-dev | |
# install nvidia driver | |
sudo add-apt-repository ppa:graphics-drivers/ppa | |
sudo apt update | |
sudo apt install -y libnvidia-common-535 libnvidia-gl-535 nvidia-driver-535 | |
# install cuda deb(network) | |
# Reference: https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_network | |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb | |
sudo dpkg -i cuda-keyring_1.0-1_all.deb | |
sudo apt-get update | |
sudo apt install -y cuda-11-8 | |
# Note: you need to add below lines to ~/.bashrc or ~/.zshrc | |
echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/."$(basename $SHELL)"rc | |
echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/."$(basename $SHELL)"rc | |
# install cuDNN v8.7 | |
# Reference: https://developer.nvidia.com/cudnn | |
CUDNN_TAR_FILE="cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz" | |
sudo wget https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz | |
sudo tar -xvf cudnn-linux-*.tar.xz | |
sudo mv cudnn-linux-x86_64-8.7.0.84_cuda11-archive cuda | |
# copy the following files into the cuda toolkit directory. | |
sudo cp -P cuda/include/cudnn.h /usr/local/cuda/include | |
sudo cp -P cuda/lib/libcudnn* /usr/local/cuda/lib64/ | |
sudo chmod a+r /usr/local/cuda/lib64/libcudnn* | |
# reboot to solve "Failed to initialize NVML: Driver/library version mismatch" | |
sudo reboot |
#!/usr/bin/env bash | |
### stage 2 #### | |
# install mamba (python 3.10) with miniforge | |
### | |
export PATH="$HOME/miniforge-pypy3/bin:$PATH" | |
# https://github.com/conda-forge/miniforge/releases/tag/24.3.0-0 is the latest version supports python 3.10 | |
DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge-pypy3-Linux-x86_64.sh" | |
DOWNLOAD_TMP_FILE="/tmp/$(basename ${DOWNLOAD_URL})" | |
if ! which mamba &> /dev/null; then | |
echo "Downloading ${DOWNLOAD_URL} ..." | |
curl -# -sSLf ${DOWNLOAD_URL} -o "${DOWNLOAD_TMP_FILE}" && echo "Installing ..." && bash "${DOWNLOAD_TMP_FILE}" -b -f && rm "${DOWNLOAD_TMP_FILE}" | |
fi | |
echo "mamba path: $(which mamba)" | |
if ! mamba env list | grep -q pyml; then | |
mamba create --yes -n pyml \ | |
'python==3.10' \ | |
'notebook' 'jupyterhub' 'jupyterlab' \ | |
'scipy' 'numpy' 'pandas' 'matplotlib' | |
fi |
#!/bin/bash | |
### stage 3 #### | |
# verify the nvidia driver + cuda + cudnn installation | |
# install TensorFlow and PyTorch | |
### | |
# verify the installation | |
nvidia-smi | |
nvcc -V | |
# activate mamba environment | |
mamba activate pyml | |
# install TensorFlow GPU | |
python3 -m pip install nvidia-cudnn-cu11==8.6.0.163 tensorflow==2.13.0 | |
# install PyTorch GPU | |
python3 -m pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 |
#!/bin/bash | |
### stage 4 #### | |
# check if framework is actually using GPU | |
### | |
mamba activate pyml | |
python3 -c "import tensorflow as tf; print('TensorFlow: Version: ' + tf.__version__ + ', GPU Available: ', bool(len(tf.config.list_physical_devices('GPU'))))" | |
python3 -c "import torch; print('PyTorch: Version: ' + torch.__version__ + ', GPU Available: ', torch.cuda.is_available())" |
Since tensorflow
2.13.0
is installed, if you need to install tf-agents
, please use version 0.17.0
.
Installation:
pip install tensorflow==2.13.0 tensorflow_probability~=0.20.1 tf-agents==0.17.0
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
I ... successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
Fix:
for a in /sys/bus/pci/devices/*; do echo 0 | sudo tee -a $a/numa_node; done