From 0414759c745a08c42641bfedf28a9387f8c9c076 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 17 Mar 2022 14:14:49 +0100 Subject: [PATCH] readme added, additional bcache settings added --- README.md | 50 +++++++++++++++++++++++++++++++++++++++++++ setup_apps.sh | 6 ++++-- setup_bcache.sh | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ setup_cuda.sh | 20 +++++++++--------- setup_drives.sh | 41 ------------------------------------ 5 files changed, 120 insertions(+), 53 deletions(-) create mode 100644 README.md create mode 100644 setup_bcache.sh delete mode 100644 setup_drives.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..b97ca3a --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# Setup for setup Tensorflow with GPU + +Tested for ubuntu-20.04.4 + +Steps: + +1. Prepare setup: + + git clone https://repos.nonan.net/nicolas/gpu_server_setup.git + cd gpu_server_setup + + +2. Setup driver/CUDA: + + sudo bash setup_cuda.sh + sudo systemctl reboot + + +3. Setup bcache: + + sudo bash setup_bcache.sh + + +3. Setup apps (Python, JupyterHub (Hub is running as root), Tensorflow etc.): + + sudo bash setup_apps.sh + + +## Notes + +### CUDA + +Check state of NVIDIA devices (electrical power, temperature, memory etc.): + + nvidia-smi + + +### bcache + +Check bcache performance: + + cat /sys/block/bcache0/bcache/state + cat /sys/block/bcache*/bcache/stats_five_minute/cache_hit_ratio + cat /sys/block/bcache*/bcache/stats_hour/cache_hit_ratio + + +Tune bcache (not permanent): + + echo 64M > /sys/block/bcache0/bcache/sequential_cutoff + echo 4096 > /sys/block/bcache0/queue/read_ahead_kb \ No newline at end of file diff --git a/setup_apps.sh b/setup_apps.sh index f1c6185..72cd2d6 100644 --- a/setup_apps.sh +++ b/setup_apps.sh @@ -31,9 +31,11 @@ systemctl enable jupyterhub && \ systemctl start jupyterhub if [ $? -eq 0 ]; then + echo "check nvidia drivers..." + nvidia-smi + echo "\n" echo "Setup successfully finished" fi -#check nvidia drivers -nvidia-smi + diff --git a/setup_bcache.sh b/setup_bcache.sh new file mode 100644 index 0000000..67d9e77 --- /dev/null +++ b/setup_bcache.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +#sources: +#https://wiki.archlinux.org/title/bcache +#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user + +ssddev=/dev/sdb +hdddev=/dev/sdc +sequential_cutoff=24000M + +if [ "$EUID" -ne 0 ] + then echo "Please run as root" + exit +fi + +while [ -b /dev/bcache0 ] + then echo "Canceld, device /dev/bcache0 is allready present" + exit +fi + +echo "Setup bcache... " +apt-get update && \ +apt-get install -y bcache-tools && \ +dd if=/dev/zero if=$ssddev bs=512 count=8 && \ +dd if=/dev/zero if=$hdddev bs=512 count=8 && \ +wipefs -a $ssddev && \ +wipefs -a $hdddev && \ +make-bcache -C $ssddev -B $hdddev --writeback && \ +echo $sequential_cutoff > /sys/block/bcache0/bcache/sequential_cutoff && \ +echo "ACTION==\"add\", SUBSYSTEM==\"block\", ENV{MAJOR}==\"252\", ATTR{bcache/sequential_cutoff}=\"$sequential_cutoff\"" \ +> /etc/udev/rules.d/99-bcache_sequential_cutoff.rules && \ +udevadm test /sys/block/bcache0 + +if [ $? -eq 0 ]; then + echo "Wait until device is ready... " + while [ ! -b /dev/bcache0 ]; do sleep 1; done + + echo "Fromat and mount bcache device... " + mkfs.ext4 /dev/bcache0 && \ + mkdir /mnt/bcache && \ + mount /dev/bcache0 /mnt/bcache && \ + echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab +fi + +if [ $? -eq 0 ]; then + echo "Config home directory for new users on bcache partition... " + mkdir /mnt/bcache/home && \ + echo "#" >> /etc/default/useradd && \ + echo "# Modifications:" >> /etc/default/useradd && \ + echo "HOME=/mnt/bcache/home" >> /etc/default/useradd +fi + +if [ $? -eq 0 ]; then + echo "" + echo "Setup successfully finished" +fi diff --git a/setup_cuda.sh b/setup_cuda.sh index 9080bd1..4eb85f8 100644 --- a/setup_cuda.sh +++ b/setup_cuda.sh @@ -4,12 +4,15 @@ #check for NVIDIA device: #apt-get -y install pciutils #lspci | grep VGA -#check if driver works with device: -#nvidia-smi +#check if driver works with device: nvidia-smi #based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/ #and https://www.tensorflow.org/install/gpu #Versions required: https://www.tensorflow.org/install/source#gpu + +#remove cuda: +#sudo apt-get remove cuda-11-2 +#sudo apt-get autoremove #------------------------------------ if [ "$EUID" -ne 0 ] @@ -47,17 +50,18 @@ if [ $? -eq 0 ]; then #Use the --no-install-recommends option for a lean driver install without any dependencies on X packages apt-get update && \ apt-get install -y --no-install-recommends cuda-11-2 - #apt-get install --no-install-recommends libcudnn8=8.1.0.44-1+cuda11.2 libcudnn8-dev=8.1.0.44-1+cuda11.2 fi if [ $? -eq 0 ]; then echo "\n" echo "install libcudnn8..." - #Latest available version from ubuntu1804 repo: + #Latest available version from nvidia ubuntu1804 repo: wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \ apt-get install -y ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \ - apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb + rm ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \ + apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \ + rm ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb fi if [ $? -eq 0 ]; then @@ -67,7 +71,3 @@ if [ $? -eq 0 ]; then echo "Setup successfully finished" echo "Type \"systemctl reboot\" to reboot system" fi - - -#sudo apt-get remove cuda-11-2 -#sudo apt-get autoremove \ No newline at end of file diff --git a/setup_drives.sh b/setup_drives.sh deleted file mode 100644 index 5def89e..0000000 --- a/setup_drives.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user - -#move user with files: -#usermod -m -d /newhome/username username - -#Simply open this file using a text editor, type: -#vi /etc/default/useradd -#The default home directory defined by HOME variable, find line that read as follows: -#HOME=/home -#Replace with: -#HOME=/iscsi/user -#Save and close the file. Now you can add user using regular useradd command: -# useradd vivek -# passwd vivek -#Verify user information: -# finger vivek - -#echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" | tee -a /etc/fstab > /dev/null - - -apt-get update && \ -apt-get install -y bcache-tools && \ -dd if=/dev/zero if=/dev/sdb bs=512 count=8 && \ -dd if=/dev/zero if=/dev/sdc bs=512 count=8 && \ -wipefs -a /dev/sdb && \ -wipefs -a /dev/sdc && \ -make-bcache -C /dev/sdb -B /dev/sdc --discard --writeback && \ -mkfs.ext4 /dev/bcache0 && \ -mkdir /mnt/bcache && \ -mount /dev/bcache0 /mnt/bcache && \ -mkdir /mnt/bcache/home && \ -echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab - - -if [ $? -eq 0 ]; then - echo "#" >> /etc/default/useradd - echo "# Modifications:" >> /etc/default/useradd - echo "HOME=/mnt/bcache/home" >> /etc/default/useradd -fi \ No newline at end of file