readme added, additional bcache settings added

This commit is contained in:
Nicolas 2022-03-17 14:14:49 +01:00
parent e4ccb843d3
commit 0414759c74
5 changed files with 120 additions and 53 deletions

50
README.md Normal file
View File

@ -0,0 +1,50 @@
# Setup for setup Tensorflow with GPU
Tested for ubuntu-20.04.4
Steps:
1. Prepare setup:
git clone https://repos.nonan.net/nicolas/gpu_server_setup.git
cd gpu_server_setup
2. Setup driver/CUDA:
sudo bash setup_cuda.sh
sudo systemctl reboot
3. Setup bcache:
sudo bash setup_bcache.sh
3. Setup apps (Python, JupyterHub (Hub is running as root), Tensorflow etc.):
sudo bash setup_apps.sh
## Notes
### CUDA
Check state of NVIDIA devices (electrical power, temperature, memory etc.):
nvidia-smi
### bcache
Check bcache performance:
cat /sys/block/bcache0/bcache/state
cat /sys/block/bcache*/bcache/stats_five_minute/cache_hit_ratio
cat /sys/block/bcache*/bcache/stats_hour/cache_hit_ratio
Tune bcache (not permanent):
echo 64M > /sys/block/bcache0/bcache/sequential_cutoff
echo 4096 > /sys/block/bcache0/queue/read_ahead_kb

View File

@ -31,9 +31,11 @@ systemctl enable jupyterhub && \
systemctl start jupyterhub systemctl start jupyterhub
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "check nvidia drivers..."
nvidia-smi
echo "\n" echo "\n"
echo "Setup successfully finished" echo "Setup successfully finished"
fi fi
#check nvidia drivers
nvidia-smi

56
setup_bcache.sh Normal file
View File

@ -0,0 +1,56 @@
#!/bin/bash
#sources:
#https://wiki.archlinux.org/title/bcache
#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user
ssddev=/dev/sdb
hdddev=/dev/sdc
sequential_cutoff=24000M
if [ "$EUID" -ne 0 ]
then echo "Please run as root"
exit
fi
while [ -b /dev/bcache0 ]
then echo "Canceld, device /dev/bcache0 is allready present"
exit
fi
echo "Setup bcache... "
apt-get update && \
apt-get install -y bcache-tools && \
dd if=/dev/zero if=$ssddev bs=512 count=8 && \
dd if=/dev/zero if=$hdddev bs=512 count=8 && \
wipefs -a $ssddev && \
wipefs -a $hdddev && \
make-bcache -C $ssddev -B $hdddev --writeback && \
echo $sequential_cutoff > /sys/block/bcache0/bcache/sequential_cutoff && \
echo "ACTION==\"add\", SUBSYSTEM==\"block\", ENV{MAJOR}==\"252\", ATTR{bcache/sequential_cutoff}=\"$sequential_cutoff\"" \
> /etc/udev/rules.d/99-bcache_sequential_cutoff.rules && \
udevadm test /sys/block/bcache0
if [ $? -eq 0 ]; then
echo "Wait until device is ready... "
while [ ! -b /dev/bcache0 ]; do sleep 1; done
echo "Fromat and mount bcache device... "
mkfs.ext4 /dev/bcache0 && \
mkdir /mnt/bcache && \
mount /dev/bcache0 /mnt/bcache && \
echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab
fi
if [ $? -eq 0 ]; then
echo "Config home directory for new users on bcache partition... "
mkdir /mnt/bcache/home && \
echo "#" >> /etc/default/useradd && \
echo "# Modifications:" >> /etc/default/useradd && \
echo "HOME=/mnt/bcache/home" >> /etc/default/useradd
fi
if [ $? -eq 0 ]; then
echo ""
echo "Setup successfully finished"
fi

View File

@ -4,12 +4,15 @@
#check for NVIDIA device: #check for NVIDIA device:
#apt-get -y install pciutils #apt-get -y install pciutils
#lspci | grep VGA #lspci | grep VGA
#check if driver works with device: #check if driver works with device: nvidia-smi
#nvidia-smi
#based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/ #based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/
#and https://www.tensorflow.org/install/gpu #and https://www.tensorflow.org/install/gpu
#Versions required: https://www.tensorflow.org/install/source#gpu #Versions required: https://www.tensorflow.org/install/source#gpu
#remove cuda:
#sudo apt-get remove cuda-11-2
#sudo apt-get autoremove
#------------------------------------ #------------------------------------
if [ "$EUID" -ne 0 ] if [ "$EUID" -ne 0 ]
@ -47,17 +50,18 @@ if [ $? -eq 0 ]; then
#Use the --no-install-recommends option for a lean driver install without any dependencies on X packages #Use the --no-install-recommends option for a lean driver install without any dependencies on X packages
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends cuda-11-2 apt-get install -y --no-install-recommends cuda-11-2
#apt-get install --no-install-recommends libcudnn8=8.1.0.44-1+cuda11.2 libcudnn8-dev=8.1.0.44-1+cuda11.2
fi fi
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "\n" echo "\n"
echo "install libcudnn8..." echo "install libcudnn8..."
#Latest available version from ubuntu1804 repo: #Latest available version from nvidia ubuntu1804 repo:
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \ wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
apt-get install -y ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \ apt-get install -y ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb rm ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
rm ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb
fi fi
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
@ -67,7 +71,3 @@ if [ $? -eq 0 ]; then
echo "Setup successfully finished" echo "Setup successfully finished"
echo "Type \"systemctl reboot\" to reboot system" echo "Type \"systemctl reboot\" to reboot system"
fi fi
#sudo apt-get remove cuda-11-2
#sudo apt-get autoremove

View File

@ -1,41 +0,0 @@
#!/bin/bash
#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user
#move user with files:
#usermod -m -d /newhome/username username
#Simply open this file using a text editor, type:
#vi /etc/default/useradd
#The default home directory defined by HOME variable, find line that read as follows:
#HOME=/home
#Replace with:
#HOME=/iscsi/user
#Save and close the file. Now you can add user using regular useradd command:
# useradd vivek
# passwd vivek
#Verify user information:
# finger vivek
#echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" | tee -a /etc/fstab > /dev/null
apt-get update && \
apt-get install -y bcache-tools && \
dd if=/dev/zero if=/dev/sdb bs=512 count=8 && \
dd if=/dev/zero if=/dev/sdc bs=512 count=8 && \
wipefs -a /dev/sdb && \
wipefs -a /dev/sdc && \
make-bcache -C /dev/sdb -B /dev/sdc --discard --writeback && \
mkfs.ext4 /dev/bcache0 && \
mkdir /mnt/bcache && \
mount /dev/bcache0 /mnt/bcache && \
mkdir /mnt/bcache/home && \
echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab
if [ $? -eq 0 ]; then
echo "#" >> /etc/default/useradd
echo "# Modifications:" >> /etc/default/useradd
echo "HOME=/mnt/bcache/home" >> /etc/default/useradd
fi