readme added, additional bcache settings added
This commit is contained in:
parent
e4ccb843d3
commit
0414759c74
|
@ -0,0 +1,50 @@
|
||||||
|
# Setup for setup Tensorflow with GPU
|
||||||
|
|
||||||
|
Tested for ubuntu-20.04.4
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
|
||||||
|
1. Prepare setup:
|
||||||
|
|
||||||
|
git clone https://repos.nonan.net/nicolas/gpu_server_setup.git
|
||||||
|
cd gpu_server_setup
|
||||||
|
|
||||||
|
|
||||||
|
2. Setup driver/CUDA:
|
||||||
|
|
||||||
|
sudo bash setup_cuda.sh
|
||||||
|
sudo systemctl reboot
|
||||||
|
|
||||||
|
|
||||||
|
3. Setup bcache:
|
||||||
|
|
||||||
|
sudo bash setup_bcache.sh
|
||||||
|
|
||||||
|
|
||||||
|
3. Setup apps (Python, JupyterHub (Hub is running as root), Tensorflow etc.):
|
||||||
|
|
||||||
|
sudo bash setup_apps.sh
|
||||||
|
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
### CUDA
|
||||||
|
|
||||||
|
Check state of NVIDIA devices (electrical power, temperature, memory etc.):
|
||||||
|
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
|
||||||
|
### bcache
|
||||||
|
|
||||||
|
Check bcache performance:
|
||||||
|
|
||||||
|
cat /sys/block/bcache0/bcache/state
|
||||||
|
cat /sys/block/bcache*/bcache/stats_five_minute/cache_hit_ratio
|
||||||
|
cat /sys/block/bcache*/bcache/stats_hour/cache_hit_ratio
|
||||||
|
|
||||||
|
|
||||||
|
Tune bcache (not permanent):
|
||||||
|
|
||||||
|
echo 64M > /sys/block/bcache0/bcache/sequential_cutoff
|
||||||
|
echo 4096 > /sys/block/bcache0/queue/read_ahead_kb
|
|
@ -31,9 +31,11 @@ systemctl enable jupyterhub && \
|
||||||
systemctl start jupyterhub
|
systemctl start jupyterhub
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "check nvidia drivers..."
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
echo "\n"
|
echo "\n"
|
||||||
echo "Setup successfully finished"
|
echo "Setup successfully finished"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
#check nvidia drivers
|
|
||||||
nvidia-smi
|
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#sources:
|
||||||
|
#https://wiki.archlinux.org/title/bcache
|
||||||
|
#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user
|
||||||
|
|
||||||
|
ssddev=/dev/sdb
|
||||||
|
hdddev=/dev/sdc
|
||||||
|
sequential_cutoff=24000M
|
||||||
|
|
||||||
|
if [ "$EUID" -ne 0 ]
|
||||||
|
then echo "Please run as root"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
while [ -b /dev/bcache0 ]
|
||||||
|
then echo "Canceld, device /dev/bcache0 is allready present"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Setup bcache... "
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y bcache-tools && \
|
||||||
|
dd if=/dev/zero if=$ssddev bs=512 count=8 && \
|
||||||
|
dd if=/dev/zero if=$hdddev bs=512 count=8 && \
|
||||||
|
wipefs -a $ssddev && \
|
||||||
|
wipefs -a $hdddev && \
|
||||||
|
make-bcache -C $ssddev -B $hdddev --writeback && \
|
||||||
|
echo $sequential_cutoff > /sys/block/bcache0/bcache/sequential_cutoff && \
|
||||||
|
echo "ACTION==\"add\", SUBSYSTEM==\"block\", ENV{MAJOR}==\"252\", ATTR{bcache/sequential_cutoff}=\"$sequential_cutoff\"" \
|
||||||
|
> /etc/udev/rules.d/99-bcache_sequential_cutoff.rules && \
|
||||||
|
udevadm test /sys/block/bcache0
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "Wait until device is ready... "
|
||||||
|
while [ ! -b /dev/bcache0 ]; do sleep 1; done
|
||||||
|
|
||||||
|
echo "Fromat and mount bcache device... "
|
||||||
|
mkfs.ext4 /dev/bcache0 && \
|
||||||
|
mkdir /mnt/bcache && \
|
||||||
|
mount /dev/bcache0 /mnt/bcache && \
|
||||||
|
echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "Config home directory for new users on bcache partition... "
|
||||||
|
mkdir /mnt/bcache/home && \
|
||||||
|
echo "#" >> /etc/default/useradd && \
|
||||||
|
echo "# Modifications:" >> /etc/default/useradd && \
|
||||||
|
echo "HOME=/mnt/bcache/home" >> /etc/default/useradd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "Setup successfully finished"
|
||||||
|
fi
|
|
@ -4,12 +4,15 @@
|
||||||
#check for NVIDIA device:
|
#check for NVIDIA device:
|
||||||
#apt-get -y install pciutils
|
#apt-get -y install pciutils
|
||||||
#lspci | grep VGA
|
#lspci | grep VGA
|
||||||
#check if driver works with device:
|
#check if driver works with device: nvidia-smi
|
||||||
#nvidia-smi
|
|
||||||
|
|
||||||
#based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/
|
#based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/
|
||||||
#and https://www.tensorflow.org/install/gpu
|
#and https://www.tensorflow.org/install/gpu
|
||||||
#Versions required: https://www.tensorflow.org/install/source#gpu
|
#Versions required: https://www.tensorflow.org/install/source#gpu
|
||||||
|
|
||||||
|
#remove cuda:
|
||||||
|
#sudo apt-get remove cuda-11-2
|
||||||
|
#sudo apt-get autoremove
|
||||||
#------------------------------------
|
#------------------------------------
|
||||||
|
|
||||||
if [ "$EUID" -ne 0 ]
|
if [ "$EUID" -ne 0 ]
|
||||||
|
@ -47,17 +50,18 @@ if [ $? -eq 0 ]; then
|
||||||
#Use the --no-install-recommends option for a lean driver install without any dependencies on X packages
|
#Use the --no-install-recommends option for a lean driver install without any dependencies on X packages
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends cuda-11-2
|
apt-get install -y --no-install-recommends cuda-11-2
|
||||||
#apt-get install --no-install-recommends libcudnn8=8.1.0.44-1+cuda11.2 libcudnn8-dev=8.1.0.44-1+cuda11.2
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
echo "\n"
|
echo "\n"
|
||||||
echo "install libcudnn8..."
|
echo "install libcudnn8..."
|
||||||
#Latest available version from ubuntu1804 repo:
|
#Latest available version from nvidia ubuntu1804 repo:
|
||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
|
|
||||||
apt-get install -y ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
apt-get install -y ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||||
apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb
|
rm ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||||
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||||
|
apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||||
|
rm ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
|
@ -67,7 +71,3 @@ if [ $? -eq 0 ]; then
|
||||||
echo "Setup successfully finished"
|
echo "Setup successfully finished"
|
||||||
echo "Type \"systemctl reboot\" to reboot system"
|
echo "Type \"systemctl reboot\" to reboot system"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
#sudo apt-get remove cuda-11-2
|
|
||||||
#sudo apt-get autoremove
|
|
|
@ -1,41 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user
|
|
||||||
|
|
||||||
#move user with files:
|
|
||||||
#usermod -m -d /newhome/username username
|
|
||||||
|
|
||||||
#Simply open this file using a text editor, type:
|
|
||||||
#vi /etc/default/useradd
|
|
||||||
#The default home directory defined by HOME variable, find line that read as follows:
|
|
||||||
#HOME=/home
|
|
||||||
#Replace with:
|
|
||||||
#HOME=/iscsi/user
|
|
||||||
#Save and close the file. Now you can add user using regular useradd command:
|
|
||||||
# useradd vivek
|
|
||||||
# passwd vivek
|
|
||||||
#Verify user information:
|
|
||||||
# finger vivek
|
|
||||||
|
|
||||||
#echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" | tee -a /etc/fstab > /dev/null
|
|
||||||
|
|
||||||
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y bcache-tools && \
|
|
||||||
dd if=/dev/zero if=/dev/sdb bs=512 count=8 && \
|
|
||||||
dd if=/dev/zero if=/dev/sdc bs=512 count=8 && \
|
|
||||||
wipefs -a /dev/sdb && \
|
|
||||||
wipefs -a /dev/sdc && \
|
|
||||||
make-bcache -C /dev/sdb -B /dev/sdc --discard --writeback && \
|
|
||||||
mkfs.ext4 /dev/bcache0 && \
|
|
||||||
mkdir /mnt/bcache && \
|
|
||||||
mount /dev/bcache0 /mnt/bcache && \
|
|
||||||
mkdir /mnt/bcache/home && \
|
|
||||||
echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab
|
|
||||||
|
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "#" >> /etc/default/useradd
|
|
||||||
echo "# Modifications:" >> /etc/default/useradd
|
|
||||||
echo "HOME=/mnt/bcache/home" >> /etc/default/useradd
|
|
||||||
fi
|
|
Loading…
Reference in New Issue