readme added, additional bcache settings added
This commit is contained in:
parent
e4ccb843d3
commit
0414759c74
|
@ -0,0 +1,50 @@
|
|||
# Setup for setup Tensorflow with GPU
|
||||
|
||||
Tested for ubuntu-20.04.4
|
||||
|
||||
Steps:
|
||||
|
||||
1. Prepare setup:
|
||||
|
||||
git clone https://repos.nonan.net/nicolas/gpu_server_setup.git
|
||||
cd gpu_server_setup
|
||||
|
||||
|
||||
2. Setup driver/CUDA:
|
||||
|
||||
sudo bash setup_cuda.sh
|
||||
sudo systemctl reboot
|
||||
|
||||
|
||||
3. Setup bcache:
|
||||
|
||||
sudo bash setup_bcache.sh
|
||||
|
||||
|
||||
3. Setup apps (Python, JupyterHub (Hub is running as root), Tensorflow etc.):
|
||||
|
||||
sudo bash setup_apps.sh
|
||||
|
||||
|
||||
## Notes
|
||||
|
||||
### CUDA
|
||||
|
||||
Check state of NVIDIA devices (electrical power, temperature, memory etc.):
|
||||
|
||||
nvidia-smi
|
||||
|
||||
|
||||
### bcache
|
||||
|
||||
Check bcache performance:
|
||||
|
||||
cat /sys/block/bcache0/bcache/state
|
||||
cat /sys/block/bcache*/bcache/stats_five_minute/cache_hit_ratio
|
||||
cat /sys/block/bcache*/bcache/stats_hour/cache_hit_ratio
|
||||
|
||||
|
||||
Tune bcache (not permanent):
|
||||
|
||||
echo 64M > /sys/block/bcache0/bcache/sequential_cutoff
|
||||
echo 4096 > /sys/block/bcache0/queue/read_ahead_kb
|
|
@ -31,9 +31,11 @@ systemctl enable jupyterhub && \
|
|||
systemctl start jupyterhub
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "check nvidia drivers..."
|
||||
nvidia-smi
|
||||
|
||||
echo "\n"
|
||||
echo "Setup successfully finished"
|
||||
fi
|
||||
|
||||
#check nvidia drivers
|
||||
nvidia-smi
|
||||
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
#!/bin/bash
|
||||
|
||||
#sources:
|
||||
#https://wiki.archlinux.org/title/bcache
|
||||
#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user
|
||||
|
||||
ssddev=/dev/sdb
|
||||
hdddev=/dev/sdc
|
||||
sequential_cutoff=24000M
|
||||
|
||||
if [ "$EUID" -ne 0 ]
|
||||
then echo "Please run as root"
|
||||
exit
|
||||
fi
|
||||
|
||||
while [ -b /dev/bcache0 ]
|
||||
then echo "Canceld, device /dev/bcache0 is allready present"
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Setup bcache... "
|
||||
apt-get update && \
|
||||
apt-get install -y bcache-tools && \
|
||||
dd if=/dev/zero if=$ssddev bs=512 count=8 && \
|
||||
dd if=/dev/zero if=$hdddev bs=512 count=8 && \
|
||||
wipefs -a $ssddev && \
|
||||
wipefs -a $hdddev && \
|
||||
make-bcache -C $ssddev -B $hdddev --writeback && \
|
||||
echo $sequential_cutoff > /sys/block/bcache0/bcache/sequential_cutoff && \
|
||||
echo "ACTION==\"add\", SUBSYSTEM==\"block\", ENV{MAJOR}==\"252\", ATTR{bcache/sequential_cutoff}=\"$sequential_cutoff\"" \
|
||||
> /etc/udev/rules.d/99-bcache_sequential_cutoff.rules && \
|
||||
udevadm test /sys/block/bcache0
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Wait until device is ready... "
|
||||
while [ ! -b /dev/bcache0 ]; do sleep 1; done
|
||||
|
||||
echo "Fromat and mount bcache device... "
|
||||
mkfs.ext4 /dev/bcache0 && \
|
||||
mkdir /mnt/bcache && \
|
||||
mount /dev/bcache0 /mnt/bcache && \
|
||||
echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab
|
||||
fi
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Config home directory for new users on bcache partition... "
|
||||
mkdir /mnt/bcache/home && \
|
||||
echo "#" >> /etc/default/useradd && \
|
||||
echo "# Modifications:" >> /etc/default/useradd && \
|
||||
echo "HOME=/mnt/bcache/home" >> /etc/default/useradd
|
||||
fi
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo ""
|
||||
echo "Setup successfully finished"
|
||||
fi
|
|
@ -4,12 +4,15 @@
|
|||
#check for NVIDIA device:
|
||||
#apt-get -y install pciutils
|
||||
#lspci | grep VGA
|
||||
#check if driver works with device:
|
||||
#nvidia-smi
|
||||
#check if driver works with device: nvidia-smi
|
||||
|
||||
#based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/
|
||||
#and https://www.tensorflow.org/install/gpu
|
||||
#Versions required: https://www.tensorflow.org/install/source#gpu
|
||||
|
||||
#remove cuda:
|
||||
#sudo apt-get remove cuda-11-2
|
||||
#sudo apt-get autoremove
|
||||
#------------------------------------
|
||||
|
||||
if [ "$EUID" -ne 0 ]
|
||||
|
@ -47,17 +50,18 @@ if [ $? -eq 0 ]; then
|
|||
#Use the --no-install-recommends option for a lean driver install without any dependencies on X packages
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends cuda-11-2
|
||||
#apt-get install --no-install-recommends libcudnn8=8.1.0.44-1+cuda11.2 libcudnn8-dev=8.1.0.44-1+cuda11.2
|
||||
fi
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "\n"
|
||||
echo "install libcudnn8..."
|
||||
#Latest available version from ubuntu1804 repo:
|
||||
#Latest available version from nvidia ubuntu1804 repo:
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||
apt-get install -y ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||
apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb
|
||||
rm ./libcudnn8_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||
apt-get install -y ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb && \
|
||||
rm ./libcudnn8-dev_8.1.1.33-1+cuda11.2_amd64.deb
|
||||
fi
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
|
@ -67,7 +71,3 @@ if [ $? -eq 0 ]; then
|
|||
echo "Setup successfully finished"
|
||||
echo "Type \"systemctl reboot\" to reboot system"
|
||||
fi
|
||||
|
||||
|
||||
#sudo apt-get remove cuda-11-2
|
||||
#sudo apt-get autoremove
|
|
@ -1,41 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#https://stackoverflow.com/questions/20797819/command-to-change-the-default-home-directory-of-a-user
|
||||
|
||||
#move user with files:
|
||||
#usermod -m -d /newhome/username username
|
||||
|
||||
#Simply open this file using a text editor, type:
|
||||
#vi /etc/default/useradd
|
||||
#The default home directory defined by HOME variable, find line that read as follows:
|
||||
#HOME=/home
|
||||
#Replace with:
|
||||
#HOME=/iscsi/user
|
||||
#Save and close the file. Now you can add user using regular useradd command:
|
||||
# useradd vivek
|
||||
# passwd vivek
|
||||
#Verify user information:
|
||||
# finger vivek
|
||||
|
||||
#echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" | tee -a /etc/fstab > /dev/null
|
||||
|
||||
|
||||
apt-get update && \
|
||||
apt-get install -y bcache-tools && \
|
||||
dd if=/dev/zero if=/dev/sdb bs=512 count=8 && \
|
||||
dd if=/dev/zero if=/dev/sdc bs=512 count=8 && \
|
||||
wipefs -a /dev/sdb && \
|
||||
wipefs -a /dev/sdc && \
|
||||
make-bcache -C /dev/sdb -B /dev/sdc --discard --writeback && \
|
||||
mkfs.ext4 /dev/bcache0 && \
|
||||
mkdir /mnt/bcache && \
|
||||
mount /dev/bcache0 /mnt/bcache && \
|
||||
mkdir /mnt/bcache/home && \
|
||||
echo "/dev/bcache0 /mnt/bcache ext4 rw 0 0" >> /etc/fstab
|
||||
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "#" >> /etc/default/useradd
|
||||
echo "# Modifications:" >> /etc/default/useradd
|
||||
echo "HOME=/mnt/bcache/home" >> /etc/default/useradd
|
||||
fi
|
Loading…
Reference in New Issue