cpu fan controll added, fitted to chassie
This commit is contained in:
parent
1dce9232db
commit
601943451c
|
@ -1,8 +1,13 @@
|
||||||
{
|
{
|
||||||
"fan_settings" : {"0": 20,
|
"fan_settings" : {"0": 10,
|
||||||
"60": 25,
|
"30": 15,
|
||||||
"70": 30,
|
"35": 20,
|
||||||
"80": 35,
|
"40": 25,
|
||||||
"87": 40,
|
"50": 30,
|
||||||
"90": 43}
|
"60": 35,
|
||||||
|
"70": 40,
|
||||||
|
"75": 45,
|
||||||
|
"80": 55,
|
||||||
|
"83": 65,
|
||||||
|
"87": 80}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,10 +51,7 @@ FAN9 ='FAN9'
|
||||||
FAN10 ='FAN10'
|
FAN10 ='FAN10'
|
||||||
|
|
||||||
FAN_ZONES_MEMBERS= {
|
FAN_ZONES_MEMBERS= {
|
||||||
FAN_ZONE_CPU1:[FAN10],
|
FAN_ZONE_CPU1:[FAN1,FAN2,FAN4,FAN6,FAN7,FAN8],
|
||||||
FAN_ZONE_CPU2:[FAN9],
|
|
||||||
FAN_ZONE_SYS2:[FAN1,FAN2,FAN3,FAN4],
|
|
||||||
FAN_ZONE_SYS1:[FAN5,FAN6,FAN7,FAN8],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# based on observations on SUPERMICRO_4029GP_TRT2 the
|
# based on observations on SUPERMICRO_4029GP_TRT2 the
|
||||||
|
|
|
@ -6,13 +6,13 @@
|
||||||
import time, superfans, subprocess, signal, sys, json
|
import time, superfans, subprocess, signal, sys, json
|
||||||
|
|
||||||
class GracefulKiller:
|
class GracefulKiller:
|
||||||
kill_now = False
|
kill_now = False
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||||
signal.signal(signal.SIGTERM, self.exit_gracefully)
|
signal.signal(signal.SIGTERM, self.exit_gracefully)
|
||||||
|
|
||||||
def exit_gracefully(self,signum, frame):
|
def exit_gracefully(self,signum, frame):
|
||||||
self.kill_now = True
|
self.kill_now = True
|
||||||
|
|
||||||
def enable_persistance_nvidia():
|
def enable_persistance_nvidia():
|
||||||
cmd = 'nvidia-smi -pm 1'
|
cmd = 'nvidia-smi -pm 1'
|
||||||
|
@ -32,7 +32,7 @@ def retrieve_nvidia_gpu_temperature():
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec=2, gpu_moving_avg_num=5, fan_target_eps=2.0):
|
def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec=2, gpu_moving_avg_num=4, fan_target_eps=2.0):
|
||||||
"""
|
"""
|
||||||
Controller function that monitors GPU temperature in constant loop and adjusts FAN speeds based on provided `fan_settings`.
|
Controller function that monitors GPU temperature in constant loop and adjusts FAN speeds based on provided `fan_settings`.
|
||||||
After the loop the default preset is restored.
|
After the loop the default preset is restored.
|
||||||
|
@ -57,15 +57,18 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
|
||||||
print('\t%d C = %d ' % (k, fan_settings[k]) + "%")
|
print('\t%d C = %d ' % (k, fan_settings[k]) + "%")
|
||||||
print('\n')
|
print('\n')
|
||||||
|
|
||||||
|
#open file for cpu temperature
|
||||||
|
cpu_t_f = open("/sys/class/thermal/thermal_zone0/temp", "r")
|
||||||
|
|
||||||
# put GPUs into persistance mode so that nvidia-smi will retern immediately
|
# put GPUs into persistance mode so that nvidia-smi will retern immediately
|
||||||
enable_persistance_nvidia()
|
enable_persistance_nvidia()
|
||||||
|
|
||||||
try:
|
#try:
|
||||||
FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] + \
|
for gege in [0]:
|
||||||
superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2]
|
FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1]
|
||||||
|
|
||||||
# GPU moving average
|
# GPU moving average
|
||||||
previous_target_fan = None
|
previous_target_fan = 0
|
||||||
previous_update_time = None
|
previous_update_time = None
|
||||||
|
|
||||||
prev_GPU_temp = []
|
prev_GPU_temp = []
|
||||||
|
@ -93,16 +96,24 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
|
||||||
|
|
||||||
max_gpu_temp = max(mean_GPU_temp)
|
max_gpu_temp = max(mean_GPU_temp)
|
||||||
|
|
||||||
|
#read cpu temperature
|
||||||
|
cpu_t_f.seek(0,0)
|
||||||
|
cpu_temp = int(cpu_t_f.read()) / 1000 #°C
|
||||||
|
|
||||||
|
max_temp = max(max_gpu_temp, cpu_temp)
|
||||||
|
|
||||||
for key_temp in sorted(fan_settings.keys())[::-1]:
|
for key_temp in sorted(fan_settings.keys())[::-1]:
|
||||||
if key_temp <= max_gpu_temp:
|
if key_temp <= max_temp:
|
||||||
target_fan = fan_settings[key_temp]
|
target_fan = fan_settings[key_temp]
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
current_fan_levels = superfans.get_fan(superfan_config, FAN_MEMBERS)
|
current_fan_levels = superfans.get_fan(superfan_config, FAN_MEMBERS)
|
||||||
current_update_time = time.time()
|
current_update_time = time.time()
|
||||||
diff_sys1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] if FAN in current_fan_levels and current_fan_levels[FAN] > 0]
|
#diff_cpu1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1] if FAN in current_fan_levels and current_fan_levels[FAN] > 0]
|
||||||
diff_sys2_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2] if FAN in current_fan_levels and current_fan_levels[FAN] > 0]
|
|
||||||
|
#print(list(superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1]))
|
||||||
|
|
||||||
|
#print(list(current_fan_levels[FAN] for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1]))
|
||||||
|
|
||||||
# TODO: ignore outlier FANs in case they are faulty
|
# TODO: ignore outlier FANs in case they are faulty
|
||||||
|
|
||||||
|
@ -115,28 +126,26 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
|
||||||
|
|
||||||
if not disbale_update:
|
if not disbale_update:
|
||||||
# Allow for 1% difference in target
|
# Allow for 1% difference in target
|
||||||
update_sys1_fan = any([d > fan_target_eps for d in diff_sys1_fan])
|
update_cpu1_fan = abs(previous_target_fan - target_fan) > fan_target_eps
|
||||||
update_sys2_fan = any([d > fan_target_eps for d in diff_sys2_fan])
|
|
||||||
if update_sys1_fan:
|
|
||||||
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS1)
|
|
||||||
|
|
||||||
if update_sys2_fan:
|
if update_cpu1_fan:
|
||||||
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS2)
|
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_CPU1)
|
||||||
|
|
||||||
if update_sys1_fan or update_sys2_fan:
|
if update_cpu1_fan:
|
||||||
print('\tCurrent GPU measurements (in C): %s' % ','.join(map(str,GPU_temp)))
|
print('\tCurrent GPU measurements (in C): %s' % ','.join(map(str,GPU_temp)))
|
||||||
print('\tMoving average GPU measurements (in C): %s (max=%d)' % (','.join(map(str,mean_GPU_temp)),max_gpu_temp))
|
print('\tMoving average GPU measurements (in C): %s (max=%d)' % (','.join(map(str,mean_GPU_temp)),max_gpu_temp))
|
||||||
print('\tTarget FAN speed: %d C => FAN %d %% (difference: SYS1 fan = %.2f; SYS2 fan = %.2f)' % (max_gpu_temp, target_fan, max(diff_sys1_fan), max(diff_sys2_fan)))
|
print('\tCPU measurement (in C): %.1f ' % cpu_temp)
|
||||||
|
print('\tTarget FAN speed: %d C => FAN %d %% (difference: CPU1 fan = %.2f)' % (max_temp, target_fan, -1)) #max(diff_cpu1_fan)
|
||||||
print('\n\n')
|
print('\n\n')
|
||||||
|
|
||||||
previous_target_fan = target_fan
|
previous_target_fan = target_fan
|
||||||
previous_update_time = current_update_time
|
previous_update_time = current_update_time
|
||||||
|
|
||||||
time.sleep(sleep_sec)
|
time.sleep(sleep_sec)
|
||||||
finally:
|
#finally:
|
||||||
# revert back to default preset before finishing
|
# revert back to default preset before finishing
|
||||||
superfans.set_preset(superfan_config, default_preset)
|
# superfans.set_preset(superfan_config, default_preset)
|
||||||
print('Reverted back to system default.')
|
# print('Reverted back to system default.')
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
||||||
|
|
Loading…
Reference in New Issue