cpu fan controll added, fitted to chassie

This commit is contained in:
Nicolas 2022-03-18 23:44:06 +00:00
parent 1dce9232db
commit 601943451c
3 changed files with 47 additions and 36 deletions

View File

@ -1,8 +1,13 @@
{
"fan_settings" : {"0": 20,
"60": 25,
"70": 30,
"80": 35,
"87": 40,
"90": 43}
"fan_settings" : {"0": 10,
"30": 15,
"35": 20,
"40": 25,
"50": 30,
"60": 35,
"70": 40,
"75": 45,
"80": 55,
"83": 65,
"87": 80}
}

View File

@ -51,10 +51,7 @@ FAN9 ='FAN9'
FAN10 ='FAN10'
FAN_ZONES_MEMBERS= {
FAN_ZONE_CPU1:[FAN10],
FAN_ZONE_CPU2:[FAN9],
FAN_ZONE_SYS2:[FAN1,FAN2,FAN3,FAN4],
FAN_ZONE_SYS1:[FAN5,FAN6,FAN7,FAN8],
FAN_ZONE_CPU1:[FAN1,FAN2,FAN4,FAN6,FAN7,FAN8],
}
# based on observations on SUPERMICRO_4029GP_TRT2 the

View File

@ -32,7 +32,7 @@ def retrieve_nvidia_gpu_temperature():
else:
return False
def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec=2, gpu_moving_avg_num=5, fan_target_eps=2.0):
def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec=2, gpu_moving_avg_num=4, fan_target_eps=2.0):
"""
Controller function that monitors GPU temperature in constant loop and adjusts FAN speeds based on provided `fan_settings`.
After the loop the default preset is restored.
@ -57,15 +57,18 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
print('\t%d C = %d ' % (k, fan_settings[k]) + "%")
print('\n')
#open file for cpu temperature
cpu_t_f = open("/sys/class/thermal/thermal_zone0/temp", "r")
# put GPUs into persistance mode so that nvidia-smi will retern immediately
enable_persistance_nvidia()
try:
FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] + \
superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2]
#try:
for gege in [0]:
FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1]
# GPU moving average
previous_target_fan = None
previous_target_fan = 0
previous_update_time = None
prev_GPU_temp = []
@ -93,16 +96,24 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
max_gpu_temp = max(mean_GPU_temp)
#read cpu temperature
cpu_t_f.seek(0,0)
cpu_temp = int(cpu_t_f.read()) / 1000 #°C
max_temp = max(max_gpu_temp, cpu_temp)
for key_temp in sorted(fan_settings.keys())[::-1]:
if key_temp <= max_gpu_temp:
if key_temp <= max_temp:
target_fan = fan_settings[key_temp]
break
current_fan_levels = superfans.get_fan(superfan_config, FAN_MEMBERS)
current_update_time = time.time()
diff_sys1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] if FAN in current_fan_levels and current_fan_levels[FAN] > 0]
diff_sys2_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2] if FAN in current_fan_levels and current_fan_levels[FAN] > 0]
#diff_cpu1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1] if FAN in current_fan_levels and current_fan_levels[FAN] > 0]
#print(list(superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1]))
#print(list(current_fan_levels[FAN] for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_CPU1]))
# TODO: ignore outlier FANs in case they are faulty
@ -115,28 +126,26 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
if not disbale_update:
# Allow for 1% difference in target
update_sys1_fan = any([d > fan_target_eps for d in diff_sys1_fan])
update_sys2_fan = any([d > fan_target_eps for d in diff_sys2_fan])
if update_sys1_fan:
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS1)
update_cpu1_fan = abs(previous_target_fan - target_fan) > fan_target_eps
if update_sys2_fan:
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS2)
if update_cpu1_fan:
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_CPU1)
if update_sys1_fan or update_sys2_fan:
if update_cpu1_fan:
print('\tCurrent GPU measurements (in C): %s' % ','.join(map(str,GPU_temp)))
print('\tMoving average GPU measurements (in C): %s (max=%d)' % (','.join(map(str,mean_GPU_temp)),max_gpu_temp))
print('\tTarget FAN speed: %d C => FAN %d %% (difference: SYS1 fan = %.2f; SYS2 fan = %.2f)' % (max_gpu_temp, target_fan, max(diff_sys1_fan), max(diff_sys2_fan)))
print('\tCPU measurement (in C): %.1f ' % cpu_temp)
print('\tTarget FAN speed: %d C => FAN %d %% (difference: CPU1 fan = %.2f)' % (max_temp, target_fan, -1)) #max(diff_cpu1_fan)
print('\n\n')
previous_target_fan = target_fan
previous_update_time = current_update_time
time.sleep(sleep_sec)
finally:
#finally:
# revert back to default preset before finishing
superfans.set_preset(superfan_config, default_preset)
print('Reverted back to system default.')
# superfans.set_preset(superfan_config, default_preset)
# print('Reverted back to system default.')
def main():
if len(sys.argv) != 2: