Several changes and fixes:

- fixed issue in GPU to FAN speed controller when faulty FANs are not returned
 - putting GPUs into persistante state before starting the controler (so that nvidia-smi returns immediately)
 - reduced GPU to FAN speeds mappings by 5% to reduce FAN noise levels
This commit is contained in:
Domen Tabernik 2019-12-10 17:09:49 +01:00
parent 1857fd168e
commit 18effc337a
1 changed files with 22 additions and 13 deletions

View File

@ -14,6 +14,10 @@ class GracefulKiller:
def exit_gracefully(self,signum, frame): def exit_gracefully(self,signum, frame):
self.kill_now = True self.kill_now = True
def enable_persistance_nvidia():
cmd = 'nvidia-smi -pm 1'
s = subprocess.check_output(cmd + " 2>&1", shell=True)
def retrieve_nvidia_gpu_temperature(): def retrieve_nvidia_gpu_temperature():
cmd = 'nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader' cmd = 'nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader'
@ -49,6 +53,9 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
print('\t%d C = %d ' % (k, fan_settings[k]) + "%") print('\t%d C = %d ' % (k, fan_settings[k]) + "%")
print('\n') print('\n')
# put GPUs into persistance mode so that nvidia-smi will retern immediately
enable_persistance_nvidia()
try: try:
FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] + \ FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] + \
superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2] superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2]
@ -90,8 +97,10 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
current_fan_levels = superfans.get_fan(superfan_config, FAN_MEMBERS) current_fan_levels = superfans.get_fan(superfan_config, FAN_MEMBERS)
current_update_time = time.time() current_update_time = time.time()
diff_sys1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1]] diff_sys1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] if current_fan_levels.has_key(FAN) and current_fan_levels[FAN] > 0]
diff_sys2_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2]] diff_sys2_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2] if current_fan_levels.has_key(FAN) and current_fan_levels[FAN] > 0]
# TODO: ignore outlier FANs in case they are faulty
disbale_update = False disbale_update = False
@ -111,10 +120,10 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS2) superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS2)
if update_sys1_fan or update_sys2_fan: if update_sys1_fan or update_sys2_fan:
print('\tCurrent GPU measurements: %s' % ','.join(map(str,GPU_temp))) print('\tCurrent GPU measurements (in C): %s' % ','.join(map(str,GPU_temp)))
print('\tMoving average GPU measurements: %s' % ','.join(map(str,mean_GPU_temp))) print('\tMoving average GPU measurements (in C): %s (max=%d)' % (','.join(map(str,mean_GPU_temp)),max_gpu_temp))
print('\tTarget difference: SYS1 fan = %f; SYS2 fan = %f' % (max(diff_sys1_fan), max(diff_sys2_fan))) print('\tTarget FAN speed: %d C => FAN %d %% (difference: SYS1 fan = %.2f; SYS2 fan = %.2f)' % (max_gpu_temp, target_fan, max(diff_sys1_fan), max(diff_sys2_fan)))
print('\n') print('\n\n')
previous_target_fan = target_fan previous_target_fan = target_fan
previous_update_time = current_update_time previous_update_time = current_update_time
@ -127,11 +136,11 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec
if __name__ == "__main__": if __name__ == "__main__":
# fan settings = {[in deg C]: [% fan], ...} # fan settings = {[in deg C]: [% fan], ...}
fan_settings = {0: 25, fan_settings = {0: 20,
60: 30, 60: 25,
70: 36, 70: 30,
80: 40, 80: 35,
85: 45, 87: 40,
90: 50} 90: 43}
superfans_gpu_controller(fan_settings) superfans_gpu_controller(fan_settings)