From 18effc337a98f025583362277bd73f91d5753cdc Mon Sep 17 00:00:00 2001 From: Domen Tabernik Date: Tue, 10 Dec 2019 17:09:49 +0100 Subject: [PATCH] Several changes and fixes: - fixed issue in GPU to FAN speed controller when faulty FANs are not returned - putting GPUs into persistante state before starting the controler (so that nvidia-smi returns immediately) - reduced GPU to FAN speeds mappings by 5% to reduce FAN noise levels --- superfans_gpu_controller.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/superfans_gpu_controller.py b/superfans_gpu_controller.py index 20f0baf..51dcc10 100644 --- a/superfans_gpu_controller.py +++ b/superfans_gpu_controller.py @@ -14,6 +14,10 @@ class GracefulKiller: def exit_gracefully(self,signum, frame): self.kill_now = True +def enable_persistance_nvidia(): + cmd = 'nvidia-smi -pm 1' + s = subprocess.check_output(cmd + " 2>&1", shell=True) + def retrieve_nvidia_gpu_temperature(): cmd = 'nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader' @@ -49,6 +53,9 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec print('\t%d C = %d ' % (k, fan_settings[k]) + "%") print('\n') + # put GPUs into persistance mode so that nvidia-smi will retern immediately + enable_persistance_nvidia() + try: FAN_MEMBERS = superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] + \ superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2] @@ -90,8 +97,10 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec current_fan_levels = superfans.get_fan(superfan_config, FAN_MEMBERS) current_update_time = time.time() - diff_sys1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1]] - diff_sys2_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2]] + diff_sys1_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS1] if current_fan_levels.has_key(FAN) and current_fan_levels[FAN] > 0] + diff_sys2_fan = [abs(current_fan_levels[FAN] - target_fan) for FAN in superfans.FAN_ZONES_MEMBERS[superfans.FAN_ZONE_SYS2] if current_fan_levels.has_key(FAN) and current_fan_levels[FAN] > 0] + + # TODO: ignore outlier FANs in case they are faulty disbale_update = False @@ -111,10 +120,10 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec superfans.set_fan(superfan_config, target_fan, superfans.FAN_ZONE_SYS2) if update_sys1_fan or update_sys2_fan: - print('\tCurrent GPU measurements: %s' % ','.join(map(str,GPU_temp))) - print('\tMoving average GPU measurements: %s' % ','.join(map(str,mean_GPU_temp))) - print('\tTarget difference: SYS1 fan = %f; SYS2 fan = %f' % (max(diff_sys1_fan), max(diff_sys2_fan))) - print('\n') + print('\tCurrent GPU measurements (in C): %s' % ','.join(map(str,GPU_temp))) + print('\tMoving average GPU measurements (in C): %s (max=%d)' % (','.join(map(str,mean_GPU_temp)),max_gpu_temp)) + print('\tTarget FAN speed: %d C => FAN %d %% (difference: SYS1 fan = %.2f; SYS2 fan = %.2f)' % (max_gpu_temp, target_fan, max(diff_sys1_fan), max(diff_sys2_fan))) + print('\n\n') previous_target_fan = target_fan previous_update_time = current_update_time @@ -127,11 +136,11 @@ def superfans_gpu_controller(fan_settings, FAN_INCREASED_MIN_TIME=120, sleep_sec if __name__ == "__main__": # fan settings = {[in deg C]: [% fan], ...} - fan_settings = {0: 25, - 60: 30, - 70: 36, - 80: 40, - 85: 45, - 90: 50} + fan_settings = {0: 20, + 60: 25, + 70: 30, + 80: 35, + 87: 40, + 90: 43} - superfans_gpu_controller(fan_settings) \ No newline at end of file + superfans_gpu_controller(fan_settings)