Update – you can remove , 'limit_low' => 0 on fan_speed as well.

 

Chris James

Technical Lead, Vessel Production Systems

4 The Heights, Brooklands

Weybridge, KT13 0NY

Mobile: +44 7768 480 557

chris.james@tgs.com

 

From: Chris James
Sent: 16 April 2026 14:39
To: observium@lists.observium.org
Subject: fix for nvidia GPU load at 0% but reported as critical

 

We run a number of machines with nvidia GPU cards and notice that when the load is reported at 0% - which can happen when the machine is idle – its flagged as critical in Observium. The fix is as follows:

In the ./includes/polling/unix-agent/nvidia.inc.php file change the utilization.gpu and utilization.memory discover_sensor calls from:

 

['limit_high' => 100, 'limit_low' => 0]

 

To:


['limit_high' => 100]

 

This the file looks like:

<?php

/**

* Observium

*

*   This file is part of Observium.

*

* @package        observium

* @subpackage     poller

* @copyright  (C) Adam Armstrong

*

*/

 

global $agent_sensors;

 

if (!safe_empty($agent_data['nvidia']['smi']) &&

    $nvidia = parse_csv($agent_data['nvidia']['smi'])) {

 

    $invalid = [ '[Not Supported]', 'N/A', '[N/A]' ];

    print_cli_heading("nvidia-smi", 3);

    foreach ($nvidia as $card) {

 

        $descr_card = "Nvidia Card " . ((int)$card['index'] + 1) . ": " . $card['name'];

        print_cli_heading($descr_card, 4);

 

        if (!in_array($card['temperature.gpu'], $invalid)) {

            $index = 'temperature.gpu.' . $card['index'];

            $descr = $descr_card;

            discover_sensor('temperature', $device, '', $index, 'nvidia-smi', $descr, 1, $card['temperature.gpu'], ['limit_high' => 100], 'agent');

            $agent_sensors['temperature']['nvidia-smi'][$index] = ['description' => $descr, 'current' => $card['temperature.gpu'], 'index' => $index];

            print_cli_data("temperature.gpu", $card['temperature.gpu'] . "C");

 

        }

 

        if (!in_array($card['power.draw [W]'], $invalid)) {

            $index = 'power.draw.' . $card['index'];

            $descr = $descr_card;

            discover_sensor('power', $device, '', $index, 'nvidia-smi', $descr, 1, $card['power.draw [W]'], [], 'agent');

            $agent_sensors['power']['nvidia-smi'][$index] = ['description' => $descr, 'current' => $card['power.draw [W]'], 'index' => $index];

            print_cli_data("power.draw", $card['power.draw [W]'] . "W");

        }

 

        if (!in_array($card['fan.speed [%]'], $invalid)) {

            $index = 'fan.speed.' . $card['index'];

            $descr = $descr_card . " Fan Load";

            discover_sensor('load', $device, '', $index, 'nvidia-smi', $descr, 1, $card['fan.speed [%]'], ['limit_high' => 100, 'limit_low' => 0], 'agent');

            $agent_sensors['load']['nvidia-smi'][$index] = ['description' => $descr, 'current' => $card['fan.speed [%]'], 'index' => $index];

            print_cli_data("fan.speed", $card['fan.speed [%]'] . "");

        }

 

        if (!in_array($card['utilization.gpu [%]'], $invalid)) {

            $index = 'utilization.gpu.' . $card['index'];

            $descr = $descr_card . " GPU Load";

            discover_sensor('load', $device, '', $index, 'nvidia-smi', $descr, 1, $card['utilization.gpu [%]'], ['limit_high' => 100], 'agent');

            $agent_sensors['load']['nvidia-smi'][$index] = ['description' => $descr, 'current' => $card['utilization.gpu [%]'], 'index' => $index];

            print_cli_data("utilization.gpu", $card['utilization.gpu [%]'] . "");

        }

 

        if (!in_array($card['utilization.memory [%]'], $invalid)) {

            $index = 'utilization.memory.' . $card['index'];

            $descr = $descr_card . " Memory Load";

            discover_sensor('load', $device, '', $index, 'nvidia-smi', $descr, 1, $card['utilization.memory [%]'], ['limit_high' => 100], 'agent');

            $agent_sensors['load']['nvidia-smi'][$index] = ['description' => $descr, 'current' => $card['utilization.memory [%]'], 'index' => $index];

            print_cli_data("utilization.memory", $card['utilization.memory [%]'] . "");

        }

 

    }

    echo "\n";

}

 

// EOF

 

 

Thanks

 

Chris