Skip to content

Commit 7e7845a

Browse files
authored
Merge pull request prometheus#1711 from prometheus/superq/cpu_cache
Linux CPU: Cache CPU metrics
2 parents b8847b5 + 3565316 commit 7e7845a

3 files changed

Lines changed: 86 additions & 3 deletions

File tree

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
steps:
2929
- checkout
3030
- run: sudo pip install codespell
31-
- run: codespell --skip=".git,./vendor,ttar,go.mod,go.sum,*pem" -L uint,packages\',uptodate
31+
- run: codespell --skip=".git,./vendor,ttar,go.mod,go.sum,*pem,./collector/fixtures" -L uint,packages\',uptodate
3232

3333
build:
3434
machine:

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* [CHANGE]
44
* [FEATURE]
55
* [ENHANCEMENT]
6-
* [BUGFIX]
6+
* [BUGFIX] Linux CPU: Cache CPU metrics to make them monotonically increasing #1711
77

88
## 1.0.0-rc.1 / 2020-05-14
99

collector/cpu_linux.go

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"fmt"
2020
"path/filepath"
2121
"strconv"
22+
"sync"
2223

2324
"github.com/go-kit/kit/log"
2425
"github.com/go-kit/kit/log/level"
@@ -35,6 +36,8 @@ type cpuCollector struct {
3536
cpuCoreThrottle *prometheus.Desc
3637
cpuPackageThrottle *prometheus.Desc
3738
logger log.Logger
39+
cpuStats []procfs.CPUStat
40+
cpuStatsMutex sync.Mutex
3841
}
3942

4043
var (
@@ -203,7 +206,12 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
203206
return err
204207
}
205208

206-
for cpuID, cpuStat := range stats.CPU {
209+
c.updateCPUStats(stats.CPU)
210+
211+
// Acquire a lock to read the stats.
212+
c.cpuStatsMutex.Lock()
213+
defer c.cpuStatsMutex.Unlock()
214+
for cpuID, cpuStat := range c.cpuStats {
207215
cpuNum := strconv.Itoa(cpuID)
208216
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user")
209217
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice")
@@ -221,3 +229,78 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
221229

222230
return nil
223231
}
232+
233+
// updateCPUStats updates the internal cache of CPU stats.
234+
func (c *cpuCollector) updateCPUStats(newStats []procfs.CPUStat) {
235+
// Acquire a lock to update the stats.
236+
c.cpuStatsMutex.Lock()
237+
defer c.cpuStatsMutex.Unlock()
238+
239+
// Reset the cache if the list of CPUs has changed.
240+
if len(c.cpuStats) != len(newStats) {
241+
c.cpuStats = make([]procfs.CPUStat, len(newStats))
242+
}
243+
244+
for i, n := range newStats {
245+
// If idle jumps backwards, assume we had a hotplug event and reset the stats for this CPU.
246+
if n.Idle < c.cpuStats[i].Idle {
247+
level.Warn(c.logger).Log("msg", "CPU Idle counter jumped backwards, possible hotplug event, resetting CPU stats", "cpu", i, "old_value", c.cpuStats[i].Idle, "new_value", n.Idle)
248+
c.cpuStats[i] = procfs.CPUStat{}
249+
}
250+
c.cpuStats[i].Idle = n.Idle
251+
252+
if n.User >= c.cpuStats[i].User {
253+
c.cpuStats[i].User = n.User
254+
} else {
255+
level.Warn(c.logger).Log("msg", "CPU User counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].User, "new_value", n.User)
256+
}
257+
258+
if n.Nice >= c.cpuStats[i].Nice {
259+
c.cpuStats[i].Nice = n.Nice
260+
} else {
261+
level.Warn(c.logger).Log("msg", "CPU Nice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Nice, "new_value", n.Nice)
262+
}
263+
264+
if n.System >= c.cpuStats[i].System {
265+
c.cpuStats[i].System = n.System
266+
} else {
267+
level.Warn(c.logger).Log("msg", "CPU System counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].System, "new_value", n.System)
268+
}
269+
270+
if n.Iowait >= c.cpuStats[i].Iowait {
271+
c.cpuStats[i].Iowait = n.Iowait
272+
} else {
273+
level.Warn(c.logger).Log("msg", "CPU Iowait counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Iowait, "new_value", n.Iowait)
274+
}
275+
276+
if n.IRQ >= c.cpuStats[i].IRQ {
277+
c.cpuStats[i].IRQ = n.IRQ
278+
} else {
279+
level.Warn(c.logger).Log("msg", "CPU IRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].IRQ, "new_value", n.IRQ)
280+
}
281+
282+
if n.SoftIRQ >= c.cpuStats[i].SoftIRQ {
283+
c.cpuStats[i].SoftIRQ = n.SoftIRQ
284+
} else {
285+
level.Warn(c.logger).Log("msg", "CPU SoftIRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].SoftIRQ, "new_value", n.SoftIRQ)
286+
}
287+
288+
if n.Steal >= c.cpuStats[i].Steal {
289+
c.cpuStats[i].Steal = n.Steal
290+
} else {
291+
level.Warn(c.logger).Log("msg", "CPU Steal counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Steal, "new_value", n.Steal)
292+
}
293+
294+
if n.Guest >= c.cpuStats[i].Guest {
295+
c.cpuStats[i].Guest = n.Guest
296+
} else {
297+
level.Warn(c.logger).Log("msg", "CPU Guest counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Guest, "new_value", n.Guest)
298+
}
299+
300+
if n.GuestNice >= c.cpuStats[i].GuestNice {
301+
c.cpuStats[i].GuestNice = n.GuestNice
302+
} else {
303+
level.Warn(c.logger).Log("msg", "CPU GuestNice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].GuestNice, "new_value", n.GuestNice)
304+
}
305+
}
306+
}

0 commit comments

Comments
 (0)