Skip to content

Commit 903124c

Browse files
authored
Merge pull request #227 from prometheus/fix_with_labels
Fix `with labels` bug reported in issue #225
2 parents efe1ced + 097cb04 commit 903124c

6 files changed

Lines changed: 241 additions & 35 deletions

File tree

lib/prometheus/client/histogram.rb

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,18 @@ def self.exponential_buckets(start:, factor: 2, count:)
4242
end
4343

4444
def with_labels(labels)
45-
self.class.new(name,
46-
docstring: docstring,
47-
labels: @labels,
48-
preset_labels: preset_labels.merge(labels),
49-
buckets: @buckets,
50-
store_settings: @store_settings)
45+
new_metric = self.class.new(name,
46+
docstring: docstring,
47+
labels: @labels,
48+
preset_labels: preset_labels.merge(labels),
49+
buckets: @buckets,
50+
store_settings: @store_settings)
51+
52+
# The new metric needs to use the same store as the "main" declared one, otherwise
53+
# any observations on that copy with the pre-set labels won't actually be exported.
54+
new_metric.replace_internal_store(@store)
55+
56+
new_metric
5157
end
5258

5359
def type

lib/prometheus/client/metric.rb

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,34 @@ def initialize(name,
4141
metric_settings: store_settings
4242
)
4343

44+
# WARNING: Our internal store can be replaced later by `with_labels`
45+
# Everything we do after this point needs to still work if @store gets replaced
4446
init_label_set({}) if labels.empty?
4547
end
4648

49+
protected def replace_internal_store(new_store)
50+
@store = new_store
51+
end
52+
53+
4754
# Returns the value for the given label set
4855
def get(labels: {})
4956
label_set = label_set_for(labels)
5057
@store.get(labels: label_set)
5158
end
5259

5360
def with_labels(labels)
54-
self.class.new(name,
55-
docstring: docstring,
56-
labels: @labels,
57-
preset_labels: preset_labels.merge(labels),
58-
store_settings: @store_settings)
61+
new_metric = self.class.new(name,
62+
docstring: docstring,
63+
labels: @labels,
64+
preset_labels: preset_labels.merge(labels),
65+
store_settings: @store_settings)
66+
67+
# The new metric needs to use the same store as the "main" declared one, otherwise
68+
# any observations on that copy with the pre-set labels won't actually be exported.
69+
new_metric.replace_internal_store(@store)
70+
71+
new_metric
5972
end
6073

6174
def init_label_set(labels)

spec/prometheus/client/counter_spec.rb

Lines changed: 146 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
require 'prometheus/client'
44
require 'prometheus/client/counter'
55
require 'examples/metric_example'
6+
require 'prometheus/client/data_stores/direct_file_store'
67

78
describe Prometheus::Client::Counter do
89
# Reset the data store
@@ -45,12 +46,6 @@
4546
end.to change { counter.get(labels: { test: 'label' }) }.by(1.0)
4647
end.to_not change { counter.get(labels: { test: 'other' }) }
4748
end
48-
49-
it 'can pre-set labels using `with_labels`' do
50-
expect { counter.increment }
51-
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
52-
expect { counter.with_labels(test: 'label').increment }.not_to raise_error
53-
end
5449
end
5550

5651
it 'increments the counter by a given value' do
@@ -122,4 +117,149 @@
122117
end
123118
end
124119
end
120+
121+
describe '#with_labels' do
122+
let(:expected_labels) { [:foo] }
123+
124+
it 'pre-sets labels for observations' do
125+
expect { counter.increment }
126+
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
127+
expect { counter.with_labels(foo: 'label').increment }.not_to raise_error
128+
end
129+
130+
it 'registers `with_labels` observations in the original metric store' do
131+
counter.increment(labels: { foo: 'value1'})
132+
counter_with_labels = counter.with_labels({ foo: 'value2'})
133+
counter_with_labels.increment(by: 2)
134+
135+
expect(counter_with_labels.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0)
136+
expect(counter.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0)
137+
end
138+
139+
context 'when using DirectFileStore' do
140+
before do
141+
Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) }
142+
Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: '/tmp/prometheus_test')
143+
end
144+
145+
let(:expected_labels) { [:foo, :bar] }
146+
147+
# Testing for file corruption: this is weird and complicated, so it needs explaining
148+
#
149+
# Files get corrupted when we have two different instances of `FileMappedDict`
150+
# reading and writing the same file. This corruption is expected; we should never have
151+
# two instances of `FileMappedDict` for the same file. If we do, it's a bug in our client.
152+
#
153+
# To clarify, the bug is that *we ended up with two instances for the same file*, not
154+
# that the instances are now corrupting the file.
155+
#
156+
# This is why we're testing this in `with_labels`. It's the only use case we've found
157+
# were we ended up with two instances (before we fixed that bug). `with_labels` is
158+
# incidental, if we find another way to get "duplicate" instances, we should add this
159+
# same exact test, except for the first line, where we need to instead reproduce
160+
# whatever bug gets us that second instance.
161+
#
162+
# The first thing we need to understand is why having two instances of `FileMappedDict`
163+
# corrupts the files:
164+
#
165+
# `FileMappedDict` keeps track, in an internal variable, of how many bytes in the file
166+
# have been used. When adding a new "entry" (observing a new labelset), it serializes
167+
# it and adds it at "the end" (according to its internal byte counter), and it also updates
168+
# the counter at the beginning of the file. However, it never re-reads that counter
169+
# from the file, because there shouldn't be any reason for it to have changed.
170+
#
171+
# If there are two instances pointing to the same file, initially they will both
172+
# share that internal counter, as they do the first read of the file, but if then
173+
# each of them adds an entry, their internal "length" counters will disagree, and
174+
# they'll start overwriting each other's entries.
175+
#
176+
# Importantly, if all of the entries happen to have the same length, it will be "fine".
177+
# Some of the labelsets will effectively disappear, but there will be no corruption,
178+
# because all the important things will fall in the right offsets by pure chance. This
179+
# would be very rare in production, but in a test, it's what normally happens because
180+
# we set all labels to "foo", "bar", etc. This is the reason for "longervalue" below,
181+
# we need to have different labelset lenghts to reproduce the corruption.
182+
#
183+
# With this background about the internals, we can now get to why the specific sequence of
184+
# steps below ends up in corrupted files.
185+
#
186+
# For this to make sense, i'll need to describe the contents of the file at each step.
187+
# I'll represent it like this: `27|labelset1,value1|labelset2,value2|labelset3,value3|`
188+
#
189+
# These are not the bytes we store in the file, but conceptually it's equivalent,
190+
# with two caveats:
191+
# - The counter at the beginning (27 == 3 * 9) here shows the combined length of labelsets.
192+
# It'd normally also include the length of values, but doing that makes this explanation
193+
# much harder to follow.
194+
# - Each entry also starts with a 4-byte int specifying the length of its labelset, so
195+
# we know how much to read. Again, I'm omitting that for readability.
196+
#
197+
#
198+
# Steps to reproduce:
199+
# - We declare `counter` and `counter_with_labels` as a clone. Neither has read the file.
200+
# - We increment `counter`, which creates the file and adds the entry ("labelset1")
201+
# - File: `9|labelset1,value1|`
202+
# - We increment `counter_with_labels`, which reads the file, and adds the new entry
203+
# to it ("muchlongerlabelset2").
204+
# - File: `28|labelset1,value1|muchlongerlabelset2, value2|`
205+
# - `counter` and `counter_with_labels` now disagree about the length of this file
206+
# (`counter` doesn't know the file has grown).
207+
# - We now add a new entry to `counter` ("labelset3"), which thinks the file is shorter
208+
# than it actually is.
209+
# - File: `18|labelset1,value1|labelset3,value3|et2, value2|`
210+
# - The initial counter reflects both labelsets for `counter`; then we have those
211+
# labelsetsp; and finally some "garbage" after the "end" (the garbage is the
212+
# last few bytes of the much longer entry added before by `counter_with_labels`)
213+
# - so far, though, we're still good. If you read the file, all entries are "fine",
214+
# because you're only reading up to the "18" length specified at the beginning.
215+
# - for the problem to manifest itself, we need to increment that counter at the
216+
# beginning, so we'll read the garbage. **BUT**, if we add a new labelset to
217+
# `counter`, it'll overwrite the "garbage" with good data, and the file will
218+
# continue to be fine.
219+
# - We add a new entry to `counter_with_labels`. This updates the length counter at
220+
# the beginning of the file.
221+
# - File: `47|labelset1,value1|labelset3,value3|et2, value2|muchlongerlabelset4, value4|`
222+
#
223+
# - Now the file is properly corrupted. When reading it, `FileMappedDict` sees:
224+
# - labelset1,value1 (cool)
225+
# - labelset3,value3 (cool)
226+
# - et2, value2 (boom)
227+
# |-> the beginning of this entry is garbage because we're actually at the middle
228+
# of an entry, not a beginning.
229+
#
230+
# What actually breaks is that each of these entries is expected to have, at their
231+
# beginning, the length in bytes of its labelset, so we know how much to read.
232+
# Now we have garbage in that position, and `FileMappedDict` will either:
233+
# - Try to interpret those four bytes as a long, get an invalid result.
234+
# - Try to read an invalid amount of data (maybe a negative amount).
235+
# - After reading the labelset, try to read the float and go past the end of the file
236+
# - Actually read what it thinks is a float, try to `unpack` it, and fail because
237+
# it's actually garbage.
238+
# - I'm sure there are other fun ways for it to fail.
239+
it "doesn't corrupt the data files" do
240+
counter_with_labels = counter.with_labels({ foo: 'longervalue'})
241+
242+
# Initialize / read the files for both views of the metric
243+
counter.increment(labels: { foo: 'value1', bar: 'zzz'})
244+
counter_with_labels.increment(by: 2, labels: {bar: 'zzz'})
245+
246+
# After both MetricStores have their files, add a new entry to both
247+
counter.increment(labels: { foo: 'value1', bar: 'aaa'}) # If there's a bug, we partially overwrite { foo: 'longervalue', bar: 'zzz'}
248+
counter_with_labels.increment(by: 2, labels: {bar: 'aaa'}) # Extend the file so we read past that overwrite
249+
250+
expect { counter.values }.not_to raise_error # Check it hasn't corrupted our files
251+
expect { counter_with_labels.values }.not_to raise_error # Check it hasn't corrupted our files
252+
253+
expected_values = {
254+
{foo: 'value1', bar: 'zzz'} => 1.0,
255+
{foo: 'value1', bar: 'aaa'} => 1.0,
256+
{foo: 'longervalue', bar: 'zzz'} => 2.0,
257+
{foo: 'longervalue', bar: 'aaa'} => 2.0,
258+
}
259+
260+
expect(counter.values).to eql(expected_values)
261+
expect(counter_with_labels.values).to eql(expected_values)
262+
end
263+
end
264+
end
125265
end

spec/prometheus/client/gauge_spec.rb

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@
4545
end.to change { gauge.get(labels: { test: 'value' }) }.from(0).to(42)
4646
end.to_not change { gauge.get(labels: { test: 'other' }) }
4747
end
48-
49-
it 'can pre-set labels using `with_labels`' do
50-
expect { gauge.set(10) }
51-
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
52-
expect { gauge.with_labels(test: 'value').set(10) }.not_to raise_error
53-
end
5448
end
5549

5650
context 'given an invalid value' do
@@ -204,4 +198,23 @@
204198
end
205199
end
206200
end
201+
202+
describe '#with_labels' do
203+
let(:expected_labels) { [:foo] }
204+
205+
it 'pre-sets labels for observations' do
206+
expect { gauge.set(10) }
207+
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
208+
expect { gauge.with_labels(foo: 'value').set(10) }.not_to raise_error
209+
end
210+
211+
it 'registers `with_labels` observations in the original metric store' do
212+
gauge.set(1, labels: { foo: 'value1'})
213+
gauge_with_labels = gauge.with_labels({ foo: 'value2'})
214+
gauge_with_labels.set(2)
215+
216+
expect(gauge_with_labels.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0)
217+
expect(gauge.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0)
218+
end
219+
end
207220
end

spec/prometheus/client/histogram_spec.rb

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,6 @@
8080
end.to change { histogram.get(labels: { test: 'value' }) }
8181
end.to_not change { histogram.get(labels: { test: 'other' }) }
8282
end
83-
84-
it 'can pre-set labels using `with_labels`' do
85-
expect { histogram.observe(2) }
86-
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
87-
expect { histogram.with_labels(test: 'value').observe(2) }.not_to raise_error
88-
end
8983
end
9084

9185
context "with non-string label values" do
@@ -189,4 +183,27 @@
189183
end
190184
end
191185
end
186+
187+
describe '#with_labels' do
188+
let(:expected_labels) { [:foo] }
189+
190+
it 'pre-sets labels for observations' do
191+
expect { histogram.observe(2) }
192+
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
193+
expect { histogram.with_labels(foo: 'value').observe(2) }.not_to raise_error
194+
end
195+
196+
it 'registers `with_labels` observations in the original metric store' do
197+
histogram.observe(7, labels: { foo: 'value1'})
198+
histogram_with_labels = histogram.with_labels({ foo: 'value2'})
199+
histogram_with_labels.observe(20)
200+
201+
expected_values = {
202+
{foo: 'value1'} => {'2.5' => 0.0, '5' => 0.0, '10' => 1.0, '+Inf' => 1.0, 'sum' => 7.0},
203+
{foo: 'value2'} => {'2.5' => 0.0, '5' => 0.0, '10' => 0.0, '+Inf' => 1.0, 'sum' => 20.0}
204+
}
205+
expect(histogram_with_labels.values).to eql(expected_values)
206+
expect(histogram.values).to eql(expected_values)
207+
end
208+
end
192209
end

spec/prometheus/client/summary_spec.rb

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,6 @@
6161
end.to change { summary.get(labels: { test: 'value' })["count"] }
6262
end.to_not change { summary.get(labels: { test: 'other' })["count"] }
6363
end
64-
65-
it 'can pre-set labels using `with_labels`' do
66-
expect { summary.observe(2) }
67-
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
68-
expect { summary.with_labels(test: 'value').observe(2) }.not_to raise_error
69-
end
7064
end
7165

7266
context "with non-string label values" do
@@ -152,4 +146,27 @@
152146
end
153147
end
154148
end
149+
150+
describe '#with_labels' do
151+
let(:expected_labels) { [:foo] }
152+
153+
it 'pre-sets labels for observations' do
154+
expect { summary.observe(2) }
155+
.to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError)
156+
expect { summary.with_labels(foo: 'value').observe(2) }.not_to raise_error
157+
end
158+
159+
it 'registers `with_labels` observations in the original metric store' do
160+
summary.observe(1, labels: { foo: 'value1'})
161+
summary_with_labels = summary.with_labels({ foo: 'value2'})
162+
summary_with_labels.observe(2)
163+
164+
expected_values = {
165+
{foo: 'value1'} => { 'count' => 1.0, 'sum' => 1.0 },
166+
{foo: 'value2'} => { 'count' => 1.0, 'sum' => 2.0 }
167+
}
168+
expect(summary_with_labels.values).to eql(expected_values)
169+
expect(summary.values).to eql(expected_values)
170+
end
171+
end
155172
end

0 commit comments

Comments
 (0)