|
3 | 3 | require 'prometheus/client' |
4 | 4 | require 'prometheus/client/counter' |
5 | 5 | require 'examples/metric_example' |
| 6 | +require 'prometheus/client/data_stores/direct_file_store' |
6 | 7 |
|
7 | 8 | describe Prometheus::Client::Counter do |
8 | 9 | # Reset the data store |
|
45 | 46 | end.to change { counter.get(labels: { test: 'label' }) }.by(1.0) |
46 | 47 | end.to_not change { counter.get(labels: { test: 'other' }) } |
47 | 48 | end |
48 | | - |
49 | | - it 'can pre-set labels using `with_labels`' do |
50 | | - expect { counter.increment } |
51 | | - .to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError) |
52 | | - expect { counter.with_labels(test: 'label').increment }.not_to raise_error |
53 | | - end |
54 | 49 | end |
55 | 50 |
|
56 | 51 | it 'increments the counter by a given value' do |
|
122 | 117 | end |
123 | 118 | end |
124 | 119 | end |
| 120 | + |
| 121 | + describe '#with_labels' do |
| 122 | + let(:expected_labels) { [:foo] } |
| 123 | + |
| 124 | + it 'pre-sets labels for observations' do |
| 125 | + expect { counter.increment } |
| 126 | + .to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError) |
| 127 | + expect { counter.with_labels(foo: 'label').increment }.not_to raise_error |
| 128 | + end |
| 129 | + |
| 130 | + it 'registers `with_labels` observations in the original metric store' do |
| 131 | + counter.increment(labels: { foo: 'value1'}) |
| 132 | + counter_with_labels = counter.with_labels({ foo: 'value2'}) |
| 133 | + counter_with_labels.increment(by: 2) |
| 134 | + |
| 135 | + expect(counter_with_labels.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0) |
| 136 | + expect(counter.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0) |
| 137 | + end |
| 138 | + |
| 139 | + context 'when using DirectFileStore' do |
| 140 | + before do |
| 141 | + Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) } |
| 142 | + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: '/tmp/prometheus_test') |
| 143 | + end |
| 144 | + |
| 145 | + let(:expected_labels) { [:foo, :bar] } |
| 146 | + |
| 147 | + # Testing for file corruption: this is weird and complicated, so it needs explaining |
| 148 | + # |
| 149 | + # Files get corrupted when we have two different instances of `FileMappedDict` |
| 150 | + # reading and writing the same file. This corruption is expected; we should never have |
| 151 | + # two instances of `FileMappedDict` for the same file. If we do, it's a bug in our client. |
| 152 | + # |
| 153 | + # To clarify, the bug is that *we ended up with two instances for the same file*, not |
| 154 | + # that the instances are now corrupting the file. |
| 155 | + # |
| 156 | + # This is why we're testing this in `with_labels`. It's the only use case we've found |
| 157 | + # were we ended up with two instances (before we fixed that bug). `with_labels` is |
| 158 | + # incidental, if we find another way to get "duplicate" instances, we should add this |
| 159 | + # same exact test, except for the first line, where we need to instead reproduce |
| 160 | + # whatever bug gets us that second instance. |
| 161 | + # |
| 162 | + # The first thing we need to understand is why having two instances of `FileMappedDict` |
| 163 | + # corrupts the files: |
| 164 | + # |
| 165 | + # `FileMappedDict` keeps track, in an internal variable, of how many bytes in the file |
| 166 | + # have been used. When adding a new "entry" (observing a new labelset), it serializes |
| 167 | + # it and adds it at "the end" (according to its internal byte counter), and it also updates |
| 168 | + # the counter at the beginning of the file. However, it never re-reads that counter |
| 169 | + # from the file, because there shouldn't be any reason for it to have changed. |
| 170 | + # |
| 171 | + # If there are two instances pointing to the same file, initially they will both |
| 172 | + # share that internal counter, as they do the first read of the file, but if then |
| 173 | + # each of them adds an entry, their internal "length" counters will disagree, and |
| 174 | + # they'll start overwriting each other's entries. |
| 175 | + # |
| 176 | + # Importantly, if all of the entries happen to have the same length, it will be "fine". |
| 177 | + # Some of the labelsets will effectively disappear, but there will be no corruption, |
| 178 | + # because all the important things will fall in the right offsets by pure chance. This |
| 179 | + # would be very rare in production, but in a test, it's what normally happens because |
| 180 | + # we set all labels to "foo", "bar", etc. This is the reason for "longervalue" below, |
| 181 | + # we need to have different labelset lenghts to reproduce the corruption. |
| 182 | + # |
| 183 | + # With this background about the internals, we can now get to why the specific sequence of |
| 184 | + # steps below ends up in corrupted files. |
| 185 | + # |
| 186 | + # For this to make sense, i'll need to describe the contents of the file at each step. |
| 187 | + # I'll represent it like this: `27|labelset1,value1|labelset2,value2|labelset3,value3|` |
| 188 | + # |
| 189 | + # These are not the bytes we store in the file, but conceptually it's equivalent, |
| 190 | + # with two caveats: |
| 191 | + # - The counter at the beginning (27 == 3 * 9) here shows the combined length of labelsets. |
| 192 | + # It'd normally also include the length of values, but doing that makes this explanation |
| 193 | + # much harder to follow. |
| 194 | + # - Each entry also starts with a 4-byte int specifying the length of its labelset, so |
| 195 | + # we know how much to read. Again, I'm omitting that for readability. |
| 196 | + # |
| 197 | + # |
| 198 | + # Steps to reproduce: |
| 199 | + # - We declare `counter` and `counter_with_labels` as a clone. Neither has read the file. |
| 200 | + # - We increment `counter`, which creates the file and adds the entry ("labelset1") |
| 201 | + # - File: `9|labelset1,value1|` |
| 202 | + # - We increment `counter_with_labels`, which reads the file, and adds the new entry |
| 203 | + # to it ("muchlongerlabelset2"). |
| 204 | + # - File: `28|labelset1,value1|muchlongerlabelset2, value2|` |
| 205 | + # - `counter` and `counter_with_labels` now disagree about the length of this file |
| 206 | + # (`counter` doesn't know the file has grown). |
| 207 | + # - We now add a new entry to `counter` ("labelset3"), which thinks the file is shorter |
| 208 | + # than it actually is. |
| 209 | + # - File: `18|labelset1,value1|labelset3,value3|et2, value2|` |
| 210 | + # - The initial counter reflects both labelsets for `counter`; then we have those |
| 211 | + # labelsetsp; and finally some "garbage" after the "end" (the garbage is the |
| 212 | + # last few bytes of the much longer entry added before by `counter_with_labels`) |
| 213 | + # - so far, though, we're still good. If you read the file, all entries are "fine", |
| 214 | + # because you're only reading up to the "18" length specified at the beginning. |
| 215 | + # - for the problem to manifest itself, we need to increment that counter at the |
| 216 | + # beginning, so we'll read the garbage. **BUT**, if we add a new labelset to |
| 217 | + # `counter`, it'll overwrite the "garbage" with good data, and the file will |
| 218 | + # continue to be fine. |
| 219 | + # - We add a new entry to `counter_with_labels`. This updates the length counter at |
| 220 | + # the beginning of the file. |
| 221 | + # - File: `47|labelset1,value1|labelset3,value3|et2, value2|muchlongerlabelset4, value4|` |
| 222 | + # |
| 223 | + # - Now the file is properly corrupted. When reading it, `FileMappedDict` sees: |
| 224 | + # - labelset1,value1 (cool) |
| 225 | + # - labelset3,value3 (cool) |
| 226 | + # - et2, value2 (boom) |
| 227 | + # |-> the beginning of this entry is garbage because we're actually at the middle |
| 228 | + # of an entry, not a beginning. |
| 229 | + # |
| 230 | + # What actually breaks is that each of these entries is expected to have, at their |
| 231 | + # beginning, the length in bytes of its labelset, so we know how much to read. |
| 232 | + # Now we have garbage in that position, and `FileMappedDict` will either: |
| 233 | + # - Try to interpret those four bytes as a long, get an invalid result. |
| 234 | + # - Try to read an invalid amount of data (maybe a negative amount). |
| 235 | + # - After reading the labelset, try to read the float and go past the end of the file |
| 236 | + # - Actually read what it thinks is a float, try to `unpack` it, and fail because |
| 237 | + # it's actually garbage. |
| 238 | + # - I'm sure there are other fun ways for it to fail. |
| 239 | + it "doesn't corrupt the data files" do |
| 240 | + counter_with_labels = counter.with_labels({ foo: 'longervalue'}) |
| 241 | + |
| 242 | + # Initialize / read the files for both views of the metric |
| 243 | + counter.increment(labels: { foo: 'value1', bar: 'zzz'}) |
| 244 | + counter_with_labels.increment(by: 2, labels: {bar: 'zzz'}) |
| 245 | + |
| 246 | + # After both MetricStores have their files, add a new entry to both |
| 247 | + counter.increment(labels: { foo: 'value1', bar: 'aaa'}) # If there's a bug, we partially overwrite { foo: 'longervalue', bar: 'zzz'} |
| 248 | + counter_with_labels.increment(by: 2, labels: {bar: 'aaa'}) # Extend the file so we read past that overwrite |
| 249 | + |
| 250 | + expect { counter.values }.not_to raise_error # Check it hasn't corrupted our files |
| 251 | + expect { counter_with_labels.values }.not_to raise_error # Check it hasn't corrupted our files |
| 252 | + |
| 253 | + expected_values = { |
| 254 | + {foo: 'value1', bar: 'zzz'} => 1.0, |
| 255 | + {foo: 'value1', bar: 'aaa'} => 1.0, |
| 256 | + {foo: 'longervalue', bar: 'zzz'} => 2.0, |
| 257 | + {foo: 'longervalue', bar: 'aaa'} => 2.0, |
| 258 | + } |
| 259 | + |
| 260 | + expect(counter.values).to eql(expected_values) |
| 261 | + expect(counter_with_labels.values).to eql(expected_values) |
| 262 | + end |
| 263 | + end |
| 264 | + end |
125 | 265 | end |
0 commit comments