|
| 1 | +^:kindly/hide-code |
| 2 | +^{:clay {:title "Eelements of Malli" |
| 3 | + :quarto {:type :post |
| 4 | + :author [:bsless] |
| 5 | + :date "2025-10-18" |
| 6 | + :description "Elements of a high performance schema validation library" |
| 7 | + :category :libs |
| 8 | + :tags [:schema :spec] |
| 9 | + :keywords [:malli]}}} |
| 10 | +(ns malli.elements-of-malli |
| 11 | + (:require |
| 12 | + [clojure.spec.alpha :as s] |
| 13 | + [babashka.http-client.websocket :as ws] |
| 14 | + [malli.core :as m] |
| 15 | + [clojure.edn :as edn] |
| 16 | + [jsonista.core :as json] |
| 17 | + [scicloj.kindly.v4.api :as kindly] |
| 18 | + [scicloj.kindly.v4.kind :as kind]) |
| 19 | + (:import |
| 20 | + (java.util.concurrent ScheduledExecutorService Executors TimeUnit))) |
| 21 | + |
| 22 | +^:kindly/hide-code |
| 23 | +(def ws-uri "wss://stream.bybit.com/v5/public/linear") |
| 24 | + |
| 25 | +^:kindly/hide-code |
| 26 | +(defonce scheduled-executor |
| 27 | + (Executors/newScheduledThreadPool 1)) |
| 28 | + |
| 29 | +^:kindly/hide-code |
| 30 | +(defn schedule! |
| 31 | + ([f period tu] |
| 32 | + (schedule! scheduled-executor f period tu)) |
| 33 | + ([ex f period tu] |
| 34 | + (schedule! ex f 0 period tu)) |
| 35 | + ([^ScheduledExecutorService scheduled-executor ^Runnable f delay period ^TimeUnit tu] |
| 36 | + (.scheduleAtFixedRate scheduled-executor f ^long delay ^long period tu))) |
| 37 | + |
| 38 | +^:kindly/hide-code |
| 39 | +(def buff (atom [])) |
| 40 | +^:kindly/hide-code |
| 41 | +(comment |
| 42 | + (spit "out.edn" @buff) |
| 43 | + (reset! buff (clojure.edn/read-string (slurp "out.edn")))) |
| 44 | + |
| 45 | +^:kindly/hide-code |
| 46 | +(defn msg-handler |
| 47 | + [_ws ^java.nio.HeapCharBuffer data _last?] |
| 48 | + (let [msg (json/read-value (.toString data) json/keyword-keys-object-mapper)] |
| 49 | + (swap! buff conj msg) |
| 50 | + (println msg))) |
| 51 | + |
| 52 | +^:kindly/hide-code |
| 53 | +(comment |
| 54 | + (def ws (ws/websocket |
| 55 | + {:uri ws-uri |
| 56 | + :on-open (fn [_ws] (println 'open)) |
| 57 | + :on-close (fn [_ws status reason] (println 'close status reason)) |
| 58 | + :on-ping (fn [_ws data] (println 'ping data)) |
| 59 | + :on-pong (fn [_ws data] (println 'pong data)) |
| 60 | + :on-error (fn [_ws er] (println 'error er)) |
| 61 | + :on-message #'msg-handler |
| 62 | + })) |
| 63 | + |
| 64 | + (def ping-task (schedule! (fn [] (ws/ping! ws (.getBytes ""))) 20 TimeUnit/SECONDS)) |
| 65 | + (comment |
| 66 | + (.cancel ping-task true)) |
| 67 | + |
| 68 | + (ws/send! ws (json/write-value-as-bytes {:op :subscribe :args ["publicTrade.BTCUSDT"]})) |
| 69 | + (ws/close! ws)) |
| 70 | + |
| 71 | +^:kindly/hide-code |
| 72 | +(defn demo |
| 73 | + [xs schema] |
| 74 | + (let [validator (m/validator schema)] |
| 75 | + {:value xs |
| 76 | + :valid? (map validator xs)})) |
| 77 | + |
| 78 | +;; --- |
| 79 | + |
| 80 | +;; ## Abstract |
| 81 | + |
| 82 | +;; The post goes over the elements of `metosin/malli`, a high performance, data driven, schema library for Clojure(Script) |
| 83 | +;; Unlike plumatic/schema and clojure.spec, it contains additional |
| 84 | +;; features such as coercion, explanation, generation, extension |
| 85 | +;; mechanisms and more |
| 86 | +;; It break down the elements of Malli, goes over its main features, |
| 87 | +;; demonstrate how to use it effectively and touch on its potential |
| 88 | +;; applications for data exploration |
| 89 | + |
| 90 | +;; ## Introduction |
| 91 | + |
| 92 | +;; Malli is a high performance library for data driven schemas in Clojure(Script). |
| 93 | + |
| 94 | + |
| 95 | +;; ### Schemas |
| 96 | + |
| 97 | +;; Schemas are a way of specifying facts about data at a certain point. |
| 98 | +;; In Clojure, we usually enforce them at system boundaries. |
| 99 | +;; Additionally, they can be enforced at test time more pervasively |
| 100 | +;; across the code base, and to render metadata that other tools like |
| 101 | +;; clj-kondo can consume. |
| 102 | +;; Malli is an alternative to clojure.spec and plumatic/schema, with |
| 103 | +;; different design goals and considerations. |
| 104 | + |
| 105 | +;; ### Data Driven |
| 106 | + |
| 107 | +;; Malli's schema syntax is Just Data. |
| 108 | +;; Schemas can be serialized, persisted and round tripped. |
| 109 | +;; The main syntax is similar to hiccup |
| 110 | + |
| 111 | +;; For example, validating a value with a schema: |
| 112 | + |
| 113 | +(m/validate |
| 114 | + [:int {:min 1 :max 3}] ; this is the malli schema |
| 115 | + 4) |
| 116 | + |
| 117 | +;; ### High performance |
| 118 | + |
| 119 | +;; Even for a very simple use case |
| 120 | + |
| 121 | +;; ```clojure |
| 122 | +;; (def v (m/validator [:int {:min 1 :max 3}])) |
| 123 | +;; (dotimes [_ 10] (time (dotimes [_ 1e7] (do (v 2) (v 4) (v 5))))) |
| 124 | +;; ;; Elapsed time: 83.082345 msecs |
| 125 | +;; (s/def ::v (s/and int? #(<= % 3) #(<= 1 %))) |
| 126 | +;; (dotimes [_ 10] (time (dotimes [_ 1e7] (do (s/valid? ::v 2) (s/valid? ::v 4) (s/valid? ::v 5))))) |
| 127 | +;; ;; Elapsed time: 1775.427095 msecs |
| 128 | +;; ``` |
| 129 | + |
| 130 | +;; Malli is about 20x faster than clojure.spec |
| 131 | + |
| 132 | +;; ## Mechanics |
| 133 | + |
| 134 | +;; Before we can enjoy High Performance (TM), we need to learn _mechanics_ |
| 135 | + |
| 136 | +;; From a bird's eye view, Malli has several "types" of schemas: |
| 137 | + |
| 138 | +;; - Base values - An integer, boolean, string, etc. |
| 139 | +;; - Boxes - If you like types, all kinds of Kind1<a>, Kind2<a,b>, etc. |
| 140 | +;; Concretely these can be negation (not int) a reference to another schema, repetition (vector of ints). |
| 141 | +;; - Comparator schemas - equality, disequality, ordering, etc. |
| 142 | +;; - Conjunctions - `and`, map descriptions, concatenation, tuples. |
| 143 | +;; - Disjunctions - `or`, multi schemas (like multi methods), sequence alternations. These backtrack. |
| 144 | + |
| 145 | +;; Schemas syntax can be: |
| 146 | +;; - keywords: `:int` |
| 147 | +;; - functions or vars: `int?` |
| 148 | +;; - vector with properties and maybe children: `[:int {:min 1}]`, `[:map {:closed true} [:a :int]]` |
| 149 | + |
| 150 | +;; Schemas themselves are either looked up in a registry or need to implement some protocols. |
| 151 | + |
| 152 | +;; Importantly, malli contains mini "compilers" that derive worker functions from schemas. |
| 153 | +;; We've already seen one above. |
| 154 | +;; These compilers achieve higher performance than all other libraries and than invoking on the function directly. |
| 155 | +;; compare |
| 156 | + |
| 157 | +(m/validate [:int {:min 1 :max 3}] 4) |
| 158 | +;; and |
| 159 | +(def v (m/validator [:int {:min 1 :max 3}])) |
| 160 | +(v 4) |
| 161 | + |
| 162 | +;; The worker functions are: |
| 163 | +;; - validator: Any -> Boolean. |
| 164 | +;; - explainer: Any -> null | Explanation. |
| 165 | +;; - parser/unparser: parser converts disjunctions to tagged tuples. |
| 166 | +;; - encoder/decoder: decoder _tries_ to decode a value according to schema and supplied transformer. Encoder goes the other way. |
| 167 | +;; - coercer: decodes then validates. |
| 168 | + |
| 169 | +;; ### Base Values |
| 170 | + |
| 171 | +;; #### Predicate schemas |
| 172 | + |
| 173 | +;; This is a long list, you don't have to remember all of them |
| 174 | + |
| 175 | +^:kindly/vector |
| 176 | +'[any? some? number? integer? int? pos-int? neg-int? nat-int? pos? neg? float? double? |
| 177 | + boolean? string? ident? simple-ident? qualified-ident? keyword? simple-keyword? |
| 178 | + qualified-keyword? symbol? simple-symbol? qualified-symbol? uuid? uri? inst? seqable? |
| 179 | + indexed? map? vector? list? seq? char? set? nil? false? true? |
| 180 | + zero? coll? associative? sequential? ifn? fn? |
| 181 | + rational? ratio? bytes? decimal?] |
| 182 | + |
| 183 | +;; Predicate schemas don't receive any arguments. |
| 184 | + |
| 185 | +;; #### Type schemas |
| 186 | + |
| 187 | +(keys (m/type-schemas)) |
| 188 | + |
| 189 | +;; string, int, float and double receive min/max properties |
| 190 | +^:kind/table |
| 191 | +(demo |
| 192 | + [(apply str (repeat 3 "a")) |
| 193 | + (apply str (repeat 9 "a")) |
| 194 | + (apply str (repeat 20 "a"))] |
| 195 | + [:string {:min 5 :max 10}]) |
| 196 | + |
| 197 | +;; ### Boxes! |
| 198 | + |
| 199 | +;; > what's in the box?! |
| 200 | + |
| 201 | +;; #### Seqable, every, vector, oh my |
| 202 | + |
| 203 | +^:kind/table |
| 204 | +(let [schemas [:vector :sequential :seqable :every :set] |
| 205 | + values [[1 2 3] (list 1 2 3) #{1 2 3} (range 1 4) (sorted-set 1 2 3)]] |
| 206 | + (into |
| 207 | + {:value values |
| 208 | + :type (map type values)} |
| 209 | + (map vector |
| 210 | + schemas |
| 211 | + (apply map list |
| 212 | + (for [value values] |
| 213 | + (for [schema schemas] |
| 214 | + (m/validate [schema :int] value))))))) |
| 215 | + |
| 216 | +;; Note how schemas can be created programatically. |
| 217 | +;; The only difference between a seqable and every is that every is _bounded_ |
| 218 | +(m/validate [:every :int] (conj (vec (range 1000)) nil)) |
| 219 | +(m/validate [:every :int] (concat (range 1000) [nil])) |
| 220 | + |
| 221 | +;; We also have map-of, which works like you'd expect: |
| 222 | + |
| 223 | +^:kind/table |
| 224 | +(demo |
| 225 | + [{1 2 3 4} {1 2 3 :a} {3 4}] |
| 226 | + [:map-of {:min 2 :max 4} :int :int]) |
| 227 | + |
| 228 | +;; #### Maybe Not |
| 229 | + |
| 230 | +^:kind/table |
| 231 | +(demo |
| 232 | + [2 nil "nil"] |
| 233 | + [:maybe :int]) |
| 234 | + |
| 235 | +(m/validate [:not :int] 'cthulhu) |
| 236 | + |
| 237 | +;; #### References and schemas schemas |
| 238 | + |
| 239 | +;; While this requires getting into registries (LATER), consider this example |
| 240 | + |
| 241 | +(m/validate |
| 242 | + [:schema ;; schema schema |
| 243 | + {:registry ;; registry in properties |
| 244 | + {::cons ;; definition of cons schema |
| 245 | + [:maybe [:tuple pos-int? [:ref ::cons]]]}} ;; self reference |
| 246 | + [:ref ::cons] ;; argument to schema schema is a schema. reference to schema from registry |
| 247 | + ] |
| 248 | + [16 [64 [26 [1 [13 nil]]]]]) |
| 249 | + |
| 250 | +;; ### Comparators |
| 251 | + |
| 252 | +[:> |
| 253 | + :>= |
| 254 | + :< |
| 255 | + :<= |
| 256 | + := |
| 257 | + :not=] |
| 258 | + |
| 259 | +;; #### Egal |
| 260 | + |
| 261 | +;; That's your ground single value. Some(1) |
| 262 | + |
| 263 | +(m/validate [:= 1] 1) |
| 264 | +(m/validate [:= 1] "1") |
| 265 | + |
| 266 | +;; It's counterpart, is anything but |
| 267 | +(m/validate [:not= 1] 1) |
| 268 | +(m/validate [:not= 1] "1") |
| 269 | + |
| 270 | +;; #### Everything else |
| 271 | + |
| 272 | +(m/validate [:> 1] 2) |
| 273 | +(m/validate [:> 1] 1) |
| 274 | + |
| 275 | +;; ### Conjunctions |
| 276 | + |
| 277 | +;; #### Tuples |
| 278 | + |
| 279 | +;; We've seen a tuple before, but for completeness |
| 280 | + |
| 281 | +(m/validate [:tuple :int :boolean] [1 true]) |
| 282 | + |
| 283 | +;; #### Maps |
| 284 | + |
| 285 | +;; Maps are the bread and butter of information transfer in the Clojure |
| 286 | +;; world, and frankly, around the web (what are JSON objects?). |
| 287 | + |
| 288 | +;; A map schema consists of type, optional properties, and children. |
| 289 | +;; Each child can be thought of as an entry schema, which is why I think of maps |
| 290 | +;; as a conjunction of multiple entry schemas. |
| 291 | + |
| 292 | +(m/validate |
| 293 | + [:map ; type |
| 294 | + {:closed true |
| 295 | + :registry {::c :boolean} |
| 296 | + } ; properties |
| 297 | + [:a :int] ; entry schema |
| 298 | + [:b :double] ; entry schema |
| 299 | + ::c ; reference to a schema (but not a reference schema) |
| 300 | + ] |
| 301 | + {:a 1 :b 2.3 ::c true}) |
| 302 | + |
| 303 | +;; It may be obvious, but important, all collections schemas nest |
| 304 | + |
| 305 | +(def Address |
| 306 | + [:map |
| 307 | + [:id :string] |
| 308 | + [:tags [:set :keyword]] |
| 309 | + [:address |
| 310 | + [:map |
| 311 | + [:street :string] |
| 312 | + [:city :string] |
| 313 | + [:zip :int] |
| 314 | + [:lonlat [:tuple :double :double]]]]]) |
| 315 | + |
| 316 | +(m/validate |
| 317 | + Address |
| 318 | + {:id "Lillan" |
| 319 | + :tags #{:artesan :coffee :hotel} |
| 320 | + :address {:street "Ahlmanintie 29" |
| 321 | + :city "Tampere" |
| 322 | + :zip 33100 |
| 323 | + :lonlat [61.4858322, 23.7854658]}}) |
0 commit comments