Skip to content

Commit 1e76a7e

Browse files
author
Mike Travers
committed
piton, dotplots and box-dotplot kind of working
1 parent bac9732 commit 1e76a7e

1 file changed

Lines changed: 214 additions & 27 deletions

File tree

src/data_visualization/violin.clj

Lines changed: 214 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,61 +12,248 @@
1212
(ns scicloj.data-visualization.violin
1313
(:require
1414
[clojure.data.json :as json]
15+
[tablecloth.api :as tc]
1516
[scicloj.kindly.v4.api :as kindly]
1617
[scicloj.kindly.v4.kind :as kind]
1718
))
1819

20+
;;; I can't stand writing long text in ;;; comments, so import it from actual .md files
21+
^{:kind/md true :kindly/hide-code true}
22+
(slurp "src/data_visualization/violin1.md")
23+
24+
;;; # Get some data
25+
26+
;;; ## Movie Dataset
1927

2028
;;; Get the movie dataset
2129
(def movie-data
22-
(json/read-str (slurp "https://vega.github.io/editor/data/movies.json") ))
30+
(json/read-str (slurp "https://vega.github.io/editor/data/movies.json") ))
2331

32+
;;; Here we'll take a look at a sample of the data (selected columns and just a few rows).
33+
(kind/table
34+
{:row-maps (take 10 movie-data)
35+
:column-names ["Title" "IMDB Rating" "US Gross" "Distributor" "Production Budget" "MPAA Rating" "Major Genre"]})
2436

25-
;;; Make a simple boxplot showing US Gross by Genre
26-
(kind/vega-lite
27-
{:mark {:type "boxplot" :tooltip {:content "data"}},
28-
:data {:values movie-data}
37+
38+
;;; ## Penguin dataset
39+
40+
(def penguin-data-url "https://raw.githubusercontent.com/ttimbers/palmerpenguins/refs/heads/file-variants/inst/extdata/penguins.tsv")
41+
42+
(def penguin-data
43+
(tc/dataset penguin-data-url {:key-fn keyword}))
44+
45+
(kind/table (tc/random penguin-data 10))
46+
47+
;;; # Boxplot
48+
49+
50+
;;; A basic boxplot shows the distribution of a single varianle. Here we look at the distribution of US gross profits:
51+
^:kind/vega-lite
52+
{:mark {:type "boxplot"}
53+
:data {:values movie-data}
2954
:encoding
30-
{"color" {:field "Major Genre", :type "nominal" :legend false},
31-
"y" {:field "Major Genre",
32-
:type "nominal"},
33-
"x" {:field "US Gross",
55+
{"x" {:field "US Gross"
3456
:type "quantitative"}
35-
:tooltip {:field "Title"
36-
;; :type "quantitative"
37-
}},
57+
:tooltip {:field "Title"}}
3858
:width 800
39-
})
59+
}
60+
61+
62+
63+
64+
65+
66+
;;; But boxplots are more useful when you compare distributions given a second variable. Here we see the different distributions for specific genres of movie.
67+
68+
^:kind/vega-lite
69+
{:mark {:type "boxplot"}
70+
:data {:values movie-data}
71+
:encoding
72+
{"x" {:field "US Gross"
73+
:type "quantitative"}
74+
"y" {:field "Major Genre"
75+
:type "nominal"}
76+
"color" {:field "Major Genre" :type "nominal" :legend false}
77+
:tooltip {:field "Title"}}
78+
:width 800
79+
}
80+
81+
;;; This shows us the median (white line)
82+
83+
;;; # Violins
4084

4185

42-
;; That's nice, but how can we add violins to this?
4386
(kind/vega-lite
44-
{:mark {:type "area"},
87+
{:mark {:type "area"}
4588
:data {:values movie-data}
4689
:transform [{:density "US Gross"
4790
:groupby ["Major Genre"]
48-
:extent [0, 200000000]}]
91+
:extent [0 200000000]}]
92+
:height 50 ;this is the height of each row (facet)
4993
:encoding
50-
{"color" {:field "Major Genre", :type "nominal" :legend false},
51-
"y" {:field "density",
94+
{"color" {:field "Major Genre" :type "nominal" :legend false}
95+
"y" {:field "density"
5296
:type "quantitative"
5397
:stack "center"
5498
:axis false
55-
}, ;this reflect-doubles the area plot to produce the violin shape
56-
"x" {:field "value",
99+
} ;this reflect-doubles the area plot to produce the violin shape
100+
"x" {:field "value"
57101
:type "quantitative"}
58-
"facet" {:field "Major Genre"
59-
:type "nominal"
60-
:columns 1
61-
:spacing 0
62-
:legend :left
63-
}
64-
},
102+
"row" {:field "Major Genre"
103+
:type "nominal"
104+
:columns 1
105+
:spacing 0
106+
:header {:labelAngle 0 :labelAlign "left"}
107+
}
108+
}
65109
:width 800
66110
})
67111

68112

113+
(defn violin-plot
114+
[data value-field group-field min max]
115+
{:mark {:type "area"}
116+
:data data
117+
:transform [{:filter (format "datum['%s'] != 'NA'" value-field)}
118+
{:density value-field
119+
:groupby [group-field]
120+
;; :bandwidth 1.0
121+
:extent [min max]}]
122+
:height 50 ;this is the height of each row (facet)
123+
:encoding
124+
{:color {:field group-field :type "nominal" :legend false}
125+
:y {:field "density"
126+
:type "quantitative"
127+
:stack "center"
128+
:axis false
129+
} ;this reflect-doubles the area plot to produce the violin shape
130+
:x {:field "value"
131+
:type "quantitative"}
132+
:row {:field group-field
133+
:type "nominal"
134+
:columns 1
135+
:spacing 0
136+
:header {:labelAngle 0 :labelAlign "left"}
137+
}
138+
}
139+
:width 800
140+
})
141+
142+
;;; TODO show box, whiskers, points
143+
;;; TODO need a better dataset, this is boring
144+
145+
;;; # Here's a more scientific example
146+
147+
(def iris
148+
(json/read-str (slurp "https://storage.googleapis.com/kagglesdsdata/datasets/20079/26025/iris.json?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20251129%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20251129T050349Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=a076da9c0375641bed362393229356cd341ab694a356bbbadb6678f654ef58880b12de60a307cff229fc845e66a05acc14621bc4a6a022fc6419e0431327bc9b8105ca66e8289bd4b030825dfb5e0aaa7b0824bb9ebe9ed087c23329fb8a9259c86d0bccfdfe4da1f4d7ae84a91e14dc0df16aa011afecaa2daa1a96d83efc170e2d50758690b22e9b1fb289a476786d15f756e84724706c5581389462938de2a7d6d7ec38e20a7d7edc9b143ddef286e462f07c7827900a9e2130ca41cf21ce7da1e540d599d6bec333a0eae26af1532bf2ba745fd07e197226fb75795b1655aab3f62d097fa9be56a907e8c98601deb5c6c880e5ccc00617752ea92518f945")))
149+
150+
^:kind/vega-lite
151+
(violin-plot {:values iris} "petalWidth" "species" 0 4)
152+
153+
^:kind/vega-lite
154+
(violin-plot {:url penguin-data-url
155+
:format {:type "tsv"}
156+
}
157+
"flipper_length_mm" "species island"
158+
150 250)
159+
160+
161+
;;; # Dotplot
162+
163+
(defn dot-plot
164+
[data value-field group-field min max]
165+
{:mark {:type "point" :tooltip {:content :data}}
166+
:data data
167+
:transform [{:filter (format "datum['%s'] != 'NA'" value-field)}
168+
{:calculate "random()" :as "jitter"}]
169+
:height 50 ;this is the height of each row (facet)
170+
:encoding
171+
{:color {:field group-field :type "nominal" :legend false}
172+
:x {:field value-field
173+
:type "quantitative"
174+
:scale {:domain [min max]}}
175+
:y {:field "jitter"
176+
:type "quantitative"
177+
:axis false}
178+
:row {:field group-field
179+
:type "nominal"
180+
:columns 1
181+
:spacing 0
182+
:header {:labelAngle 0 :labelAlign "left"}
183+
}
184+
}
185+
:width 800
186+
})
187+
188+
^:kind/vega-lite
189+
(dot-plot {:url penguin-data-url
190+
:format {:type "tsv"}
191+
}
192+
"flipper_length_mm" "species island"
193+
150 250
194+
)
195+
196+
;;; TODO vertical violins
197+
;;; TODO controls
198+
;;; TODO layering
199+
;;; TODO more options
200+
;;; TODO merge
201+
202+
203+
;;; # Combining layers
204+
205+
206+
(defn box-dot-plot
207+
[data value-field group-field min max]
208+
{
209+
;; Data is in common
210+
:data data
211+
212+
:facet
213+
{:row {:field group-field
214+
:type "nominal"
215+
:spacing 0 ;??? not working
216+
:header {:labelAngle 0 :labelAlign "left"}
217+
}}
218+
219+
:spec
220+
{
221+
:height 50 ;this is the height of each row (facet)
222+
:encoding
223+
{:x {:field value-field
224+
:type "quantitative"
225+
:scale {:domain [min max]}}
226+
}
227+
228+
:layer
229+
[{:mark {:type "point" :tooltip {:content :data}}
230+
:transform [{:filter (format "datum['%s'] != 'NA'" value-field)}
231+
{:calculate "random()" :as "jitter"}]
232+
233+
:encoding
234+
{:color {:value "gray"}
235+
:y {:field "jitter"
236+
:type "quantitative"
237+
:axis false}
238+
}}
239+
240+
;; box layer
241+
;; TODO turn off outliers, widen box
242+
{:mark {:type "boxplot" :outliers false}
243+
:encoding
244+
{:color {:field group-field :type "nominal" :legend false}
245+
}}]
246+
:width 800
247+
}
69248

249+
})
70250

71251

72252

253+
^:kind/vega-lite
254+
(box-dot-plot {:url penguin-data-url
255+
:format {:type "tsv"}
256+
}
257+
"flipper_length_mm" "species island"
258+
150 250
259+
)

0 commit comments

Comments
 (0)