Skip to content

Commit a09d4c1

Browse files
nateshim-indicoNathanael Shim
andauthored
Update CreateExport query (#162)
* updating createExport query * addresses comments; runs sphinx-build to make sure documentation is updated Co-authored-by: Nathanael Shim <nathanaelshim@Nathanaels-MacBook-Pro.local>
1 parent 8f867a3 commit a09d4c1

7 files changed

Lines changed: 112 additions & 47 deletions

File tree

docs/exports.html

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,17 +141,18 @@
141141

142142
<dl class="py class">
143143
<dt class="sig sig-object py" id="indico.queries.export.CreateExport">
144-
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">indico.queries.export.</span></span><span class="sig-name descname"><span class="pre">CreateExport</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_id</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">subset_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labelset_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">file_info</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">combine_labels</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">anonymous</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">wait</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#indico.queries.export.CreateExport" title="Permalink to this definition"></a></dt>
144+
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">indico.queries.export.</span></span><span class="sig-name descname"><span class="pre">CreateExport</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_id</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labelset_id</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">frozen_labelset_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">combine_labels</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'ALL'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">file_info</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">anonymous</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">wait</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#indico.queries.export.CreateExport" title="Permalink to this definition"></a></dt>
145145
<dd><p>Create an export job for a dataset.</p>
146146
<dl class="field-list simple">
147147
<dt class="field-odd">Parameters</dt>
148148
<dd class="field-odd"><ul class="simple">
149149
<li><p><strong>dataset_id</strong> (<em>int</em>) – Dataset to create the export for</p></li>
150-
<li><p><strong>subset_ids</strong> (<code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code>[<code class="xref py py-class docutils literal notranslate"><span class="pre">List</span></code>[<code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code>]]) – (List(int)): Subset ids to export rows sets</p></li>
150+
<li><p><strong>labelset_id</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code>) – (int): Labelset column id to export</p></li>
151151
<li><p><strong>column_ids</strong> (<em>List</em><em>(</em><em>int</em><em>)</em>) – Data column ids to export</p></li>
152-
<li><p><strong>labelset_ids</strong> (<code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code>[<code class="xref py py-class docutils literal notranslate"><span class="pre">List</span></code>[<code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code>]]) – (List(int)): Labelset column ids to export</p></li>
152+
<li><p><strong>model_ids</strong> (<em>List</em><em>(</em><em>int</em><em>)</em>) – Model ids to include predictions from</p></li>
153+
<li><p><strong>frozen_labelset_ids</strong> (<code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code>[<code class="xref py py-class docutils literal notranslate"><span class="pre">List</span></code>[<code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code>]]) – (List(int)): frozen labelset ids to limit examples by</p></li>
154+
<li><p><strong>combine_labels</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">LabelResolutionStrategy</span></code>) – (LabelResolutionStrategy): One row per example, combine labels from multiple labels into a single row</p></li>
153155
<li><p><strong>file_info</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">bool</span></code>) – (bool): Include datafile information</p></li>
154-
<li><p><strong>combine_labels</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">bool</span></code>) – (bool): One row per example, combine labels from multiple labels into a single row</p></li>
155156
<li><p><strong>anonymous</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">bool</span></code>) – (bool): Anonymize user information</p></li>
156157
<li><p><strong>wait</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">bool</span></code>) – (bool): Wait for the export to complete. Default is True</p></li>
157158
</ul>

docs/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/create-download-export.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from indico import IndicoClient, IndicoConfig
2+
from indico.queries import GetDataset, CreateExport, DownloadExport
3+
import pandas as pd
4+
5+
6+
"""Example 1: Create export from dataset and download as csv"""
7+
8+
dataset_id = 6826
9+
10+
# Create an Indico API client
11+
my_config = IndicoConfig(
12+
host="app.indico.io", api_token_path="./path/to/indico_api_token.txt"
13+
)
14+
client = IndicoClient(config=my_config)
15+
16+
# Get dataset object
17+
dataset = client.call(GetDataset(id=dataset_id))
18+
19+
# Create export object using dataset's id and labelset id
20+
export = client.call(
21+
CreateExport(
22+
dataset_id=dataset.id,
23+
labelset_id=dataset.labelsets[0].id,
24+
wait=True
25+
)
26+
)
27+
28+
# Use export object to download as pandas csv
29+
csv = client.call(DownloadExport(export.id))

indico/queries/export.py

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,63 +4,68 @@
44

55
from indico.client import GraphQLRequest, RequestChain, Debouncer
66
from indico.errors import IndicoNotFound, IndicoRequestError
7-
from indico.types.export import Export
7+
from indico.types.export import LabelResolutionStrategy, Export
88
from indico.queries.storage import RetrieveStorageObject
99

1010

1111
class _CreateExport(GraphQLRequest):
1212
query = """
1313
mutation CreateExport(
14-
$datasetId: Int!,
14+
$datasetId: Int!,
15+
$labelsetId: Int!,
1516
$columnIds: [Int],
16-
$subsetIds: [Int],
17-
$labelsetIds: [Int],
17+
$modelIds: [Int],
18+
$frozenLabelsetIds: [Int],
19+
$combineLabels: LabelResolutionStrategy,
1820
$fileInfo: Boolean,
19-
$combineLabels: Boolean,
2021
$anonymous: Boolean
2122
) {
2223
createExport(
2324
datasetId: $datasetId,
25+
labelsetId: $labelsetId,
2426
columnIds: $columnIds,
25-
subsetIds: $subsetIds,
27+
modelIds: $modelIds,
28+
frozenLabelsetIds: $frozenLabelsetIds,
29+
combineLabels: $combineLabels,
2630
fileInfo: $fileInfo,
27-
labelsetIds: $labelsetIds,
28-
combineLabels: $combineLabels
2931
anonymous: $anonymous
3032
) {
3133
id,
3234
datasetId,
35+
labelsetId,
3336
name,
3437
status,
35-
downloadUrl,
3638
columnIds,
37-
labelsetIds,
38-
subsetIds,
39+
modelIds,
40+
frozenLabelsetIds,
3941
anonymous
42+
downloadUrl,
4043
}
41-
}
44+
}
4245
4346
"""
4447

4548
def __init__(
4649
self,
4750
dataset_id: int,
51+
labelset_id: int,
4852
column_ids: List[int] = None,
49-
subset_ids: List[int] = None,
50-
labelset_ids: List[int] = None,
53+
model_ids: List[int] = None,
54+
frozen_labelset_ids: List[int] = None,
55+
combine_labels: LabelResolutionStrategy = LabelResolutionStrategy.ALL.name,
5156
file_info: bool = None,
52-
combine_labels: bool = None,
5357
anonymoous: bool = None,
5458
):
5559
super().__init__(
5660
self.query,
5761
variables={
5862
"datasetId": dataset_id,
63+
"labelsetId": labelset_id,
5964
"columnIds": column_ids,
60-
"subsetIds": subset_ids,
61-
"labelsetIds": labelset_ids,
62-
"fileInfo": file_info,
65+
"modelIds": model_ids,
66+
"frozenLabelsetIds": frozen_labelset_ids,
6367
"combineLabels": combine_labels,
68+
"fileInfo": file_info,
6469
"anonymous": anonymoous,
6570
},
6671
)
@@ -91,8 +96,9 @@ class GetExport(GraphQLRequest):
9196
name
9297
status
9398
columnIds
94-
labelsetIds
95-
subsetIds
99+
labelsetId
100+
modelIds
101+
frozenLabelsetIds
96102
numLabels
97103
anonymous
98104
downloadUrl
@@ -160,11 +166,12 @@ class CreateExport(RequestChain):
160166
161167
Args:
162168
dataset_id (int): Dataset to create the export for
163-
subset_ids: (List(int)): Subset ids to export rows sets
169+
labelset_id: (int): Labelset column id to export
164170
column_ids (List(int)): Data column ids to export
165-
labelset_ids: (List(int)): Labelset column ids to export
171+
model_ids (List(int)): Model ids to include predictions from
172+
frozen_labelset_ids: (List(int)): frozen labelset ids to limit examples by
173+
combine_labels: (LabelResolutionStrategy): One row per example, combine labels from multiple labels into a single row
166174
file_info: (bool): Include datafile information
167-
combine_labels: (bool): One row per example, combine labels from multiple labels into a single row
168175
anonymous: (bool): Anonymize user information
169176
wait: (bool): Wait for the export to complete. Default is True
170177
@@ -178,32 +185,35 @@ class CreateExport(RequestChain):
178185
def __init__(
179186
self,
180187
dataset_id: int,
181-
subset_ids: List[int] = None,
188+
labelset_id: int,
182189
column_ids: List[int] = None,
183-
labelset_ids: List[int] = None,
190+
model_ids: List[int] = None,
191+
frozen_labelset_ids: List[int] = None,
192+
combine_labels: LabelResolutionStrategy = LabelResolutionStrategy.ALL.name,
184193
file_info: bool = False,
185-
combine_labels: bool = False,
186194
anonymous: bool = False,
187195
wait: bool = True,
188196
):
189197
self.dataset_id = dataset_id
198+
self.labelset_id = labelset_id
190199
self.column_ids = column_ids
191-
self.subset_ids = subset_ids
192-
self.labelset_ids = labelset_ids
193-
self.file_info = file_info
200+
self.model_ids = model_ids
201+
self.frozen_labelset_ids = frozen_labelset_ids
194202
self.combine_labels = combine_labels
203+
self.file_info = file_info
195204
self.anonymous = anonymous
196205
self.wait = wait
197206
super().__init__()
198207

199208
def requests(self):
200209
yield _CreateExport(
201210
dataset_id=self.dataset_id,
211+
labelset_id=self.labelset_id,
202212
column_ids=self.column_ids,
203-
subset_ids=self.subset_ids,
204-
labelset_ids=self.labelset_ids,
205-
file_info=self.file_info,
213+
model_ids=self.model_ids,
214+
frozen_labelset_ids=self.frozen_labelset_ids,
206215
combine_labels=self.combine_labels,
216+
file_info=self.file_info,
207217
anonymoous=self.anonymous,
208218
)
209219
debouncer = Debouncer()

indico/types/export.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
11
from typing import List
2+
from enum import Enum
23

34
from indico.types.base import BaseType
45

56

7+
class LabelResolutionStrategy(Enum):
8+
MAJORITY_VOTE_WITH_TIES = "majority_vote_with_ties"
9+
MAJORITY_VOTE_WITHOUT_TIES = "majority_vote_without_ties"
10+
UNANIMOUS = "unanimous"
11+
ALL = "all"
12+
13+
614
class Export(BaseType):
715
"""
816
An export of a dataset.
917
1018
Attributes:
1119
id (int): Id of the export
1220
dataset_id (int): Dataset id of the export
13-
status (str): Export job status
21+
name (str): Name of the export
22+
status (ExportStatus): Export job status
1423
column_ids (List(int)): Data columns for export
15-
labelset_ids (List(int)): Labelsets columns for export
16-
subset_ids (List(int)): Subsets requested for export rows
24+
labelset_id (int): Labelset column for export
25+
model_ids (List(int)): Models to include predictions from
26+
frozen_labelset_ids (List(int)): Frozen labelsets to select examples
1727
num_labels (int): Number of labels on the dataset
1828
anonymous (bool): Whether to anonymize labelers
1929
download_url (str): Indico url of the export csv
@@ -25,8 +35,9 @@ class Export(BaseType):
2535
name: str
2636
status: str
2737
column_ids: List[int]
28-
labelset_ids: List[int]
29-
subset_ids: List[int]
38+
labelset_id: int
39+
model_ids: List[int]
40+
frozen_labelset_ids: List[int]
3041
num_labels: int
3142
anonymous: bool
3243
download_url: str

tests/integration/queries/test_dataset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,12 @@ def test_create_from_csv(indico):
287287

288288
_dataset_complete(dataset)
289289

290-
export = client.call(CreateExport(dataset_id=dataset.id, wait=True))
290+
export = client.call(CreateExport(
291+
dataset_id=dataset.id,
292+
labelset_id=dataset.labelsets[0].id,
293+
wait=True
294+
)
295+
)
291296

292297
exported_data = client.call(DownloadExport(export.id))
293298

tests/integration/queries/test_export.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@
1111

1212
def test_create_and_download_export(airlines_dataset: Dataset):
1313
client = IndicoClient()
14-
export = client.call(CreateExport(dataset_id=airlines_dataset.id, wait=True))
14+
export = client.call(
15+
CreateExport(
16+
dataset_id=airlines_dataset.id,
17+
labelset_id=airlines_dataset.labelsets[0].id,
18+
wait=True
19+
)
20+
)
1521
assert export.status == "COMPLETE"
16-
1722
csv = client.call(DownloadExport(export.id))
18-
1923
assert all(c in csv.columns.to_list() for c in ["ID", "Target_1", "Text"])
2024
assert any(re.match("row_index_[0-9]+", c) for c in csv.columns.to_list())
2125
assert csv["Text"][0] == "Your service is so bad."
@@ -35,5 +39,10 @@ def test_download_incomplete(indico):
3539

3640
def test_create_export_no_wait(airlines_dataset: Dataset):
3741
client = IndicoClient()
38-
export = client.call(CreateExport(dataset_id=airlines_dataset.id, wait=False))
42+
export = client.call(CreateExport(
43+
dataset_id=airlines_dataset.id,
44+
labelset_id=airlines_dataset.labelsets[0].id,
45+
wait=False
46+
)
47+
)
3948
assert export.status == "STARTED"

0 commit comments

Comments
 (0)