Skip to content

Commit 30383c7

Browse files
ADD: ocr options inputs for all ocr engines + fix for model training options and wait submission (#435)
1 parent c63fec4 commit 30383c7

5 files changed

Lines changed: 148 additions & 23 deletions

File tree

indico/queries/datasets.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
EmailOptions,
2828
OmnipageOcrOptionsInput,
2929
ReadApiOcrOptionsInput,
30+
ReadApiTablesV1OcrOptionsInput,
31+
ReadApiTablesV2OcrOptionsInput,
32+
ReadApiV2OcrOptionsInput,
3033
)
3134
from indico.typing import AnyDict, Payload
3235

@@ -206,9 +209,12 @@ class CreateDataset(RequestChain["Dataset"]):
206209
from_local_images (bool, optional): Flag whether files are local images or not. Defaults to False.
207210
image_filename_col (str, optional): Image filename column. Defaults to 'filename'.
208211
batch_size (int, optional): Size of file batch to upload at a time. Defaults to 20.
209-
ocr_engine (OcrEngine, optional): Specify an OCR engine [OMNIPAGE, READAPI, READAPI_V2, READAPI_TABLES_V1]. Defaults to None.
212+
ocr_engine (OcrEngine, optional): Specify an OCR engine [OMNIPAGE, READAPI, READAPI_V2, READAPI_TABLES_V1, READAPI_TABLES_V2]. Defaults to None.
210213
omnipage_ocr_options (OmnipageOcrOptionsInput, optional): If using Omnipage, specify Omnipage OCR options. Defaults to None.
211214
read_api_ocr_options: (ReadApiOcrOptionsInput, optional): If using ReadAPI, specify ReadAPI OCR options. Defaults to None.
215+
read_api_v2_ocr_options: (ReadApiV2OcrOptionsInput, optional): If using ReadAPI v2, specify ReadAPI v2 OCR options. Defaults to None.
216+
read_api_tables_v1_ocr_options: (ReadApiTablesV1OcrOptionsInput, optional): If using ReadAPI tables v1, specify ReadAPI tables v1 OCR options. Defaults to None.
217+
read_api_tables_v2_ocr_options: (ReadApiTablesV2OcrOptionsInput, optional): If using ReadAPI tables v2, specify ReadAPI tables v2 OCR options. Defaults to None.
212218
request_interval (int or float, optional): The maximum time in between retry calls when waiting. Defaults to 5 seconds.
213219
214220
Returns:
@@ -228,6 +234,9 @@ def __init__(
228234
ocr_engine: "Optional[OcrEngine]" = None,
229235
omnipage_ocr_options: "Optional[OmnipageOcrOptionsInput]" = None,
230236
read_api_ocr_options: "Optional[ReadApiOcrOptionsInput]" = None,
237+
read_api_v2_ocr_options: "Optional[ReadApiV2OcrOptionsInput]" = None,
238+
read_api_tables_v1_ocr_options: "Optional[ReadApiTablesV1OcrOptionsInput]" = None,
239+
read_api_tables_v2_ocr_options: "Optional[ReadApiTablesV2OcrOptionsInput]" = None,
231240
request_interval: "Union[int, float]" = 5,
232241
email_options: "Optional[EmailOptions]" = None,
233242
):
@@ -241,11 +250,26 @@ def __init__(
241250
self.ocr_engine = ocr_engine
242251
self.omnipage_ocr_options = omnipage_ocr_options
243252
self.read_api_ocr_options = read_api_ocr_options
253+
self.read_api_v2_ocr_options = read_api_v2_ocr_options
254+
self.read_api_tables_v1_ocr_options = read_api_tables_v1_ocr_options
255+
self.read_api_tables_v2_ocr_options = read_api_tables_v2_ocr_options
244256
self.request_interval = request_interval
245257
self.email_options = email_options
246-
if omnipage_ocr_options is not None and read_api_ocr_options is not None:
258+
if (
259+
sum(
260+
opt is not None
261+
for opt in [
262+
omnipage_ocr_options,
263+
read_api_ocr_options,
264+
read_api_v2_ocr_options,
265+
read_api_tables_v1_ocr_options,
266+
read_api_tables_v2_ocr_options,
267+
]
268+
)
269+
> 1
270+
):
247271
raise IndicoInputError(
248-
"Must supply either omnipage or readapi options but not both."
272+
"Must supply only one of omnipage, readapi, readapi v2, readapi tables v1, or readapi tables v2 options."
249273
)
250274
super().__init__()
251275

@@ -300,6 +324,9 @@ def requests(
300324
name=self.name,
301325
dataset_type=self.dataset_type,
302326
readapi_ocr_options=self.read_api_ocr_options,
327+
readapi_v2_ocr_options=self.read_api_v2_ocr_options,
328+
readapi_tables_v1_ocr_options=self.read_api_tables_v1_ocr_options,
329+
readapi_tables_v2_ocr_options=self.read_api_tables_v2_ocr_options,
303330
omnipage_ocr_options=self.omnipage_ocr_options,
304331
ocr_engine=self.ocr_engine,
305332
email_options=self.email_options,
@@ -401,6 +428,9 @@ def __init__(
401428
ocr_engine: "Optional[OcrEngine]" = None,
402429
omnipage_ocr_options: "Optional[OmnipageOcrOptionsInput]" = None,
403430
readapi_ocr_options: "Optional[ReadApiOcrOptionsInput]" = None,
431+
readapi_v2_ocr_options: "Optional[ReadApiV2OcrOptionsInput]" = None,
432+
readapi_tables_v1_ocr_options: "Optional[ReadApiTablesV1OcrOptionsInput]" = None,
433+
readapi_tables_v2_ocr_options: "Optional[ReadApiTablesV2OcrOptionsInput]" = None,
404434
email_options: "Optional[EmailOptions]" = None,
405435
):
406436
if not dataset_type:
@@ -412,6 +442,9 @@ def __init__(
412442
"ocrEngine": ocr_engine.name,
413443
"omnipageOptions": omnipage_ocr_options,
414444
"readapiOptions": readapi_ocr_options,
445+
"readapiV2Options": readapi_v2_ocr_options,
446+
"readapiTablesV1Options": readapi_tables_v1_ocr_options,
447+
"readapiTablesV2Options": readapi_tables_v2_ocr_options,
415448
},
416449
"emailOptions": email_options,
417450
}

indico/queries/gallery.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from indico.types.component_blueprint import BlueprintPage, BlueprintTags
55

66
if TYPE_CHECKING: # pragma: no cover
7-
from typing import Any, Optional
7+
from typing import Any, Dict, Optional, Union
88

99
from indico.typing import Payload
1010

@@ -53,7 +53,7 @@ class ListGallery(PagedRequestV2[BlueprintPage]):
5353

5454
def __init__(
5555
self,
56-
filters: "Optional[str]" = None,
56+
filters: "Optional[Union[Dict[str, Any], str]]" = None,
5757
limit: int = 100,
5858
order_by: str = "name",
5959
desc: bool = False,

indico/queries/model_groups/model_groups.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,9 @@ def __init__(
311311
model_training_options_json: "Optional[str]" = None
312312
if model_training_options:
313313
if isinstance(model_training_options, dict):
314-
model_training_options = json.dumps(model_training_options)
314+
model_training_options_json = json.dumps(model_training_options)
315315
else:
316-
model_training_options = model_training_options
316+
model_training_options_json = model_training_options
317317

318318
predict_options_json: "Optional[str]" = None
319319
if predict_options:

indico/queries/submission.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,15 +287,22 @@ class WaitForSubmissions(RequestChain["List[Submission]"]):
287287
}
288288
"""
289289

290-
def __init__(self, submission_ids: "List[int]", timeout: "Union[int, float]" = 60):
290+
def __init__(
291+
self, submission_ids: "Union[int, List[int]]", timeout: "Union[int, float]" = 60
292+
):
291293
if not submission_ids:
292294
raise IndicoInputError("Please provide submission ids")
293295

294296
self.submission_ids = submission_ids
295297
self.timeout = timeout
296298
self.status_check = partial(ne, "PROCESSING")
299+
num_submissions = (
300+
1 if isinstance(self.submission_ids, int) else len(self.submission_ids)
301+
)
297302
self.status_getter = partial(
298-
ListSubmissions, submission_ids=self.submission_ids, limit=None
303+
ListSubmissions,
304+
submission_ids=self.submission_ids,
305+
limit=num_submissions,
299306
)
300307

301308
def requests(self) -> "Iterator[ListSubmissions]":

indico/types/dataset.py

Lines changed: 99 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ class TableReadOrder(Enum):
7575
COLUMN = 1
7676

7777

78+
class ExcelTablesType(Enum):
79+
RENDERED = 0
80+
NATIVE = 1
81+
82+
7883
class OcrEngine(Enum):
7984
"""
8085
Enum representing available OCR engines.
@@ -84,17 +89,36 @@ class OcrEngine(Enum):
8489
READAPI = 1
8590
READAPI_V2 = 2
8691
READAPI_TABLES_V1 = 3
92+
READAPI_TABLES_V2 = 4
8793

8894

89-
class OmnipageOcrOptionsInput(BaseType):
95+
class BaseOCROptions(BaseType):
96+
"""
97+
Base OCR options shared across engines.
98+
99+
Args:
100+
auto_rotate(bool): Auto rotate.
101+
upscale_images(bool): Scale up low resolution images.
102+
spreadsheet_converter_version(int): Spreadsheet converter version.
103+
languages(List[str]): List of languages to use.
104+
"""
105+
106+
auto_rotate: bool
107+
upscale_images: bool
108+
spreadsheet_converter_version: int
109+
languages: List[str]
110+
111+
112+
class OmnipageOcrOptionsInput(BaseOCROptions):
90113
"""
91114
Omnipage specific OCR options for dataset creation.
92115
93116
Args:
94-
auto_rotate(bool): auto rotate.
95-
single_colum(bool): Read table as a single column.
96-
upscale_images(bool): Scale up low-resolution images.
97-
languages(List[OmnipageLanguageCode]): List of languages to use in ocr.
117+
auto_rotate(bool): Auto rotate.
118+
upscale_images(bool): Scale up low resolution images.
119+
spreadsheet_converter_version(int): Spreadsheet converter version.
120+
languages(List[str]): List of languages to use.
121+
single_column(bool): Read table as a single column.
98122
cells(bool): Return table information for post-processing rules
99123
force_render(bool): Force rednering.
100124
native_layout(bool): Native layout.
@@ -103,32 +127,90 @@ class OmnipageOcrOptionsInput(BaseType):
103127
104128
"""
105129

106-
auto_rotate: bool
107130
single_column: bool
108-
upscale_images: bool
109-
languages: List[str]
110131
cells: bool
111132
force_render: bool
112133
native_layout: bool
113134
native_pdf: bool
114135
table_read_order: TableReadOrder
136+
split_version: int
115137

116138

117-
class ReadApiOcrOptionsInput(BaseType):
139+
class ReadApiOcrOptionsInput(BaseOCROptions):
118140
"""
119141
Read API OCR options.
120142
121143
Args:
122-
auto_rotate(bool): Auto rotate
123-
single_column(bool): Read table as a single column.
144+
auto_rotate(bool): Auto rotate.
124145
upscale_images(bool): Scale up low resolution images.
146+
spreadsheet_converter_version(int): Spreadsheet converter version.
125147
languages(List[str]): List of languages to use.
148+
excel_tables(bool): Enable excel tables processing.
149+
excel_tables_type(ExcelTablesType): Excel tables processing type (NATIVE or RENDERED).
150+
single_column(bool): Read table as a single column.
126151
"""
127152

128-
auto_rotate: bool
129153
single_column: bool
130-
upscale_images: bool
131-
languages: List[str]
154+
excel_tables: bool
155+
excel_tables_type: ExcelTablesType
156+
157+
158+
class ReadApiV2OcrOptionsInput(ReadApiOcrOptionsInput):
159+
"""
160+
Read API v2 OCR options.
161+
162+
Args:
163+
auto_rotate(bool): Auto rotate.
164+
upscale_images(bool): Scale up low resolution images.
165+
spreadsheet_converter_version(int): Spreadsheet converter version.
166+
languages(List[str]): List of languages to use.
167+
excel_tables(bool): Enable excel tables processing.
168+
excel_tables_type(ExcelTablesType): Excel tables processing type (NATIVE or RENDERED).
169+
single_column(bool): Read table as a single column.
170+
"""
171+
172+
173+
class ReadApiTablesV1OcrOptionsInput(ReadApiOcrOptionsInput):
174+
"""
175+
Read API tables v1 OCR options.
176+
177+
Args:
178+
auto_rotate(bool): Auto rotate.
179+
upscale_images(bool): Scale up low resolution images.
180+
spreadsheet_converter_version(int): Spreadsheet converter version.
181+
languages(List[str]): List of languages to use.
182+
excel_tables(bool): Enable excel tables processing.
183+
excel_tables_type(ExcelTablesType): Excel tables processing type (NATIVE or RENDERED).
184+
single_column(bool): Read table as a single column.
185+
table_read_order(TableReadOrder): Read table by row or column.
186+
"""
187+
188+
table_read_order: TableReadOrder
189+
190+
191+
class ReadApiTablesV2OcrOptionsInput(BaseOCROptions):
192+
"""
193+
Read API tables v2 OCR options.
194+
195+
Args:
196+
auto_rotate(bool): Auto rotate.
197+
upscale_images(bool): Scale up low resolution images.
198+
spreadsheet_converter_version(int): Spreadsheet converter version.
199+
languages(List[str]): List of languages to use.
200+
excel_tables(bool): Enable excel tables processing.
201+
excel_tables_type(ExcelTablesType): Excel tables processing type (NATIVE or RENDERED).
202+
table_read_order(TableReadOrder): Read table by row or column.
203+
include_markdown(bool): Include formatted text in the output.
204+
include_barcodes(bool): Recognize and extract barcodes.
205+
include_key_value_pairs(bool): Recognize and extract key-value pairs.
206+
"""
207+
208+
excel_tables: bool
209+
excel_tables_type: ExcelTablesType
210+
table_read_order: TableReadOrder
211+
include_markdown: bool
212+
include_barcodes: bool
213+
include_key_value_pairs: bool
132214

133215

134216
class OcrInputLanguage(BaseType):
@@ -165,3 +247,6 @@ class OcrOptionsInput:
165247
ocr_engine: OcrEngine
166248
omnipage_options: OmnipageOcrOptionsInput
167249
readapi_options: ReadApiOcrOptionsInput
250+
readapi_v2_options: ReadApiV2OcrOptionsInput
251+
readapi_tables_v1_options: ReadApiTablesV1OcrOptionsInput
252+
readapi_tables_v2_options: ReadApiTablesV2OcrOptionsInput

0 commit comments

Comments
 (0)