-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlido2rdf.py
More file actions
82 lines (63 loc) · 3 KB
/
lido2rdf.py
File metadata and controls
82 lines (63 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
"""Converts LIDO file to RDF """
import re
import argparse
from sys import stdin, stderr, exit
from io import BytesIO
from urllib.error import HTTPError, URLError
from pathlib import Path
from libs.LidoRDFConverter import LidoRDFConverter
VERSION = "0.1.0"
SUFFIX_FORMAT_MAP = {'ttl': 'turtle', 'nt': 'nt', 'json': 'json-ld', 'xml': 'xml'}
'''Maps file suffixes to formats'''
def error(msg):
'''Prints an error message and exits'''
print(msg, file=stderr)
exit(1)
def getValidFormat(format_str, file_name) -> str:
'''Returns a valid format string'''
suffix = format_str or Path(file_name).suffix.strip('.')
return SUFFIX_FORMAT_MAP.get(suffix, 'nt')
def isURL(s):
return re.compile("^(https?|file):").match(s)
def lido2rdf(input, mapping_file, **kw) -> LidoRDFConverter.Graph | None:
'''Applies a x3ml mapping to a LIDO file'''
converter = LidoRDFConverter(mapping_file)
if isURL(input):
return converter.process_url(input, **kw)
else:
if input == "-":
input = BytesIO()
input.write(stdin.buffer.read())
input.seek(0)
return converter.parse_file(input)[0]
def cli_convert():
def apFormatter(prog):
return argparse.HelpFormatter(prog, max_help_position=50)
parser = argparse.ArgumentParser(
prog="lido2rdf", description=f"Convert LIDO to RDF using X3ML mapping (version={VERSION})", formatter_class=apFormatter)
formats = ",".join(SUFFIX_FORMAT_MAP.keys())
parser.add_argument('source', metavar='LIDO-XML', nargs="?",
default="-", help='LIDO file or URL (default: -)')
parser.add_argument("-o", '--output', metavar="FILE", dest="target",
default='/dev/stdout', help="RDF output file (default: -)")
parser.add_argument("-t", '--type', dest="format", default="ttl",
help=f"RDF output format ({formats})")
parser.add_argument('-m', '--mapping', dest="mapping", metavar="X3ML", default='defaultMapping.x3ml',
help="X3ML mapping file (default: defaultMapping.x3ml)")
parser.add_argument('--rdf-folder', metavar="DIR", dest="rdf_folder",
default='rdfData', help="RDF output folder for OAI-PMH processing (default: rdfData)")
parser.add_argument('-of', '--oai-from', dest="oai_from", default='', help="OAI from argument")
parser.add_argument('-ot', '--oai-to', dest="oai_to", default='', help="OAI to argument")
args = parser.parse_args()
if args.source == "-" and stdin.isatty():
parser.print_help()
else:
try:
format = getValidFormat(args.format, args.target)
if graph := lido2rdf(args.source, args.mapping, suffix=args.format, format=format, rdf_folder=args.rdf_folder, args=args):
graph.serialize(destination=args.target, format=format, encoding='utf-8')
except (HTTPError, URLError) as exception:
error(exception)
if __name__ == "__main__":
cli_convert()