Skip to content

Commit d1adeeb

Browse files
committed
feat(generators): add --exclude-external-imports flag
Add a dedicated --exclude-external-imports / --no-exclude-external-imports CLI flag to control whether external vocabulary terms are included in generated artifacts when --no-mergeimports is set. Previously external terms leaked into JSON-LD contexts even with --no-mergeimports. The new flag explicitly suppresses terms whose class_uri or slot_uri belong to an imported (external) schema. Tests cover linkml:types built-in import preservation, local file import preservation, and interaction with mergeimports=False. Signed-off-by: jdsika <carlo.van-driesten@bmw.de>
1 parent 0fa6f93 commit d1adeeb

2 files changed

Lines changed: 342 additions & 3 deletions

File tree

packages/linkml/src/linkml/generators/jsonldcontextgen.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,22 @@ class ContextGenerator(Generator):
5656
fix_multivalue_containers: bool | None = False
5757
exclude_imports: bool = False
5858
"""If True, elements from imported schemas won't be included in the generated context"""
59+
exclude_external_imports: bool = False
60+
"""If True, elements from URL-based external vocabulary imports are excluded.
61+
62+
Local file imports and linkml standard imports are kept. This is useful
63+
when extending an external ontology (e.g. W3C Verifiable Credentials)
64+
whose terms are ``@protected`` in their own JSON-LD context — redefining
65+
them locally would violate JSON-LD 1.1 §4.1.11.
66+
67+
Note: this flag has no effect when ``mergeimports=False`` because
68+
non-local elements are already absent from the visitor iteration
69+
in that mode.
70+
"""
5971
_local_classes: set | None = field(default=None, repr=False)
6072
_local_slots: set | None = field(default=None, repr=False)
73+
_external_classes: set | None = field(default=None, repr=False)
74+
_external_slots: set | None = field(default=None, repr=False)
6175

6276
# Framing (opt-in via CLI flag)
6377
emit_frame: bool = False
@@ -69,16 +83,39 @@ def __post_init__(self) -> None:
6983
super().__post_init__()
7084
if self.namespaces is None:
7185
raise TypeError("Schema text must be supplied to context generator. Preparsed schema will not work")
72-
if self.exclude_imports:
86+
if self.exclude_imports or self.exclude_external_imports:
7387
if self.schemaview:
7488
sv = self.schemaview
7589
else:
7690
source = self.schema.source_file or self.schema
7791
if isinstance(source, str) and self.base_dir and not Path(source).is_absolute():
7892
source = str(Path(self.base_dir) / source)
7993
sv = SchemaView(source, importmap=self.importmap, base_dir=self.base_dir)
80-
self._local_classes = set(sv.all_classes(imports=False).keys())
81-
self._local_slots = set(sv.all_slots(imports=False).keys())
94+
if self.exclude_imports:
95+
self._local_classes = set(sv.all_classes(imports=False).keys())
96+
self._local_slots = set(sv.all_slots(imports=False).keys())
97+
if self.exclude_external_imports:
98+
self._external_classes, self._external_slots = self._collect_external_elements(sv)
99+
100+
@staticmethod
101+
def _collect_external_elements(sv: SchemaView) -> tuple[set[str], set[str]]:
102+
"""Identify classes and slots from URL-based external vocabulary imports.
103+
104+
Walks the SchemaView ``schema_map`` (populated by ``imports_closure``)
105+
and collects element names from schemas whose import key starts with
106+
``http://`` or ``https://``. Local file imports and ``linkml:``
107+
standard imports are left untouched.
108+
"""
109+
sv.imports_closure()
110+
external_classes: set[str] = set()
111+
external_slots: set[str] = set()
112+
for schema_key, schema_def in sv.schema_map.items():
113+
if schema_key == sv.schema.name:
114+
continue
115+
if schema_key.startswith("http://") or schema_key.startswith("https://"):
116+
external_classes.update(schema_def.classes.keys())
117+
external_slots.update(schema_def.slots.keys())
118+
return external_classes, external_slots
82119

83120
def visit_schema(self, base: str | Namespace | None = None, output: str | None = None, **_):
84121
# Add any explicitly declared prefixes
@@ -194,6 +231,8 @@ def end_schema(
194231
def visit_class(self, cls: ClassDefinition) -> bool:
195232
if self.exclude_imports and cls.name not in self._local_classes:
196233
return False
234+
if self.exclude_external_imports and cls.name in self._external_classes:
235+
return False
197236

198237
class_def = {}
199238
cn = camelcase(cls.name)
@@ -246,6 +285,8 @@ def _literal_coercion_for_ranges(self, ranges: list[str]) -> tuple[bool, str | N
246285
def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None:
247286
if self.exclude_imports and slot.name not in self._local_slots:
248287
return
288+
if self.exclude_external_imports and slot.name in self._external_slots:
289+
return
249290

250291
if slot.identifier:
251292
slot_def = "@id"
@@ -390,6 +431,13 @@ def serialize(
390431
help="Use --exclude-imports to exclude imported elements from the generated JSON-LD context. This is useful when "
391432
"extending an ontology whose terms already have context definitions in their own JSON-LD context file.",
392433
)
434+
@click.option(
435+
"--exclude-external-imports/--no-exclude-external-imports",
436+
default=False,
437+
show_default=True,
438+
help="Exclude elements from URL-based external vocabulary imports while keeping local file imports. "
439+
"Useful when extending ontologies (e.g. W3C VC v2) whose terms are @protected in their own JSON-LD context.",
440+
)
393441
@click.version_option(__version__, "-V", "--version")
394442
def cli(yamlfile, emit_frame, embed_context_in_frame, output, **args):
395443
"""Generate jsonld @context definition from LinkML model"""

tests/linkml/test_generators/test_jsonldcontextgen.py

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import textwrap
23

34
import pytest
45
from click.testing import CliRunner
@@ -571,3 +572,293 @@ def test_exclude_imports(input_path):
571572
# Imported class and slot must NOT be present
572573
assert "BaseClass" not in ctx, "Imported class 'BaseClass' must not appear in exclude-imports context"
573574
assert "baseProperty" not in ctx, "Imported slot 'baseProperty' must not appear in exclude-imports context"
575+
576+
577+
@pytest.mark.parametrize("mergeimports", [True, False], ids=["merge", "no-merge"])
578+
def test_exclude_external_imports(tmp_path, mergeimports):
579+
"""With --exclude-external-imports, elements from URL-based external
580+
vocabulary imports must not appear in the generated JSON-LD context,
581+
while local file imports and linkml standard imports are kept.
582+
583+
When a schema imports terms from an external vocabulary (e.g. W3C VC
584+
v2), those terms already have context definitions in their own JSON-LD
585+
context file. Re-defining them in the local context can conflict with
586+
@protected term definitions from the external context (JSON-LD 1.1
587+
section 4.1.11).
588+
"""
589+
ext_dir = tmp_path / "ext"
590+
ext_dir.mkdir()
591+
(ext_dir / "external_vocab.yaml").write_text(
592+
textwrap.dedent("""\
593+
id: https://example.org/external-vocab
594+
name: external_vocab
595+
default_prefix: ext
596+
prefixes:
597+
linkml: https://w3id.org/linkml/
598+
ext: https://example.org/external-vocab/
599+
imports:
600+
- linkml:types
601+
slots:
602+
issuer:
603+
slot_uri: ext:issuer
604+
range: string
605+
validFrom:
606+
slot_uri: ext:validFrom
607+
range: date
608+
classes:
609+
ExternalCredential:
610+
class_uri: ext:ExternalCredential
611+
slots:
612+
- issuer
613+
- validFrom
614+
"""),
615+
encoding="utf-8",
616+
)
617+
618+
(tmp_path / "main.yaml").write_text(
619+
textwrap.dedent("""\
620+
id: https://example.org/main
621+
name: main
622+
default_prefix: main
623+
prefixes:
624+
linkml: https://w3id.org/linkml/
625+
main: https://example.org/main/
626+
ext: https://example.org/external-vocab/
627+
imports:
628+
- linkml:types
629+
- https://example.org/external-vocab
630+
slots:
631+
localName:
632+
slot_uri: main:localName
633+
range: string
634+
classes:
635+
LocalThing:
636+
class_uri: main:LocalThing
637+
slots:
638+
- localName
639+
"""),
640+
encoding="utf-8",
641+
)
642+
643+
importmap = {"https://example.org/external-vocab": str(ext_dir / "external_vocab")}
644+
645+
context_text = ContextGenerator(
646+
str(tmp_path / "main.yaml"),
647+
exclude_external_imports=True,
648+
mergeimports=mergeimports,
649+
importmap=importmap,
650+
base_dir=str(tmp_path),
651+
).serialize()
652+
context = json.loads(context_text)
653+
ctx = context["@context"]
654+
655+
# Local terms must be present
656+
assert "localName" in ctx or "local_name" in ctx, (
657+
f"Local slot missing with mergeimports={mergeimports}, got: {list(ctx.keys())}"
658+
)
659+
assert "LocalThing" in ctx, f"Local class missing with mergeimports={mergeimports}, got: {list(ctx.keys())}"
660+
661+
# External vocabulary terms must NOT be present
662+
assert "issuer" not in ctx, f"External slot 'issuer' present with mergeimports={mergeimports}"
663+
assert "validFrom" not in ctx and "valid_from" not in ctx, (
664+
f"External slot 'validFrom' present with mergeimports={mergeimports}"
665+
)
666+
assert "ExternalCredential" not in ctx, (
667+
f"External class 'ExternalCredential' present with mergeimports={mergeimports}"
668+
)
669+
670+
671+
def test_exclude_external_imports_preserves_linkml_types(tmp_path):
672+
"""linkml:types (standard library import) must NOT be treated as external.
673+
674+
The ``linkml:types`` import resolves to a URL internally
675+
(``https://w3id.org/linkml/types``), but it is a standard LinkML import,
676+
not a user-declared external vocabulary. The ``_collect_external_elements``
677+
method filters by ``schema_key.startswith("http")`` — this test verifies
678+
that linkml built-in types (string, integer, date, etc.) survive the filter.
679+
"""
680+
(tmp_path / "schema.yaml").write_text(
681+
textwrap.dedent("""\
682+
id: https://example.org/test
683+
name: test_linkml_types
684+
default_prefix: ex
685+
prefixes:
686+
linkml: https://w3id.org/linkml/
687+
ex: https://example.org/
688+
imports:
689+
- linkml:types
690+
slots:
691+
name:
692+
slot_uri: ex:name
693+
range: string
694+
age:
695+
slot_uri: ex:age
696+
range: integer
697+
classes:
698+
Person:
699+
class_uri: ex:Person
700+
slots:
701+
- name
702+
- age
703+
"""),
704+
encoding="utf-8",
705+
)
706+
707+
context_text = ContextGenerator(
708+
str(tmp_path / "schema.yaml"),
709+
exclude_external_imports=True,
710+
).serialize()
711+
ctx = json.loads(context_text)["@context"]
712+
713+
# Local classes and slots must be present
714+
assert "Person" in ctx, f"Local class 'Person' missing, got: {list(ctx.keys())}"
715+
assert "name" in ctx, f"Local slot 'name' missing, got: {list(ctx.keys())}"
716+
assert "age" in ctx, f"Local slot 'age' missing, got: {list(ctx.keys())}"
717+
718+
719+
def test_exclude_external_imports_preserves_local_file_imports(tmp_path):
720+
"""Local file imports (non-URL) must be preserved when exclude_external_imports is set.
721+
722+
Only URL-based imports (http:// or https://) are considered external.
723+
File-path imports between local schemas must remain in the context.
724+
"""
725+
local_dir = tmp_path / "local"
726+
local_dir.mkdir()
727+
(local_dir / "base.yaml").write_text(
728+
textwrap.dedent("""\
729+
id: https://example.org/base
730+
name: base
731+
default_prefix: base
732+
prefixes:
733+
linkml: https://w3id.org/linkml/
734+
base: https://example.org/base/
735+
imports:
736+
- linkml:types
737+
slots:
738+
baseField:
739+
slot_uri: base:baseField
740+
range: string
741+
classes:
742+
BaseRecord:
743+
class_uri: base:BaseRecord
744+
slots:
745+
- baseField
746+
"""),
747+
encoding="utf-8",
748+
)
749+
750+
(tmp_path / "main.yaml").write_text(
751+
textwrap.dedent("""\
752+
id: https://example.org/main
753+
name: main
754+
default_prefix: main
755+
prefixes:
756+
linkml: https://w3id.org/linkml/
757+
main: https://example.org/main/
758+
base: https://example.org/base/
759+
imports:
760+
- linkml:types
761+
- local/base
762+
slots:
763+
localField:
764+
slot_uri: main:localField
765+
range: string
766+
classes:
767+
MainRecord:
768+
class_uri: main:MainRecord
769+
slots:
770+
- localField
771+
"""),
772+
encoding="utf-8",
773+
)
774+
775+
context_text = ContextGenerator(
776+
str(tmp_path / "main.yaml"),
777+
exclude_external_imports=True,
778+
mergeimports=True,
779+
base_dir=str(tmp_path),
780+
).serialize()
781+
ctx = json.loads(context_text)["@context"]
782+
783+
# Local file import terms must be present
784+
assert "MainRecord" in ctx, f"Local class 'MainRecord' missing, got: {list(ctx.keys())}"
785+
assert "BaseRecord" in ctx, f"Local-file-imported class 'BaseRecord' missing, got: {list(ctx.keys())}"
786+
assert "baseField" in ctx or "base_field" in ctx, (
787+
f"Local-file-imported slot 'baseField' missing, got: {list(ctx.keys())}"
788+
)
789+
790+
791+
def test_exclude_external_imports_works_with_mergeimports_false(tmp_path):
792+
"""exclude_external_imports is effective even when mergeimports=False.
793+
794+
Although mergeimports=False prevents most imported elements from appearing,
795+
external vocabulary elements can still leak into the context via the
796+
schema_map. The exclude_external_imports flag catches these.
797+
"""
798+
ext_dir = tmp_path / "ext"
799+
ext_dir.mkdir()
800+
(ext_dir / "external_vocab.yaml").write_text(
801+
textwrap.dedent("""\
802+
id: https://example.org/external-vocab
803+
name: external_vocab
804+
default_prefix: ext
805+
prefixes:
806+
linkml: https://w3id.org/linkml/
807+
ext: https://example.org/external-vocab/
808+
imports:
809+
- linkml:types
810+
slots:
811+
issuer:
812+
slot_uri: ext:issuer
813+
range: string
814+
classes:
815+
ExternalCredential:
816+
class_uri: ext:ExternalCredential
817+
slots:
818+
- issuer
819+
"""),
820+
encoding="utf-8",
821+
)
822+
823+
(tmp_path / "main.yaml").write_text(
824+
textwrap.dedent("""\
825+
id: https://example.org/main
826+
name: main
827+
default_prefix: main
828+
prefixes:
829+
linkml: https://w3id.org/linkml/
830+
main: https://example.org/main/
831+
ext: https://example.org/external-vocab/
832+
imports:
833+
- linkml:types
834+
- https://example.org/external-vocab
835+
slots:
836+
localName:
837+
slot_uri: main:localName
838+
range: string
839+
classes:
840+
LocalThing:
841+
class_uri: main:LocalThing
842+
slots:
843+
- localName
844+
"""),
845+
encoding="utf-8",
846+
)
847+
848+
importmap = {"https://example.org/external-vocab": str(ext_dir / "external_vocab")}
849+
850+
ctx_text = ContextGenerator(
851+
str(tmp_path / "main.yaml"),
852+
exclude_external_imports=True,
853+
mergeimports=False,
854+
importmap=importmap,
855+
base_dir=str(tmp_path),
856+
).serialize()
857+
ctx = json.loads(ctx_text)["@context"]
858+
859+
# Local terms must still be present
860+
assert "LocalThing" in ctx, f"Local class missing, got: {list(ctx.keys())}"
861+
862+
# External vocabulary terms must be excluded
863+
assert "issuer" not in ctx, "External slot 'issuer' should be excluded with mergeimports=False"
864+
assert "ExternalCredential" not in ctx, "External class should be excluded with mergeimports=False"

0 commit comments

Comments
 (0)