diff --git a/.zed/rules b/.zed/rules
index 885a1a42..c6b86cec 100644
--- a/.zed/rules
+++ b/.zed/rules
@@ -139,13 +139,13 @@ Examples:
 src/orcapod/
   types.py                    — Schema, ColumnConfig, ContentHash
   system_constants.py         — Column prefixes and separators
-  errors.py                   — InputValidationError, DuplicateTagError, FieldNotResolvableError
+  errors.py                   — InputValidationError, DuplicateKeyError, FieldNotResolvableError
   config.py                   — Config dataclass
   contexts/                   — DataContext (semantic_hasher, arrow_hasher, type_converter)
   protocols/
     hashing_protocols.py      — PipelineElementProtocol, ContentIdentifiableProtocol
     core_protocols/           — StreamProtocol, PodProtocol, SourceProtocol,
-                                DataFunctionProtocol, DatagramProtocol, TagProtocol,
+                                DataFunctionProtocol, DatagramProtocol, KeyProtocol,
                                 DataProtocol, TrackerProtocol
   core/
     base.py                   — ContentIdentifiableBase, PipelineElementBase, TraceableBase
@@ -156,7 +156,7 @@ src/orcapod/
     tracker.py                — Invocation tracking
     datagrams/
       datagram.py             — Datagram (unified dict/Arrow backing, lazy conversion)
-      tag_data.py           — Tag (+ system tags), Data (+ source info)
+      key_data.py           — Key (+ system keys), Data (+ source info)
     sources/
       base.py                 — RootSource (abstract, no upstream)
       arrow_table_source.py   — Core source — all other sources delegate to it
@@ -173,15 +173,15 @@ src/orcapod/
       merge_join.py           — MergeJoin (binary, colliding cols → sorted list[T])
       semijoin.py             — SemiJoin (binary, non-commutative)
       batch.py                — Batch (group rows, types become list[T])
-      column_selection.py     — Select/Drop Tag/Data columns
-      mappers.py              — MapTags, MapData (rename columns)
+      column_selection.py     — Select/Drop Key/Data columns
+      mappers.py              — MapKeys, MapData (rename columns)
       filters.py              — PolarsFilter
   hashing/
     semantic_hashing/         — BaseSemanticHasher, type handlers
   semantic_types/             — Type conversion (Python ↔ Arrow)
   databases/                  — ArrowDatabaseProtocol implementations (Delta Lake, in-memory)
   utils/
-    arrow_data_utils.py       — System tag manipulation, source info, column helpers
+    arrow_data_utils.py       — System key manipulation, source info, column helpers
     arrow_utils.py            — Arrow table utilities
     schema_utils.py           — Schema extraction, union, intersection, compatibility
     lazy_module.py            — LazyModule for deferred heavy imports
@@ -208,26 +208,26 @@ See orcapod-design.md at the project root for the full design specification.
 
   RootSource → ArrowTableStream → [Operator / FunctionPod] → ArrowTableStream → ...
 
-Every stream is an immutable sequence of (Tag, Data) pairs backed by a PyArrow Table.
-Tag columns are join keys and metadata; data columns are the data payload.
+Every stream is an immutable sequence of (Key, Data) pairs backed by a PyArrow Table.
+Key columns are join keys and metadata; data columns are the data payload.
 
 ### Core abstractions
 
 Datagram (core/datagrams/datagram.py) — immutable data container with lazy dict ↔ Arrow
 conversion. Two specializations:
-- Tag — metadata columns + hidden system tag columns for provenance tracking
+- Key — metadata columns + hidden system key columns for provenance tracking
 - Data — data columns + per-column source info provenance tokens
 
-Stream (core/streams/arrow_table_stream.py) — immutable (Tag, Data) sequence.
+Stream (core/streams/arrow_table_stream.py) — immutable (Key, Data) sequence.
 Key methods: output_schema(), keys(), iter_data(), as_table().
 
 Source (core/sources/) — produces a stream from external data. ArrowTableSource is the core
 implementation; CSV/Delta/DataFrame/Dict/List sources all delegate to it internally. Each
-source adds source-info columns and a system tag column. DerivedSource wraps a
+source adds source-info columns and a system key column. DerivedSource wraps a
 FunctionNode/OperatorNode's DB records as a new source.
 
 Function Pod (core/function_pod.py) — wraps a DataFunction that transforms individual
-data. Never inspects tags. Two execution models:
+data. Never inspects keys. Two execution models:
 - FunctionPod → FunctionPodStream: lazy, in-memory
 - FunctionNode: DB-backed, two-phase (yield cached results first, then compute missing)
 
@@ -242,8 +242,8 @@ FunctionNode.
 
 ### Strict operator / function pod boundary
 
-Operators: inspect tags (never data content), can rename columns, cannot synthesize values.
-Function Pods: inspect data content (never tags), synthesize new values, cached by content.
+Operators: inspect keys (never data content), can rename columns, cannot synthesize values.
+Function Pods: inspect data content (never keys), synthesize new values, cached by content.
 
 ### Two identity chains
 
@@ -253,7 +253,7 @@ Every pipeline element has two parallel hashes:
 2. pipeline_hash() — schema + topology only. Ignores data content. Used for DB path scoping
    so that different sources with identical schemas share database tables.
 
-Base case: RootSource.pipeline_identity_structure() returns (tag_schema, data_schema).
+Base case: RootSource.pipeline_identity_structure() returns (key_schema, data_schema).
 Each downstream node's pipeline hash commits to its own identity plus upstream pipeline
 hashes, forming a Merkle chain.
 
@@ -261,17 +261,17 @@ hashes, forming a Merkle chain.
 
   __ prefix       — System metadata (ColumnConfig meta)
   _source_ prefix — Source info provenance (ColumnConfig source)
-  _tag:: prefix   — System tag (ColumnConfig system_tags)
+  _key:: prefix   — System key (ColumnConfig system_keys)
   _context_key    — Data context (ColumnConfig context)
 
 Prefixes are computed from SystemConstant in system_constants.py.
 
-### System tag evolution rules
+### System key evolution rules
 
 1. Name-preserving — single-stream ops. Column name/value pass through unchanged.
-2. Name-extending — multi-input ops. System tag column name gets
+2. Name-extending — multi-input ops. System key column name gets
    ::{pipeline_hash}:{canonical_position} appended. Commutative operators sort by
-   pipeline_hash and sort system tag values per row.
+   pipeline_hash and sort system key values per row.
 3. Type-evolving — aggregation ops. Column type changes from str to list[str].
 
 ### Key patterns
@@ -285,7 +285,7 @@ Prefixes are computed from SystemConstant in system_constants.py.
 
 ### Important implementation details
 
-- ArrowTableSource raises ValueError if any tag_columns are not in the table.
+- ArrowTableSource raises ValueError if any key_columns are not in the table.
 - ArrowTableStream requires at least one data column; raises ValueError otherwise.
 - FunctionNode Phase 1 returns ALL records in the shared pipeline_path DB table.
   Phase 2 skips inputs whose hash is already in the DB.
@@ -293,4 +293,4 @@ Prefixes are computed from SystemConstant in system_constants.py.
 - DerivedSource before run() → raises ValueError (no computed records).
 - Join requires non-overlapping data columns; raises InputValidationError on collision.
 - MergeJoin requires colliding columns to have identical types; merges into sorted list[T].
-- Operators predict output schema (including system tag names) without computation.
+- Operators predict output schema (including system key names) without computation.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e0d699e8..f8eece80 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,37 @@
 
 ### Breaking Changes
 
+#### `tag` → `key` rename (hard break)
+
+All identifiers containing `tag`/`tags`/`Tag` have been renamed to
+`key`/`keys`/`Key`. No deprecation aliases. Pre-v0.1 artifacts will not load.
+
+| Old name | New name |
+|---|---|
+| `Tag` | `Key` |
+| `TagProtocol` | `KeyProtocol` |
+| `TagValue` | `KeyValue` |
+| `DuplicateTagError` | `DuplicateKeyError` |
+| `SelectTagColumns` | `SelectKeyColumns` |
+| `DropTagColumns` | `DropKeyColumns` |
+| `MapTags` | `MapKeys` |
+| `system_tags()` | `system_keys()` |
+| `map_tags()` | `map_keys()` |
+| `select_tag_columns()` | `select_key_columns()` |
+| `drop_tag_columns()` | `drop_key_columns()` |
+| `sort_by_tags` | `sort_by_keys` |
+| `SYSTEM_TAG_PREFIX` | `SYSTEM_KEY_PREFIX` |
+| `SYSTEM_TAG_PREFIX_NAME` (`"tag"`) | `SYSTEM_KEY_PREFIX_NAME` (`"key"`) |
+| `SYSTEM_TAG_SOURCE_ID_PREFIX` | `SYSTEM_KEY_SOURCE_ID_PREFIX` |
+| `SYSTEM_TAG_RECORD_ID_PREFIX` | `SYSTEM_KEY_RECORD_ID_PREFIX` |
+| `SYSTEM_TAG_SOURCE_ID_FIELD` | `SYSTEM_KEY_SOURCE_ID_FIELD` |
+| `SYSTEM_TAG_RECORD_ID_FIELD` | `SYSTEM_KEY_RECORD_ID_FIELD` |
+| `ColumnConfig(system_tags=...)` | `ColumnConfig(system_keys=...)` |
+| Column prefix `_tag_` | `_key_` (e.g. `_tag_source_id` → `_key_source_id`) |
+| Column prefix `_tag::` | `_key::` (e.g. `_tag::source:abc` → `_key::source:abc`) |
+| `src/orcapod/core/datagrams/tag_data.py` | `key_data.py` |
+| `test-objective/unit/test_tag.py` | `test_key.py` |
+
 #### `packets` → `data` rename (hard break)
 
 All identifiers containing `packet`/`packets`/`Packet` have been renamed to
diff --git a/CLAUDE.md b/CLAUDE.md
index bcfdb6b8..ba3711a9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -149,13 +149,13 @@ Examples:
 src/orcapod/
 ├── types.py                    # Schema, ColumnConfig, ContentHash
 ├── system_constants.py         # Column prefixes and separators
-├── errors.py                   # InputValidationError, DuplicateTagError, FieldNotResolvableError
+├── errors.py                   # InputValidationError, DuplicateKeyError, FieldNotResolvableError
 ├── config.py                   # Config dataclass
 ├── contexts/                   # DataContext (semantic_hasher, arrow_hasher, type_converter)
 ├── protocols/
 │   ├── hashing_protocols.py    # PipelineElementProtocol, ContentIdentifiableProtocol
 │   └── core_protocols/         # StreamProtocol, PodProtocol, SourceProtocol,
-│                               # DataFunctionProtocol, DatagramProtocol, TagProtocol,
+│                               # DataFunctionProtocol, DatagramProtocol, KeyProtocol,
 │                               # DataProtocol, TrackerProtocol
 ├── core/
 │   ├── base.py                 # ContentIdentifiableBase, PipelineElementBase, TraceableBase
@@ -166,7 +166,7 @@ src/orcapod/
 │   ├── tracker.py              # Invocation tracking
 │   ├── datagrams/
 │   │   ├── datagram.py         # Datagram (unified dict/Arrow backing, lazy conversion)
-│   │   └── tag_data.py       # Tag (+ system tags), Data (+ source info)
+│   │   └── key_data.py       # Key (+ system keys), Data (+ source info)
 │   ├── sources/
 │   │   ├── base.py             # RootSource (abstract, no upstream)
 │   │   ├── arrow_table_source.py  # Core source — all other sources delegate to it
@@ -183,15 +183,15 @@ src/orcapod/
 │       ├── merge_join.py       # MergeJoin (binary, colliding cols → sorted list[T])
 │       ├── semijoin.py         # SemiJoin (binary, non-commutative)
 │       ├── batch.py            # Batch (group rows, types become list[T])
-│       ├── column_selection.py # Select/Drop Tag/Data columns
-│       ├── mappers.py          # MapTags, MapData (rename columns)
+│       ├── column_selection.py # Select/Drop Key/Data columns
+│       ├── mappers.py          # MapKeys, MapData (rename columns)
 │       └── filters.py          # PolarsFilter
 ├── hashing/
 │   └── semantic_hashing/       # BaseSemanticHasher, type handlers
 ├── semantic_types/             # Type conversion (Python ↔ Arrow)
 ├── databases/                  # ArrowDatabaseProtocol implementations (Delta Lake, in-memory)
 └── utils/
-    ├── arrow_data_utils.py     # System tag manipulation, source info, column helpers
+    ├── arrow_data_utils.py     # System key manipulation, source info, column helpers
     ├── arrow_utils.py          # Arrow table utilities
     ├── schema_utils.py         # Schema extraction, union, intersection, compatibility
     └── lazy_module.py          # LazyModule for deferred heavy imports
@@ -221,26 +221,26 @@ See `orcapod-design.md` at the project root for the full design specification.
 RootSource → ArrowTableStream → [Operator / FunctionPod] → ArrowTableStream → ...
 ```
 
-Every stream is an immutable sequence of (Tag, Data) pairs backed by a PyArrow Table.
-Tag columns are join keys and metadata; data columns are the data payload.
+Every stream is an immutable sequence of (Key, Data) pairs backed by a PyArrow Table.
+Key columns are join keys and metadata; data columns are the data payload.
 
 ### Core abstractions
 
 **Datagram** (`core/datagrams/datagram.py`) — immutable data container with lazy dict ↔ Arrow
 conversion. Two specializations:
-- **Tag** — metadata columns + hidden system tag columns for provenance tracking
+- **Key** — metadata columns + hidden system key columns for provenance tracking
 - **Data** — data columns + per-column source info provenance tokens
 
-**Stream** (`core/streams/arrow_table_stream.py`) — immutable (Tag, Data) sequence.
+**Stream** (`core/streams/arrow_table_stream.py`) — immutable (Key, Data) sequence.
 Key methods: `output_schema()`, `keys()`, `iter_data()`, `as_table()`.
 
 **Source** (`core/sources/`) — produces a stream from external data. `ArrowTableSource` is the
 core implementation; CSV/Delta/DataFrame/Dict/List sources all delegate to it internally. Each
-source adds source-info columns and a system tag column. `DerivedSource` wraps a
+source adds source-info columns and a system key column. `DerivedSource` wraps a
 FunctionNode/OperatorNode's DB records as a new source.
 
 **Function Pod** (`core/function_pod.py`) — wraps a `DataFunction` that transforms individual
-data. Never inspects tags. Two execution models:
+data. Never inspects keys. Two execution models:
 - `FunctionPod` → `FunctionPodStream`: lazy, in-memory
 - `FunctionNode`: DB-backed, two-phase (yield cached results first, then compute missing)
 
@@ -258,7 +258,7 @@ FunctionNode.
 | | Operator | Function Pod |
 |---|---|---|
 | Inspects data content | Never | Yes |
-| Inspects / uses tags | Yes | No |
+| Inspects / uses keys | Yes | No |
 | Can rename columns | Yes | No |
 | Synthesizes new values | No | Yes |
 | Stream arity | Configurable | Single in, single out |
@@ -272,7 +272,7 @@ Every pipeline element has two parallel hashes:
 2. **`pipeline_hash()`** — schema + topology only. Ignores data content. Used for DB path
    scoping so that different sources with identical schemas share database tables.
 
-Base case: `RootSource.pipeline_identity_structure()` returns `(tag_schema, data_schema)`.
+Base case: `RootSource.pipeline_identity_structure()` returns `(key_schema, data_schema)`.
 Each downstream node's pipeline hash commits to its own identity plus the pipeline hashes of
 its upstreams, forming a Merkle chain.
 
@@ -285,28 +285,28 @@ The pipeline hash uses a **resolver pattern** — `PipelineElementProtocol` obje
 |--------|---------|---------|---------------|
 | `__` | System metadata | `__data_id`, `__pod_version` | `ColumnConfig(meta=True)` |
 | `_source_` | Source info provenance | `_source_age` | `ColumnConfig(source=True)` |
-| `_tag::` | System tag | `_tag::source:abc123` | `ColumnConfig(system_tags=True)` |
+| `_key::` | System key | `_key::source:abc123` | `ColumnConfig(system_keys=True)` |
 | `_context_key` | Data context | `_context_key` | `ColumnConfig(context=True)` |
 
 Prefixes are computed from `SystemConstant` in `system_constants.py`. The `constants` singleton
 (with no global prefix) is used throughout.
 
-### System tag evolution rules
+### System key evolution rules
 
 1. **Name-preserving** — single-stream ops (filter, select, map). Column name and value pass
    through unchanged.
-2. **Name-extending** — multi-input ops (join, merge join). Each input's system tag column
+2. **Name-extending** — multi-input ops (join, merge join). Each input's system key column
    name gets `::{pipeline_hash}:{canonical_position}` appended. Commutative operators
-   canonically order inputs by `pipeline_hash` and sort system tag values per row.
+   canonically order inputs by `pipeline_hash` and sort system key values per row.
 3. **Type-evolving** — aggregation ops (batch). Column type changes from `str` to `list[str]`.
 
 ### Schema types and ColumnConfig
 
 `Schema` (`types.py`) — immutable `Mapping[str, DataType]` with `optional_fields` support.
-`output_schema()` always returns `(tag_schema, data_schema)` as a tuple of Schemas.
+`output_schema()` always returns `(key_schema, data_schema)` as a tuple of Schemas.
 
 `ColumnConfig` (`types.py`) — frozen dataclass controlling which column groups are included.
-Fields: `meta`, `context`, `source`, `system_tags`, `content_hash`, `sort_by_tags`.
+Fields: `meta`, `context`, `source`, `system_keys`, `content_hash`, `sort_by_keys`.
 Normalize via `ColumnConfig.handle_config(columns, all_info)` at the top of `output_schema()`
 and `as_table()` methods. `all_info=True` sets everything to True.
 
@@ -323,7 +323,7 @@ and `as_table()` methods. `all_info=True` sets everything to True.
 
 ### Important implementation details
 
-- `ArrowTableSource.__init__` raises `ValueError` if any `tag_columns` are not in the table.
+- `ArrowTableSource.__init__` raises `ValueError` if any `key_columns` are not in the table.
 - `ArrowTableStream` requires at least one data column; raises `ValueError` otherwise.
 - `FunctionNode.iter_data()` Phase 1 returns ALL records in the shared `pipeline_path`
   DB table (not filtered to current inputs). Phase 2 skips inputs whose hash is already
@@ -333,5 +333,5 @@ and `as_table()` methods. `all_info=True` sets everything to True.
 - Join requires non-overlapping data columns; raises `InputValidationError` on collision.
 - MergeJoin requires colliding data columns to have identical types; merges into sorted
   `list[T]` with source columns reordered to match.
-- Operators predict their output schema (including system tag column names) without
+- Operators predict their output schema (including system key column names) without
   performing the actual computation.
diff --git a/DESIGN_ISSUES.md b/DESIGN_ISSUES.md
index 1fc0ae20..b1c4f839 100644
--- a/DESIGN_ISSUES.md
+++ b/DESIGN_ISSUES.md
@@ -30,7 +30,7 @@ gaps rather than intentional choices:
 
 Note: merging into `TraceableBase` is correct at the *computation-node* level.
 `ContentIdentifiableBase` (which `TraceableBase` builds on) should **not** absorb
-`PipelineElementBase` — data datagrams (`Tag`, `Data`) are legitimately content-identifiable
+`PipelineElementBase` — data datagrams (`Key`, `Data`) are legitimately content-identifiable
 without being pipeline elements.
 
 **Fix:** Added `PipelineElementBase` to `TraceableBase`'s bases. Added
@@ -193,7 +193,7 @@ duplication that diverges silently over time.
 ### F5 — `FunctionPodStream` and `FunctionPodNodeStream` are near-identical copy-pastes
 **Status:** open
 **Severity:** medium
-`iter_data`, `as_table` (including content_hash and sort_by_tags logic), `keys`,
+`iter_data`, `as_table` (including content_hash and sort_by_keys logic), `keys`,
 `output_schema`, `source`, and `upstreams` are duplicated almost line-for-line. The only
 behavioural differences are:
 - `FunctionPodNodeStream` has `refresh_cache()`
@@ -219,7 +219,7 @@ is `self`.
 **Severity:** medium
 The method checks for an existing record with `get_record_by_id` and skips insertion if found.
 But it then calls `add_record(..., skip_duplicates=False)`, which will raise on a duplicate. A
-race between the lookup and the insert (e.g. two concurrent processes handling the same tag+data)
+race between the lookup and the insert (e.g. two concurrent processes handling the same key+data)
 would cause a crash instead of a graceful skip. Should use `skip_duplicates=True` for consistency
 with the intent.
 
@@ -251,22 +251,22 @@ Fix: change `ValueError` to `InputValidationError`.
 
 ---
 
-### F12 — System tag columns excluded from cache entry ID
+### F12 — System key columns excluded from cache entry ID
 **Status:** open
 **Severity:** high
 
-`FunctionPodNode.record_data_for_cache()` (line ~1077) builds a tag table for entry-ID
-computation but excludes system tag columns:
+`FunctionPodNode.record_data_for_cache()` (line ~1077) builds a key table for entry-ID
+computation but excludes system key columns:
 ```python
-# TODO: add system tag columns
+# TODO: add system key columns
 ```
 
-Two data with identical user tags but different provenance (arriving from different
-pipeline branches, thus having different system tags) produce the same cache key. This can
+Two data with identical user keys but different provenance (arriving from different
+pipeline branches, thus having different system keys) produce the same cache key. This can
 cause cache collisions where a result computed for one pipeline branch is returned for
 another.
 
-Fix: include system tag columns in the `tag_with_hash` table before computing the entry ID hash.
+Fix: include system key columns in the `key_with_hash` table before computing the entry ID hash.
 
 ---
 
@@ -289,7 +289,7 @@ incomplete schema, inconsistent with `as_table()` which does include source colu
 **Status:** open
 **Severity:** medium
 
-`as_table()` (line ~568) converts Arrow → Polars → sort → Arrow when sorting by tags:
+`as_table()` (line ~568) converts Arrow → Polars → sort → Arrow when sorting by keys:
 ```python
 # TODO: reimplement using polars natively
 ```
@@ -307,11 +307,11 @@ even when results are already stored in the result/pipeline databases.  This def
 of the two-database design (result DB + pipeline DB) used to cache computed outputs.
 
 **Fix:** Refactored `iter_data` to first call `FunctionPodNode.get_all_records(columns={"meta": True})`
-to load already-computed (tag, output-data) pairs from the databases (mirroring the legacy
+to load already-computed (key, output-data) pairs from the databases (mirroring the legacy
 `PodNodeStream` design), yield those via `TableStream`, then collect the set of already-processed
 `INPUT_PACKET_HASH` values and only call `process_data` for input data not yet in the DB.
 Also added `FunctionPodNode.get_all_records(columns, all_info)` using `ColumnConfig` to control
-which column groups (meta, source, system_tags) are returned.
+which column groups (meta, source, system_keys) are returned.
 
 ---
 
@@ -375,7 +375,7 @@ Delegating sources make this worse:
 - `DeltaTableSource` sets `source_name = resolved.name` but never sets `source_id` → same issue
 
 Additionally, delegating sources all return `self._arrow_source.identity_structure()` which is
-`("ArrowTableSource", tag_columns, table_hash)`. This means the outer source type (CSV, Delta,
+`("ArrowTableSource", key_columns, table_hash)`. This means the outer source type (CSV, Delta,
 etc.) is invisible to the content hash, and `source_id` (defaulting to content hash) will be
 identical for a CSVSource and an ArrowTableSource with the same data.
 
@@ -391,9 +391,9 @@ Added `computed_label()` to `RootSource` returning `_explicit_source_id`.
 **Status:** resolved
 **Severity:** high
 Both `FunctionPodStream.as_table()` and `FunctionPodNodeStream.as_table()` unconditionally call
-`.drop([constants.CONTEXT_KEY])` on the tags table built from the accumulated data. When the
+`.drop([constants.CONTEXT_KEY])` on the keys table built from the accumulated data. When the
 stream is empty (e.g. because the data function is inactive), `iter_data()` yields nothing,
-`tag_schema` stays `None`, and `pa.Table.from_pylist([], schema=None)` produces a zero-column
+`key_schema` stays `None`, and `pa.Table.from_pylist([], schema=None)` produces a zero-column
 table. The subsequent `.drop([constants.CONTEXT_KEY])` then raises `KeyError` because the column
 does not exist.
 
@@ -439,7 +439,7 @@ Relevant for future streaming/chunked processing of large datasets.
 **Status:** open
 **Severity:** medium
 
-`SelectTagColumns`, `SelectDataColumns`, `DropTagColumns`, `DropDataColumns` (in
+`SelectKeyColumns`, `SelectDataColumns`, `DropKeyColumns`, `DropDataColumns` (in
 `column_selection.py:58`, `137`, `214`, `292`) and `PolarsFilterByDataColumns`
 (`filters.py:135`) each have near-identical `validate_unary_input()` implementations. All are
 marked:
@@ -447,7 +447,7 @@ marked:
 # TODO: remove redundant logic
 ```
 
-The only difference between them is which key set (tag vs. data) is checked and the error
+The only difference between them is which key set (key vs. data) is checked and the error
 message text. A shared parameterized validation helper would eliminate the duplication.
 
 ---
@@ -469,14 +469,14 @@ Three categories of improvement are planned:
    independently:
    - ~~`PolarsFilter` — evaluate predicate per row, emit or drop immediately~~ (kept barrier:
      Polars expressions require DataFrame context for evaluation)
-   - `MapTags` / `MapData` — rename columns per row, emit immediately ✅
-   - `SelectTagColumns` / `SelectDataColumns` — project columns per row, emit immediately ✅
-   - `DropTagColumns` / `DropDataColumns` — drop columns per row, emit immediately ✅
+   - `MapKeys` / `MapData` — rename columns per row, emit immediately ✅
+   - `SelectKeyColumns` / `SelectDataColumns` — project columns per row, emit immediately ✅
+   - `DropKeyColumns` / `DropDataColumns` — drop columns per row, emit immediately ✅
 
 2. **Incremental overrides (stateful, eager emit)** — for multi-input operators that can
    produce partial results before all inputs are consumed:
    - `Join` — symmetric hash join for 2 inputs (streaming, with correct
-     system-tag name-extending via `input_pipeline_hashes` passed directly
+     system-key name-extending via `input_pipeline_hashes` passed directly
      to `async_execute`); barrier fallback for N>2 inputs via `static_process`. ✅
    - `MergeJoin` — kept barrier: complex column-merging logic
    - `SemiJoin` — build right, stream left through hash lookup ✅
@@ -487,7 +487,7 @@ Three categories of improvement are planned:
 
 **Remaining:** `PolarsFilter` (barrier), `MergeJoin` (barrier) could receive incremental
 overrides in the future but require careful handling of Polars expression evaluation and
-system-tag evolution respectively.
+system-key evolution respectively.
 
 ---
 
@@ -509,10 +509,10 @@ A naïve decomposition into `FunctionPod + Join` works but has unnecessary overh
 1. **Materialization waste** — FunctionPod produces an intermediate stream that is only created
    to be immediately joined back. AddResult can compute new columns and merge them into the
    original data in a single pass, with no intermediate stream.
-2. **Redundant tag matching** — Join must re-match tags that trivially correspond (they came
-   from the same input row). AddResult already holds the (tag, data) pair and can skip the
+2. **Redundant key matching** — Join must re-match keys that trivially correspond (they came
+   from the same input row). AddResult already holds the (key, data) pair and can skip the
    matching entirely.
-3. **Simpler async path** — streams row-by-row like FunctionPod: read (tag, data), call
+3. **Simpler async path** — streams row-by-row like FunctionPod: read (key, data), call
    the data function, merge original data columns + new columns, emit. No broadcast,
    passthrough channel, or rejoin wiring needed.
 
@@ -561,7 +561,7 @@ await AddResult(grade_pf).async_execute([input_ch], output_ch)
 
 #### Implementation notes
 
-- `output_schema()` returns `(input_tag_schema, input_data_schema | function_output_schema)`
+- `output_schema()` returns `(input_key_schema, input_data_schema | function_output_schema)`
   — the union of original data columns and new computed columns.
 - Must raise `InputValidationError` if function output keys collide with existing data
   column names (same constraint as Join on overlapping data columns).
diff --git a/README.md b/README.md
index 39df8081..8bb5bd19 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@ Orcapod's Python library for developing reproducbile scientific pipelines.
 
 ## Releasing
 
-To cut a release, tag a commit on `main` — `hatch-vcs` derives the version
+To cut a release, key a commit on `main` — `hatch-vcs` derives the version
 automatically and CI publishes to PyPI. See [RELEASING.md](RELEASING.md) for the
 full workflow.
 
@@ -112,24 +112,24 @@ While the following is subject to change based on future development, it represe
       │  ctx_observer = obs.contextualize(node_hash, node_label)
       │  ctx_observer.on_node_start(node_label, node_hash)
       │
-      │  for each non-cached (tag, data):
+      │  for each non-cached (key, data):
       │
-      │  ctx_observer.on_data_start(node_label, tag, data)
+      │  ctx_observer.on_data_start(node_label, key, data)
       │
-      ├─► pkt_logger = ctx_observer.create_data_logger(tag, data, pipeline_path=...)
+      ├─► pkt_logger = ctx_observer.create_data_logger(key, data, pipeline_path=...)
       │       │
       │       └─► _ContextualizedLoggingObserver creates a DataLogger bound to
-      │           (run_id, node_label, node_hash, tag_data, log_path)
+      │           (run_id, node_label, node_hash, key_data, log_path)
       │
-      ├─► FunctionNode._process_data_internal(tag, data, logger=pkt_logger)
+      ├─► FunctionNode._process_data_internal(key, data, logger=pkt_logger)
       │       │
-      │       ├─► CachedFunctionPod.process_data(tag, data, logger=pkt_logger)
+      │       ├─► CachedFunctionPod.process_data(key, data, logger=pkt_logger)
       │       │       │
       │       │       │  checks pod-level cache (ResultCache.lookup)
-      │       │       │  cache hit? → return (tag, cached_data)
+      │       │       │  cache hit? → return (key, cached_data)
       │       │       │  cache miss ↓
       │       │       │
-      │       │       ├─► _FunctionPodBase.process_data(tag, data, logger=pkt_logger)
+      │       │       ├─► _FunctionPodBase.process_data(key, data, logger=pkt_logger)
       │       │       │       │
       │       │       │       ├─► PythonDataFunction.call(data, logger=pkt_logger)
       │       │       │       │       │
@@ -168,27 +168,27 @@ While the following is subject to change based on future development, it represe
       │       │       │       │       │
       │       │       │       │       └─► returns Data | None (or raises)
       │       │       │       │
-      │       │       │       └─► returns (tag, Data | None)
+      │       │       │       └─► returns (key, Data | None)
       │       │       │
       │       │       │  stores result in pod-level cache (on success)
       │       │       │
-      │       │       └─► returns (tag, Data | None)
+      │       │       └─► returns (key, Data | None)
       │       │
       │       │  writes pipeline provenance record (on success)
       │       │  caches result internally
       │       │
-      │       └─► returns (tag, Data | None)
+      │       └─► returns (key, Data | None)
       │
-      │  ← back in FunctionNode.execute() with (tag_out, result)
+      │  ← back in FunctionNode.execute() with (key_out, result)
       │
       │  (logger.record already called inside the executor — nothing to do here)
       │
       ├─► try/except around _process_data_internal:
       │   on success:
-      │       ctx_observer.on_data_end(node_label, tag, data, result, cached=False)
-      │       emit (tag_out, result) downstream
+      │       ctx_observer.on_data_end(node_label, key, data, result, cached=False)
+      │       emit (key_out, result) downstream
       │   on exception:
-      │       ctx_observer.on_data_crash(node_label, tag, data, exc)
+      │       ctx_observer.on_data_crash(node_label, key, data, exc)
       │       if error_policy == "fail_fast": raise
       │       otherwise: skip this data, continue
       │
@@ -204,7 +204,7 @@ While the following is subject to change based on future development, it represe
           Execution output columns (from **kwargs, prefixed with "`__`"):
             __stdout, __stderr, __python_logs, __traceback, __success
             (or any other fields the executor passes — protocol is generic)
-          Tag columns (unprefixed, from tag_data baked in at creation):
+          Key columns (unprefixed, from key_data baked in at creation):
             e.g. "idx" → "0", "key" → "a"
 
 Writes the row to the database at the mirrored log path.
@@ -222,4 +222,4 @@ Writes the row to the database at the mirrored log path.
   identify the node).
   - No auto-executor in the class — DataFunctionBase.__init__ does not assign a default executor. Pipeline.compile() assigns LocalExecutor to function nodes that have none. Users
    can override per-node (pipeline.node.executor = ...) or globally via pipeline.run(execution_engine=...).
-  - Log columns use __ prefix — fixed columns (__log_id, __stdout, __success, etc.) are prefixed to avoid collision with user-defined tag column names.
\ No newline at end of file
+  - Log columns use __ prefix — fixed columns (__log_id, __stdout, __success, etc.) are prefixed to avoid collision with user-defined key column names.
\ No newline at end of file
diff --git a/RELEASING.md b/RELEASING.md
index 2f6606eb..a6f4c6c0 100644
--- a/RELEASING.md
+++ b/RELEASING.md
@@ -12,18 +12,18 @@ This document describes how to cut a release of `orcapod` to PyPI.
 
 1. **Merge your branch into `main`** — open a PR, get it reviewed, merge it.
 
-2. **Tag the commit on `main`** — the version is derived automatically from the git
-   tag by `hatch-vcs` (`dynamic = ["version"]` in `pyproject.toml`). No manual
+2. **Key the commit on `main`** — the version is derived automatically from the git
+   key by `hatch-vcs` (`dynamic = ["version"]` in `pyproject.toml`). No manual
    version bump is needed.
 
    ```bash
    git checkout main
    git pull origin main
-   git tag v0.1.0          # or v0.1.0rc1 for a pre-release
+   git key v0.1.0          # or v0.1.0rc1 for a pre-release
    git push origin v0.1.0
    ```
 
-3. **CI takes over** — pushing the tag triggers the publish workflow
+3. **CI takes over** — pushing the key triggers the publish workflow
    (`.github/workflows/publish.yml`):
 
    ```
@@ -40,9 +40,9 @@ the stable vs pre-release distinction natively:
 - `pip install orcapod` — installs the latest **stable** release only
 - `pip install --pre orcapod` — installs the latest release including pre-releases
 
-## Tag Format
+## Key Format
 
-| Release type | Tag format | Example |
+| Release type | Key format | Example |
 |-------------|------------|---------|
 | Stable | `vMAJOR.MINOR.PATCH` | `v0.1.0` |
 | Release candidate | `vMAJOR.MINOR.PATCHrcN` | `v0.1.0rc1` |
diff --git a/TESTING_PLAN.md b/TESTING_PLAN.md
index e43b6d90..6d250247 100644
--- a/TESTING_PLAN.md
+++ b/TESTING_PLAN.md
@@ -29,7 +29,7 @@ test-objective/
 │   ├── __init__.py
 │   ├── test_types.py                  # Schema, ColumnConfig, ContentHash
 │   ├── test_datagram.py               # Datagram core behavior
-│   ├── test_tag.py                    # Tag (system tags, ColumnConfig filtering)
+│   ├── test_key.py                    # Key (system keys, ColumnConfig filtering)
 │   ├── test_data.py                 # Data (source info, provenance)
 │   ├── test_stream.py                 # ArrowTableStream construction & iteration
 │   ├── test_sources.py                # All source types + error conditions
@@ -42,7 +42,7 @@ test-objective/
 │   ├── test_databases.py              # InMemory, DeltaLake, NoOp databases
 │   ├── test_schema_utils.py           # Schema extraction, union, intersection
 │   ├── test_arrow_utils.py            # Arrow table/schema utilities
-│   ├── test_arrow_data_utils.py       # System tags, source info, column helpers
+│   ├── test_arrow_data_utils.py       # System keys, source info, column helpers
 │   ├── test_semantic_types.py         # UniversalTypeConverter, SemanticTypeRegistry
 │   ├── test_contexts.py               # DataContext resolution, validation
 │   ├── test_tracker.py                # BasicTrackerManager, GraphTracker
@@ -52,7 +52,7 @@ test-objective/
 │   ├── test_pipeline_flows.py         # End-to-end pipeline scenarios
 │   ├── test_caching_flows.py          # DB-backed caching (FunctionNode, OperatorNode)
 │   ├── test_hash_invariants.py        # Hash stability & Merkle chain properties
-│   ├── test_provenance.py             # System tag lineage through pipelines
+│   ├── test_provenance.py             # System key lineage through pipelines
 │   └── test_column_config_filtering.py # ColumnConfig behavior across all components
 └── property/
     ├── __init__.py
@@ -156,19 +156,19 @@ test-objective/
 - `test_datagram_content_hash_changes_with_data` — different data → different hash
 - `test_datagram_equality_by_content` — equal content → equal datagrams
 
-### 3. `test_tag.py` — Tag
+### 3. `test_key.py` — Key
 
-- `test_tag_construction_with_system_tags` — system tags stored separately from data
-- `test_tag_system_tags_excluded_from_default_keys` — keys() doesn't show system tags
-- `test_tag_system_tags_included_with_column_config` — keys(columns={"system_tags": True}) shows them
-- `test_tag_as_dict_excludes_system_tags_by_default` — as_dict() only has data
-- `test_tag_as_dict_all_info_includes_system_tags` — as_dict(all_info=True) has everything
-- `test_tag_as_table_excludes_system_tags_by_default`
-- `test_tag_as_table_all_info_includes_system_tags`
-- `test_tag_schema_excludes_system_tags_by_default`
-- `test_tag_copy_preserves_system_tags` — copy() includes system tags
-- `test_tag_as_datagram_conversion` — as_datagram() returns Datagram (not Tag)
-- `test_tag_system_tags_method_returns_copy` — system_tags() returns dict copy, not reference
+- `test_key_construction_with_system_keys` — system keys stored separately from data
+- `test_key_system_keys_excluded_from_default_keys` — keys() doesn't show system keys
+- `test_key_system_keys_included_with_column_config` — keys(columns={"system_keys": True}) shows them
+- `test_key_as_dict_excludes_system_keys_by_default` — as_dict() only has data
+- `test_key_as_dict_all_info_includes_system_keys` — as_dict(all_info=True) has everything
+- `test_key_as_table_excludes_system_keys_by_default`
+- `test_key_as_table_all_info_includes_system_keys`
+- `test_key_schema_excludes_system_keys_by_default`
+- `test_key_copy_preserves_system_keys` — copy() includes system keys
+- `test_key_as_datagram_conversion` — as_datagram() returns Datagram (not Key)
+- `test_key_system_keys_method_returns_copy` — system_keys() returns dict copy, not reference
 
 ### 4. `test_data.py` — Data
 
@@ -186,23 +186,23 @@ test-objective/
 ### 5. `test_stream.py` — ArrowTableStream
 
 **Construction:**
-- `test_stream_from_table_with_tag_columns` — tag/data column separation
+- `test_stream_from_table_with_key_columns` — key/data column separation
 - `test_stream_requires_at_least_one_data_column` — ValueError if no data columns
-- `test_stream_with_system_tag_columns` — system tag columns tracked
+- `test_stream_with_system_key_columns` — system key columns tracked
 - `test_stream_with_source_info` — source info attached to data columns
 - `test_stream_with_producer` — producer property set
 - `test_stream_with_upstreams` — upstreams tuple set
 
 **Schema & Keys:**
-- `test_stream_keys_returns_tag_and_data_keys` — tuple of (tag_keys, data_keys)
-- `test_stream_output_schema_returns_two_schemas` — (tag_schema, data_schema)
+- `test_stream_keys_returns_key_and_data_keys` — tuple of (key_keys, data_keys)
+- `test_stream_output_schema_returns_two_schemas` — (key_schema, data_schema)
 - `test_stream_schema_matches_actual_data` — output_schema() types match as_table() types
 - `test_stream_keys_with_column_config` — ColumnConfig filtering works
 
 **Iteration:**
-- `test_stream_iter_data_yields_tag_data_pairs` — each yield is (Tag, Data)
+- `test_stream_iter_data_yields_key_data_pairs` — each yield is (Key, Data)
 - `test_stream_iter_data_count_matches_rows` — number of yields = number of rows
-- `test_stream_iter_data_tag_keys_correct` — tag column names match
+- `test_stream_iter_data_key_keys_correct` — key column names match
 - `test_stream_iter_data_data_keys_correct` — data column names match
 - `test_stream_as_table_matches_iter_data` — table materialization consistent with iteration
 
@@ -219,22 +219,22 @@ test-objective/
 **ArrowTableSource:**
 - `test_arrow_source_from_valid_table` — normal construction succeeds
 - `test_arrow_source_empty_table_raises` — ValueError("Table is empty")
-- `test_arrow_source_missing_tag_column_raises` — ValueError if tag_columns not in table
-- `test_arrow_source_adds_system_tag_column` — system tag column added automatically
+- `test_arrow_source_missing_key_column_raises` — ValueError if key_columns not in table
+- `test_arrow_source_adds_system_key_column` — system key column added automatically
 - `test_arrow_source_adds_source_info_columns` — _source_ columns added
 - `test_arrow_source_source_id_set` — source_id property populated
 - `test_arrow_source_producer_is_none` — root sources have no producer
 - `test_arrow_source_upstreams_empty` — root sources have no upstreams
 - `test_arrow_source_resolve_field_by_record_id` — resolves field value
 - `test_arrow_source_resolve_field_missing_raises` — FieldNotResolvableError
-- `test_arrow_source_pipeline_identity_structure` — returns (tag_schema, data_schema)
+- `test_arrow_source_pipeline_identity_structure` — returns (key_schema, data_schema)
 - `test_arrow_source_iter_data_yields_correct_pairs`
 - `test_arrow_source_as_table_has_all_columns`
 
 **DictSource:**
 - `test_dict_source_from_dict_of_lists` — constructs correctly
 - `test_dict_source_delegates_to_arrow_table_source` — same behavior as ArrowTableSource
-- `test_dict_source_with_tag_columns`
+- `test_dict_source_with_key_columns`
 
 **ListSource:**
 - `test_list_source_from_list_of_dicts` — constructs correctly
@@ -242,7 +242,7 @@ test-objective/
 
 **CSVSource:**
 - `test_csv_source_from_file` — reads CSV correctly
-- `test_csv_source_with_tag_columns`
+- `test_csv_source_with_key_columns`
 
 **DataFrameSource:**
 - `test_dataframe_source_from_polars` — constructs from Polars DataFrame
@@ -304,7 +304,7 @@ test-objective/
 - `test_function_pod_validate_inputs_multiple_raises` — rejects multiple streams
 - `test_function_pod_output_schema_prediction` — output_schema() matches actual output
 - `test_function_pod_callable_alias` — __call__ same as process()
-- `test_function_pod_never_modifies_tags` — tags pass through unchanged
+- `test_function_pod_never_modifies_keys` — keys pass through unchanged
 - `test_function_pod_transforms_data` — data are transformed by function
 
 **FunctionPodStream:**
@@ -322,55 +322,55 @@ test-objective/
 ### 10. `test_operators.py` — All Operators
 
 **Join (N-ary, commutative):**
-- `test_join_two_streams_on_common_tags` — inner join on shared tag columns
+- `test_join_two_streams_on_common_keys` — inner join on shared key columns
 - `test_join_non_overlapping_data_columns_required` — InputValidationError on collision
 - `test_join_commutative` — join(A, B) == join(B, A) (same rows regardless of order)
 - `test_join_three_or_more_streams` — N-ary join works
-- `test_join_empty_result_when_no_matches` — disjoint tags → empty stream
-- `test_join_system_tag_name_extending` — system tag columns get ::pipeline_hash:position suffix
-- `test_join_system_tag_values_sorted_for_commutativity` — canonical ordering of tag values
+- `test_join_empty_result_when_no_matches` — disjoint keys → empty stream
+- `test_join_system_key_name_extending` — system key columns get ::pipeline_hash:position suffix
+- `test_join_system_key_values_sorted_for_commutativity` — canonical ordering of key values
 - `test_join_output_schema_prediction` — output_schema() matches actual output
 
 **MergeJoin (binary):**
 - `test_merge_join_colliding_columns_become_sorted_lists` — same-name data cols → list[T]
 - `test_merge_join_requires_identical_types` — different types raise error
 - `test_merge_join_non_colliding_columns_pass_through` — unmatched columns kept as-is
-- `test_merge_join_system_tag_name_extending`
+- `test_merge_join_system_key_name_extending`
 - `test_merge_join_output_schema_prediction` — predicts list[T] types correctly
 
 **SemiJoin (binary, non-commutative):**
-- `test_semijoin_filters_left_by_right_tags` — keeps left rows matching right tags
+- `test_semijoin_filters_left_by_right_keys` — keeps left rows matching right keys
 - `test_semijoin_non_commutative` — semijoin(A, B) != semijoin(B, A) in general
 - `test_semijoin_preserves_left_data_columns` — right data columns dropped
-- `test_semijoin_system_tag_name_extending`
+- `test_semijoin_system_key_name_extending`
 
 **Batch:**
-- `test_batch_groups_rows` — groups rows by tag, aggregates data
+- `test_batch_groups_rows` — groups rows by key, aggregates data
 - `test_batch_types_become_lists` — data column types become list[T]
-- `test_batch_system_tag_type_evolving` — system tag type becomes list[str]
+- `test_batch_system_key_type_evolving` — system key type becomes list[str]
 - `test_batch_with_batch_size` — batch_size limits group size
 - `test_batch_drop_partial_batch` — drop_partial_batch=True drops incomplete groups
 - `test_batch_output_schema_prediction` — predicts list[T] types
 
-**Column Selection (Select/Drop Tag/Data):**
-- `test_select_tag_columns` — keeps only specified tag columns
-- `test_select_tag_columns_strict_missing_raises` — strict=True raises on missing column
+**Column Selection (Select/Drop Key/Data):**
+- `test_select_key_columns` — keeps only specified key columns
+- `test_select_key_columns_strict_missing_raises` — strict=True raises on missing column
 - `test_select_data_columns` — keeps only specified data columns
-- `test_drop_tag_columns` — removes specified tag columns
+- `test_drop_key_columns` — removes specified key columns
 - `test_drop_data_columns` — removes specified data columns
-- `test_column_selection_system_tag_name_preserving` — system tags unchanged
+- `test_column_selection_system_key_name_preserving` — system keys unchanged
 
-**MapTags/MapData:**
-- `test_map_tags_renames_tag_columns` — renames specified tag columns
-- `test_map_tags_drop_unmapped` — drop_unmapped=True removes unrenamed columns
+**MapKeys/MapData:**
+- `test_map_keys_renames_key_columns` — renames specified key columns
+- `test_map_keys_drop_unmapped` — drop_unmapped=True removes unrenamed columns
 - `test_map_data_renames_data_columns`
-- `test_map_preserves_system_tags` — system tag columns unchanged (name-preserving)
+- `test_map_preserves_system_keys` — system key columns unchanged (name-preserving)
 
 **PolarsFilter:**
 - `test_polars_filter_with_predicate` — filters rows matching predicate
 - `test_polars_filter_with_constraints` — filters by column=value constraints
 - `test_polars_filter_preserves_schema` — output schema same as input
-- `test_polars_filter_system_tag_name_preserving`
+- `test_polars_filter_system_key_name_preserving`
 
 **Operator Base Classes:**
 - `test_unary_operator_rejects_multiple_inputs` — validate_inputs raises for >1 stream
@@ -381,7 +381,7 @@ test-objective/
 
 **FunctionNode:**
 - `test_function_node_iter_data` — iterates and transforms all data
-- `test_function_node_process_data` — transforms single (tag, data) pair
+- `test_function_node_process_data` — transforms single (key, data) pair
 - `test_function_node_producer_is_function_pod`
 - `test_function_node_upstreams`
 - `test_function_node_clear_cache`
@@ -486,13 +486,13 @@ test-objective/
 - `test_check_arrow_schema_compatibility` — compatible schemas pass
 - `test_split_by_column_groups` — splits table into multiple tables
 
-### 16. `test_arrow_data_utils.py` — System Tags & Source Info
+### 16. `test_arrow_data_utils.py` — System Keys & Source Info
 
-- `test_add_system_tag_columns` — adds _tag:: prefixed columns
-- `test_add_system_tag_columns_empty_table_raises` — ValueError
-- `test_add_system_tag_columns_length_mismatch_raises` — ValueError
-- `test_append_to_system_tags` — extends existing system tag values
-- `test_sort_system_tag_values` — canonical sorting for commutativity
+- `test_add_system_key_columns` — adds _key:: prefixed columns
+- `test_add_system_key_columns_empty_table_raises` — ValueError
+- `test_add_system_key_columns_length_mismatch_raises` — ValueError
+- `test_append_to_system_keys` — extends existing system key values
+- `test_sort_system_key_values` — canonical sorting for commutativity
 - `test_add_source_info` — adds _source_ prefixed columns
 - `test_drop_columns_with_prefix` — removes columns matching prefix
 - `test_drop_system_columns` — removes __ and __ prefixed columns
@@ -542,7 +542,7 @@ test-objective/
 - `test_source_to_stream_to_single_operator` — Source → Filter → Stream
 - `test_source_to_function_pod` — Source → FunctionPod → Stream with transformed data
 - `test_multi_source_join` — Two sources → Join → Stream with combined data
-- `test_chained_operators` — Source → Filter → Select → MapTags → Stream
+- `test_chained_operators` — Source → Filter → Select → MapKeys → Stream
 - `test_function_pod_then_operator` — Source → FunctionPod → Filter → Stream
 - `test_join_then_batch` — Two sources → Join → Batch → Stream
 - `test_semijoin_filters_correctly` — Source A semi-joined with Source B
@@ -569,20 +569,20 @@ test-objective/
 - `test_commutative_join_pipeline_hash_order_independent` — join(A,B) pipeline_hash == join(B,A)
 - `test_non_commutative_semijoin_pipeline_hash_order_dependent` — semijoin(A,B) != semijoin(B,A)
 
-### `test_provenance.py` — System Tag Lineage Tracking
+### `test_provenance.py` — System Key Lineage Tracking
 
-- `test_source_creates_system_tag_column` — source adds _tag::source:hash column
-- `test_unary_operator_preserves_system_tags` — filter/select/map: name+value unchanged
-- `test_join_extends_system_tag_names` — multi-input: column names get ::hash:pos suffix
-- `test_join_sorts_system_tag_values` — commutative ops sort tag values
-- `test_batch_evolves_system_tag_type` — batch: str → list[str]
+- `test_source_creates_system_key_column` — source adds _key::source:hash column
+- `test_unary_operator_preserves_system_keys` — filter/select/map: name+value unchanged
+- `test_join_extends_system_key_names` — multi-input: column names get ::hash:pos suffix
+- `test_join_sorts_system_key_values` — commutative ops sort key values
+- `test_batch_evolves_system_key_type` — batch: str → list[str]
 - `test_full_pipeline_provenance_chain` — source → join → filter → batch: all rules applied
 
 ### `test_column_config_filtering.py` — ColumnConfig Across All Components
 
 - `test_datagram_column_config_meta` — meta=True includes __ columns
 - `test_datagram_column_config_data_only` — all False = data columns only
-- `test_tag_column_config_system_tags` — system_tags=True includes _tag:: columns
+- `test_key_column_config_system_keys` — system_keys=True includes _key:: columns
 - `test_data_column_config_source` — source=True includes _source_ columns
 - `test_stream_column_config_all_info` — all_info=True on keys/output_schema/as_table
 - `test_stream_column_config_consistency` — keys(), output_schema(), as_table() all respect same config
@@ -619,7 +619,7 @@ test-objective/
 
 ### Recommended additions (not implemented in this PR, but suggested):
 3. **Mutation testing** with `mutmut` — run `uv run mutmut run --paths-to-mutate=src/orcapod/ --tests-dir=test-objective/` to verify tests catch code mutations. A surviving mutant indicates a test gap
-4. **Metamorphic testing** — "if I add a row to source A that matches source B's tags, the join output should have one more row" — tests relationships between inputs/outputs without knowing exact expected values
+4. **Metamorphic testing** — "if I add a row to source A that matches source B's keys, the join output should have one more row" — tests relationships between inputs/outputs without knowing exact expected values
 5. **Protocol conformance automation** — use `runtime_checkable` protocols and `isinstance` checks to verify every concrete class satisfies its protocol at import time
 6. **Specification oracle** — for each documented behavior in `orcapod-design.md`, create a test that constructs the exact scenario described and verifies the documented outcome
 7. **Fuzz testing** — feed malformed inputs (wrong types, extreme sizes, Unicode edge cases) to constructors and verify graceful error handling
@@ -630,7 +630,7 @@ test-objective/
 
 1. **`conftest.py`** — shared fixtures (reusable sources, streams, data functions, databases)
 2. **`unit/test_types.py`** — foundational types (Schema, ContentHash, ColumnConfig)
-3. **`unit/test_datagram.py`**, **`test_tag.py`**, **`test_data.py`** — data containers
+3. **`unit/test_datagram.py`**, **`test_key.py`**, **`test_data.py`** — data containers
 4. **`unit/test_stream.py`** — stream construction and iteration
 5. **`unit/test_sources.py`** + **`test_source_registry.py`** — all source types
 6. **`unit/test_hashing.py`** — semantic hasher and handlers
diff --git a/design/async-execution-implementation-plan.md b/design/async-execution-implementation-plan.md
index 6b6dba23..390baec0 100644
--- a/design/async-execution-implementation-plan.md
+++ b/design/async-execution-implementation-plan.md
@@ -67,9 +67,9 @@ sync execution — this just makes every node async-capable.
 
 **New file:** `src/orcapod/core/execution/materialization.py`
 
-- `materialize_to_stream(rows: list[tuple[TagProtocol, DataProtocol]]) -> ArrowTableStream`
-  — converts a list of (tag, data) pairs back into an ArrowTableStream
-- `stream_to_rows(stream: StreamProtocol) -> list[tuple[TagProtocol, DataProtocol]]`
+- `materialize_to_stream(rows: list[tuple[KeyProtocol, DataProtocol]]) -> ArrowTableStream`
+  — converts a list of (key, data) pairs back into an ArrowTableStream
+- `stream_to_rows(stream: StreamProtocol) -> list[tuple[KeyProtocol, DataProtocol]]`
   — the inverse (thin wrapper around `iter_data`)
 
 **Tests:** `tests/test_core/test_execution/test_materialization.py`
@@ -166,8 +166,8 @@ Each step is independent — can be done in any order or in parallel.
 
 **Modify:** `src/orcapod/core/operators/column_selection.py`
 
-- Override `async_execute` on `SelectTagColumns`, `SelectDataColumns`,
-  `DropTagColumns`, `DropDataColumns`
+- Override `async_execute` on `SelectKeyColumns`, `SelectDataColumns`,
+  `DropKeyColumns`, `DropDataColumns`
 - Each: iterate input, project/drop columns per row, emit
 
 **Tests:** `tests/test_core/test_execution/test_streaming_operators.py`
@@ -178,7 +178,7 @@ Each step is independent — can be done in any order or in parallel.
 
 **Modify:** `src/orcapod/core/operators/mappers.py`
 
-- Override `async_execute` on `MapTags`, `MapData`
+- Override `async_execute` on `MapKeys`, `MapData`
 - Each: iterate input, rename columns per row, emit
 
 **Tests:** added to `test_streaming_operators.py`
@@ -199,7 +199,7 @@ Each step is independent — can be done in any order or in parallel.
 - Override `async_execute` with symmetric hash join
 - Concurrent consumption of all inputs via TaskGroup
 - Per-row index probing and immediate emission
-- System tag extension logic (reuse existing `_extend_system_tag_columns` logic)
+- System key extension logic (reuse existing `_extend_system_key_columns` logic)
 
 **Tests:** `tests/test_core/test_execution/test_incremental_join.py`
 - Same result set as sync join (order may differ, compare as sets)
@@ -312,7 +312,7 @@ Phase 5 depends on everything above.
 
 | Risk | Mitigation |
 |---|---|
-| Row ordering differs between sync/async | Document clearly; `sort_by_tags` provides determinism |
+| Row ordering differs between sync/async | Document clearly; `sort_by_keys` provides determinism |
 | Incremental Join correctness | Extensive property-based tests comparing to sync |
 | Deadlocks from channel misuse | Strict rule: every node MUST close output channel |
 | Per-row Datagram operations are slow | Benchmark; fall back to barrier if perf regresses |
diff --git a/design/async-execution-system.md b/design/async-execution-system.md
index e5e47470..0f2a3220 100644
--- a/design/async-execution-system.md
+++ b/design/async-execution-system.md
@@ -41,11 +41,11 @@ Every pipeline node — source, operator, or function pod — implements a singl
 class AsyncExecutableProtocol(Protocol):
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
     ) -> None:
         """
-        Consume (tag, data) pairs from input channels, produce to output channel.
+        Consume (key, data) pairs from input channels, produce to output channel.
         MUST close output channel when done (signals completion to downstream).
         """
         ...
@@ -128,31 +128,31 @@ in **when** the node reads, **how much** it buffers, and **when** it emits.
 
 ### 1. Streaming (Row-by-Row)
 
-**Applies to:** Filter, MapTags, MapData, Select/Drop columns, FunctionPod
+**Applies to:** Filter, MapKeys, MapData, Select/Drop columns, FunctionPod
 
 Zero buffering. Each input row is independently transformed and emitted immediately.
 
 ```python
 # Example: PolarsFilter
 async def async_execute(self, inputs, output):
-    async for tag, data in inputs[0]:
-        if self._evaluate_predicate(tag, data):
-            await output.send((tag, data))
+    async for key, data in inputs[0]:
+        if self._evaluate_predicate(key, data):
+            await output.send((key, data))
     await output.close()
 
 # Example: FunctionPod with concurrency control
 async def async_execute(self, inputs, output):
     sem = asyncio.Semaphore(self.node_config.max_concurrency or _INF)
 
-    async def process_one(tag, data):
+    async def process_one(key, data):
         async with sem:
             result = await self.data_function.async_call(data)
             if result is not None:
-                await output.send((tag, result))
+                await output.send((key, result))
 
     async with asyncio.TaskGroup() as tg:
-        async for tag, data in inputs[0]:
-            tg.create_task(process_one(tag, data))
+        async for key, data in inputs[0]:
+            tg.create_task(process_one(key, data))
 
     await output.close()
 ```
@@ -169,14 +169,14 @@ async def async_execute(self, inputs, output):
     indexes: list[dict[JoinKey, list[Row]]] = [{} for _ in inputs]
 
     async def consume(i: int, channel):
-        async for tag, data in channel:
-            key = self._extract_join_key(tag)
-            indexes[i].setdefault(key, []).append((tag, data))
+        async for key, data in channel:
+            key = self._extract_join_key(key)
+            indexes[i].setdefault(key, []).append((key, data))
 
             # Probe all OTHER indexes for matches
             other_lists = [indexes[j].get(key, []) for j in range(len(inputs)) if j != i]
             for combo in itertools.product(*other_lists):
-                joined = self._merge_rows((tag, data), *combo)
+                joined = self._merge_rows((key, data), *combo)
                 await output.send(joined)
 
     async with asyncio.TaskGroup() as tg:
@@ -194,15 +194,15 @@ async def async_execute(self, inputs, output):
 
     # Phase 1: Build right-side index
     right_keys = set()
-    async for tag, data in right:
-        key = self._extract_join_key(tag)
+    async for key, data in right:
+        key = self._extract_join_key(key)
         right_keys.add(key)
 
     # Phase 2: Stream left, emit matches
-    async for tag, data in left:
-        key = self._extract_join_key(tag)
+    async for key, data in left:
+        key = self._extract_join_key(key)
         if key in right_keys:
-            await output.send((tag, data))
+            await output.send((key, data))
 
     await output.close()
 ```
@@ -224,8 +224,8 @@ async def async_execute(self, inputs, output):
     result_stream = self.static_process(*streams)
 
     # Phase 3: Emit results asynchronously
-    for tag, data in result_stream.iter_data():
-        await output.send((tag, data))
+    for key, data in result_stream.iter_data():
+        await output.send((key, data))
 
     await output.close()
 ```
@@ -249,8 +249,8 @@ class UnaryOperator(StaticOutputPod):
         rows = await inputs[0].collect()
         stream = self._materialize_to_stream(rows)
         result = self.static_process(stream)
-        for tag, data in result.iter_data():
-            await output.send((tag, data))
+        for key, data in result.iter_data():
+            await output.send((key, data))
         await output.close()
 
 
@@ -262,8 +262,8 @@ class BinaryOperator(StaticOutputPod):
         left_stream = self._materialize_to_stream(left_rows)
         right_stream = self._materialize_to_stream(right_rows)
         result = self.static_process(left_stream, right_stream)
-        for tag, data in result.iter_data():
-            await output.send((tag, data))
+        for key, data in result.iter_data():
+            await output.send((key, data))
         await output.close()
 
 
@@ -272,8 +272,8 @@ class NonZeroInputOperator(StaticOutputPod):
         all_rows = await asyncio.gather(*(ch.collect() for ch in inputs))
         streams = [self._materialize_to_stream(rows) for rows in all_rows]
         result = self.static_process(*streams)
-        for tag, data in result.iter_data():
-            await output.send((tag, data))
+        for key, data in result.iter_data():
+            await output.send((key, data))
         await output.close()
 ```
 
@@ -290,15 +290,15 @@ class FunctionPod:
     async def async_execute(self, inputs, output):
         sem = asyncio.Semaphore(self.node_config.max_concurrency or _INF)
 
-        async def process_one(tag, data):
+        async def process_one(key, data):
             async with sem:
                 result_data = await self.data_function.async_call(data)
                 if result_data is not None:
-                    await output.send((tag, result_data))
+                    await output.send((key, result_data))
 
         async with asyncio.TaskGroup() as tg:
-            async for tag, data in inputs[0]:
-                tg.create_task(process_one(tag, data))
+            async for key, data in inputs[0]:
+                tg.create_task(process_one(key, data))
 
         await output.close()
 ```
@@ -311,22 +311,22 @@ class FunctionNode:
     async def async_execute(self, inputs, output):
         sem = asyncio.Semaphore(self.node_config.max_concurrency or _INF)
 
-        async def process_one(tag, data):
+        async def process_one(key, data):
             cache_key = self._compute_cache_key(data)
             cached = await self._db_lookup(cache_key)
             if cached is not None:
-                await output.send((tag, cached))
+                await output.send((key, cached))
                 return
 
             async with sem:
                 result = await self.data_function.async_call(data)
                 await self._db_store(cache_key, result)
                 if result is not None:
-                    await output.send((tag, result))
+                    await output.send((key, result))
 
         async with asyncio.TaskGroup() as tg:
-            async for tag, data in inputs[0]:
-                tg.create_task(process_one(tag, data))
+            async for key, data in inputs[0]:
+                tg.create_task(process_one(key, data))
 
         await output.close()
 ```
@@ -429,8 +429,8 @@ Sources have no input channels — they just push their data onto the output cha
 class SourceNode:
     async def async_execute(self, inputs, output):
         # inputs is empty for sources
-        for tag, data in self.stream.iter_data():
-            await output.send((tag, data))
+        for key, data in self.stream.iter_data():
+            await output.send((key, data))
         await output.close()
 ```
 
@@ -447,9 +447,9 @@ while allowing each consumer to read at its own pace.
 | Operator | Default Strategy | Async Override? |
 |---|---|---|
 | PolarsFilter | Barrier (inherited) | **Streaming** — evaluate predicate per row |
-| MapTags / MapData | Barrier (inherited) | **Streaming** — rename per row |
-| SelectTagColumns / SelectDataColumns | Barrier (inherited) | **Streaming** — project per row |
-| DropTagColumns / DropDataColumns | Barrier (inherited) | **Streaming** — project per row |
+| MapKeys / MapData | Barrier (inherited) | **Streaming** — rename per row |
+| SelectKeyColumns / SelectDataColumns | Barrier (inherited) | **Streaming** — project per row |
+| DropKeyColumns / DropDataColumns | Barrier (inherited) | **Streaming** — project per row |
 | FunctionPod | N/A (new) | **Streaming** — transform data per row |
 | FunctionNode | N/A (new) | **Streaming** — cache check + transform per row |
 | Join | Barrier (inherited) | **Incremental** — symmetric hash join |
@@ -503,7 +503,7 @@ Streaming and incremental strategies may change row ordering compared to synchro
   from upstream. The result set is identical but row order may differ.
 - **Barrier**: row order matches synchronous mode exactly.
 
-The `sort_by_tags` option in `ColumnConfig` provides deterministic ordering when needed,
+The `sort_by_keys` option in `ColumnConfig` provides deterministic ordering when needed,
 independent of execution strategy.
 
 ---
diff --git a/docs/api/index.md b/docs/api/index.md
index bb475cf8..06b54b06 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -22,5 +22,5 @@ Everything else lives in subpackages:
 | [`orcapod.operators`](operators.md) | Structural stream transformations (join, filter, select, batch, etc.) |
 | [`orcapod.databases`](databases.md) | Persistent storage backends for computation results |
 | [`orcapod.nodes`](nodes.md) | DB-backed pipeline elements that persist their results |
-| [`orcapod.streams`](streams.md) | Immutable (Tag, Data) sequences backed by PyArrow tables |
+| [`orcapod.streams`](streams.md) | Immutable (Key, Data) sequences backed by PyArrow tables |
 | [`orcapod.types`](types.md) | Core type definitions: `Schema`, `ColumnConfig`, `ContentHash` |
diff --git a/docs/api/operators.md b/docs/api/operators.md
index ef1d69dd..d5742e88 100644
--- a/docs/api/operators.md
+++ b/docs/api/operators.md
@@ -10,15 +10,15 @@ Operators perform structural transformations on streams without inspecting or sy
 
 ::: orcapod.core.operators.Batch
 
-::: orcapod.core.operators.SelectTagColumns
+::: orcapod.core.operators.SelectKeyColumns
 
 ::: orcapod.core.operators.SelectDataColumns
 
-::: orcapod.core.operators.DropTagColumns
+::: orcapod.core.operators.DropKeyColumns
 
 ::: orcapod.core.operators.DropDataColumns
 
-::: orcapod.core.operators.MapTags
+::: orcapod.core.operators.MapKeys
 
 ::: orcapod.core.operators.MapData
 
diff --git a/docs/api/sources.md b/docs/api/sources.md
index 7895999d..9db232fd 100644
--- a/docs/api/sources.md
+++ b/docs/api/sources.md
@@ -2,7 +2,7 @@
 
 Source classes provide the entry point for external data into Orcapod pipelines.
 All sources convert their input to a PyArrow Table and use `SourceStreamBuilder` for
-enrichment (provenance columns, system tags, hashing).
+enrichment (provenance columns, system keys, hashing).
 
 ::: orcapod.core.sources.ArrowTableSource
 
diff --git a/docs/api/streams.md b/docs/api/streams.md
index 98a8635e..3ba1a062 100644
--- a/docs/api/streams.md
+++ b/docs/api/streams.md
@@ -1,6 +1,6 @@
 # Streams
 
-Streams are immutable sequences of (Tag, Data) pairs backed by PyArrow tables.
+Streams are immutable sequences of (Key, Data) pairs backed by PyArrow tables.
 
 ::: orcapod.core.streams.ArrowTableStream
 
diff --git a/docs/concepts/function-pods.md b/docs/concepts/function-pods.md
index 37a8af79..6021901b 100644
--- a/docs/concepts/function-pods.md
+++ b/docs/concepts/function-pods.md
@@ -3,7 +3,7 @@
 Function pods are data-level transforms -- they take each data in a
 [stream](streams.md), apply a Python function to its values, and produce a new data with the
 function's outputs. Unlike [operators](operators.md), function pods never inspect or modify
-tags. They are the primary mechanism for adding computation to an Orcapod pipeline: data
+keys. They are the primary mechanism for adding computation to an Orcapod pipeline: data
 cleaning, feature extraction, model inference, or any transformation that produces new values
 from existing ones.
 
@@ -58,27 +58,27 @@ source = DictSource(
         {"subject_id": "mouse_01", "weight": 25.3, "height": 0.12},
         {"subject_id": "mouse_02", "weight": 22.1, "height": 0.10},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 # Apply the function pod to the source stream
 result = compute_bmi.pod(source)  # shorthand for compute_bmi.pod.process(source)
 
-# Inspect the output schema -- tags pass through, data are replaced
-tag_schema, data_schema = result.output_schema()
-print("Tag schema:", dict(tag_schema))
-# Tag schema: {'subject_id': <class 'str'>}
+# Inspect the output schema -- keys pass through, data are replaced
+key_schema, data_schema = result.output_schema()
+print("Key schema:", dict(key_schema))
+# Key schema: {'subject_id': <class 'str'>}
 print("Data schema:", dict(data_schema))
 # Data schema: {'bmi': <class 'float'>}
 
 # Iterate over results
-for tag, data in result.iter_data():
-    print(f"  {tag.as_dict()} -> {data.as_dict()}")
+for key, data in result.iter_data():
+    print(f"  {key.as_dict()} -> {data.as_dict()}")
 # {'subject_id': 'mouse_01'} -> {'bmi': 1756.9444444444446}
 # {'subject_id': 'mouse_02'} -> {'bmi': 2209.9999999999995}
 ```
 
-The function pod preserves tags and replaces data columns with the function's output. If the
+The function pod preserves keys and replaces data columns with the function's output. If the
 input stream has multiple data columns but the function only needs some of them, Orcapod
 extracts the matching columns by name.
 
@@ -107,7 +107,7 @@ source = DictSource(
         {"subject_id": "mouse_01", "weight": 25.3, "height": 0.12},
         {"subject_id": "mouse_02", "weight": 22.1, "height": 0.10},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 db = InMemoryArrowDatabase()
@@ -122,8 +122,8 @@ node = FunctionNode(
 node.run()
 
 # Iterate over cached results
-for tag, data in node.iter_data():
-    print(f"  {tag.as_dict()} -> {data.as_dict()}")
+for key, data in node.iter_data():
+    print(f"  {key.as_dict()} -> {data.as_dict()}")
 ```
 
 `FunctionNode` also provides:
@@ -141,7 +141,7 @@ If you pass multiple streams to a function pod, they are automatically joined (u
 result = compute_bmi.pod(weight_stream, height_stream)
 ```
 
-The join happens on shared tag columns, and the merged data columns are fed to the function.
+The join happens on shared key columns, and the merged data columns are fed to the function.
 
 ## DataFunction internals
 
diff --git a/docs/concepts/identity.md b/docs/concepts/identity.md
index abf3cbd1..b98fa2fd 100644
--- a/docs/concepts/identity.md
+++ b/docs/concepts/identity.md
@@ -42,8 +42,8 @@ benefits from results already cached for previous data with the same schema.
 Consider two sources with the same schema but different data:
 
 ```
-source_a = DictSource(data=[{"x": 1, "y": 2}], tag_columns=["x"])
-source_b = DictSource(data=[{"x": 10, "y": 20}], tag_columns=["x"])
+source_a = DictSource(data=[{"x": 1, "y": 2}], key_columns=["x"])
+source_b = DictSource(data=[{"x": 10, "y": 20}], key_columns=["x"])
 ```
 
 - `source_a.content_hash() != source_b.content_hash()` -- different source identity
@@ -59,7 +59,7 @@ identity plus the pipeline hashes of all its upstream elements.
 
 ### Base case: sources
 
-A `RootSource`'s pipeline identity is simply its `(tag_schema, data_schema)`. Sources with
+A `RootSource`'s pipeline identity is simply its `(key_schema, data_schema)`. Sources with
 the same column names and types have the same pipeline hash, regardless of their data.
 
 ### Recursive case: downstream elements
diff --git a/docs/concepts/operators.md b/docs/concepts/operators.md
index 2c45a5fb..5c660a2b 100644
--- a/docs/concepts/operators.md
+++ b/docs/concepts/operators.md
@@ -5,7 +5,7 @@ synthesizing data values. They join, filter, batch, rename, and select columns -
 that affect the *structure* of the data (which rows exist, which columns are present, how
 columns are named) but never compute new values from data content. This is the key
 distinction from [function pods](function-pods.md), which do the opposite: they transform
-data values but never touch tags or stream structure.
+data values but never touch keys or stream structure.
 
 ## The operator / function pod boundary
 
@@ -14,7 +14,7 @@ This separation is a core Orcapod design principle:
 |  | Operator | Function Pod |
 |---|---|---|
 | Inspects data content | Never | Yes |
-| Inspects / uses tags | Yes | No |
+| Inspects / uses keys | Yes | No |
 | Can rename columns | Yes | No |
 | Synthesizes new values | No | Yes |
 | Stream arity | Configurable (1, 2, or N inputs) | Single in, single out |
@@ -42,7 +42,7 @@ Takes one or more streams. Used for `Join`, which performs an N-ary inner join.
 
 ### Join
 
-N-ary inner join on shared tag columns. Requires that input streams have non-overlapping
+N-ary inner join on shared key columns. Requires that input streams have non-overlapping
 data columns (raises `InputValidationError` on collision). Join is **commutative** -- the
 order of input streams does not affect the result.
 
@@ -55,7 +55,7 @@ subjects = DictSource(
         {"subject_id": "mouse_01", "age": 12},
         {"subject_id": "mouse_02", "age": 8},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 measurements = DictSource(
@@ -63,7 +63,7 @@ measurements = DictSource(
         {"subject_id": "mouse_01", "weight": 25.3},
         {"subject_id": "mouse_02", "weight": 22.1},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 join = Join()
@@ -82,10 +82,10 @@ is **commutative** -- the order of the two input streams does not affect the res
 
 ### SemiJoin
 
-Binary join that filters the left stream to only include rows whose tags match the right
+Binary join that filters the left stream to only include rows whose keys match the right
 stream. The right stream's data columns are discarded. SemiJoin is **not commutative** --
 the order of inputs matters. The first stream is the one being filtered; the second stream
-provides the set of matching tags.
+provides the set of matching keys.
 
 ### Batch
 
@@ -101,14 +101,14 @@ source = DictSource(
         {"subject_id": "mouse_01", "age": 12},
         {"subject_id": "mouse_02", "age": 8},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 batch = Batch()
 batched = batch.process(source)
-for tag, data in batched.iter_data():
-    print("Tags:", tag.as_dict())
-    # Tags: {'subject_id': ['mouse_01', 'mouse_02']}
+for key, data in batched.iter_data():
+    print("Keys:", key.as_dict())
+    # Keys: {'subject_id': ['mouse_01', 'mouse_02']}
     print("Data:", data.as_dict())
     # Data: {'age': [12, 8]}
 ```
@@ -123,9 +123,9 @@ batch = Batch(batch_size=10, drop_partial_batch=False)
 
 Four operators for including or excluding columns:
 
-- **`SelectTagColumns(columns=["col1", "col2"])`** -- keep only the specified tag columns
+- **`SelectKeyColumns(columns=["col1", "col2"])`** -- keep only the specified key columns
 - **`SelectDataColumns(columns=["col1", "col2"])`** -- keep only the specified data columns
-- **`DropTagColumns(columns=["col1"])`** -- remove the specified tag columns
+- **`DropKeyColumns(columns=["col1"])`** -- remove the specified key columns
 - **`DropDataColumns(columns=["col1"])`** -- remove the specified data columns
 
 ```python
@@ -138,7 +138,7 @@ print(result.keys()[1])  # ('weight',)
 
 ### Column renaming
 
-- **`MapTags(mapping={"old_name": "new_name"})`** -- rename tag columns
+- **`MapKeys(mapping={"old_name": "new_name"})`** -- rename key columns
 - **`MapData(mapping={"old_name": "new_name"})`** -- rename data columns
 
 ### PolarsFilter
@@ -151,8 +151,8 @@ from orcapod.operators import PolarsFilter
 
 filt = PolarsFilter(predicates=[pl.col("age") > 10])
 filtered = filt.process(source)
-for tag, pkt in filtered.iter_data():
-    print(f"{tag.as_dict()} -> {pkt.as_dict()}")
+for key, pkt in filtered.iter_data():
+    print(f"{key.as_dict()} -> {pkt.as_dict()}")
 # {'subject_id': 'mouse_01'} -> {'age': 12, 'weight': 25.3}
 # {'subject_id': 'mouse_03'} -> {'age': 15, 'weight': 27.8}
 ```
diff --git a/docs/concepts/sources.md b/docs/concepts/sources.md
index 4430d04c..f2e3ceed 100644
--- a/docs/concepts/sources.md
+++ b/docs/concepts/sources.md
@@ -3,8 +3,8 @@
 Sources are the entry points for external data into an Orcapod pipeline. Every pipeline begins
 with one or more sources that load raw data -- from Python dicts, lists, CSV files, Delta Lake
 tables, or Pandas DataFrames -- and present it as an immutable
-[stream](streams.md) of (Tag, Data) pairs. Sources also attach provenance metadata
-(source-info columns and system tag columns) so that every downstream value can be traced back
+[stream](streams.md) of (Key, Data) pairs. Sources also attach provenance metadata
+(source-info columns and system key columns) so that every downstream value can be traced back
 to its origin.
 
 ## Key classes
@@ -21,13 +21,13 @@ dependencies -- it sits at the root of the computational graph. Key properties:
 ### Concrete source types
 
 All sources follow the same pattern: convert input data to a PyArrow Table, then pass it
-through `SourceStreamBuilder` which handles enrichment (provenance columns, system tags,
+through `SourceStreamBuilder` which handles enrichment (provenance columns, system keys,
 hashing) and produces the final immutable stream.
 
 | Source | Input type | Notes |
 |---|---|---|
 | `ArrowTableSource` | PyArrow `Table` | Accepts an Arrow table directly |
-| `DictSource` | `list[dict]` | Each dict becomes one (Tag, Data) pair |
+| `DictSource` | `list[dict]` | Each dict becomes one (Key, Data) pair |
 | `ListSource` | `list[Any]` | Each element stored under a named data column |
 | `DataFrameSource` | Pandas `DataFrame` | Converts via Arrow |
 | `CSVSource` | File path (string) | Reads CSV into Arrow |
@@ -48,7 +48,7 @@ data lineage:
 For example, a data column `weight` gets a companion `_source_weight` column. These tokens
 identify which source originally produced each value.
 
-**System tag columns** (prefix `_tag::`) track which source contributed each row. These
+**System key columns** (prefix `_key::`) track which source contributed each row. These
 columns are used internally during [joins](operators.md) to maintain provenance through
 multi-stream operations.
 
@@ -58,8 +58,8 @@ These columns are hidden by default. You can reveal them using `ColumnConfig`:
 # Show source-info columns
 table = source.as_table(columns={"source": True})
 
-# Show system tag columns
-tag_schema, data_schema = source.output_schema(columns={"system_tags": True})
+# Show system key columns
+key_schema, data_schema = source.output_schema(columns={"system_keys": True})
 
 # Show everything
 table = source.as_table(all_info=True)
@@ -78,27 +78,27 @@ source = DictSource(
         {"subject_id": "mouse_02", "age": 8, "weight": 22.1},
         {"subject_id": "mouse_03", "age": 15, "weight": 27.8},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 # Inspect the schema
-tag_schema, data_schema = source.output_schema()
-print("Tag schema:", dict(tag_schema))
-# Tag schema: {'subject_id': <class 'str'>}
+key_schema, data_schema = source.output_schema()
+print("Key schema:", dict(key_schema))
+# Key schema: {'subject_id': <class 'str'>}
 print("Data schema:", dict(data_schema))
 # Data schema: {'age': <class 'int'>, 'weight': <class 'float'>}
 
 # Get column names
-tag_keys, data_keys = source.keys()
-print("Tag keys:", tag_keys)    # ('subject_id',)
+key_keys, data_keys = source.keys()
+print("Key keys:", key_keys)    # ('subject_id',)
 print("Data keys:", data_keys)  # ('age', 'weight')
 
-# Iterate over (Tag, Data) pairs
-for tag, data in source.iter_data():
-    print(f"  Tag: {tag.as_dict()}, Data: {data.as_dict()}")
-# Tag: {'subject_id': 'mouse_01'}, Data: {'age': 12, 'weight': 25.3}
-# Tag: {'subject_id': 'mouse_02'}, Data: {'age': 8, 'weight': 22.1}
-# Tag: {'subject_id': 'mouse_03'}, Data: {'age': 15, 'weight': 27.8}
+# Iterate over (Key, Data) pairs
+for key, data in source.iter_data():
+    print(f"  Key: {key.as_dict()}, Data: {data.as_dict()}")
+# Key: {'subject_id': 'mouse_01'}, Data: {'age': 12, 'weight': 25.3}
+# Key: {'subject_id': 'mouse_02'}, Data: {'age': 8, 'weight': 22.1}
+# Key: {'subject_id': 'mouse_03'}, Data: {'age': 15, 'weight': 27.8}
 
 # Convert to a PyArrow table
 table = source.as_table()
@@ -111,7 +111,7 @@ print(table.to_pandas())
 
 ## How it connects to other concepts
 
-- Sources produce [Streams](streams.md) -- immutable sequences of (Tag, Data) pairs
+- Sources produce [Streams](streams.md) -- immutable sequences of (Key, Data) pairs
 - Streams flow into [Operators](operators.md) for structural transforms (joins, filters,
   column selection)
 - Streams flow into [Function Pods](function-pods.md) for value-level transforms
diff --git a/docs/concepts/streams.md b/docs/concepts/streams.md
index ec26615f..d47c6926 100644
--- a/docs/concepts/streams.md
+++ b/docs/concepts/streams.md
@@ -1,19 +1,19 @@
 # Streams
 
-A stream is an immutable sequence of (Tag, Data) pairs backed by a PyArrow Table. Streams
+A stream is an immutable sequence of (Key, Data) pairs backed by a PyArrow Table. Streams
 are the universal data currency in Orcapod -- every [source](sources.md) produces a stream,
 every [operator](operators.md) consumes and produces streams, and every
 [function pod](function-pods.md) transforms data within a stream. Immutability guarantees
 that once a stream is created, its data cannot change, which is essential for reproducible
 pipelines.
 
-## Tag columns vs Data columns
+## Key columns vs Data columns
 
 Every stream divides its columns into two groups:
 
-**Tag columns** are join keys and metadata. They identify *which* record you are looking at
+**Key columns** are join keys and metadata. They identify *which* record you are looking at
 (e.g., `subject_id`, `session_date`). Operators like [Join](operators.md) match rows across
-streams using shared tag columns.
+streams using shared key columns.
 
 **Data columns** are the data payload. They hold the actual values being processed
 (e.g., `age`, `weight`, `spike_count`). [Function pods](function-pods.md) read data
@@ -21,14 +21,14 @@ columns as function inputs and write new data columns as outputs.
 
 This separation is enforced throughout the framework:
 
-- Operators inspect and restructure tags but never look inside data
-- Function pods inspect and transform data but never look at tags
+- Operators inspect and restructure keys but never look inside data
+- Function pods inspect and transform data but never look at keys
 
 ## Key classes
 
 ### `ArrowTableStream`
 
-The primary stream implementation. Wraps a PyArrow Table with designated tag and data
+The primary stream implementation. Wraps a PyArrow Table with designated key and data
 columns. Created internally by sources and operators -- you rarely construct one directly.
 
 ### `StreamBase`
@@ -42,32 +42,32 @@ Every stream exposes four key methods:
 
 ### `output_schema()`
 
-Returns the `(tag_schema, data_schema)` tuple describing column names and their Python types:
+Returns the `(key_schema, data_schema)` tuple describing column names and their Python types:
 
 ```python
-tag_schema, data_schema = stream.output_schema()
-print(dict(tag_schema))    # {'subject_id': <class 'str'>}
+key_schema, data_schema = stream.output_schema()
+print(dict(key_schema))    # {'subject_id': <class 'str'>}
 print(dict(data_schema)) # {'age': <class 'int'>, 'weight': <class 'float'>}
 ```
 
 ### `keys()`
 
-Returns column names as `(tag_keys, data_keys)`:
+Returns column names as `(key_keys, data_keys)`:
 
 ```python
-tag_keys, data_keys = stream.keys()
-# tag_keys = ('subject_id',)
+key_keys, data_keys = stream.keys()
+# key_keys = ('subject_id',)
 # data_keys = ('age', 'weight')
 ```
 
 ### `iter_data()`
 
-Iterates over (Tag, Data) pairs. Each Tag and Data is an immutable datagram that you can
+Iterates over (Key, Data) pairs. Each Key and Data is an immutable datagram that you can
 inspect with `.as_dict()`:
 
 ```python
-for tag, data in stream.iter_data():
-    print(tag.as_dict())    # {'subject_id': 'mouse_01'}
+for key, data in stream.iter_data():
+    print(key.as_dict())    # {'subject_id': 'mouse_01'}
     print(data.as_dict()) # {'age': 12, 'weight': 25.3}
 ```
 
@@ -83,17 +83,17 @@ df = table.to_pandas()
 
 ## Controlling column visibility with `ColumnConfig`
 
-By default, streams only expose user-facing tag and data columns. Orcapod also maintains
+By default, streams only expose user-facing key and data columns. Orcapod also maintains
 hidden columns for provenance tracking and metadata. Use `ColumnConfig` (or the `all_info`
 shortcut) to control which column groups are included.
 
 | Config field | What it reveals | Column prefix |
 |---|---|---|
-| `system_tags` | System tag columns (provenance tracking) | `_tag::` |
+| `system_keys` | System key columns (provenance tracking) | `_key::` |
 | `source` | Source-info columns (per-data provenance tokens) | `_source_` |
 | `context` | Data context column | `_context_key` |
 | `content_hash` | Content hash column | `_content_hash` |
-| `sort_by_tags` | Sort rows by tag columns | (ordering only) |
+| `sort_by_keys` | Sort rows by key columns | (ordering only) |
 
 Pass config as a dict or a `ColumnConfig` object:
 
@@ -105,7 +105,7 @@ source = DictSource(
         {"subject_id": "mouse_01", "age": 12, "weight": 25.3},
         {"subject_id": "mouse_02", "age": 8, "weight": 22.1},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 # Default: user-facing columns only
@@ -121,7 +121,7 @@ print(table.column_names)
 # Include everything
 table = source.as_table(all_info=True)
 print(table.column_names)
-# ['subject_id', 'age', 'weight', '_tag_source_id::...', '_tag_record_id::...',
+# ['subject_id', 'age', 'weight', '_key_source_id::...', '_key_record_id::...',
 #  '_content_hash', '_context_key', '_source_age', '_source_weight']
 ```
 
@@ -138,19 +138,19 @@ source = DictSource(
         {"subject_id": "mouse_02", "age": 8, "weight": 22.1},
         {"subject_id": "mouse_03", "age": 15, "weight": 27.8},
     ],
-    tag_columns=["subject_id"],
+    key_columns=["subject_id"],
 )
 
 # Schema inspection
-tag_schema, data_schema = source.output_schema()
-print("Tag schema:", dict(tag_schema))
-# Tag schema: {'subject_id': <class 'str'>}
+key_schema, data_schema = source.output_schema()
+print("Key schema:", dict(key_schema))
+# Key schema: {'subject_id': <class 'str'>}
 print("Data schema:", dict(data_schema))
 # Data schema: {'age': <class 'int'>, 'weight': <class 'float'>}
 
-# Iterate over (Tag, Data) pairs
-for tag, data in source.iter_data():
-    print(f"  {tag.as_dict()} -> {data.as_dict()}")
+# Iterate over (Key, Data) pairs
+for key, data in source.iter_data():
+    print(f"  {key.as_dict()} -> {data.as_dict()}")
 # {'subject_id': 'mouse_01'} -> {'age': 12, 'weight': 25.3}
 # {'subject_id': 'mouse_02'} -> {'age': 8, 'weight': 22.1}
 # {'subject_id': 'mouse_03'} -> {'age': 15, 'weight': 27.8}
diff --git a/docs/design/2026-03-27-non-active-node-semantics-design.md b/docs/design/2026-03-27-non-active-node-semantics-design.md
index 1bf21a05..46ed5111 100644
--- a/docs/design/2026-03-27-non-active-node-semantics-design.md
+++ b/docs/design/2026-03-27-non-active-node-semantics-design.md
@@ -67,7 +67,7 @@ All of these will become passive after the fix — empty before `run()`, correct
 
 **`_make_empty_table() -> "pa.Table"`**
 
-Builds a zero-row PyArrow table whose columns match the node's full output schema (tags + data). Uses `self.output_schema()` and `self.data_context.type_converter`. This is a pure, side-effect-free method.
+Builds a zero-row PyArrow table whose columns match the node's full output schema (keys + data). Uses `self.output_schema()` and `self.data_context.type_converter`. This is a pure, side-effect-free method.
 
 - The return type annotation must use a string literal (`"pa.Table"`) because `pa` is imported via `LazyModule` at runtime; the real type is only available under `TYPE_CHECKING`.
 - `output_schema()` is safe on live and read-only deserialized nodes (uses `_stored_schema` when `_operator is None`).
@@ -85,7 +85,7 @@ Guards (return `None` immediately if any apply):
 If all guards pass, call `self.pipeline_path` directly (no try/except). This is safe: by the time we reach this point, `_pipeline_database is not None`, and `pipeline_path` only raises `RuntimeError` when `_pipeline_database is None`. For live nodes `_pipeline_node_hash` is always set in `__init__`; for read-only deserialized nodes `_operator is None` causes `pipeline_path` to return `_stored_pipeline_path`.
 
 Then call `self._pipeline_database.get_all_records(self.pipeline_path)`:
-- If records are non-None (zero or more rows): wrap in `ArrowTableStream(records, tag_columns=self.keys()[0])` and return it.
+- If records are non-None (zero or more rows): wrap in `ArrowTableStream(records, key_columns=self.keys()[0])` and return it.
   Note: the DB stores records with a `_record_hash` column added by `_store_output_stream`. `get_all_records` does not strip this column. `_load_cached_stream_from_db` inherits this behavior — it returns an `ArrowTableStream` that includes `_record_hash`. This matches the existing behavior of `_replay_from_cache`, which also does not strip `_record_hash`.
 - If records are `None` (no prior LOG run has written to this path): build an empty table via `_make_empty_table()`, wrap in `ArrowTableStream`, and return it.
 
@@ -126,7 +126,7 @@ In `src/orcapod/core/streams/base.py`, update the `flow()` docstring:
 
 **Before:** "This will trigger any upstream computation of the stream."
 
-**After:** "Returns the entire collection of (TagProtocol, DataProtocol) as a list. This is a read-only operation — results reflect whatever has been computed by a prior `run()` or `execute()` call. If no computation has been performed, returns an empty list."
+**After:** "Returns the entire collection of (KeyProtocol, DataProtocol) as a list. This is a read-only operation — results reflect whatever has been computed by a prior `run()` or `execute()` call. If no computation has been performed, returns an empty list."
 
 No other changes to `base.py`.
 
@@ -167,7 +167,7 @@ node.run()
        └─ populates self._cached_output_stream
 
 # Read path (this fix): never triggers upstream computation
-for tag, data in operator_node:     # __iter__ → iter_data()
+for key, data in operator_node:     # __iter__ → iter_data()
 node.flow()                           # flow() → iter_data()
 node.iter_data()
 node.as_table()
diff --git a/docs/design/plans/2026-03-26-sqlite-table-source.md b/docs/design/plans/2026-03-26-sqlite-table-source.md
index 3e53ddb8..4bb81ec7 100644
--- a/docs/design/plans/2026-03-26-sqlite-table-source.md
+++ b/docs/design/plans/2026-03-26-sqlite-table-source.md
@@ -2,7 +2,7 @@
 
 > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
 
-**Goal:** Implement `SQLiteTableSource`, a `RootSource` backed by a SQLite table that uses primary-key columns (or `rowid` for ROWID-only tables) as default tag columns, with a working `from_config` round-trip.
+**Goal:** Implement `SQLiteTableSource`, a `RootSource` backed by a SQLite table that uses primary-key columns (or `rowid` for ROWID-only tables) as default key columns, with a working `from_config` round-trip.
 
 **Architecture:** `SQLiteTableSource` subclasses `DBTableSource`. Two small patches are made to existing files first (`SQLiteConnector.iter_batches` for `rowid` typing; `DBTableSource.__init__` for a `_query` hook), then the new class and its tests are added, followed by wiring into exports and the source registry.
 
@@ -178,8 +178,8 @@ In `src/orcapod/core/sources/db_table_source.py`, update the `__init__` signatur
         self,
         connector: DBConnectorProtocol,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -195,8 +195,8 @@ to:
         self,
         connector: DBConnectorProtocol,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -249,7 +249,7 @@ git commit -m "feat(sources): add keyword-only _query parameter to DBTableSource
 
 ---
 
-## Task 3: Implement `SQLiteTableSource` — core class + unit tests (PK, explicit tags, errors, stream, hashing)
+## Task 3: Implement `SQLiteTableSource` — core class + unit tests (PK, explicit keys, errors, stream, hashing)
 
 **Files:**
 - Create: `src/orcapod/core/sources/sqlite_table_source.py`
@@ -265,8 +265,8 @@ Create `tests/test_core/sources/test_sqlite_table_source.py`:
 Test sections:
  1. Import / export sanity
  2. Protocol conformance
- 3. PK as default tag columns (single and composite)
- 4. Explicit tag column override
+ 3. PK as default key columns (single and composite)
+ 4. Explicit key column override
  5. ROWID fallback (no explicit PK)
  6. Error cases (missing table, empty table)
  7. Stream behaviour
@@ -426,7 +426,7 @@ Create `src/orcapod/core/sources/sqlite_table_source.py`:
 """SQLiteTableSource — a read-only RootSource backed by a SQLite table.
 
 Wraps a SQLite table as an OrcaPod Source. Primary-key columns are used
-as tag columns by default. For tables with no explicit primary key
+as key columns by default. For tables with no explicit primary key
 (ROWID-only tables), the implicit ``rowid`` integer column is used
 automatically.
 
@@ -436,7 +436,7 @@ Example::
     source = SQLiteTableSource("/path/to/my.db", "measurements")
 
     # In-memory (for tests / throwaway pipelines; cannot round-trip)
-    source = SQLiteTableSource(":memory:", "events", tag_columns=["session_id"])
+    source = SQLiteTableSource(":memory:", "events", key_columns=["session_id"])
 
 Note:
     ``:memory:`` sources cannot be reconstructed via ``from_config`` because
@@ -464,13 +464,13 @@ class SQLiteTableSource(DBTableSource):
     At construction time the source:
     1. Opens a ``SQLiteConnector`` for *db_path*.
     2. Validates the table exists.
-    3. Resolves tag columns:
-       - If *tag_columns* is provided, uses them as-is.
+    3. Resolves key columns:
+       - If *key_columns* is provided, uses them as-is.
        - Otherwise uses the table's primary-key columns.
        - If the table has no explicit PK (ROWID-only), falls back to the
          implicit ``rowid`` integer column.
     4. Determines the fetch query: injects ``SELECT rowid, *`` when
-       ``"rowid"`` is a resolved tag column and not a normal table column
+       ``"rowid"`` is a resolved key column and not a normal table column
        (handles both auto-detection and ``from_config`` reconstruction).
     5. Delegates to ``DBTableSource.__init__`` for fetching and stream building.
 
@@ -478,10 +478,10 @@ class SQLiteTableSource(DBTableSource):
         db_path: Path to the SQLite database file, or ``":memory:"`` for an
             in-process in-memory database.
         table_name: Name of the table to expose as a source.
-        tag_columns: Columns to use as tag columns. If ``None`` (default),
+        key_columns: Columns to use as key columns. If ``None`` (default),
             the table's primary-key columns are used; ROWID-only tables fall
             back to ``["rowid"]``.
-        system_tag_columns: Additional system-level tag columns.
+        system_key_columns: Additional system-level key columns.
         record_id_column: Column for stable per-row record IDs in provenance.
         source_id: Canonical source name. Defaults to *table_name*.
         label: Human-readable label for this source node.
@@ -497,8 +497,8 @@ class SQLiteTableSource(DBTableSource):
         self,
         db_path: str | os.PathLike,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -508,19 +508,19 @@ class SQLiteTableSource(DBTableSource):
         self._db_path = db_path
         connector = SQLiteConnector(db_path)
 
-        # Step 3: Resolve tag columns.
-        if tag_columns is None:
+        # Step 3: Resolve key columns.
+        if key_columns is None:
             pk_cols = connector.get_pk_columns(table_name)
-            resolved_tags: list[str] = pk_cols if pk_cols else ["rowid"]
+            resolved_keys: list[str] = pk_cols if pk_cols else ["rowid"]
         else:
-            resolved_tags = list(tag_columns)
+            resolved_keys = list(key_columns)
 
         # Step 4: Determine the fetch query.
-        # If "rowid" is in resolved_tags but not a real column, we need
+        # If "rowid" is in resolved_keys but not a real column, we need
         # SELECT rowid, * to include it.  This also handles from_config
-        # reconstruction where tag_columns=["rowid"] is passed explicitly.
+        # reconstruction where key_columns=["rowid"] is passed explicitly.
         normal_cols = {ci.name for ci in connector.get_column_info(table_name)}
-        if "rowid" in resolved_tags and "rowid" not in normal_cols:
+        if "rowid" in resolved_keys and "rowid" not in normal_cols:
             _query: str | None = f'SELECT rowid, * FROM "{table_name}"'
         else:
             _query = None
@@ -528,8 +528,8 @@ class SQLiteTableSource(DBTableSource):
         super().__init__(
             connector,
             table_name,
-            tag_columns=resolved_tags,
-            system_tag_columns=system_tag_columns,
+            key_columns=resolved_keys,
+            system_key_columns=system_key_columns,
             record_id_column=record_id_column,
             source_id=source_id,
             label=label,
@@ -565,8 +565,8 @@ class SQLiteTableSource(DBTableSource):
         return cls(
             db_path=config["db_path"],
             table_name=config["table_name"],
-            tag_columns=config.get("tag_columns"),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns"),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
         )
@@ -588,22 +588,22 @@ uv run pytest tests/test_core/sources/test_sqlite_table_source.py::TestProtocolC
 
 Expected: PASS. Revert the temporary import change (back to `from orcapod.core.sources import SQLiteTableSource`) before committing — Task 4 will wire the `__init__.py` export and make the standard import work.
 
-- [ ] **Step 3.5: Add PK, explicit tag, error-case, stream, and hashing test groups to the test file**
+- [ ] **Step 3.5: Add PK, explicit key, error-case, stream, and hashing test groups to the test file**
 
 Append to `tests/test_core/sources/test_sqlite_table_source.py`:
 
 ```python
 # ===========================================================================
-# 3. PK as default tag columns
+# 3. PK as default key columns
 # ===========================================================================
 
 
-class TestPKAsDefaultTags:
-    def test_single_pk_is_tag_column(self, pk_connector):
+class TestPKAsDefaultKeys:
+    def test_single_pk_is_key_column(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(pk_connector._db_path, "measurements")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_pk_not_in_data_schema(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
@@ -618,12 +618,12 @@ class TestPKAsDefaultTags:
         assert "trial" in data_schema
         assert "response" in data_schema
 
-    def test_composite_pk_all_columns_are_tags(self, composite_pk_connector):
+    def test_composite_pk_all_columns_are_keys(self, composite_pk_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(composite_pk_connector._db_path, "events")
-        tag_schema, _ = src.output_schema()
-        assert "user_id" in tag_schema
-        assert "event_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "user_id" in key_schema
+        assert "event_id" in key_schema
 
     def test_default_source_id_is_table_name(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
@@ -637,30 +637,30 @@ class TestPKAsDefaultTags:
 
 
 # ===========================================================================
-# 4. Explicit tag column override
+# 4. Explicit key column override
 # ===========================================================================
 
 
-class TestExplicitTagOverride:
-    def test_explicit_tag_columns_override_pk(self, pk_connector):
+class TestExplicitKeyOverride:
+    def test_explicit_key_columns_override_pk(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(
-            pk_connector._db_path, "measurements", tag_columns=["trial"]
+            pk_connector._db_path, "measurements", key_columns=["trial"]
         )
-        tag_schema, _ = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
-    def test_multiple_explicit_tag_columns(self, pk_connector):
+    def test_multiple_explicit_key_columns(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(
             pk_connector._db_path,
             "measurements",
-            tag_columns=["session_id", "trial"],
+            key_columns=["session_id", "trial"],
         )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "trial" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "trial" in key_schema
 
 
 # ===========================================================================
@@ -669,11 +669,11 @@ class TestExplicitTagOverride:
 
 
 class TestRowidFallback:
-    def test_rowid_only_table_uses_rowid_as_tag(self, rowid_connector):
+    def test_rowid_only_table_uses_rowid_as_key(self, rowid_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_connector._db_path, "logs")
-        tag_schema, _ = src.output_schema()
-        assert "rowid" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "rowid" in key_schema
 
     def test_rowid_is_not_in_data_schema(self, rowid_connector):
         from orcapod.core.sources import SQLiteTableSource
@@ -684,15 +684,15 @@ class TestRowidFallback:
     def test_rowid_values_are_positive_integers(self, rowid_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_connector._db_path, "logs")
-        for tags, _ in src.iter_data():
-            assert isinstance(tags["rowid"], int)
-            assert tags["rowid"] > 0
+        for keys, _ in src.iter_data():
+            assert isinstance(keys["rowid"], int)
+            assert keys["rowid"] > 0
 
     def test_rowid_type_is_int64(self, rowid_connector):
         """Verify rowid is actually typed as int64, not large_string."""
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_connector._db_path, "logs")
-        # The raw stream table (before tag/data split) holds all columns.
+        # The raw stream table (before key/data split) holds all columns.
         # We can verify the Arrow type via the internal stream table.
         raw = src._stream._table  # ArrowTableStream stores the enriched table
         assert "rowid" in raw.schema.names
@@ -744,11 +744,11 @@ class TestStreamBehaviour:
         data = list(src.iter_data())
         assert len(data) == 3
 
-    def test_iter_data_tags_contain_pk(self, pk_connector):
+    def test_iter_data_keys_contain_pk(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(pk_connector._db_path, "measurements")
-        for tags, _ in src.iter_data():
-            assert "session_id" in tags
+        for keys, _ in src.iter_data():
+            assert "session_id" in keys
 
     def test_output_schema_returns_two_schemas(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
@@ -786,11 +786,11 @@ class TestDeterministicHashing:
         src2 = SQLiteTableSource(pk_connector._db_path, "measurements")
         assert src1.content_hash() == src2.content_hash()
 
-    def test_different_tag_columns_yields_different_pipeline_hash(self, pk_connector):
+    def test_different_key_columns_yields_different_pipeline_hash(self, pk_connector):
         from orcapod.core.sources import SQLiteTableSource
         src1 = SQLiteTableSource(pk_connector._db_path, "measurements")
         src2 = SQLiteTableSource(
-            pk_connector._db_path, "measurements", tag_columns=["trial"]
+            pk_connector._db_path, "measurements", key_columns=["trial"]
         )
         assert src1.pipeline_hash() != src2.pipeline_hash()
 ```
@@ -969,10 +969,10 @@ class TestConfigRoundTripPKTable:
         src = SQLiteTableSource(file_db_path, "measurements")
         assert src.to_config()["table_name"] == "measurements"
 
-    def test_to_config_has_tag_columns(self, file_db_path):
+    def test_to_config_has_key_columns(self, file_db_path):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(file_db_path, "measurements")
-        assert "session_id" in src.to_config()["tag_columns"]
+        assert "session_id" in src.to_config()["key_columns"]
 
     def test_to_config_has_identity_fields(self, file_db_path):
         from orcapod.core.sources import SQLiteTableSource
@@ -1019,18 +1019,18 @@ class TestConfigRoundTripRowidTable:
         conn.close()
         return db_path
 
-    def test_to_config_has_rowid_as_tag_column(self, rowid_file_db_path):
+    def test_to_config_has_rowid_as_key_column(self, rowid_file_db_path):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_file_db_path, "logs")
-        assert src.to_config()["tag_columns"] == ["rowid"]
+        assert src.to_config()["key_columns"] == ["rowid"]
 
     def test_from_config_reconstructs_rowid_table(self, rowid_file_db_path):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_file_db_path, "logs")
         config = src.to_config()
         src2 = SQLiteTableSource.from_config(config)
-        tag_schema, _ = src2.output_schema()
-        assert "rowid" in tag_schema
+        key_schema, _ = src2.output_schema()
+        assert "rowid" in key_schema
 
     def test_from_config_rowid_hashes_match(self, rowid_file_db_path):
         from orcapod.core.sources import SQLiteTableSource
@@ -1060,7 +1060,7 @@ git commit -m "test(sources): add config round-trip tests for SQLiteTableSource"
 
 ## Task 6: Integration test — `SQLiteTableSource` in a pipeline
 
-**Background:** `Pipeline` wires sources into a node graph. A `FunctionPod` consumes data from the source. We verify end-to-end that tag columns flow through and the pipeline produces the expected results.
+**Background:** `Pipeline` wires sources into a node graph. A `FunctionPod` consumes data from the source. We verify end-to-end that key columns flow through and the pipeline produces the expected results.
 
 **Files:**
 - Modify: `tests/test_core/sources/test_sqlite_table_source.py`
@@ -1108,15 +1108,15 @@ class TestPipelineIntegration:
         assert len(fn_outputs) == 1
         assert len(fn_outputs[0]) == 3
 
-        # Verify tag column (session_id) flows through and results are correct
+        # Verify key column (session_id) flows through and results are correct
         doubled_values = sorted(
             [pkt.as_dict()["doubled"] for _, pkt in fn_outputs[0]]
         )
         assert doubled_values == pytest.approx([0.2, 0.4, 0.6])
 
-        # Verify tag values are present
-        tag_values = sorted([tags["session_id"] for tags, _ in fn_outputs[0]])
-        assert tag_values == ["s1", "s2", "s3"]
+        # Verify key values are present
+        key_values = sorted([keys["session_id"] for keys, _ in fn_outputs[0]])
+        assert key_values == ["s1", "s2", "s3"]
 ```
 
 - [ ] **Step 6.2: Run the integration test**
@@ -1165,8 +1165,8 @@ gh-app-token-generator nauticalab | gh auth login --with-token
 - [ ] **Step 7.3: Create the feature branch and push**
 
 ```bash
-git checkout -b eywalker/plt-1077-implement-source-based-on-sqlite-tables-with-pk-as-default-tag
-git push -u origin eywalker/plt-1077-implement-source-based-on-sqlite-tables-with-pk-as-default-tag
+git checkout -b eywalker/plt-1077-implement-source-based-on-sqlite-tables-with-pk-as-default-key
+git push -u origin eywalker/plt-1077-implement-source-based-on-sqlite-tables-with-pk-as-default-key
 ```
 
 - [ ] **Step 7.4: Open the PR against `dev`**
@@ -1179,7 +1179,7 @@ gh pr create \
 ## Summary
 
 - Implements `SQLiteTableSource` — a `RootSource` backed by a SQLite table
-- Primary-key columns are used as tag columns by default
+- Primary-key columns are used as key columns by default
 - ROWID-only tables (no explicit PK) automatically fall back to the implicit `rowid` integer column
 - `from_config` round-trip works for file-backed databases (unlike `DBTableSource`)
 - Registered as `\"sqlite_table\"` in the source registry
diff --git a/docs/design/specs/2026-03-26-sqlite-table-source-design.md b/docs/design/specs/2026-03-26-sqlite-table-source-design.md
index 6e8868e7..e466115e 100644
--- a/docs/design/specs/2026-03-26-sqlite-table-source-design.md
+++ b/docs/design/specs/2026-03-26-sqlite-table-source-design.md
@@ -8,7 +8,7 @@
 
 ## Summary
 
-Implement `SQLiteTableSource`, a `RootSource` backed by a SQLite table. Primary-key columns serve as the default tag columns. For tables with no explicit primary key (ROWID-only tables), the implicit SQLite `rowid` is used automatically. Provides a working `from_config` round-trip — the gap that `DBTableSource` cannot fill today.
+Implement `SQLiteTableSource`, a `RootSource` backed by a SQLite table. Primary-key columns serve as the default key columns. For tables with no explicit primary key (ROWID-only tables), the implicit SQLite `rowid` is used automatically. Provides a working `from_config` round-trip — the gap that `DBTableSource` cannot fill today.
 
 ---
 
@@ -34,8 +34,8 @@ Three alternatives were evaluated:
 
 ```
 SQLiteTableSource(DBTableSource)
-  __init__(db_path, table_name, tag_columns=None,
-           system_tag_columns=(), record_id_column=None,
+  __init__(db_path, table_name, key_columns=None,
+           system_key_columns=(), record_id_column=None,
            source_id=None, label=None, data_context=None, config=None)
   to_config() → dict        # source_type="sqlite_table", db_path, table_name, …
   from_config(config) → cls # reconstructs via db_path; fully working
@@ -57,35 +57,35 @@ Note: `src/orcapod/sources/__init__.py` already does `from orcapod.core.sources
 ## Data Flow
 
 ```
-SQLiteTableSource.__init__(db_path, table_name, tag_columns, ...)
+SQLiteTableSource.__init__(db_path, table_name, key_columns, ...)
 │
 ├─ 1. SQLiteConnector(db_path)
 │
 ├─ 2. Validate: table_name in connector.get_table_names()
 │      └─ missing → ValueError("Table 'x' not found in database.")
 │
-├─ 3. Resolve tags and query
-│      ├─ tag_columns provided (non-None)
-│      │    → resolved_tags = list(tag_columns)
-│      └─ tag_columns is None
+├─ 3. Resolve keys and query
+│      ├─ key_columns provided (non-None)
+│      │    → resolved_keys = list(key_columns)
+│      └─ key_columns is None
 │           ├─ pk_cols = connector.get_pk_columns(table_name)
-│           ├─ pk_cols non-empty → resolved_tags = pk_cols
-│           └─ pk_cols empty   → resolved_tags = ["rowid"]
+│           ├─ pk_cols non-empty → resolved_keys = pk_cols
+│           └─ pk_cols empty   → resolved_keys = ["rowid"]
 │
 ├─ 4. Determine query (handles both auto-detection AND from_config reconstruction)
 │      normal_cols = {ci.name for ci in connector.get_column_info(table_name)}
-│      ├─ "rowid" in resolved_tags AND "rowid" not in normal_cols
+│      ├─ "rowid" in resolved_keys AND "rowid" not in normal_cols
 │      │    → _query = 'SELECT rowid, * FROM "{table_name}"'
 │      └─ otherwise
 │           → _query = None  (DBTableSource uses default SELECT *)
 │
 └─ 5. super().__init__(connector, table_name,
-                       tag_columns=resolved_tags,   ← always non-None
+                       key_columns=resolved_keys,   ← always non-None
                        _query=_query, ...)
        │
-       │  NOTE: passing tag_columns as a non-None list bypasses
+       │  NOTE: passing key_columns as a non-None list bypasses
        │  DBTableSource's own PK-lookup-and-raise path, which only
-       │  fires when tag_columns is None. This is intentional.
+       │  fires when key_columns is None. This is intentional.
        │
        └─ DBTableSource: fetch batches (using _query) → SourceStreamBuilder → stream
           (rowid column arrives typed as int64 via SQLiteConnector patch)
@@ -93,7 +93,7 @@ SQLiteTableSource.__init__(db_path, table_name, tag_columns, ...)
 store self._db_path
 ```
 
-`from_config` calls `cls(db_path=config["db_path"], table_name=config["table_name"], tag_columns=config["tag_columns"], ...)` — the connector is recreated from `db_path`. Because `tag_columns` is passed explicitly (non-None), step 3 skips PK detection; step 4 then checks whether `"rowid"` is in the resolved tags but not in the table's normal columns and re-injects the rowid query if so. This means **ROWID-only tables also round-trip correctly** from config as long as the backing file exists.
+`from_config` calls `cls(db_path=config["db_path"], table_name=config["table_name"], key_columns=config["key_columns"], ...)` — the connector is recreated from `db_path`. Because `key_columns` is passed explicitly (non-None), step 3 skips PK detection; step 4 then checks whether `"rowid"` is in the resolved keys but not in the table's normal columns and re-injects the rowid query if so. This means **ROWID-only tables also round-trip correctly** from config as long as the backing file exists.
 
 **Known limitation:** `:memory:` sources cannot be reconstructed via `from_config`. The new in-memory database is empty and does not contain the original table, causing `ValueError: Table 'x' not found`. File-backed sources (including ROWID-only tables) round-trip correctly. The config round-trip test (test 9) must use a `tmp_path`-backed SQLite file.
 
@@ -104,14 +104,14 @@ store self._db_path
 | Condition | Behaviour |
 |---|---|
 | Table not found | `ValueError: Table 'x' not found in database.` — raised in step 2, before ROWID logic |
-| Table found, no PK, no explicit tags | ROWID fallback — no error; `"rowid"` used as tag column |
-| Table found, no PK, explicit `tag_columns=[...]` | Works normally — ROWID detection skipped when `tag_columns` is provided |
-| `tag_columns=[]` provided explicitly | Proceeds with empty tag schema — `SourceStreamBuilder` does not guard against empty tag lists; no `ValueError` is raised |
+| Table found, no PK, no explicit keys | ROWID fallback — no error; `"rowid"` used as key column |
+| Table found, no PK, explicit `key_columns=[...]` | Works normally — ROWID detection skipped when `key_columns` is provided |
+| `key_columns=[]` provided explicitly | Proceeds with empty key schema — `SourceStreamBuilder` does not guard against empty key lists; no `ValueError` is raised |
 | Table exists but is empty | `ValueError: Table 'x' is empty.` — raised by `DBTableSource` |
 | `db_path` points to non-existent file | `sqlite3.OperationalError` propagates from `SQLiteConnector.__init__` |
 | `"` in table name | `ValueError` from `SQLiteConnector._validate_table_name` |
 
-The ROWID fallback is silent (no warning log). The resolved `"rowid"` tag column appears in `to_config()` for auditability.
+The ROWID fallback is silent (no warning log). The resolved `"rowid"` key column appears in `to_config()` for auditability.
 
 ---
 
@@ -123,18 +123,18 @@ All use in-memory SQLite (`:memory:`), except the config round-trip test which r
 
 1. **Import / export sanity** — importable from `orcapod.core.sources` and `orcapod.sources`; in `__all__`
 2. **Protocol conformance** — is `SourceProtocol`, `StreamProtocol`, `PipelineElementProtocol`
-3. **PK as default tags** — single-column PK; composite PK; correct tag/data schema split
-4. **Explicit tag override** — `tag_columns=[...]` overrides PK detection entirely
-5. **ROWID fallback** — table with no explicit PK gets `"rowid"` tag; `rowid` column type is `int64`; all rows returned; rowid values are positive integers
+3. **PK as default keys** — single-column PK; composite PK; correct key/data schema split
+4. **Explicit key override** — `key_columns=[...]` overrides PK detection entirely
+5. **ROWID fallback** — table with no explicit PK gets `"rowid"` key; `rowid` column type is `int64`; all rows returned; rowid values are positive integers
 6. **Error cases** — missing table raises `ValueError`; empty table raises `ValueError`
 7. **Stream behaviour** — `iter_data` count, `as_table`, `output_schema`, `producer is None`, `upstreams == ()`
 8. **Deterministic hashing** — `pipeline_hash` and `content_hash` stable across two identical constructions (both in-memory)
-9. **Config round-trip (PK table)** — uses file-backed `tmp_path` SQLite db; `to_config()` has `source_type="sqlite_table"`, `db_path`, `table_name`, `tag_columns`; `from_config(to_config())` reconstructs successfully; content/pipeline hashes match before and after
-10. **Config round-trip (ROWID-only table)** — same as above but with a ROWID-only table; `tag_columns=["rowid"]` in config; `from_config(to_config())` reconstructs correctly and `rowid` remains the tag column
+9. **Config round-trip (PK table)** — uses file-backed `tmp_path` SQLite db; `to_config()` has `source_type="sqlite_table"`, `db_path`, `table_name`, `key_columns`; `from_config(to_config())` reconstructs successfully; content/pipeline hashes match before and after
+10. **Config round-trip (ROWID-only table)** — same as above but with a ROWID-only table; `key_columns=["rowid"]` in config; `from_config(to_config())` reconstructs correctly and `rowid` remains the key column
 
 ### Integration test — same file, marked `@pytest.mark.integration`
 
-Write rows into an in-memory SQLite table via `SQLiteConnector`, wrap with `SQLiteTableSource`, feed through a `FunctionPod`, collect output — verify tag columns flow through and pipeline completes.
+Write rows into an in-memory SQLite table via `SQLiteConnector`, wrap with `SQLiteTableSource`, feed through a `FunctionPod`, collect output — verify key columns flow through and pipeline completes.
 
 ### Regression tests
 
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 8a8bd696..410334f3 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -18,14 +18,14 @@ source = DictSource(
         {"experiment": "exp_002", "temperature": 22.3, "pressure": 0.98},
         {"experiment": "exp_003", "temperature": 19.8, "pressure": 1.05},
     ],
-    tag_columns=["experiment"],
+    key_columns=["experiment"],
     source_id="lab_results",
 )
 ```
 
 There are two important concepts here:
 
-- **Tag columns** (`tag_columns`) are the keys that identify each row -- like primary keys
+- **Key columns** (`key_columns`) are the keys that identify each row -- like primary keys
   in a database or independent variables in an experiment. Here, `experiment` uniquely
   identifies each measurement.
 - **Data columns** are everything else -- the actual data payload. In this example,
@@ -45,11 +45,11 @@ conversion step.
 
 ### Schema
 
-Use `output_schema()` to see the tag and data column types:
+Use `output_schema()` to see the key and data column types:
 
 ```python
-tag_schema, data_schema = source.output_schema()
-print(tag_schema)
+key_schema, data_schema = source.output_schema()
+print(key_schema)
 # Schema({'experiment': <class 'str'>})
 print(data_schema)
 # Schema({'temperature': <class 'float'>, 'pressure': <class 'float'>})
@@ -60,8 +60,8 @@ print(data_schema)
 Use `keys()` to get just the column names:
 
 ```python
-tag_keys, data_keys = source.keys()
-print(tag_keys)
+key_keys, data_keys = source.keys()
+print(key_keys)
 # ('experiment',)
 print(data_keys)
 # ('temperature', 'pressure')
@@ -69,14 +69,14 @@ print(data_keys)
 
 ### Iterating over rows
 
-Use `iter_data()` to walk through each (Tag, Data) pair:
+Use `iter_data()` to walk through each (Key, Data) pair:
 
 ```python
-for tag, data in source.iter_data():
-    print(f"Tag: {tag.as_dict()}, Data: {data.as_dict()}")
-# Tag: {'experiment': 'exp_001'}, Data: {'temperature': 20.5, 'pressure': 1.01}
-# Tag: {'experiment': 'exp_002'}, Data: {'temperature': 22.3, 'pressure': 0.98}
-# Tag: {'experiment': 'exp_003'}, Data: {'temperature': 19.8, 'pressure': 1.05}
+for key, data in source.iter_data():
+    print(f"Key: {key.as_dict()}, Data: {data.as_dict()}")
+# Key: {'experiment': 'exp_001'}, Data: {'temperature': 20.5, 'pressure': 1.01}
+# Key: {'experiment': 'exp_002'}, Data: {'temperature': 22.3, 'pressure': 0.98}
+# Key: {'experiment': 'exp_003'}, Data: {'temperature': 19.8, 'pressure': 1.05}
 ```
 
 ### Getting the full table
@@ -128,7 +128,7 @@ result = analyze_conditions.pod(source)
     All standard pods support `__call__` as a shorthand for `.process()`, so
     `pod(stream)` is equivalent to `pod.process(stream)`.
 
-The `result` is a new stream. Tags are preserved from the input; the data columns
+The `result` is a new stream. Keys are preserved from the input; the data columns
 are replaced with the function's outputs.
 
 ## Inspecting the result
@@ -136,24 +136,24 @@ are replaced with the function's outputs.
 The result stream supports the same inspection methods as the source:
 
 ```python
-tag_schema, data_schema = result.output_schema()
-print(tag_schema)
+key_schema, data_schema = result.output_schema()
+print(key_schema)
 # Schema({'experiment': <class 'str'>})
 print(data_schema)
 # Schema({'temp_fahrenheit': <class 'float'>, 'is_high_pressure': <class 'bool'>})
 ```
 
-The tag schema is unchanged -- function pods never modify tags. The data schema
+The key schema is unchanged -- function pods never modify keys. The data schema
 now reflects the function's output types.
 
 Iterate over the results:
 
 ```python
-for tag, data in result.iter_data():
-    print(f"Tag: {tag.as_dict()}, Data: {data.as_dict()}")
-# Tag: {'experiment': 'exp_001'}, Data: {'temp_fahrenheit': 68.9, 'is_high_pressure': True}
-# Tag: {'experiment': 'exp_002'}, Data: {'temp_fahrenheit': 72.14, 'is_high_pressure': False}
-# Tag: {'experiment': 'exp_003'}, Data: {'temp_fahrenheit': 67.64, 'is_high_pressure': True}
+for key, data in result.iter_data():
+    print(f"Key: {key.as_dict()}, Data: {data.as_dict()}")
+# Key: {'experiment': 'exp_001'}, Data: {'temp_fahrenheit': 68.9, 'is_high_pressure': True}
+# Key: {'experiment': 'exp_002'}, Data: {'temp_fahrenheit': 72.14, 'is_high_pressure': False}
+# Key: {'experiment': 'exp_003'}, Data: {'temp_fahrenheit': 67.64, 'is_high_pressure': True}
 ```
 
 Or view it as a table:
@@ -181,7 +181,7 @@ source = DictSource(
         {"experiment": "exp_002", "temperature": 22.3, "pressure": 0.98},
         {"experiment": "exp_003", "temperature": 19.8, "pressure": 1.05},
     ],
-    tag_columns=["experiment"],
+    key_columns=["experiment"],
     source_id="lab_results",
 )
 
@@ -207,10 +207,10 @@ Now that you have the basics, explore these topics:
 
 - [Sources](concepts/sources.md) -- learn about the different source types and how
   provenance tracking works.
-- [Streams](concepts/streams.md) -- understand the immutable (Tag, Data) stream model.
+- [Streams](concepts/streams.md) -- understand the immutable (Key, Data) stream model.
 - [Function Pods](concepts/function-pods.md) -- advanced function pod usage, including
   caching with databases.
 - [Operators](concepts/operators.md) -- structural transforms like Join, Batch, and Filter
-  that work on tags and stream structure without inspecting data content.
+  that work on keys and stream structure without inspecting data content.
 - [Identity & Hashing](concepts/identity.md) -- how Orcapod tracks content identity and
   pipeline structure for reproducibility.
diff --git a/docs/index.md b/docs/index.md
index 20204090..6f5bbea9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -34,7 +34,7 @@ source = DictSource(
         {"name": "Bob", "age": 25},
         {"name": "Charlie", "age": 35},
     ],
-    tag_columns=["name"],
+    key_columns=["name"],
     source_id="people",
 )
 
@@ -45,8 +45,8 @@ def compute_birth_year(age: int) -> int:
 
 # 3. Apply the function pod and inspect the output
 result = compute_birth_year.pod(source)
-for tag, data in result.iter_data():
-    print(f"{tag.as_dict()} -> {data.as_dict()}")
+for key, data in result.iter_data():
+    print(f"{key.as_dict()} -> {data.as_dict()}")
 # {'name': 'Alice'} -> {'birth_year': 1996}
 # {'name': 'Bob'} -> {'birth_year': 2001}
 # {'name': 'Charlie'} -> {'birth_year': 1991}
diff --git a/docs/plans/2026-03-26-postgresql-table-source.md b/docs/plans/2026-03-26-postgresql-table-source.md
index 5291990b..4e59f9dd 100644
--- a/docs/plans/2026-03-26-postgresql-table-source.md
+++ b/docs/plans/2026-03-26-postgresql-table-source.md
@@ -2,7 +2,7 @@
 
 > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
 
-**Goal:** Implement `PostgreSQLTableSource`, a read-only OrcaPod `Source` backed by a PostgreSQL table, using PK columns as default tag columns.
+**Goal:** Implement `PostgreSQLTableSource`, a read-only OrcaPod `Source` backed by a PostgreSQL table, using PK columns as default key columns.
 
 **Architecture:** Thin subclass of `DBTableSource` (which handles all source logic). `PostgreSQLTableSource.__init__` stores the DSN, creates a `PostgreSQLConnector`, delegates entirely to `DBTableSource.__init__` (which eagerly loads all data into memory), then closes the connector. No ROWID fallback needed — PostgreSQL PKs are always `NOT NULL`.
 
@@ -81,7 +81,7 @@ Create `src/orcapod/core/sources/postgresql_table_source.py`:
 """PostgreSQLTableSource — a read-only RootSource backed by a PostgreSQL table.
 
 Wraps a PostgreSQL table as an OrcaPod Source. Primary-key columns are used
-as tag columns by default.
+as key columns by default.
 
 Example::
 
@@ -109,13 +109,13 @@ class PostgreSQLTableSource(DBTableSource):
     1. Stores the DSN for serialisation.
     2. Opens a ``PostgreSQLConnector`` for *dsn*.
     3. Delegates to ``DBTableSource.__init__``, which validates the table,
-       resolves tag columns (defaults to PK columns), fetches all rows as
+       resolves key columns (defaults to PK columns), fetches all rows as
        Arrow batches, and builds the stream.
     4. Closes the connector — all data is eagerly loaded into memory, so the
        connection is released immediately.
 
-    PostgreSQL PK columns are always ``NOT NULL``, so NULL tag values can
-    only arise when *tag_columns* is overridden to point at a nullable
+    PostgreSQL PK columns are always ``NOT NULL``, so NULL key values can
+    only arise when *key_columns* is overridden to point at a nullable
     column. Such NULLs are passed through as-is (Arrow supports nulls).
 
     Args:
@@ -123,10 +123,10 @@ class PostgreSQLTableSource(DBTableSource):
             URI form: ``"postgresql://user:pass@host:5432/dbname"``
             Keyword form: ``"host=localhost dbname=mydb user=alice"``
         table_name: Name of the table to expose as a source.
-        tag_columns: Columns to use as tag columns. If ``None`` (default),
+        key_columns: Columns to use as key columns. If ``None`` (default),
             the table's primary-key columns are used. Raises ``ValueError``
             if the table has no primary key and no explicit columns are given.
-        system_tag_columns: Additional system-level tag columns.
+        system_key_columns: Additional system-level key columns.
         record_id_column: Column for stable per-row record IDs in provenance.
         source_id: Canonical source name. Defaults to *table_name*.
         label: Human-readable label for this source node.
@@ -135,7 +135,7 @@ class PostgreSQLTableSource(DBTableSource):
 
     Raises:
         ValueError: If the table is not found, is empty, or has no PK and
-            no *tag_columns* are given.
+            no *key_columns* are given.
         psycopg.OperationalError: If the DSN is invalid or connection fails.
     """
 
@@ -143,8 +143,8 @@ class PostgreSQLTableSource(DBTableSource):
         self,
         dsn: str,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -194,7 +194,7 @@ git commit -m "feat(sources): stub PostgreSQLTableSource with import/export wiri
 
 ---
 
-## Task 2: Core `__init__` — construction, PK tags, error cases
+## Task 2: Core `__init__` — construction, PK keys, error cases
 
 Implement the actual `__init__` and verify the main behaviours.
 
@@ -297,19 +297,19 @@ class TestProtocolConformance:
 
 
 # ===========================================================================
-# 3. PK as default tag columns
+# 3. PK as default key columns
 # ===========================================================================
 
 
-class TestPKAsDefaultTags:
-    def test_single_pk_is_tag_column(self):
+class TestPKAsDefaultKeys:
+    def test_single_pk_is_key_column(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src = PostgreSQLTableSource(DSN, "measurements")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_pk_not_in_data_schema(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -330,7 +330,7 @@ class TestPKAsDefaultTags:
         assert "trial" in data_schema
         assert "response" in data_schema
 
-    def test_composite_pk_all_columns_are_tags(self):
+    def test_composite_pk_all_columns_are_keys(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         schema = pa.schema([
@@ -353,9 +353,9 @@ class TestPKAsDefaultTags:
                 batches=[batch],
             )
             src = PostgreSQLTableSource(DSN, "events")
-        tag_schema, _ = src.output_schema()
-        assert "user_id" in tag_schema
-        assert "event_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "user_id" in key_schema
+        assert "event_id" in key_schema
 
     def test_default_source_id_is_table_name(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -375,32 +375,32 @@ class TestPKAsDefaultTags:
 
 
 # ===========================================================================
-# 4. Explicit tag_columns override
+# 4. Explicit key_columns override
 # ===========================================================================
 
 
-class TestExplicitTagOverride:
-    def test_explicit_tag_columns_override_pk(self):
+class TestExplicitKeyOverride:
+    def test_explicit_key_columns_override_pk(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
-            src = PostgreSQLTableSource(DSN, "measurements", tag_columns=["trial"])
-        tag_schema, _ = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+            src = PostgreSQLTableSource(DSN, "measurements", key_columns=["trial"])
+        key_schema, _ = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
-    def test_multiple_explicit_tag_columns(self):
+    def test_multiple_explicit_key_columns(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src = PostgreSQLTableSource(
-                DSN, "measurements", tag_columns=["session_id", "trial"]
+                DSN, "measurements", key_columns=["session_id", "trial"]
             )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "trial" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "trial" in key_schema
 
 
 # ===========================================================================
@@ -409,7 +409,7 @@ class TestExplicitTagOverride:
 
 
 class TestNoPKError:
-    def test_no_pk_and_no_tag_columns_raises(self):
+    def test_no_pk_and_no_key_columns_raises(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
@@ -471,14 +471,14 @@ class TestStreamBehaviour:
             src = PostgreSQLTableSource(DSN, "measurements")
         assert len(list(src.iter_data())) == 3
 
-    def test_iter_data_tags_contain_pk(self):
+    def test_iter_data_keys_contain_pk(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src = PostgreSQLTableSource(DSN, "measurements")
-        for tags, _ in src.iter_data():
-            assert "session_id" in tags
+        for keys, _ in src.iter_data():
+            assert "session_id" in keys
 
     def test_output_schema_returns_two_schemas(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -533,7 +533,7 @@ class TestDeterministicHashing:
             src2 = PostgreSQLTableSource(DSN, "measurements")
         assert src1.content_hash() == src2.content_hash()
 
-    def test_different_tag_columns_yields_different_pipeline_hash(self):
+    def test_different_key_columns_yields_different_pipeline_hash(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
@@ -541,7 +541,7 @@ class TestDeterministicHashing:
             src1 = PostgreSQLTableSource(DSN, "measurements")
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
-            src2 = PostgreSQLTableSource(DSN, "measurements", tag_columns=["trial"])
+            src2 = PostgreSQLTableSource(DSN, "measurements", key_columns=["trial"])
         assert src1.pipeline_hash() != src2.pipeline_hash()
 ```
 
@@ -562,8 +562,8 @@ Replace the `__init__` stub in `src/orcapod/core/sources/postgresql_table_source
         self,
         dsn: str,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -576,8 +576,8 @@ Replace the `__init__` stub in `src/orcapod/core/sources/postgresql_table_source
             super().__init__(
                 connector,
                 table_name,
-                tag_columns=tag_columns,
-                system_tag_columns=system_tag_columns,
+                key_columns=key_columns,
+                system_key_columns=system_key_columns,
                 record_id_column=record_id_column,
                 source_id=source_id,
                 label=label,
@@ -627,7 +627,7 @@ Expected: PASS.
 ```bash
 git add src/orcapod/core/sources/postgresql_table_source.py \
         tests/test_core/sources/test_postgresql_table_source.py
-git commit -m "feat(sources): implement PostgreSQLTableSource.__init__ with PK tag resolution (PLT-1072)"
+git commit -m "feat(sources): implement PostgreSQLTableSource.__init__ with PK key resolution (PLT-1072)"
 ```
 
 ---
@@ -667,8 +667,8 @@ class TestToConfig:
     def test_has_table_name(self):
         assert self._make_src().to_config()["table_name"] == "measurements"
 
-    def test_has_tag_columns(self):
-        assert "session_id" in self._make_src().to_config()["tag_columns"]
+    def test_has_key_columns(self):
+        assert "session_id" in self._make_src().to_config()["key_columns"]
 
     def test_has_source_id(self):
         assert self._make_src().to_config()["source_id"] == "measurements"
@@ -721,18 +721,18 @@ class TestFromConfig:
         assert src2.content_hash() == src.content_hash()
         assert src2.pipeline_hash() == src.pipeline_hash()
 
-    def test_from_config_with_explicit_tag_columns(self):
+    def test_from_config_with_explicit_key_columns(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
-            src = PostgreSQLTableSource(DSN, "measurements", tag_columns=["trial"])
+            src = PostgreSQLTableSource(DSN, "measurements", key_columns=["trial"])
         config = src.to_config()
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src2 = PostgreSQLTableSource.from_config(config)
-        tag_schema, _ = src2.output_schema()
-        assert "trial" in tag_schema
+        key_schema, _ = src2.output_schema()
+        assert "trial" in key_schema
 
     def test_from_config_missing_dsn_raises(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -774,8 +774,8 @@ Replace the stubs in `src/orcapod/core/sources/postgresql_table_source.py`:
         return cls(
             dsn=config["dsn"],
             table_name=config["table_name"],
-            tag_columns=config.get("tag_columns"),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns"),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
             label=config.get("label"),
@@ -993,7 +993,7 @@ def schema_dsn(pg_schema: str) -> str:
 class TestSinglePKTable:
     """Source backed by a table with a single-column PK."""
 
-    def test_pk_column_is_tag(self, schema_dsn: str) -> None:
+    def test_pk_column_is_key(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -1009,8 +1009,8 @@ class TestSinglePKTable:
             conn.commit()
 
         src = PostgreSQLTableSource(schema_dsn, "measurements")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_non_pk_columns_in_data_schema(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
@@ -1050,7 +1050,7 @@ class TestSinglePKTable:
         src = PostgreSQLTableSource(schema_dsn, "measurements")
         assert len(list(src.iter_data())) == 3
 
-    def test_tag_values_are_correct(self, schema_dsn: str) -> None:
+    def test_key_values_are_correct(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -1066,15 +1066,15 @@ class TestSinglePKTable:
             conn.commit()
 
         src = PostgreSQLTableSource(schema_dsn, "measurements")
-        tag_values = sorted([tags["session_id"] for tags, _ in src.iter_data()])
-        assert tag_values == ["s1", "s2", "s3"]
+        key_values = sorted([keys["session_id"] for keys, _ in src.iter_data()])
+        assert key_values == ["s1", "s2", "s3"]
 
 
 @pytest.mark.postgres
 class TestCompositePKTable:
     """Source backed by a table with a composite PK."""
 
-    def test_both_pk_columns_are_tags(self, schema_dsn: str) -> None:
+    def test_both_pk_columns_are_keys(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -1091,16 +1091,16 @@ class TestCompositePKTable:
             conn.commit()
 
         src = PostgreSQLTableSource(schema_dsn, "events")
-        tag_schema, _ = src.output_schema()
-        assert "user_id" in tag_schema
-        assert "event_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "user_id" in key_schema
+        assert "event_id" in key_schema
 
 
 @pytest.mark.postgres
-class TestExplicitTagOverride:
-    """tag_columns override overrides the PK."""
+class TestExplicitKeyOverride:
+    """key_columns override overrides the PK."""
 
-    def test_explicit_tag_columns_override_pk(self, schema_dsn: str) -> None:
+    def test_explicit_key_columns_override_pk(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -1116,11 +1116,11 @@ class TestExplicitTagOverride:
             conn.commit()
 
         src = PostgreSQLTableSource(
-            schema_dsn, "measurements", tag_columns=["trial"]
+            schema_dsn, "measurements", key_columns=["trial"]
         )
-        tag_schema, _ = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
 
 @pytest.mark.postgres
@@ -1173,8 +1173,8 @@ class TestPipelineIntegration:
         doubled_values = sorted([pkt.as_dict()["doubled"] for _, pkt in fn_outputs[0]])
         assert doubled_values == pytest.approx([0.2, 0.4, 0.6])
 
-        tag_values = sorted([tags["session_id"] for tags, _ in fn_outputs[0]])
-        assert tag_values == ["s1", "s2", "s3"]
+        key_values = sorted([keys["session_id"] for keys, _ in fn_outputs[0]])
+        assert key_values == ["s1", "s2", "s3"]
 ```
 
 - [ ] **Step 5.2: Verify the integration test file is syntactically correct (dry run)**
@@ -1235,7 +1235,7 @@ gh pr create \
 ## Summary
 
 - Implements `PostgreSQLTableSource` as a thin subclass of `DBTableSource`
-- PK columns used as default tag columns; explicit `tag_columns` override supported
+- PK columns used as default key columns; explicit `key_columns` override supported
 - Connector opened and closed eagerly at construction time (all data loaded into memory)
 - `to_config` / `from_config` round-trip serialisation
 - Registered in `_build_source_registry()` under `"postgresql_table"`
diff --git a/docs/plans/2026-03-27-non-active-node-semantics.md b/docs/plans/2026-03-27-non-active-node-semantics.md
index d8105eb7..2e5e0ad9 100644
--- a/docs/plans/2026-03-27-non-active-node-semantics.md
+++ b/docs/plans/2026-03-27-non-active-node-semantics.md
@@ -72,7 +72,7 @@ from orcapod.types import CacheMode
 
 @pytest.fixture
 def simple_source() -> ArrowTableStream:
-    """Single-tag stream: id (tag), x (data), 3 rows."""
+    """Single-key stream: id (key), x (data), 3 rows."""
     return ArrowTableStream(
         pa.table(
             {
@@ -80,7 +80,7 @@ def simple_source() -> ArrowTableStream:
                 "x": pa.array([10, 20, 30], type=pa.int64()),
             }
         ),
-        tag_columns=["id"],
+        key_columns=["id"],
     )
 
 
@@ -189,10 +189,10 @@ def _make_empty_table(self) -> "pa.Table":
     Requires ``self._operator is not None`` (pre-existing limitation shared
     with ``_replay_from_cache``).
     """
-    tag_schema, data_schema = self.output_schema()
+    key_schema, data_schema = self.output_schema()
     type_converter = self.data_context.type_converter
     empty_fields: dict = {}
-    for name, py_type in {**tag_schema, **data_schema}.items():
+    for name, py_type in {**key_schema, **data_schema}.items():
         arrow_type = type_converter.python_type_to_arrow_type(py_type)
         empty_fields[name] = pa.array([], type=arrow_type)
     return pa.table(empty_fields)
@@ -213,8 +213,8 @@ def _replay_from_cache(self) -> None:
     if records is None:
         records = self._make_empty_table()
 
-    tag_keys = self.keys()[0]
-    self._cached_output_stream = ArrowTableStream(records, tag_columns=tag_keys)
+    key_keys = self.keys()[0]
+    self._cached_output_stream = ArrowTableStream(records, key_columns=key_keys)
     self._update_modified_time()
 ```
 
@@ -271,8 +271,8 @@ def _load_cached_stream_from_db(self) -> "ArrowTableStream | None":
         records_table = self._make_empty_table()
     else:
         records_table = records
-    tag_keys = self.keys()[0]
-    return ArrowTableStream(records_table, tag_columns=tag_keys)
+    key_keys = self.keys()[0]
+    return ArrowTableStream(records_table, key_columns=key_keys)
 ```
 
 - [ ] **Step 3.2: Run the existing test suite — no regressions**
@@ -306,8 +306,8 @@ This is the core fix. Both methods stop calling `self.run()` and instead do a 3-
 Replace lines 555–558 (current `iter_data()`):
 
 ```python
-def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
-    """Return an iterator over (tag, data) pairs.
+def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
+    """Return an iterator over (key, data) pairs.
 
     Read-only: never triggers computation. Returns empty before ``run()``
     or ``execute()`` populates the cache. Call ``node.is_stale`` before
@@ -368,7 +368,7 @@ def test_iter_data(self, simple_stream, db):
     node.run()                          # <-- add this line
     data = list(node.iter_data())
     assert len(data) == 3
-    for tag, data in data:
+    for key, data in data:
         assert "renamed_x" in data.keys()
 
 def test_as_table(self, simple_stream, db):
@@ -586,9 +586,9 @@ Replace the body of the `flow()` docstring:
 ```python
 def flow(
     self,
-) -> Collection[tuple[TagProtocol, DataProtocol]]:
+) -> Collection[tuple[KeyProtocol, DataProtocol]]:
     """
-    Returns the entire collection of (TagProtocol, DataProtocol) as a list.
+    Returns the entire collection of (KeyProtocol, DataProtocol) as a list.
     This is a read-only operation — results reflect whatever has been computed
     by a prior ``run()`` or ``execute()`` call. If no computation has been
     performed, returns an empty list.
@@ -634,7 +634,7 @@ from orcapod.core.streams.arrow_table_stream import ArrowTableStream
 
 src = ArrowTableStream(
     pa.table({"id": [1, 2], "x": [10, 20]}),
-    tag_columns=["id"],
+    key_columns=["id"],
 )
 op_a = MapData({"x": "y"})
 op_b = MapData({"y": "z"})
diff --git a/docs/specs/2026-03-26-postgresql-table-source-design.md b/docs/specs/2026-03-26-postgresql-table-source-design.md
index eb9a58c1..0a2fcc78 100644
--- a/docs/specs/2026-03-26-postgresql-table-source-design.md
+++ b/docs/specs/2026-03-26-postgresql-table-source-design.md
@@ -1,14 +1,14 @@
 # Design: PostgreSQLTableSource (PLT-1072)
 
 **Date:** 2026-03-26
-**Issue:** [PLT-1072](https://linear.app/enigma-metamorphic/issue/PLT-1072/implement-source-based-on-postgresql-tables-with-pk-as-default-tag)
+**Issue:** [PLT-1072](https://linear.app/enigma-metamorphic/issue/PLT-1072/implement-source-based-on-postgresql-tables-with-pk-as-default-key)
 **Status:** Approved
 
 ---
 
 ## Summary
 
-Implement `PostgreSQLTableSource`, a read-only OrcaPod `Source` backed by a PostgreSQL table. The table's primary key columns serve as default tag columns. Follows the same pattern as the already-merged `SQLiteTableSource`.
+Implement `PostgreSQLTableSource`, a read-only OrcaPod `Source` backed by a PostgreSQL table. The table's primary key columns serve as default key columns. Follows the same pattern as the already-merged `SQLiteTableSource`.
 
 ---
 
@@ -24,7 +24,7 @@ PostgreSQL is a primary production database at Metamorphic. Exposing tables as O
 
 `PostgreSQLTableSource` is a minimal subclass of `DBTableSource`. All source logic (PK resolution, eager loading, Arrow conversion, stream building) lives in `DBTableSource`. This class only handles PostgreSQL-specific initialization: accept a DSN string, create a `PostgreSQLConnector`, delegate to the base class, then close the connector.
 
-This mirrors `SQLiteTableSource` exactly — the only meaningful difference is that PostgreSQL has no ROWID fallback, so a table with no PK and no explicit `tag_columns` raises `ValueError` (already the default `DBTableSource` behaviour).
+This mirrors `SQLiteTableSource` exactly — the only meaningful difference is that PostgreSQL has no ROWID fallback, so a table with no PK and no explicit `key_columns` raises `ValueError` (already the default `DBTableSource` behaviour).
 
 ---
 
@@ -42,8 +42,8 @@ class PostgreSQLTableSource(DBTableSource):
         self,
         dsn: str,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -75,7 +75,7 @@ The `finally` block wraps the `close()` call in a `try/except Exception: pass` t
 self._dsn = dsn
 connector = PostgreSQLConnector(dsn)    # outside try — must succeed before entering try
 try:
-    super().__init__(connector, table_name, tag_columns=tag_columns, ...)
+    super().__init__(connector, table_name, key_columns=key_columns, ...)
 finally:
     try:
         connector.close()
@@ -91,10 +91,10 @@ PostgreSQLTableSource.__init__(dsn, table_name)
       → DBTableSource.__init__(connector, ...)   # full source initialisation
           [inside DBTableSource]:
             → connector.get_table_names()        # validate table exists
-            → connector.get_pk_columns(table)    # resolve default tag columns (if tag_columns=None)
+            → connector.get_pk_columns(table)    # resolve default key columns (if key_columns=None)
             → connector.iter_batches(SELECT * FROM "table")
             → pa.Table.from_batches(...)         # assemble Arrow table
-            → SourceStreamBuilder.build(...)     # attach tags, source-info, schema hash
+            → SourceStreamBuilder.build(...)     # attach keys, source-info, schema hash
   → [finally] try: connector.close() except Exception: pass
 ```
 
@@ -111,13 +111,13 @@ After construction the source holds all data in-memory as an `ArrowTableStream`.
     "source_type": "postgresql_table",
     "dsn": "<connection string>",
     "table_name": "<name>",
-    "tag_columns": [...],
-    "system_tag_columns": [...],
+    "key_columns": [...],
+    "system_key_columns": [...],
     "record_id_column": ...,
     "source_id": ...,
     "content_hash": ...,
     "pipeline_hash": ...,
-    "tag_schema": {...},
+    "key_schema": {...},
     "data_schema": {...},
 }
 ```
@@ -143,8 +143,8 @@ Note: stripping `"connector"` is both a schema concern (callers should not see t
 |---|---|
 | `dsn` | `"dsn"` (required) |
 | `table_name` | `"table_name"` (required) |
-| `tag_columns` | `"tag_columns"` |
-| `system_tag_columns` | `"system_tag_columns"` (default `()`) |
+| `key_columns` | `"key_columns"` |
+| `system_key_columns` | `"system_key_columns"` (default `()`) |
 | `record_id_column` | `"record_id_column"` |
 | `source_id` | `"source_id"` |
 | `label` | `"label"` |
@@ -170,8 +170,8 @@ Three places to update, identical to the `SQLiteTableSource` rollout:
 |---|---|
 | Table does not exist | `ValueError: Table 'x' not found in database.` (from `DBTableSource`) |
 | Table is empty | `ValueError: Table 'x' is empty.` (from `DBTableSource`) |
-| No PK and no `tag_columns` given | `ValueError: Table 'x' has no primary key columns. Provide explicit tag_columns.` (from `DBTableSource`) |
-| NULL values in tag columns | Passed through as-is — Arrow supports nulls natively; PostgreSQL PK columns are always `NOT NULL` so this can only arise with an explicit `tag_columns` override |
+| No PK and no `key_columns` given | `ValueError: Table 'x' has no primary key columns. Provide explicit key_columns.` (from `DBTableSource`) |
+| NULL values in key columns | Passed through as-is — Arrow supports nulls natively; PostgreSQL PK columns are always `NOT NULL` so this can only arise with an explicit `key_columns` override |
 | Connection failure | `psycopg` exception propagates naturally |
 | Missing `"dsn"` key in `from_config` | `KeyError` from the `from_config` body |
 
@@ -187,13 +187,13 @@ Uses `unittest.mock.patch("psycopg.connect")` throughout, with mock cursors retu
 
 1. Import / export sanity (`from orcapod.core.sources import PostgreSQLTableSource`, present in `__all__`, importable from `orcapod.sources`)
 2. Protocol conformance (`SourceProtocol`, `StreamProtocol`, `PipelineElementProtocol`)
-3. PK as default tag columns — single PK, composite PK
-4. Explicit `tag_columns` override
+3. PK as default key columns — single PK, composite PK
+4. Explicit `key_columns` override
 5. No-PK table raises `ValueError`
 6. Missing / empty table raises `ValueError`
 7. Stream behaviour (`iter_data`, `output_schema`, `as_table`, `producer`, `upstreams`)
 8. Deterministic hashing (`pipeline_hash`, `content_hash`)
-9. `to_config` shape — has `source_type`, `dsn`, `table_name`, `tag_columns`, `source_id`, `content_hash`, `pipeline_hash`; does **not** have `connector` key or `label` key
+9. `to_config` shape — has `source_type`, `dsn`, `table_name`, `key_columns`, `source_id`, `content_hash`, `pipeline_hash`; does **not** have `connector` key or `label` key
 10. `from_config` round-trip (reconstructs with matching hashes)
 11. `resolve_source_from_config` dispatches to `PostgreSQLTableSource`
 
@@ -203,10 +203,10 @@ Uses `unittest.mock.patch("psycopg.connect")` throughout, with mock cursors retu
 **Marker:** `@pytest.mark.postgres`
 **Fixture:** per-test schema isolation (reuse pattern from `test_postgresql_connector_integration.py`)
 
-- Single-PK table: source yields correct data, tag column in tag schema
-- Composite-PK table: both PK columns in tag schema
-- Explicit `tag_columns` override: overrides PKs correctly
-- Pipeline integration: `PostgreSQLTableSource` drives a full pipeline end-to-end, tag values and data values are correct
+- Single-PK table: source yields correct data, key column in key schema
+- Composite-PK table: both PK columns in key schema
+- Explicit `key_columns` override: overrides PKs correctly
+- Pipeline integration: `PostgreSQLTableSource` drives a full pipeline end-to-end, key values and data values are correct
 
 ---
 
diff --git a/examples/async_vs_sync_pipeline.py b/examples/async_vs_sync_pipeline.py
index 28839baa..98bdbc73 100644
--- a/examples/async_vs_sync_pipeline.py
+++ b/examples/async_vs_sync_pipeline.py
@@ -101,7 +101,7 @@ def build_pipeline(use_async_fn: bool) -> Pipeline:
 
     fn = async_slow_double if use_async_fn else sync_slow_double
     with pipeline:
-        source = ArrowTableSource(SOURCE_TABLE, tag_columns=["id"])
+        source = ArrowTableSource(SOURCE_TABLE, key_columns=["id"])
         pf_a = PythonDataFunction(fn, output_keys="result", function_name="branch_a")
         pf_b = PythonDataFunction(fn, output_keys="result", function_name="branch_b")
         FunctionPod(
diff --git a/examples/save_and_load_pipelines.py b/examples/save_and_load_pipelines.py
index 66051d27..0d47e4ef 100644
--- a/examples/save_and_load_pipelines.py
+++ b/examples/save_and_load_pipelines.py
@@ -4,13 +4,13 @@
 database = databases.DeltaTableDatabase("./local_database")
 source1 = sources.DictSource(
     [{"id": 0, "x": 5}, {"id": 1, "x": 10}, {"id": 2, "x": 15}],
-    tag_columns=["id"],
+    key_columns=["id"],
     label="source1",
 )
 source1 = source1.cached(database)
 source2 = sources.DictSource(
     [{"id": 0, "y": 3}, {"id": 2, "y": 6}, {"id": 4, "y": 9}],
-    tag_columns=["id"],
+    key_columns=["id"],
     label="source2",
 )
 source2 = source2.cached(database)
diff --git a/function-execution-improvements-plan.md b/function-execution-improvements-plan.md
index 9137f357..69a727c4 100644
--- a/function-execution-improvements-plan.md
+++ b/function-execution-improvements-plan.md
@@ -77,23 +77,23 @@ executor directly on the data function. The node never sees raw option dicts.
 ### Current state
 
 Caching exists only at the data-function level (`CachedDataFunction`), which wraps
-`call()` / `async_call()` with DB lookup/insert. This works but cannot leverage tag
+`call()` / `async_call()` with DB lookup/insert. This works but cannot leverage key
 information (which is invisible to data functions).
 
 ### Design decision
 
 Add a **`CachedFunctionPod`** that wraps a `FunctionPod` and intercepts at the
-`process_data(tag, data)` level. This complements `CachedDataFunction`:
+`process_data(key, data)` level. This complements `CachedDataFunction`:
 
 | Layer | `CachedDataFunction` | `CachedFunctionPod` |
 |-------|------------------------|---------------------|
-| Intercepts at | `call(data)` | `process_data(tag, data)` |
-| Has tag access | No | Yes |
-| Cache key includes | Data content hash | Tag + data content hash |
+| Intercepts at | `call(data)` | `process_data(key, data)` |
+| Has key access | No | Yes |
+| Cache key includes | Data content hash | Key + data content hash |
 | Delegates to | Wrapped `DataFunction.call()` | Inner `FunctionPod.process_data()` |
 
 Both are useful: `CachedDataFunction` deduplicates purely on data content;
-`CachedFunctionPod` can incorporate tag metadata into cache decisions.
+`CachedFunctionPod` can incorporate key metadata into cache decisions.
 
 ### Implementation sketch
 
@@ -113,17 +113,17 @@ class CachedFunctionPod(WrappedFunctionPod):
         self._record_path_prefix = record_path_prefix
 
     def process_data(
-        self, tag: TagProtocol, data: DataProtocol
-    ) -> tuple[TagProtocol, DataProtocol | None]:
-        # Cache key incorporates both tag and data content
-        cache_key = self._compute_cache_key(tag, data)
+        self, key: KeyProtocol, data: DataProtocol
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
+        # Cache key incorporates both key and data content
+        cache_key = self._compute_cache_key(key, data)
         cached = self._lookup(cache_key)
         if cached is not None:
-            return tag, cached
-        tag, output = self._function_pod.process_data(tag, data)
+            return key, cached
+        key, output = self._function_pod.process_data(key, data)
         if output is not None:
-            self._store(cache_key, tag, output)
-        return tag, output
+            self._store(cache_key, key, output)
+        return key, output
 ```
 
 ### Changes
@@ -276,7 +276,7 @@ No code changes needed — this is a documentation/convention clarification.
 ### Phase 3: `CachedFunctionPod`
 
 1. Create `src/orcapod/core/cached_function_pod.py`.
-2. Implement `CachedFunctionPod(WrappedFunctionPod)` with tag-aware cache key computation.
+2. Implement `CachedFunctionPod(WrappedFunctionPod)` with key-aware cache key computation.
 3. Add `pod_cache_database` parameter to `function_pod` decorator.
 4. Add tests for pod-level vs data-level caching interaction.
 
@@ -296,5 +296,5 @@ No code changes needed — this is a documentation/convention clarification.
   rather than `execute(pf, data)`), the `Generic[E]` mechanism already supports this —
   just parameterize with the narrower protocol.
 - **`CachedFunctionPod` cache key design**: The exact composition of the cache key (which
-  tag columns to include, whether to include system tags) needs detailed design during
-  implementation. A reasonable default is tag content hash + data content hash.
+  key columns to include, whether to include system keys) needs detailed design during
+  implementation. A reasonable default is key content hash + data content hash.
diff --git a/notebooks/old_tutorials/01_orcapod_core_concepts copy.ipynb b/notebooks/old_tutorials/01_orcapod_core_concepts copy.ipynb
index 01badab7..9cbe8183 100644
--- a/notebooks/old_tutorials/01_orcapod_core_concepts copy.ipynb	
+++ b/notebooks/old_tutorials/01_orcapod_core_concepts copy.ipynb	
@@ -26,10 +26,10 @@
                         "* `Stream` -- a series of one or more `data` flowing from a `data producer` to a `data consumer`. In a directed acyclic graph represneing an Orcapod `pipeline`, a `stream` corresponds to a *directed* edge connecting from a data source into a `data consumer` (e.g., `pod`)\n",
                         "* `Data producer` and `data consumer` -- in the Orcapod data pipeline, data (in form of `data` of data flowing inside a `stream`) flows from a `data producer` to a `data consumer`. Consequentially, a `data consumer` may in turn act as a `data producer` downstream\n",
                         "* `Data source` -- Root level `data producer` (that is, the data originates from this `data producer` and it is not a `data consumer` of any stream). Typically `data source` is tied to a data storage, although you could have *procedural* `data source` where data data are produced programatically.\n",
-                        "* `Tag` -- each `Data` in a stream *may be* associated with a `tag` that helps to assign semantic identity to the particular `data`. For example, a data `data` for an experimental data may be associated with a `tag` of session ID. Note that while `tag` provides a convenient and often meaningful ways of identifying and referring to specific data within a stream, it should **not** be considered to be the defining identity of the `data`. Identity of the `data` is strictly determined by the exact data content of the `data`, and not by how you refer to it. Consequently, it may be that two data with an identical content (and thus shared identity) are associated with distinct `tags` in a `stream`. Conversely, an identical `tag` may be associated with two distinct `data` in a stream.  Typically, one would associate a unique `tag` for each data in the stream.\n",
+                        "* `Key` -- each `Data` in a stream *may be* associated with a `key` that helps to assign semantic identity to the particular `data`. For example, a data `data` for an experimental data may be associated with a `key` of session ID. Note that while `key` provides a convenient and often meaningful ways of identifying and referring to specific data within a stream, it should **not** be considered to be the defining identity of the `data`. Identity of the `data` is strictly determined by the exact data content of the `data`, and not by how you refer to it. Consequently, it may be that two data with an identical content (and thus shared identity) are associated with distinct `keys` in a `stream`. Conversely, an identical `key` may be associated with two distinct `data` in a stream.  Typically, one would associate a unique `key` for each data in the stream.\n",
                         "* `Operation` -- A *node* in the directed acyclic graph representing an Orcapod `pipeline`, corresponding to a step of data processing/transformation/computation. An `Operation` receives can be classified into either a `mapper` or a `pod` based on their role in `data provenance`.\n",
-                        "* `Mapper` -- A class of `operation` that does **not** result in creation/alteration of a new data -- that is, `operation` does **not** every create or modify a file *content*. More specifically, `Mapper` operation can not produce a path that was not already present in the input streams to the `mapper`. This feature ensures that a `mapper` is fundamentally not involved in the reproducibility of computation. Consequently, `mapper` information is not necessary for the maintenance of proper `data provenance` in a tree of computation. However, `mapper` plays critical role in the actual execution of a data pipeline, determining which data `data` will be fed into operations in the pipeline directed acyclic graph (DAG). Note that as long as it doesn't modify the content of any file, a `mapper` may inspect the content of any file in a `data` it receives and alter its behavior based on the content of the file. In other words, `mapper` may alter what data file(s) gets passed around without changing/creating any file based on a rule that depends on `tag`, `data` key (`argument` name) and/or file content.\n",
-                        "* `Pod` (e.g. FunctionPod) -- fundamental unit of computation in Orcapod. `Pod` is the only class of `operation` that may create a new file. Critically, when operating within an Orcapod `pipeline`, a `pod` will **not** receive the `tag` information. Rather, `pod` must strictly operate on a single `data`. An ideal `pod` will have completely deterministic behavior that only depends on the `data` identity (that is, data keys and `pathset` contents)."
+                        "* `Mapper` -- A class of `operation` that does **not** result in creation/alteration of a new data -- that is, `operation` does **not** every create or modify a file *content*. More specifically, `Mapper` operation can not produce a path that was not already present in the input streams to the `mapper`. This feature ensures that a `mapper` is fundamentally not involved in the reproducibility of computation. Consequently, `mapper` information is not necessary for the maintenance of proper `data provenance` in a tree of computation. However, `mapper` plays critical role in the actual execution of a data pipeline, determining which data `data` will be fed into operations in the pipeline directed acyclic graph (DAG). Note that as long as it doesn't modify the content of any file, a `mapper` may inspect the content of any file in a `data` it receives and alter its behavior based on the content of the file. In other words, `mapper` may alter what data file(s) gets passed around without changing/creating any file based on a rule that depends on `key`, `data` key (`argument` name) and/or file content.\n",
+                        "* `Pod` (e.g. FunctionPod) -- fundamental unit of computation in Orcapod. `Pod` is the only class of `operation` that may create a new file. Critically, when operating within an Orcapod `pipeline`, a `pod` will **not** receive the `key` information. Rather, `pod` must strictly operate on a single `data`. An ideal `pod` will have completely deterministic behavior that only depends on the `data` identity (that is, data keys and `pathset` contents)."
                   ]
             },
             {
@@ -43,7 +43,7 @@
                   "cell_type": "markdown",
                   "metadata": {},
                   "source": [
-                        "`Orcabridge` provide prototypal implementation of the above-defined key concepts in `orcapod`, with particular focus given to `stream`, `data`, `tag`, `operation` (`pod` and `mapper`). This package provides the reference implementation of both synchronous and asynchronous `streams` as a sequence of `data` associated with a `tag`. "
+                        "`Orcabridge` provide prototypal implementation of the above-defined key concepts in `orcapod`, with particular focus given to `stream`, `data`, `key`, `operation` (`pod` and `mapper`). This package provides the reference implementation of both synchronous and asynchronous `streams` as a sequence of `data` associated with a `key`. "
                   ]
             },
             {
diff --git a/notebooks/old_tutorials/02_orcapod_basic_usage copy.ipynb b/notebooks/old_tutorials/02_orcapod_basic_usage copy.ipynb
index 97af83dc..f3d30f1b 100644
--- a/notebooks/old_tutorials/02_orcapod_basic_usage copy.ipynb	
+++ b/notebooks/old_tutorials/02_orcapod_basic_usage copy.ipynb	
@@ -680,15 +680,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'id': 0}, Data: {'result': 12.0}\n",
-      "Tag: {'id': 1}, Data: {'result': 30.0}\n",
-      "Tag: {'id': 2}, Data: {'result': 56.0}\n"
+      "Key: {'id': 0}, Data: {'result': 12.0}\n",
+      "Key: {'id': 1}, Data: {'result': 30.0}\n",
+      "Key: {'id': 2}, Data: {'result': 56.0}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in pod(stream):\n",
-    "    print((f\"Tag: {tag}, Data: {data}\"))"
+    "for key, data in pod(stream):\n",
+    "    print((f\"Key: {key}, Data: {data}\"))"
    ]
   },
   {
@@ -722,7 +722,7 @@
     "    object_hasher=object_hasher,\n",
     "    arrow_hasher=ArrowDataHasher(),\n",
     "    result_store=MyArrowDataStore(),\n",
-    "    tag_store=MyArrowDataStore(),\n",
+    "    key_store=MyArrowDataStore(),\n",
     ")"
    ]
   },
@@ -803,7 +803,7 @@
       "result: double\n",
       "----\n",
       "result: [[12]]\n",
-      "Tag: {'id': 0}, Data: {'result': 12.0}\n",
+      "Key: {'id': 0}, Data: {'result': 12.0}\n",
       "Requested to hash arrow data pyarrow.Table\n",
       "x: double\n",
       "y: int64\n",
@@ -832,7 +832,7 @@
       "result: double\n",
       "----\n",
       "result: [[30]]\n",
-      "Tag: {'id': 1}, Data: {'result': 30.0}\n",
+      "Key: {'id': 1}, Data: {'result': 30.0}\n",
       "Requested to hash arrow data pyarrow.Table\n",
       "x: double\n",
       "y: int64\n",
@@ -861,13 +861,13 @@
       "result: double\n",
       "----\n",
       "result: [[56]]\n",
-      "Tag: {'id': 2}, Data: {'result': 56.0}\n"
+      "Key: {'id': 2}, Data: {'result': 56.0}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in cached_pod(stream):\n",
-    "    print((f\"Tag: {tag}, Data: {data}\"))"
+    "for key, data in cached_pod(stream):\n",
+    "    print((f\"Key: {key}, Data: {data}\"))"
    ]
   },
   {
@@ -981,7 +981,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tag = {\"name\": [\"Edgar\", \"Names\"], \"age\": 37}"
+    "key = {\"name\": [\"Edgar\", \"Names\"], \"age\": 37}"
    ]
   },
   {
@@ -990,7 +990,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tag[\"__data_key\"] = \"some_unique_key\"\n"
+    "key[\"__data_key\"] = \"some_unique_key\"\n"
    ]
   },
   {
@@ -1012,7 +1012,7 @@
    "source": [
     "from orcabridge.hashing.defaults import LegacyObjectHasher\n",
     "\n",
-    "LegacyObjectHasher().hash_to_hex(tag)\n"
+    "LegacyObjectHasher().hash_to_hex(key)\n"
    ]
   },
   {
@@ -1047,7 +1047,7 @@
     }
    ],
    "source": [
-    "pa.Table.from_pylist([tag])"
+    "pa.Table.from_pylist([key])"
    ]
   },
   {
@@ -1748,7 +1748,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can then obtain `stream` from a `source` by invoking the source with `Source()`. The return `stream` acts as an iterator over the `data` and its `tag`.\n",
+    "We can then obtain `stream` from a `source` by invoking the source with `Source()`. The return `stream` acts as an iterator over the `data` and its `key`.\n",
     "For convenience, `source` can be treated synonymously with a `stream`, allowing you to directly iterate over the content."
    ]
   },
@@ -1761,17 +1761,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n"
+      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in dataset1():\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset1():\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
@@ -1783,25 +1783,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n"
+      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n"
      ]
     }
    ],
    "source": [
     "# equivalent to above but more natural without the need to call `dataset1()`\n",
-    "for tag, data in dataset1:\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset1:\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A few things to note. When creating the `GlobSource` we pass in the argument name to be associated with the `pathset` matching our glob pattern (`*.txt` in this case). By default, the `GlobSource` tags each data with a key of `file_name` and value of the name of the file that was matched (minus the file extension). This behavior can be easily changed by passing in a custom function for tag generation at the time of `GlobSource` creation."
+    "A few things to note. When creating the `GlobSource` we pass in the argument name to be associated with the `pathset` matching our glob pattern (`*.txt` in this case). By default, the `GlobSource` keys each data with a key of `file_name` and value of the name of the file that was matched (minus the file extension). This behavior can be easily changed by passing in a custom function for key generation at the time of `GlobSource` creation."
    ]
   },
   {
@@ -1816,7 +1816,7 @@
     "    \"data\",\n",
     "    \"../examples/dataset1\",\n",
     "    \"*.txt\",\n",
-    "    tag_function=lambda x: {\"date\": Path(x).stem},\n",
+    "    key_function=lambda x: {\"date\": Path(x).stem},\n",
     ")"
    ]
   },
@@ -1829,24 +1829,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'data': PosixPath('../examples/dataset1/day1.txt')} with tag {'date': 'day1'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day2.txt')} with tag {'date': 'day2'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day3.txt')} with tag {'date': 'day3'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day4.txt')} with tag {'date': 'day4'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day6.txt')} with tag {'date': 'day6'}\n"
+      "Data {'data': PosixPath('../examples/dataset1/day1.txt')} with key {'date': 'day1'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day2.txt')} with key {'date': 'day2'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day3.txt')} with key {'date': 'day3'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day4.txt')} with key {'date': 'day4'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day6.txt')} with key {'date': 'day6'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in dataset1_custom:\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset1_custom:\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Custom tag function would allow one to extract information useful in controlling the flow of the data pipeline from the file path or even the file content. We will return to this a bit later."
+    "Custom key function would allow one to extract information useful in controlling the flow of the data pipeline from the file path or even the file content. We will return to this a bit later."
    ]
   },
   {
@@ -1872,18 +1872,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')} with tag {'file_name': 'session_day1'}\n",
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')} with tag {'file_name': 'session_day3'}\n",
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')} with tag {'file_name': 'session_day4'}\n",
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')} with tag {'file_name': 'session_day5'}\n"
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')} with key {'file_name': 'session_day1'}\n",
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')} with key {'file_name': 'session_day3'}\n",
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')} with key {'file_name': 'session_day4'}\n",
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')} with key {'file_name': 'session_day5'}\n"
      ]
     }
    ],
    "source": [
     "dataset2 = ob.GlobSource(\"bin_data\", \"../examples/dataset2\", \"*.bin\")\n",
     "\n",
-    "for tag, data in dataset2:\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset2:\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
@@ -1919,7 +1919,7 @@
    "metadata": {},
    "source": [
     "\n",
-    "`Mappers` are `operations` that controls and alter the streams but *without generating or modifying new data files*. As we will see shortly, `mappers` work to alter the stream by alterning data tags and/or data content, but critically will never create or modify new files that were not already present somewhere in the stream feeding into the `mapper` node. While this might sound like an unnecessary restriction on what `mappers` can do, we will see that this property guarantees that *mappers can not ever alter the reproducibility of computational chains*."
+    "`Mappers` are `operations` that controls and alter the streams but *without generating or modifying new data files*. As we will see shortly, `mappers` work to alter the stream by alterning data keys and/or data content, but critically will never create or modify new files that were not already present somewhere in the stream feeding into the `mapper` node. While this might sound like an unnecessary restriction on what `mappers` can do, we will see that this property guarantees that *mappers can not ever alter the reproducibility of computational chains*."
    ]
   },
   {
@@ -1942,7 +1942,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Once you have created a `source` from which streams can be formed, you can alter the stream by applying various `mappers`. More precisely, a `mapper` can work on tags and/or data."
+    "Once you have created a `source` from which streams can be formed, you can alter the stream by applying various `mappers`. More precisely, a `mapper` can work on keys and/or data."
    ]
   },
   {
@@ -1963,47 +1963,47 @@
      "output_type": "stream",
      "text": [
       "Before mapping:\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n",
       "After mapping:\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n"
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n"
      ]
     }
    ],
    "source": [
     "print(\"Before mapping:\")\n",
-    "for tag, data in dataset1:\n",
-    "    print(f\"Data {data} with tag {tag}\")\n",
+    "for key, data in dataset1:\n",
+    "    print(f\"Data {data} with key {key}\")\n",
     "\n",
     "\n",
     "# create a new stream mapping data keys 'txt_file' to 'content'\n",
     "data_mapper = ob.MapData(key_map={\"txt_file\": \"content\"})\n",
     "\n",
     "print(\"After mapping:\")\n",
-    "for tag, data in data_mapper(dataset1):\n",
-    "    print(f\"Mapped Data {data} with tag {tag}\")"
+    "for key, data in data_mapper(dataset1):\n",
+    "    print(f\"Mapped Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You'd notice that for each data, the key `txt_file` was replaced with `content` without altering the pointed `path` or the associated tag. As the keys of the data will be used as the name of arguments when invoking pods on a stream, we will see that `MapData` are commonly used to *map* the correct path to the argument."
+    "You'd notice that for each data, the key `txt_file` was replaced with `content` without altering the pointed `path` or the associated key. As the keys of the data will be used as the name of arguments when invoking pods on a stream, we will see that `MapData` are commonly used to *map* the correct path to the argument."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Map tags\n",
-    "As we have already seen, each data in the stream is associated with a tag, often derived from the data source. In the case of `GlobFileSource`, the tags are by default the name of the file that formed the data. These tags are used to *transiently* identify the data and will be used when matching data across multiple streams (as we will see shortly in `Join` operation). You can manipulate the tags using `MapTags` operation, much like `MapKeys` but operating on the tags for each packaet under a uniform renaming rule."
+    "### Map keys\n",
+    "As we have already seen, each data in the stream is associated with a key, often derived from the data source. In the case of `GlobFileSource`, the keys are by default the name of the file that formed the data. These keys are used to *transiently* identify the data and will be used when matching data across multiple streams (as we will see shortly in `Join` operation). You can manipulate the keys using `MapKeys` operation, much like `MapKeys` but operating on the keys for each packaet under a uniform renaming rule."
    ]
   },
   {
@@ -2024,10 +2024,10 @@
     }
    ],
    "source": [
-    "tag_mapper = ob.MapTags(key_map={\"file_name\": \"day\"})\n",
+    "key_mapper = ob.MapKeys(key_map={\"file_name\": \"day\"})\n",
     "\n",
-    "for tag, data in tag_mapper(dataset1):\n",
-    "    print(tag, data)"
+    "for key, data in key_mapper(dataset1):\n",
+    "    print(key, data)"
    ]
   },
   {
@@ -2041,7 +2041,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you might expect, you can chain multiple operations one after another to construct a more complex stream. Below, we first apply the key mapping and then map tags."
+    "As you might expect, you can chain multiple operations one after another to construct a more complex stream. Below, we first apply the key mapping and then map keys."
    ]
   },
   {
@@ -2053,11 +2053,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with tag {'day': 'day1'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with tag {'day': 'day2'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with tag {'day': 'day3'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with tag {'day': 'day4'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with tag {'day': 'day6'}\n"
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with key {'day': 'day1'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with key {'day': 'day2'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with key {'day': 'day3'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with key {'day': 'day4'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with key {'day': 'day6'}\n"
      ]
     }
    ],
@@ -2065,18 +2065,18 @@
     "data_mapper = ob.MapData(key_map={\"txt_file\": \"content\"})\n",
     "key_mapped_stream = data_mapper(dataset1)\n",
     "\n",
-    "tag_mapper = ob.MapTags(key_map={\"file_name\": \"day\"})\n",
-    "tag_and_data_mapped = tag_mapper(key_mapped_stream)\n",
+    "key_mapper = ob.MapKeys(key_map={\"file_name\": \"day\"})\n",
+    "key_and_data_mapped = key_mapper(key_mapped_stream)\n",
     "\n",
-    "for tag, data in tag_and_data_mapped:\n",
-    "    print(f\"Mapped Data {data} with tag {tag}\")"
+    "for key, data in key_and_data_mapped:\n",
+    "    print(f\"Mapped Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "It's worth emphasizing again that all computations are triggered only when you iterate through the final stream `tag_and_key_mapped`"
+    "It's worth emphasizing again that all computations are triggered only when you iterate through the final stream `key_and_key_mapped`"
    ]
   },
   {
@@ -2095,20 +2095,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with tag {'day': 'day1'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with tag {'day': 'day2'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with tag {'day': 'day3'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with tag {'day': 'day4'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with tag {'day': 'day6'}\n"
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with key {'day': 'day1'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with key {'day': 'day2'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with key {'day': 'day3'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with key {'day': 'day4'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with key {'day': 'day6'}\n"
      ]
     }
    ],
    "source": [
     "# totally valid, but difficult to read and thus not recommended\n",
-    "for tag, data in ob.MapTags(key_map={\"file_name\": \"day\"})(\n",
+    "for key, data in ob.MapKeys(key_map={\"file_name\": \"day\"})(\n",
     "    ob.MapData(key_map={\"txt_file\": \"content\"})(dataset1)\n",
     "):\n",
-    "    print(f\"Mapped Data {data} with tag {tag}\")"
+    "    print(f\"Mapped Data {data} with key {key}\")"
    ]
   },
   {
@@ -2119,7 +2119,7 @@
     "Now that we have looked at how you can manipulate a single stream, let's turn our eyes to how you can work with more than one streams together.\n",
     "\n",
     "By the far the most common multi-stream operations will be to join two (or more) streams into a single, bigger stream. \n",
-    "You can combine multiple streams into one by using `Join` operation, matching data from each stream based on the matching tags. If tags from two streams have shared key, the value must be identical for all shared keys for the two data to be matched. The matched data are then merged into a one (typically larger) data and shipped to the output stream."
+    "You can combine multiple streams into one by using `Join` operation, matching data from each stream based on the matching keys. If keys from two streams have shared key, the value must be identical for all shared keys for the two data to be matched. The matched data are then merged into a one (typically larger) data and shipped to the output stream."
    ]
   },
   {
@@ -2139,30 +2139,30 @@
      "output_type": "stream",
      "text": [
       "Dataset 1:\n",
-      "Tag: {'file_name': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt')}\n",
-      "Tag: {'file_name': 'day2'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt')}\n",
-      "Tag: {'file_name': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt')}\n",
-      "Tag: {'file_name': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt')}\n",
-      "Tag: {'file_name': 'day6'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt')}\n",
+      "Key: {'file_name': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt')}\n",
+      "Key: {'file_name': 'day2'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt')}\n",
+      "Key: {'file_name': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt')}\n",
+      "Key: {'file_name': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt')}\n",
+      "Key: {'file_name': 'day6'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt')}\n",
       "\n",
       "Dataset 2:\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
      ]
     }
    ],
    "source": [
     "# dataset 1\n",
     "print(\"Dataset 1:\")\n",
-    "for tag, data in dataset1:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")\n",
+    "for key, data in dataset1:\n",
+    "    print(f\"Key: {key}, Data: {data}\")\n",
     "\n",
     "# dataset 2\n",
     "print(\"\\nDataset 2:\")\n",
-    "for tag, data in dataset2:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in dataset2:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2180,8 +2180,8 @@
    "source": [
     "join_op = ob.Join()\n",
     "\n",
-    "for tag, data in join_op(dataset1, dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in join_op(dataset1, dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2202,7 +2202,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "First, let's completely rename the tag key for one of the streams and see what would happen."
+    "First, let's completely rename the key key for one of the streams and see what would happen."
    ]
   },
   {
@@ -2214,34 +2214,34 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "01 Tag: {'day': 'day1', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "02 Tag: {'day': 'day1', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "03 Tag: {'day': 'day1', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "04 Tag: {'day': 'day1', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "05 Tag: {'day': 'day2', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "06 Tag: {'day': 'day2', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "07 Tag: {'day': 'day2', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "08 Tag: {'day': 'day2', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "09 Tag: {'day': 'day3', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "10 Tag: {'day': 'day3', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "11 Tag: {'day': 'day3', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "12 Tag: {'day': 'day3', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "13 Tag: {'day': 'day4', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "14 Tag: {'day': 'day4', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "15 Tag: {'day': 'day4', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "16 Tag: {'day': 'day4', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "17 Tag: {'day': 'day6', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "18 Tag: {'day': 'day6', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "19 Tag: {'day': 'day6', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "20 Tag: {'day': 'day6', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
+      "01 Key: {'day': 'day1', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "02 Key: {'day': 'day1', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "03 Key: {'day': 'day1', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "04 Key: {'day': 'day1', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "05 Key: {'day': 'day2', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "06 Key: {'day': 'day2', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "07 Key: {'day': 'day2', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "08 Key: {'day': 'day2', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "09 Key: {'day': 'day3', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "10 Key: {'day': 'day3', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "11 Key: {'day': 'day3', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "12 Key: {'day': 'day3', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "13 Key: {'day': 'day4', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "14 Key: {'day': 'day4', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "15 Key: {'day': 'day4', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "16 Key: {'day': 'day4', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "17 Key: {'day': 'day6', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "18 Key: {'day': 'day6', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "19 Key: {'day': 'day6', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "20 Key: {'day': 'day6', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
      ]
     }
    ],
    "source": [
-    "dataset1_retagged = ob.MapTags(key_map={\"file_name\": \"day\"})(dataset1)\n",
+    "dataset1_retagged = ob.MapKeys(key_map={\"file_name\": \"day\"})(dataset1)\n",
     "\n",
-    "for i, (tag, data) in enumerate(join_op(dataset1_retagged, dataset2)):\n",
-    "    print(f\"{i + 1:02d} Tag: {tag}, Data: {data}\")"
+    "for i, (key, data) in enumerate(join_op(dataset1_retagged, dataset2)):\n",
+    "    print(f\"{i + 1:02d} Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2250,9 +2250,9 @@
    "source": [
     "We are now getting something -- in fact, quite a few things. If you look carefully at the `data`, you'll notice that it now contains two keys/arguments -- `txt_file` and `bin_data`, combining the data from the two datasets. \n",
     "\n",
-    "The `tags` also now contain two keys `day` from the re-tagged dataset1 stream and `file_name` from unchanged dataset2 stream.\n",
+    "The `keys` also now contain two keys `day` from the re-tagged dataset1 stream and `file_name` from unchanged dataset2 stream.\n",
     "\n",
-    "Since the two streams share no common tags, the `Join` operation results in *full-multiplexing* of two streams. With the streams from dataset1 and dataset2 containing 5 data and 4 data, respectively, you get $5 \\times 4 = 20$ data"
+    "Since the two streams share no common keys, the `Join` operation results in *full-multiplexing* of two streams. With the streams from dataset1 and dataset2 containing 5 data and 4 data, respectively, you get $5 \\times 4 = 20$ data"
    ]
   },
   {
@@ -2268,7 +2268,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Although we could achieve the desired effect by changing how we load the source, passing in custom `tag_function` into `GlobSource`, let's achieve the same by using another `mapper` called `Transform`. `Transform` effectively combines `MapKey` and `MapTag` but further allows you to provide a function that will receive the tag and data, one at a time, and return a (potentially modified) tag and/or data, achieving the desired transformation."
+    "Although we could achieve the desired effect by changing how we load the source, passing in custom `key_function` into `GlobSource`, let's achieve the same by using another `mapper` called `Transform`. `Transform` effectively combines `MapKey` and `MapKey` but further allows you to provide a function that will receive the key and data, one at a time, and return a (potentially modified) key and/or data, achieving the desired transformation."
    ]
   },
   {
@@ -2280,18 +2280,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'day': 'day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "Tag: {'day': 'day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "Tag: {'day': 'day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "Tag: {'day': 'day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
+      "Key: {'day': 'day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "Key: {'day': 'day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "Key: {'day': 'day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "Key: {'day': 'day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
      ]
     }
    ],
    "source": [
-    "def transform_dataset2(tag, data):\n",
+    "def transform_dataset2(key, data):\n",
     "    # Extract the second half of the filename containing day\n",
-    "    new_tag = {\"day\": tag[\"file_name\"].split(\"_\")[1]}\n",
-    "    return new_tag, data\n",
+    "    new_key = {\"day\": key[\"file_name\"].split(\"_\")[1]}\n",
+    "    return new_key, data\n",
     "\n",
     "\n",
     "# Speical mappers like transform can be found in the orcabridge.mapper module\n",
@@ -2299,8 +2299,8 @@
     "\n",
     "retagged_dataset2 = dataset2_transformer(dataset2)\n",
     "\n",
-    "for tag, data in retagged_dataset2:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in retagged_dataset2:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2319,22 +2319,22 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'day': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "Tag: {'day': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "Tag: {'day': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n"
+      "Key: {'day': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "Key: {'day': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "Key: {'day': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n"
      ]
     }
    ],
    "source": [
     "# change filename to day for dataset1\n",
-    "tag_mapper = ob.MapTags(key_map={\"file_name\": \"day\"})\n",
-    "retagged_dataset1 = tag_mapper(dataset1)\n",
+    "key_mapper = ob.MapKeys(key_map={\"file_name\": \"day\"})\n",
+    "retagged_dataset1 = key_mapper(dataset1)\n",
     "\n",
     "join_op = ob.Join()\n",
     "joined_stream = join_op(retagged_dataset1, retagged_dataset2)\n",
     "\n",
-    "for tag, data in joined_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in joined_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2362,7 +2362,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "While `mapper` operations are useful in altering tags, data, and in combining multiple streams, a data pipeline is not really useful if it cannot produce new resultsin the form of new data -- that is, introduce new files into the stream. This is precisely where `Pod` operations come in!\n",
+    "While `mapper` operations are useful in altering keys, data, and in combining multiple streams, a data pipeline is not really useful if it cannot produce new resultsin the form of new data -- that is, introduce new files into the stream. This is precisely where `Pod` operations come in!\n",
     "\n",
     "In fact, we have already been working with a `pod` all along -- `sources`. If you think about it, `sources` also introduce files into the stream. It is just special in that it takes no input streams (hence the name, `source`).\n",
     "\n",
@@ -2434,15 +2434,15 @@
      "output_type": "stream",
      "text": [
       "File ../examples/dataset1/day1.txt has 24 lines.\n",
-      "Tag: {'file_name': 'day1'}, Data: {}\n",
+      "Key: {'file_name': 'day1'}, Data: {}\n",
       "File ../examples/dataset1/day2.txt has 15 lines.\n",
-      "Tag: {'file_name': 'day2'}, Data: {}\n",
+      "Key: {'file_name': 'day2'}, Data: {}\n",
       "File ../examples/dataset1/day3.txt has 27 lines.\n",
-      "Tag: {'file_name': 'day3'}, Data: {}\n",
+      "Key: {'file_name': 'day3'}, Data: {}\n",
       "File ../examples/dataset1/day4.txt has 22 lines.\n",
-      "Tag: {'file_name': 'day4'}, Data: {}\n",
+      "Key: {'file_name': 'day4'}, Data: {}\n",
       "File ../examples/dataset1/day6.txt has 22 lines.\n",
-      "Tag: {'file_name': 'day6'}, Data: {}\n"
+      "Key: {'file_name': 'day6'}, Data: {}\n"
      ]
     }
    ],
@@ -2450,8 +2450,8 @@
     "# apply the function pod on a stream\n",
     "processed_stream = function_pod(dataset1)\n",
     "\n",
-    "for tag, data in processed_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in processed_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2515,7 +2515,7 @@
       " -0.08364667 -0.45551653  0.70752188  1.02283734 -0.18612795  0.8767394\n",
       " -1.542636    1.04685484 -2.1311672  -1.34874222  0.61977577 -0.33880262\n",
       "  0.6624482   0.60257325 -3.04901544 -0.20685843 -0.08997232  0.88932232]\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpm2wka6il/statistics.json')}\n",
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpm2wka6il/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day3.bin\n",
       "[ 0.56114059 -1.34902274  1.0665563   0.71890802  0.65244834  1.04369548\n",
       "  0.54872876  2.19365207  0.53864286 -1.44108823 -0.55651539  0.1603561\n",
@@ -2523,21 +2523,21 @@
       "  0.38400938 -1.23004316  1.34426647 -0.07620065 -0.91983972  0.23537101\n",
       "  0.91515395  0.8064348   0.81470895 -1.04466683 -0.25893558 -1.46253167\n",
       "  1.39972807 -0.13940519]\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmplkmx65ll/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmplkmx65ll/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day4.bin\n",
       "[ 0.70078854  1.18137906 -0.44361437 -0.389409    0.29719038  0.2523247\n",
       " -0.97418716  0.49301127  0.07900351 -0.29965042 -0.25810762 -2.78777445\n",
       " -1.24321702  0.13011593  1.07826637 -0.33177479 -0.78337033 -1.30075356\n",
       " -0.15710138  0.51927589  0.08671884  0.02058063  0.20778149 -1.40382559\n",
       " -0.69978105 -1.10525753  0.1945444   0.82623748  0.17467868]\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpxajrzctd/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpxajrzctd/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day5.bin\n",
       "[ 1.9125739  -0.05252076  0.33347618  0.31627214  0.47141153 -0.71088615\n",
       " -0.74745805  0.53959117 -0.14395142 -0.28713782 -0.29422236 -1.00231383\n",
       "  0.69566576 -0.25895608 -0.9660761  -0.78504297 -1.91668262  0.89452296\n",
       " -0.82748688 -0.19792482  0.07305616  0.36133414  1.7164791   0.64364619\n",
       " -0.73146429  0.96324864 -1.05981222 -0.59502066  0.15084192]\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp67rthfe1/statistics.json')}\n"
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp67rthfe1/statistics.json')}\n"
      ]
     }
    ],
@@ -2547,8 +2547,8 @@
     "# change the key from 'bin_data' to 'bin_file', matching the function's input\n",
     "mapped_dataset2 = ob.MapData(key_map={\"bin_data\": \"bin_file\"})(dataset2)\n",
     "\n",
-    "for tag, data in fp_stats(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2575,7 +2575,7 @@
       " -0.08364667 -0.45551653  0.70752188  1.02283734 -0.18612795  0.8767394\n",
       " -1.542636    1.04685484 -2.1311672  -1.34874222  0.61977577 -0.33880262\n",
       "  0.6624482   0.60257325 -3.04901544 -0.20685843 -0.08997232  0.88932232]\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpciwa2xl_/statistics.json')}\n",
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpciwa2xl_/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day3.bin\n",
       "[ 0.56114059 -1.34902274  1.0665563   0.71890802  0.65244834  1.04369548\n",
       "  0.54872876  2.19365207  0.53864286 -1.44108823 -0.55651539  0.1603561\n",
@@ -2583,29 +2583,29 @@
       "  0.38400938 -1.23004316  1.34426647 -0.07620065 -0.91983972  0.23537101\n",
       "  0.91515395  0.8064348   0.81470895 -1.04466683 -0.25893558 -1.46253167\n",
       "  1.39972807 -0.13940519]\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpkq824j5b/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpkq824j5b/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day4.bin\n",
       "[ 0.70078854  1.18137906 -0.44361437 -0.389409    0.29719038  0.2523247\n",
       " -0.97418716  0.49301127  0.07900351 -0.29965042 -0.25810762 -2.78777445\n",
       " -1.24321702  0.13011593  1.07826637 -0.33177479 -0.78337033 -1.30075356\n",
       " -0.15710138  0.51927589  0.08671884  0.02058063  0.20778149 -1.40382559\n",
       " -0.69978105 -1.10525753  0.1945444   0.82623748  0.17467868]\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmp7ii2nd6e/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmp7ii2nd6e/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day5.bin\n",
       "[ 1.9125739  -0.05252076  0.33347618  0.31627214  0.47141153 -0.71088615\n",
       " -0.74745805  0.53959117 -0.14395142 -0.28713782 -0.29422236 -1.00231383\n",
       "  0.69566576 -0.25895608 -0.9660761  -0.78504297 -1.91668262  0.89452296\n",
       " -0.82748688 -0.19792482  0.07305616  0.36133414  1.7164791   0.64364619\n",
       " -0.73146429  0.96324864 -1.05981222 -0.59502066  0.15084192]\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmpz23q61gg/statistics.json')}\n"
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmpz23q61gg/statistics.json')}\n"
      ]
     }
    ],
    "source": [
     "# everytime you run the following loop, new computations are performed and\n",
     "# saved in a different set of temporary files\n",
-    "for tag, data in fp_stats(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2653,7 +2653,7 @@
       " -0.08364667 -0.45551653  0.70752188  1.02283734 -0.18612795  0.8767394\n",
       " -1.542636    1.04685484 -2.1311672  -1.34874222  0.61977577 -0.33880262\n",
       "  0.6624482   0.60257325 -3.04901544 -0.20685843 -0.08997232  0.88932232]\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day3.bin\n",
       "[ 0.56114059 -1.34902274  1.0665563   0.71890802  0.65244834  1.04369548\n",
       "  0.54872876  2.19365207  0.53864286 -1.44108823 -0.55651539  0.1603561\n",
@@ -2661,21 +2661,21 @@
       "  0.38400938 -1.23004316  1.34426647 -0.07620065 -0.91983972  0.23537101\n",
       "  0.91515395  0.8064348   0.81470895 -1.04466683 -0.25893558 -1.46253167\n",
       "  1.39972807 -0.13940519]\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day4.bin\n",
       "[ 0.70078854  1.18137906 -0.44361437 -0.389409    0.29719038  0.2523247\n",
       " -0.97418716  0.49301127  0.07900351 -0.29965042 -0.25810762 -2.78777445\n",
       " -1.24321702  0.13011593  1.07826637 -0.33177479 -0.78337033 -1.30075356\n",
       " -0.15710138  0.51927589  0.08671884  0.02058063  0.20778149 -1.40382559\n",
       " -0.69978105 -1.10525753  0.1945444   0.82623748  0.17467868]\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day5.bin\n",
       "[ 1.9125739  -0.05252076  0.33347618  0.31627214  0.47141153 -0.71088615\n",
       " -0.74745805  0.53959117 -0.14395142 -0.28713782 -0.29422236 -1.00231383\n",
       "  0.69566576 -0.25895608 -0.9660761  -0.78504297 -1.91668262  0.89452296\n",
       " -0.82748688 -0.19792482  0.07305616  0.36133414  1.7164791   0.64364619\n",
       " -0.73146429  0.96324864 -1.05981222 -0.59502066  0.15084192]\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
      ]
     }
    ],
@@ -2690,8 +2690,8 @@
     "cached_stream = cache_stream(stats_stream)\n",
     "\n",
     "# iterate over the cached stream\n",
-    "for tag, data in cached_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in cached_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2710,16 +2710,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in cached_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in cached_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2784,16 +2784,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in fp_stats_stored(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats_stored(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -2812,16 +2812,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in fp_stats_stored(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats_stored(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
diff --git a/notebooks/old_tutorials/02_orcapod_basic_usage.ipynb b/notebooks/old_tutorials/02_orcapod_basic_usage.ipynb
index bb8b302b..767ee36a 100644
--- a/notebooks/old_tutorials/02_orcapod_basic_usage.ipynb
+++ b/notebooks/old_tutorials/02_orcapod_basic_usage.ipynb
@@ -82,7 +82,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can then obtain `stream` from a `source` by invoking the source with `Source()`. The return `stream` acts as an iterator over the `data` and its `tag`.\n",
+    "We can then obtain `stream` from a `source` by invoking the source with `Source()`. The return `stream` acts as an iterator over the `data` and its `key`.\n",
     "For convenience, `source` can be treated synonymously with a `stream`, allowing you to directly iterate over the content."
    ]
   },
@@ -95,17 +95,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n"
+      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in dataset1():\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset1():\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
@@ -117,25 +117,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n"
+      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n"
      ]
     }
    ],
    "source": [
     "# equivalent to above but more natural without the need to call `dataset1()`\n",
-    "for tag, data in dataset1:\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset1:\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A few things to note. When creating the `GlobSource` we pass in the argument name to be associated with the `pathset` matching our glob pattern (`*.txt` in this case). By default, the `GlobSource` tags each data with a key of `file_name` and value of the name of the file that was matched (minus the file extension). This behavior can be easily changed by passing in a custom function for tag generation at the time of `GlobSource` creation."
+    "A few things to note. When creating the `GlobSource` we pass in the argument name to be associated with the `pathset` matching our glob pattern (`*.txt` in this case). By default, the `GlobSource` keys each data with a key of `file_name` and value of the name of the file that was matched (minus the file extension). This behavior can be easily changed by passing in a custom function for key generation at the time of `GlobSource` creation."
    ]
   },
   {
@@ -150,7 +150,7 @@
     "    \"data\",\n",
     "    \"../examples/dataset1\",\n",
     "    \"*.txt\",\n",
-    "    tag_function=lambda x: {\"date\": Path(x).stem},\n",
+    "    key_function=lambda x: {\"date\": Path(x).stem},\n",
     ")"
    ]
   },
@@ -163,24 +163,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'data': PosixPath('../examples/dataset1/day1.txt')} with tag {'date': 'day1'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day2.txt')} with tag {'date': 'day2'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day3.txt')} with tag {'date': 'day3'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day4.txt')} with tag {'date': 'day4'}\n",
-      "Data {'data': PosixPath('../examples/dataset1/day6.txt')} with tag {'date': 'day6'}\n"
+      "Data {'data': PosixPath('../examples/dataset1/day1.txt')} with key {'date': 'day1'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day2.txt')} with key {'date': 'day2'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day3.txt')} with key {'date': 'day3'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day4.txt')} with key {'date': 'day4'}\n",
+      "Data {'data': PosixPath('../examples/dataset1/day6.txt')} with key {'date': 'day6'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in dataset1_custom:\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset1_custom:\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Custom tag function would allow one to extract information useful in controlling the flow of the data pipeline from the file path or even the file content. We will return to this a bit later."
+    "Custom key function would allow one to extract information useful in controlling the flow of the data pipeline from the file path or even the file content. We will return to this a bit later."
    ]
   },
   {
@@ -206,18 +206,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')} with tag {'file_name': 'session_day1'}\n",
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')} with tag {'file_name': 'session_day3'}\n",
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')} with tag {'file_name': 'session_day4'}\n",
-      "Data {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')} with tag {'file_name': 'session_day5'}\n"
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')} with key {'file_name': 'session_day1'}\n",
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')} with key {'file_name': 'session_day3'}\n",
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')} with key {'file_name': 'session_day4'}\n",
+      "Data {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')} with key {'file_name': 'session_day5'}\n"
      ]
     }
    ],
    "source": [
     "dataset2 = ob.GlobSource(\"bin_data\", \"../examples/dataset2\", \"*.bin\")\n",
     "\n",
-    "for tag, data in dataset2:\n",
-    "    print(f\"Data {data} with tag {tag}\")"
+    "for key, data in dataset2:\n",
+    "    print(f\"Data {data} with key {key}\")"
    ]
   },
   {
@@ -253,7 +253,7 @@
    "metadata": {},
    "source": [
     "\n",
-    "`Mappers` are `operations` that controls and alter the streams but *without generating or modifying new data files*. As we will see shortly, `mappers` work to alter the stream by alterning data tags and/or data content, but critically will never create or modify new files that were not already present somewhere in the stream feeding into the `mapper` node. While this might sound like an unnecessary restriction on what `mappers` can do, we will see that this property guarantees that *mappers can not ever alter the reproducibility of computational chains*."
+    "`Mappers` are `operations` that controls and alter the streams but *without generating or modifying new data files*. As we will see shortly, `mappers` work to alter the stream by alterning data keys and/or data content, but critically will never create or modify new files that were not already present somewhere in the stream feeding into the `mapper` node. While this might sound like an unnecessary restriction on what `mappers` can do, we will see that this property guarantees that *mappers can not ever alter the reproducibility of computational chains*."
    ]
   },
   {
@@ -276,7 +276,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Once you have created a `source` from which streams can be formed, you can alter the stream by applying various `mappers`. More precisely, a `mapper` can work on tags and/or data."
+    "Once you have created a `source` from which streams can be formed, you can alter the stream by applying various `mappers`. More precisely, a `mapper` can work on keys and/or data."
    ]
   },
   {
@@ -297,47 +297,47 @@
      "output_type": "stream",
      "text": [
       "Before mapping:\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Data {'txt_file': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n",
       "After mapping:\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with tag {'file_name': 'day1'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with tag {'file_name': 'day2'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with tag {'file_name': 'day3'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with tag {'file_name': 'day4'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with tag {'file_name': 'day6'}\n"
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with key {'file_name': 'day1'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with key {'file_name': 'day2'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with key {'file_name': 'day3'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with key {'file_name': 'day4'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with key {'file_name': 'day6'}\n"
      ]
     }
    ],
    "source": [
     "print(\"Before mapping:\")\n",
-    "for tag, data in dataset1:\n",
-    "    print(f\"Data {data} with tag {tag}\")\n",
+    "for key, data in dataset1:\n",
+    "    print(f\"Data {data} with key {key}\")\n",
     "\n",
     "\n",
     "# create a new stream mapping data keys 'txt_file' to 'content'\n",
     "data_mapper = ob.MapData(key_map={\"txt_file\": \"content\"})\n",
     "\n",
     "print(\"After mapping:\")\n",
-    "for tag, data in data_mapper(dataset1):\n",
-    "    print(f\"Mapped Data {data} with tag {tag}\")"
+    "for key, data in data_mapper(dataset1):\n",
+    "    print(f\"Mapped Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You'd notice that for each data, the key `txt_file` was replaced with `content` without altering the pointed `path` or the associated tag. As the keys of the data will be used as the name of arguments when invoking pods on a stream, we will see that `MapData` are commonly used to *map* the correct path to the argument."
+    "You'd notice that for each data, the key `txt_file` was replaced with `content` without altering the pointed `path` or the associated key. As the keys of the data will be used as the name of arguments when invoking pods on a stream, we will see that `MapData` are commonly used to *map* the correct path to the argument."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Map tags\n",
-    "As we have already seen, each data in the stream is associated with a tag, often derived from the data source. In the case of `GlobFileSource`, the tags are by default the name of the file that formed the data. These tags are used to *transiently* identify the data and will be used when matching data across multiple streams (as we will see shortly in `Join` operation). You can manipulate the tags using `MapTags` operation, much like `MapKeys` but operating on the tags for each packaet under a uniform renaming rule."
+    "### Map keys\n",
+    "As we have already seen, each data in the stream is associated with a key, often derived from the data source. In the case of `GlobFileSource`, the keys are by default the name of the file that formed the data. These keys are used to *transiently* identify the data and will be used when matching data across multiple streams (as we will see shortly in `Join` operation). You can manipulate the keys using `MapKeys` operation, much like `MapKeys` but operating on the keys for each packaet under a uniform renaming rule."
    ]
   },
   {
@@ -358,10 +358,10 @@
     }
    ],
    "source": [
-    "tag_mapper = ob.MapTags(key_map={\"file_name\": \"day\"})\n",
+    "key_mapper = ob.MapKeys(key_map={\"file_name\": \"day\"})\n",
     "\n",
-    "for tag, data in tag_mapper(dataset1):\n",
-    "    print(tag, data)"
+    "for key, data in key_mapper(dataset1):\n",
+    "    print(key, data)"
    ]
   },
   {
@@ -375,7 +375,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you might expect, you can chain multiple operations one after another to construct a more complex stream. Below, we first apply the key mapping and then map tags."
+    "As you might expect, you can chain multiple operations one after another to construct a more complex stream. Below, we first apply the key mapping and then map keys."
    ]
   },
   {
@@ -387,11 +387,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with tag {'day': 'day1'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with tag {'day': 'day2'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with tag {'day': 'day3'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with tag {'day': 'day4'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with tag {'day': 'day6'}\n"
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with key {'day': 'day1'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with key {'day': 'day2'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with key {'day': 'day3'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with key {'day': 'day4'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with key {'day': 'day6'}\n"
      ]
     }
    ],
@@ -399,18 +399,18 @@
     "data_mapper = ob.MapData(key_map={\"txt_file\": \"content\"})\n",
     "key_mapped_stream = data_mapper(dataset1)\n",
     "\n",
-    "tag_mapper = ob.MapTags(key_map={\"file_name\": \"day\"})\n",
-    "tag_and_data_mapped = tag_mapper(key_mapped_stream)\n",
+    "key_mapper = ob.MapKeys(key_map={\"file_name\": \"day\"})\n",
+    "key_and_data_mapped = key_mapper(key_mapped_stream)\n",
     "\n",
-    "for tag, data in tag_and_data_mapped:\n",
-    "    print(f\"Mapped Data {data} with tag {tag}\")"
+    "for key, data in key_and_data_mapped:\n",
+    "    print(f\"Mapped Data {data} with key {key}\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "It's worth emphasizing again that all computations are triggered only when you iterate through the final stream `tag_and_key_mapped`"
+    "It's worth emphasizing again that all computations are triggered only when you iterate through the final stream `key_and_key_mapped`"
    ]
   },
   {
@@ -429,20 +429,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with tag {'day': 'day1'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with tag {'day': 'day2'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with tag {'day': 'day3'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with tag {'day': 'day4'}\n",
-      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with tag {'day': 'day6'}\n"
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day1.txt')} with key {'day': 'day1'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day2.txt')} with key {'day': 'day2'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day3.txt')} with key {'day': 'day3'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day4.txt')} with key {'day': 'day4'}\n",
+      "Mapped Data {'content': PosixPath('../examples/dataset1/day6.txt')} with key {'day': 'day6'}\n"
      ]
     }
    ],
    "source": [
     "# totally valid, but difficult to read and thus not recommended\n",
-    "for tag, data in ob.MapTags(key_map={\"file_name\": \"day\"})(\n",
+    "for key, data in ob.MapKeys(key_map={\"file_name\": \"day\"})(\n",
     "    ob.MapData(key_map={\"txt_file\": \"content\"})(dataset1)\n",
     "):\n",
-    "    print(f\"Mapped Data {data} with tag {tag}\")"
+    "    print(f\"Mapped Data {data} with key {key}\")"
    ]
   },
   {
@@ -453,7 +453,7 @@
     "Now that we have looked at how you can manipulate a single stream, let's turn our eyes to how you can work with more than one streams together.\n",
     "\n",
     "By the far the most common multi-stream operations will be to join two (or more) streams into a single, bigger stream. \n",
-    "You can combine multiple streams into one by using `Join` operation, matching data from each stream based on the matching tags. If tags from two streams have shared key, the value must be identical for all shared keys for the two data to be matched. The matched data are then merged into a one (typically larger) data and shipped to the output stream."
+    "You can combine multiple streams into one by using `Join` operation, matching data from each stream based on the matching keys. If keys from two streams have shared key, the value must be identical for all shared keys for the two data to be matched. The matched data are then merged into a one (typically larger) data and shipped to the output stream."
    ]
   },
   {
@@ -473,30 +473,30 @@
      "output_type": "stream",
      "text": [
       "Dataset 1:\n",
-      "Tag: {'file_name': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt')}\n",
-      "Tag: {'file_name': 'day2'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt')}\n",
-      "Tag: {'file_name': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt')}\n",
-      "Tag: {'file_name': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt')}\n",
-      "Tag: {'file_name': 'day6'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt')}\n",
+      "Key: {'file_name': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt')}\n",
+      "Key: {'file_name': 'day2'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt')}\n",
+      "Key: {'file_name': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt')}\n",
+      "Key: {'file_name': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt')}\n",
+      "Key: {'file_name': 'day6'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt')}\n",
       "\n",
       "Dataset 2:\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
      ]
     }
    ],
    "source": [
     "# dataset 1\n",
     "print(\"Dataset 1:\")\n",
-    "for tag, data in dataset1:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")\n",
+    "for key, data in dataset1:\n",
+    "    print(f\"Key: {key}, Data: {data}\")\n",
     "\n",
     "# dataset 2\n",
     "print(\"\\nDataset 2:\")\n",
-    "for tag, data in dataset2:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in dataset2:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -514,8 +514,8 @@
    "source": [
     "join_op = ob.Join()\n",
     "\n",
-    "for tag, data in join_op(dataset1, dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in join_op(dataset1, dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -536,7 +536,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "First, let's completely rename the tag key for one of the streams and see what would happen."
+    "First, let's completely rename the key key for one of the streams and see what would happen."
    ]
   },
   {
@@ -548,34 +548,34 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "01 Tag: {'day': 'day1', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "02 Tag: {'day': 'day1', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "03 Tag: {'day': 'day1', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "04 Tag: {'day': 'day1', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "05 Tag: {'day': 'day2', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "06 Tag: {'day': 'day2', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "07 Tag: {'day': 'day2', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "08 Tag: {'day': 'day2', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "09 Tag: {'day': 'day3', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "10 Tag: {'day': 'day3', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "11 Tag: {'day': 'day3', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "12 Tag: {'day': 'day3', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "13 Tag: {'day': 'day4', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "14 Tag: {'day': 'day4', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "15 Tag: {'day': 'day4', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "16 Tag: {'day': 'day4', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
-      "17 Tag: {'day': 'day6', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "18 Tag: {'day': 'day6', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "19 Tag: {'day': 'day6', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "20 Tag: {'day': 'day6', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
+      "01 Key: {'day': 'day1', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "02 Key: {'day': 'day1', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "03 Key: {'day': 'day1', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "04 Key: {'day': 'day1', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "05 Key: {'day': 'day2', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "06 Key: {'day': 'day2', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "07 Key: {'day': 'day2', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "08 Key: {'day': 'day2', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day2.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "09 Key: {'day': 'day3', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "10 Key: {'day': 'day3', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "11 Key: {'day': 'day3', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "12 Key: {'day': 'day3', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "13 Key: {'day': 'day4', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "14 Key: {'day': 'day4', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "15 Key: {'day': 'day4', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "16 Key: {'day': 'day4', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n",
+      "17 Key: {'day': 'day6', 'file_name': 'session_day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "18 Key: {'day': 'day6', 'file_name': 'session_day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "19 Key: {'day': 'day6', 'file_name': 'session_day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "20 Key: {'day': 'day6', 'file_name': 'session_day5'}, Data: {'txt_file': PosixPath('../examples/dataset1/day6.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
      ]
     }
    ],
    "source": [
-    "dataset1_retagged = ob.MapTags(key_map={\"file_name\": \"day\"})(dataset1)\n",
+    "dataset1_retagged = ob.MapKeys(key_map={\"file_name\": \"day\"})(dataset1)\n",
     "\n",
-    "for i, (tag, data) in enumerate(join_op(dataset1_retagged, dataset2)):\n",
-    "    print(f\"{i + 1:02d} Tag: {tag}, Data: {data}\")"
+    "for i, (key, data) in enumerate(join_op(dataset1_retagged, dataset2)):\n",
+    "    print(f\"{i + 1:02d} Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -584,9 +584,9 @@
    "source": [
     "We are now getting something -- in fact, quite a few things. If you look carefully at the `data`, you'll notice that it now contains two keys/arguments -- `txt_file` and `bin_data`, combining the data from the two datasets. \n",
     "\n",
-    "The `tags` also now contain two keys `day` from the re-tagged dataset1 stream and `file_name` from unchanged dataset2 stream.\n",
+    "The `keys` also now contain two keys `day` from the re-tagged dataset1 stream and `file_name` from unchanged dataset2 stream.\n",
     "\n",
-    "Since the two streams share no common tags, the `Join` operation results in *full-multiplexing* of two streams. With the streams from dataset1 and dataset2 containing 5 data and 4 data, respectively, you get $5 \\times 4 = 20$ data"
+    "Since the two streams share no common keys, the `Join` operation results in *full-multiplexing* of two streams. With the streams from dataset1 and dataset2 containing 5 data and 4 data, respectively, you get $5 \\times 4 = 20$ data"
    ]
   },
   {
@@ -602,7 +602,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Although we could achieve the desired effect by changing how we load the source, passing in custom `tag_function` into `GlobSource`, let's achieve the same by using another `mapper` called `Transform`. `Transform` effectively combines `MapKey` and `MapTag` but further allows you to provide a function that will receive the tag and data, one at a time, and return a (potentially modified) tag and/or data, achieving the desired transformation."
+    "Although we could achieve the desired effect by changing how we load the source, passing in custom `key_function` into `GlobSource`, let's achieve the same by using another `mapper` called `Transform`. `Transform` effectively combines `MapKey` and `MapKey` but further allows you to provide a function that will receive the key and data, one at a time, and return a (potentially modified) key and/or data, achieving the desired transformation."
    ]
   },
   {
@@ -614,18 +614,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'day': 'day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "Tag: {'day': 'day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "Tag: {'day': 'day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
-      "Tag: {'day': 'day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
+      "Key: {'day': 'day1'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "Key: {'day': 'day3'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "Key: {'day': 'day4'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n",
+      "Key: {'day': 'day5'}, Data: {'bin_data': PosixPath('../examples/dataset2/session_day5.bin')}\n"
      ]
     }
    ],
    "source": [
-    "def transform_dataset2(tag, data):\n",
+    "def transform_dataset2(key, data):\n",
     "    # Extract the second half of the filename containing day\n",
-    "    new_tag = {\"day\": tag[\"file_name\"].split(\"_\")[1]}\n",
-    "    return new_tag, data\n",
+    "    new_key = {\"day\": key[\"file_name\"].split(\"_\")[1]}\n",
+    "    return new_key, data\n",
     "\n",
     "\n",
     "# Speical mappers like transform can be found in the orcabridge.mapper module\n",
@@ -633,8 +633,8 @@
     "\n",
     "retagged_dataset2 = dataset2_transformer(dataset2)\n",
     "\n",
-    "for tag, data in retagged_dataset2:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in retagged_dataset2:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -653,22 +653,22 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'day': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
-      "Tag: {'day': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
-      "Tag: {'day': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n"
+      "Key: {'day': 'day1'}, Data: {'txt_file': PosixPath('../examples/dataset1/day1.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day1.bin')}\n",
+      "Key: {'day': 'day3'}, Data: {'txt_file': PosixPath('../examples/dataset1/day3.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day3.bin')}\n",
+      "Key: {'day': 'day4'}, Data: {'txt_file': PosixPath('../examples/dataset1/day4.txt'), 'bin_data': PosixPath('../examples/dataset2/session_day4.bin')}\n"
      ]
     }
    ],
    "source": [
     "# change filename to day for dataset1\n",
-    "tag_mapper = ob.MapTags(key_map={\"file_name\": \"day\"})\n",
-    "retagged_dataset1 = tag_mapper(dataset1)\n",
+    "key_mapper = ob.MapKeys(key_map={\"file_name\": \"day\"})\n",
+    "retagged_dataset1 = key_mapper(dataset1)\n",
     "\n",
     "join_op = ob.Join()\n",
     "joined_stream = join_op(retagged_dataset1, retagged_dataset2)\n",
     "\n",
-    "for tag, data in joined_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in joined_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -696,7 +696,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "While `mapper` operations are useful in altering tags, data, and in combining multiple streams, a data pipeline is not really useful if it cannot produce new resultsin the form of new data -- that is, introduce new files into the stream. This is precisely where `Pod` operations come in!\n",
+    "While `mapper` operations are useful in altering keys, data, and in combining multiple streams, a data pipeline is not really useful if it cannot produce new resultsin the form of new data -- that is, introduce new files into the stream. This is precisely where `Pod` operations come in!\n",
     "\n",
     "In fact, we have already been working with a `pod` all along -- `sources`. If you think about it, `sources` also introduce files into the stream. It is just special in that it takes no input streams (hence the name, `source`).\n",
     "\n",
@@ -768,15 +768,15 @@
      "output_type": "stream",
      "text": [
       "File ../examples/dataset1/day1.txt has 24 lines.\n",
-      "Tag: {'file_name': 'day1'}, Data: {}\n",
+      "Key: {'file_name': 'day1'}, Data: {}\n",
       "File ../examples/dataset1/day2.txt has 15 lines.\n",
-      "Tag: {'file_name': 'day2'}, Data: {}\n",
+      "Key: {'file_name': 'day2'}, Data: {}\n",
       "File ../examples/dataset1/day3.txt has 27 lines.\n",
-      "Tag: {'file_name': 'day3'}, Data: {}\n",
+      "Key: {'file_name': 'day3'}, Data: {}\n",
       "File ../examples/dataset1/day4.txt has 22 lines.\n",
-      "Tag: {'file_name': 'day4'}, Data: {}\n",
+      "Key: {'file_name': 'day4'}, Data: {}\n",
       "File ../examples/dataset1/day6.txt has 22 lines.\n",
-      "Tag: {'file_name': 'day6'}, Data: {}\n"
+      "Key: {'file_name': 'day6'}, Data: {}\n"
      ]
     }
    ],
@@ -784,8 +784,8 @@
     "# apply the function pod on a stream\n",
     "processed_stream = function_pod(dataset1)\n",
     "\n",
-    "for tag, data in processed_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in processed_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -849,7 +849,7 @@
       " -0.08364667 -0.45551653  0.70752188  1.02283734 -0.18612795  0.8767394\n",
       " -1.542636    1.04685484 -2.1311672  -1.34874222  0.61977577 -0.33880262\n",
       "  0.6624482   0.60257325 -3.04901544 -0.20685843 -0.08997232  0.88932232]\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpm2wka6il/statistics.json')}\n",
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpm2wka6il/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day3.bin\n",
       "[ 0.56114059 -1.34902274  1.0665563   0.71890802  0.65244834  1.04369548\n",
       "  0.54872876  2.19365207  0.53864286 -1.44108823 -0.55651539  0.1603561\n",
@@ -857,21 +857,21 @@
       "  0.38400938 -1.23004316  1.34426647 -0.07620065 -0.91983972  0.23537101\n",
       "  0.91515395  0.8064348   0.81470895 -1.04466683 -0.25893558 -1.46253167\n",
       "  1.39972807 -0.13940519]\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmplkmx65ll/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmplkmx65ll/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day4.bin\n",
       "[ 0.70078854  1.18137906 -0.44361437 -0.389409    0.29719038  0.2523247\n",
       " -0.97418716  0.49301127  0.07900351 -0.29965042 -0.25810762 -2.78777445\n",
       " -1.24321702  0.13011593  1.07826637 -0.33177479 -0.78337033 -1.30075356\n",
       " -0.15710138  0.51927589  0.08671884  0.02058063  0.20778149 -1.40382559\n",
       " -0.69978105 -1.10525753  0.1945444   0.82623748  0.17467868]\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpxajrzctd/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpxajrzctd/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day5.bin\n",
       "[ 1.9125739  -0.05252076  0.33347618  0.31627214  0.47141153 -0.71088615\n",
       " -0.74745805  0.53959117 -0.14395142 -0.28713782 -0.29422236 -1.00231383\n",
       "  0.69566576 -0.25895608 -0.9660761  -0.78504297 -1.91668262  0.89452296\n",
       " -0.82748688 -0.19792482  0.07305616  0.36133414  1.7164791   0.64364619\n",
       " -0.73146429  0.96324864 -1.05981222 -0.59502066  0.15084192]\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp67rthfe1/statistics.json')}\n"
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp67rthfe1/statistics.json')}\n"
      ]
     }
    ],
@@ -881,8 +881,8 @@
     "# change the key from 'bin_data' to 'bin_file', matching the function's input\n",
     "mapped_dataset2 = ob.MapData(key_map={\"bin_data\": \"bin_file\"})(dataset2)\n",
     "\n",
-    "for tag, data in fp_stats(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -909,7 +909,7 @@
       " -0.08364667 -0.45551653  0.70752188  1.02283734 -0.18612795  0.8767394\n",
       " -1.542636    1.04685484 -2.1311672  -1.34874222  0.61977577 -0.33880262\n",
       "  0.6624482   0.60257325 -3.04901544 -0.20685843 -0.08997232  0.88932232]\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpciwa2xl_/statistics.json')}\n",
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpciwa2xl_/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day3.bin\n",
       "[ 0.56114059 -1.34902274  1.0665563   0.71890802  0.65244834  1.04369548\n",
       "  0.54872876  2.19365207  0.53864286 -1.44108823 -0.55651539  0.1603561\n",
@@ -917,29 +917,29 @@
       "  0.38400938 -1.23004316  1.34426647 -0.07620065 -0.91983972  0.23537101\n",
       "  0.91515395  0.8064348   0.81470895 -1.04466683 -0.25893558 -1.46253167\n",
       "  1.39972807 -0.13940519]\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpkq824j5b/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpkq824j5b/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day4.bin\n",
       "[ 0.70078854  1.18137906 -0.44361437 -0.389409    0.29719038  0.2523247\n",
       " -0.97418716  0.49301127  0.07900351 -0.29965042 -0.25810762 -2.78777445\n",
       " -1.24321702  0.13011593  1.07826637 -0.33177479 -0.78337033 -1.30075356\n",
       " -0.15710138  0.51927589  0.08671884  0.02058063  0.20778149 -1.40382559\n",
       " -0.69978105 -1.10525753  0.1945444   0.82623748  0.17467868]\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmp7ii2nd6e/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmp7ii2nd6e/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day5.bin\n",
       "[ 1.9125739  -0.05252076  0.33347618  0.31627214  0.47141153 -0.71088615\n",
       " -0.74745805  0.53959117 -0.14395142 -0.28713782 -0.29422236 -1.00231383\n",
       "  0.69566576 -0.25895608 -0.9660761  -0.78504297 -1.91668262  0.89452296\n",
       " -0.82748688 -0.19792482  0.07305616  0.36133414  1.7164791   0.64364619\n",
       " -0.73146429  0.96324864 -1.05981222 -0.59502066  0.15084192]\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmpz23q61gg/statistics.json')}\n"
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmpz23q61gg/statistics.json')}\n"
      ]
     }
    ],
    "source": [
     "# everytime you run the following loop, new computations are performed and\n",
     "# saved in a different set of temporary files\n",
-    "for tag, data in fp_stats(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -987,7 +987,7 @@
       " -0.08364667 -0.45551653  0.70752188  1.02283734 -0.18612795  0.8767394\n",
       " -1.542636    1.04685484 -2.1311672  -1.34874222  0.61977577 -0.33880262\n",
       "  0.6624482   0.60257325 -3.04901544 -0.20685843 -0.08997232  0.88932232]\n",
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day3.bin\n",
       "[ 0.56114059 -1.34902274  1.0665563   0.71890802  0.65244834  1.04369548\n",
       "  0.54872876  2.19365207  0.53864286 -1.44108823 -0.55651539  0.1603561\n",
@@ -995,21 +995,21 @@
       "  0.38400938 -1.23004316  1.34426647 -0.07620065 -0.91983972  0.23537101\n",
       "  0.91515395  0.8064348   0.81470895 -1.04466683 -0.25893558 -1.46253167\n",
       "  1.39972807 -0.13940519]\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day4.bin\n",
       "[ 0.70078854  1.18137906 -0.44361437 -0.389409    0.29719038  0.2523247\n",
       " -0.97418716  0.49301127  0.07900351 -0.29965042 -0.25810762 -2.78777445\n",
       " -1.24321702  0.13011593  1.07826637 -0.33177479 -0.78337033 -1.30075356\n",
       " -0.15710138  0.51927589  0.08671884  0.02058063  0.20778149 -1.40382559\n",
       " -0.69978105 -1.10525753  0.1945444   0.82623748  0.17467868]\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
       "Computing stats for file: ../examples/dataset2/session_day5.bin\n",
       "[ 1.9125739  -0.05252076  0.33347618  0.31627214  0.47141153 -0.71088615\n",
       " -0.74745805  0.53959117 -0.14395142 -0.28713782 -0.29422236 -1.00231383\n",
       "  0.69566576 -0.25895608 -0.9660761  -0.78504297 -1.91668262  0.89452296\n",
       " -0.82748688 -0.19792482  0.07305616  0.36133414  1.7164791   0.64364619\n",
       " -0.73146429  0.96324864 -1.05981222 -0.59502066  0.15084192]\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
      ]
     }
    ],
@@ -1024,8 +1024,8 @@
     "cached_stream = cache_stream(stats_stream)\n",
     "\n",
     "# iterate over the cached stream\n",
-    "for tag, data in cached_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in cached_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -1044,16 +1044,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': PosixPath('/tmp/tmpukvddhuv/statistics.json')}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': PosixPath('/tmp/tmpat3rm4dk/statistics.json')}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': PosixPath('/tmp/tmpuj3tiu8k/statistics.json')}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': PosixPath('/tmp/tmp6yohu0pw/statistics.json')}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in cached_stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in cached_stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -1118,16 +1118,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in fp_stats_stored(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats_stored(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -1146,16 +1146,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
-      "Tag: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
+      "Key: {'file_name': 'session_day1'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-c63c34c5fefaa0f2e9bba3edcf6c861c/statistics.json'}\n",
+      "Key: {'file_name': 'session_day3'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-9dfef842f88463f5145ab0d4c06e3938/statistics.json'}\n",
+      "Key: {'file_name': 'session_day4'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-4f3dfa71356fe8f226be66aa8dffbc55/statistics.json'}\n",
+      "Key: {'file_name': 'session_day5'}, Data: {'stats': 'pod_data/compute_stats/15da3b08791f51d9/sha256-26bffc293c82e14cde904274e0c63afd/statistics.json'}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in fp_stats_stored(mapped_dataset2):\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in fp_stats_stored(mapped_dataset2):\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
diff --git a/notebooks/old_tutorials/03_orcacapod_qol_features.ipynb b/notebooks/old_tutorials/03_orcacapod_qol_features.ipynb
index d83b8da9..4406232c 100644
--- a/notebooks/old_tutorials/03_orcacapod_qol_features.ipynb
+++ b/notebooks/old_tutorials/03_orcacapod_qol_features.ipynb
@@ -34,11 +34,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'day1'}, Data: {'output_data': 'path/to/result/file'}\n",
-      "Tag: {'file_name': 'day2'}, Data: {'output_data': 'path/to/result/file'}\n",
-      "Tag: {'file_name': 'day3'}, Data: {'output_data': 'path/to/result/file'}\n",
-      "Tag: {'file_name': 'day4'}, Data: {'output_data': 'path/to/result/file'}\n",
-      "Tag: {'file_name': 'day6'}, Data: {'output_data': 'path/to/result/file'}\n"
+      "Key: {'file_name': 'day1'}, Data: {'output_data': 'path/to/result/file'}\n",
+      "Key: {'file_name': 'day2'}, Data: {'output_data': 'path/to/result/file'}\n",
+      "Key: {'file_name': 'day3'}, Data: {'output_data': 'path/to/result/file'}\n",
+      "Key: {'file_name': 'day4'}, Data: {'output_data': 'path/to/result/file'}\n",
+      "Key: {'file_name': 'day6'}, Data: {'output_data': 'path/to/result/file'}\n"
      ]
     }
    ],
@@ -142,11 +142,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'info_day1'}, Data: {'output_data': PosixPath('/tmp/tmpb0q3mj9m/output.json')}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {'output_data': PosixPath('/tmp/tmpt79_hpoe/output.json')}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {'output_data': PosixPath('/tmp/tmp_rq1b2rq/output.json')}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {'output_data': PosixPath('/tmp/tmp4dyoqbix/output.json')}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {'output_data': PosixPath('/tmp/tmpc9a1bxx4/output.json')}\n"
+      "Key: {'file_name': 'info_day1'}, Data: {'output_data': PosixPath('/tmp/tmpb0q3mj9m/output.json')}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {'output_data': PosixPath('/tmp/tmpt79_hpoe/output.json')}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {'output_data': PosixPath('/tmp/tmp_rq1b2rq/output.json')}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {'output_data': PosixPath('/tmp/tmp4dyoqbix/output.json')}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {'output_data': PosixPath('/tmp/tmpc9a1bxx4/output.json')}\n"
      ]
     }
    ],
@@ -189,7 +189,7 @@
    "id": "bea0880a",
    "metadata": {},
    "source": [
-    "## Mapping tags and data with `>>` operator"
+    "## Mapping keys and data with `>>` operator"
    ]
   },
   {
@@ -267,11 +267,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'info_day1'}, Data: {'line_count': PosixPath('/tmp/tmpl9kxw4yn/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {'line_count': PosixPath('/tmp/tmpa0d08oym/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {'line_count': PosixPath('/tmp/tmp_9r0cryr/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {'line_count': PosixPath('/tmp/tmpygwfjha9/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {'line_count': PosixPath('/tmp/tmph_5zgk6j/line_count.json')}\n"
+      "Key: {'file_name': 'info_day1'}, Data: {'line_count': PosixPath('/tmp/tmpl9kxw4yn/line_count.json')}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {'line_count': PosixPath('/tmp/tmpa0d08oym/line_count.json')}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {'line_count': PosixPath('/tmp/tmp_9r0cryr/line_count.json')}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {'line_count': PosixPath('/tmp/tmpygwfjha9/line_count.json')}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {'line_count': PosixPath('/tmp/tmph_5zgk6j/line_count.json')}\n"
      ]
     }
    ],
@@ -307,16 +307,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'info_day1'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day5.json')}\n",
-      "Tag: {'file_name': 'info_day1'}, Data: {'line_count': PosixPath('/tmp/tmpskhgsexk/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {'line_count': PosixPath('/tmp/tmp7oto9nav/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {'line_count': PosixPath('/tmp/tmpushxubr1/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {'line_count': PosixPath('/tmp/tmpb2fhgner/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {'line_count': PosixPath('/tmp/tmpnujrqytb/line_count.json')}\n"
+      "Key: {'file_name': 'info_day1'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {'text_file': PosixPath('../examples/dataset2/info_day5.json')}\n",
+      "Key: {'file_name': 'info_day1'}, Data: {'line_count': PosixPath('/tmp/tmpskhgsexk/line_count.json')}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {'line_count': PosixPath('/tmp/tmp7oto9nav/line_count.json')}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {'line_count': PosixPath('/tmp/tmpushxubr1/line_count.json')}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {'line_count': PosixPath('/tmp/tmpb2fhgner/line_count.json')}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {'line_count': PosixPath('/tmp/tmpnujrqytb/line_count.json')}\n"
      ]
     }
    ],
@@ -346,11 +346,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'info_day1'}, Data: {'line_count': PosixPath('/tmp/tmp4ny0gm34/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {'line_count': PosixPath('/tmp/tmpegxyuceg/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {'line_count': PosixPath('/tmp/tmpwzvjhte9/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {'line_count': PosixPath('/tmp/tmpf0loiyqs/line_count.json')}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {'line_count': PosixPath('/tmp/tmp9vatjy_m/line_count.json')}\n"
+      "Key: {'file_name': 'info_day1'}, Data: {'line_count': PosixPath('/tmp/tmp4ny0gm34/line_count.json')}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {'line_count': PosixPath('/tmp/tmpegxyuceg/line_count.json')}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {'line_count': PosixPath('/tmp/tmpwzvjhte9/line_count.json')}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {'line_count': PosixPath('/tmp/tmpf0loiyqs/line_count.json')}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {'line_count': PosixPath('/tmp/tmp9vatjy_m/line_count.json')}\n"
      ]
     }
    ],
@@ -372,7 +372,7 @@
    "id": "18ec64f3",
    "metadata": {},
    "source": [
-    "### Mapping tags and advanced mapping"
+    "### Mapping keys and advanced mapping"
    ]
   },
   {
@@ -380,7 +380,7 @@
    "id": "05e8ff25",
    "metadata": {},
    "source": [
-    "We just saw how the rightshift operator can be used to simplify the `MapData` operation creation. How about `MapTags`? We can get `MapTags` equivalent operation also by using the rightshift (`>>`) operator, but with the help of an additional function: `tag()`."
+    "We just saw how the rightshift operator can be used to simplify the `MapData` operation creation. How about `MapKeys`? We can get `MapKeys` equivalent operation also by using the rightshift (`>>`) operator, but with the help of an additional function: `key()`."
    ]
   },
   {
@@ -393,17 +393,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'experiment_day': 'info_day1'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
-      "Tag: {'experiment_day': 'info_day2'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
-      "Tag: {'experiment_day': 'info_day3'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
-      "Tag: {'experiment_day': 'info_day4'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
-      "Tag: {'experiment_day': 'info_day5'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
+      "Key: {'experiment_day': 'info_day1'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
+      "Key: {'experiment_day': 'info_day2'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
+      "Key: {'experiment_day': 'info_day3'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
+      "Key: {'experiment_day': 'info_day4'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
+      "Key: {'experiment_day': 'info_day5'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
      ]
     }
    ],
    "source": [
-    "# use ob.tag to specifically map the tag key\n",
-    "(json_files >> ob.tag({\"file_name\": \"experiment_day\"})).head()"
+    "# use ob.key to specifically map the key key\n",
+    "(json_files >> ob.key({\"file_name\": \"experiment_day\"})).head()"
    ]
   },
   {
@@ -411,7 +411,7 @@
    "id": "ac34eed4",
    "metadata": {},
    "source": [
-    "Now if you were to closely inspect `MapData` and `MapData`, you would know that it is capable of taking in some additional arguments such as `drop_unmapped`. Using `tag()` and `data()` helper functions would let you specify those arguments as well while using the `>>` operator."
+    "Now if you were to closely inspect `MapData` and `MapData`, you would know that it is capable of taking in some additional arguments such as `drop_unmapped`. Using `key()` and `data()` helper functions would let you specify those arguments as well while using the `>>` operator."
    ]
   },
   {
@@ -424,11 +424,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'info_day1'}, Data: {}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {}\n"
+      "Key: {'file_name': 'info_day1'}, Data: {}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {}\n"
      ]
     }
    ],
@@ -447,11 +447,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'info_day1'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
-      "Tag: {'file_name': 'info_day2'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
-      "Tag: {'file_name': 'info_day3'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
-      "Tag: {'file_name': 'info_day4'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
-      "Tag: {'file_name': 'info_day5'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
+      "Key: {'file_name': 'info_day1'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
+      "Key: {'file_name': 'info_day2'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
+      "Key: {'file_name': 'info_day3'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
+      "Key: {'file_name': 'info_day4'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
+      "Key: {'file_name': 'info_day5'}, Data: {'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
      ]
     }
    ],
@@ -494,8 +494,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "mapped_dataset1 = dataset1 >> ob.tag({\"file_name\": \"txt_file\"})\n",
-    "mapped_dataset2 = dataset2 >> ob.tag({\"file_name\": \"json_file\"})"
+    "mapped_dataset1 = dataset1 >> ob.key({\"file_name\": \"txt_file\"})\n",
+    "mapped_dataset2 = dataset2 >> ob.key({\"file_name\": \"json_file\"})"
    ]
   },
   {
@@ -508,11 +508,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day1'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day2'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day3'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day4'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day5'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day1'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day2'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day3'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day4'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day5'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
      ]
     }
    ],
@@ -541,11 +541,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day1'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day2'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day3'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day4'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
-      "Tag: {'txt_file': 'day1', 'json_file': 'info_day5'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day1'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day1.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day2'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day2.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day3'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day3.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day4'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day4.json')}\n",
+      "Key: {'txt_file': 'day1', 'json_file': 'info_day5'}, Data: {'text_file': PosixPath('../examples/dataset1/day1.txt'), 'json_file': PosixPath('../examples/dataset2/info_day5.json')}\n"
      ]
     }
    ],
diff --git a/notebooks/old_tutorials/04_orcapod_tracker.ipynb b/notebooks/old_tutorials/04_orcapod_tracker.ipynb
index fd96408c..d58f55cf 100644
--- a/notebooks/old_tutorials/04_orcapod_tracker.ipynb
+++ b/notebooks/old_tutorials/04_orcapod_tracker.ipynb
@@ -139,10 +139,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'day1'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/c4c3939c-aced-18ac-8a7c-330f07780bbd/keys.json'}\n",
-      "Tag: {'file_name': 'day2'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/61e78af0-346a-00f7-df3d-f2ec1693a84e/keys.json'}\n",
-      "Tag: {'file_name': 'day3'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/92fbcba4-a642-4105-8be2-c01ce9c3e12e/keys.json'}\n",
-      "Tag: {'file_name': 'day4'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/9e7da977-1ed3-03a2-ffd5-a3d626c286d8/keys.json'}\n"
+      "Key: {'file_name': 'day1'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/c4c3939c-aced-18ac-8a7c-330f07780bbd/keys.json'}\n",
+      "Key: {'file_name': 'day2'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/61e78af0-346a-00f7-df3d-f2ec1693a84e/keys.json'}\n",
+      "Key: {'file_name': 'day3'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/92fbcba4-a642-4105-8be2-c01ce9c3e12e/keys.json'}\n",
+      "Key: {'file_name': 'day4'}, Data: {'key_info': 'pod_data/extract_keys/cf978f9c23318c91/9e7da977-1ed3-03a2-ffd5-a3d626c286d8/keys.json'}\n"
      ]
     }
    ],
@@ -160,10 +160,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'day1'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/c4c3939c-aced-18ac-8a7c-330f07780bbd/data.yaml'}\n",
-      "Tag: {'file_name': 'day2'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/61e78af0-346a-00f7-df3d-f2ec1693a84e/data.yaml'}\n",
-      "Tag: {'file_name': 'day3'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/92fbcba4-a642-4105-8be2-c01ce9c3e12e/data.yaml'}\n",
-      "Tag: {'file_name': 'day4'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/9e7da977-1ed3-03a2-ffd5-a3d626c286d8/data.yaml'}\n"
+      "Key: {'file_name': 'day1'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/c4c3939c-aced-18ac-8a7c-330f07780bbd/data.yaml'}\n",
+      "Key: {'file_name': 'day2'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/61e78af0-346a-00f7-df3d-f2ec1693a84e/data.yaml'}\n",
+      "Key: {'file_name': 'day3'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/92fbcba4-a642-4105-8be2-c01ce9c3e12e/data.yaml'}\n",
+      "Key: {'file_name': 'day4'}, Data: {'yaml_file': 'pod_data/json_to_yaml/0a2282eda2b641e6/9e7da977-1ed3-03a2-ffd5-a3d626c286d8/data.yaml'}\n"
      ]
     }
    ],
@@ -249,7 +249,7 @@
    "id": "3b8fd7d0",
    "metadata": {},
    "source": [
-    "Does that match what you thought of the pipeline thus far? You might notice that while we used the convenience operator `>>` to map data keys, the corresponding `MapData` mapper actually shows up in the graph. Remember that `>>` is just for convenience, making the creation of `MapData` and `MapTags` more accesible."
+    "Does that match what you thought of the pipeline thus far? You might notice that while we used the convenience operator `>>` to map data keys, the corresponding `MapData` mapper actually shows up in the graph. Remember that `>>` is just for convenience, making the creation of `MapData` and `MapKeys` more accesible."
    ]
   },
   {
diff --git a/notebooks/old_tutorials/05_orcabridge_dj_integration.ipynb b/notebooks/old_tutorials/05_orcabridge_dj_integration.ipynb
index 4f038d7e..ee911150 100644
--- a/notebooks/old_tutorials/05_orcabridge_dj_integration.ipynb
+++ b/notebooks/old_tutorials/05_orcabridge_dj_integration.ipynb
@@ -502,11 +502,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'file_name': 'day1'}, Data: {'data_file': PosixPath('../examples/dataset1/day1.txt')}\n",
-      "Tag: {'file_name': 'day2'}, Data: {'data_file': PosixPath('../examples/dataset1/day2.txt')}\n",
-      "Tag: {'file_name': 'day3'}, Data: {'data_file': PosixPath('../examples/dataset1/day3.txt')}\n",
-      "Tag: {'file_name': 'day4'}, Data: {'data_file': PosixPath('../examples/dataset1/day4.txt')}\n",
-      "Tag: {'file_name': 'day6'}, Data: {'data_file': PosixPath('../examples/dataset1/day6.txt')}\n"
+      "Key: {'file_name': 'day1'}, Data: {'data_file': PosixPath('../examples/dataset1/day1.txt')}\n",
+      "Key: {'file_name': 'day2'}, Data: {'data_file': PosixPath('../examples/dataset1/day2.txt')}\n",
+      "Key: {'file_name': 'day3'}, Data: {'data_file': PosixPath('../examples/dataset1/day3.txt')}\n",
+      "Key: {'file_name': 'day4'}, Data: {'data_file': PosixPath('../examples/dataset1/day4.txt')}\n",
+      "Key: {'file_name': 'day6'}, Data: {'data_file': PosixPath('../examples/dataset1/day6.txt')}\n"
      ]
     }
    ],
diff --git a/notebooks/tutorials/01_introduction_to_orcapod.ipynb b/notebooks/tutorials/01_introduction_to_orcapod.ipynb
index 8a28472e..5929297e 100644
--- a/notebooks/tutorials/01_introduction_to_orcapod.ipynb
+++ b/notebooks/tutorials/01_introduction_to_orcapod.ipynb
@@ -93,7 +93,7 @@
    "id": "c2ac8f32",
    "metadata": {},
    "source": [
-    "Use `op.streams.TableStream` to turn table into a stream. You will also have to specify which columns are the tags."
+    "Use `op.streams.TableStream` to turn table into a stream. You will also have to specify which columns are the keys."
    ]
   },
   {
@@ -103,7 +103,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stream = op.sources.DataFrameSource(table, tag_columns=[\"a\", \"b\"])"
+    "stream = op.sources.DataFrameSource(table, key_columns=[\"a\", \"b\"])"
    ]
   },
   {
@@ -119,7 +119,7 @@
    "id": "08a854e7",
    "metadata": {},
    "source": [
-    "Once you have a stream, you can iterate through tag, data pair:"
+    "Once you have a stream, you can iterate through key, data pair:"
    ]
   },
   {
@@ -132,15 +132,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tag: {'a': 1, 'b': 'x'}, Data: {'c': True, 'd': 1.1}\n",
-      "Tag: {'a': 2, 'b': 'y'}, Data: {'c': False, 'd': 2.2}\n",
-      "Tag: {'a': 3, 'b': 'z'}, Data: {'c': True, 'd': 3.3}\n"
+      "Key: {'a': 1, 'b': 'x'}, Data: {'c': True, 'd': 1.1}\n",
+      "Key: {'a': 2, 'b': 'y'}, Data: {'c': False, 'd': 2.2}\n",
+      "Key: {'a': 3, 'b': 'z'}, Data: {'c': True, 'd': 3.3}\n"
      ]
     }
    ],
    "source": [
-    "for tag, data in stream:\n",
-    "    print(f\"Tag: {tag}, Data: {data}\")"
+    "for key, data in stream:\n",
+    "    print(f\"Key: {key}, Data: {data}\")"
    ]
   },
   {
@@ -148,7 +148,7 @@
    "id": "41c7876b",
    "metadata": {},
    "source": [
-    "You can also get all tag data pairs as a list of tuples by calling `.flow()`"
+    "You can also get all key data pairs as a list of tuples by calling `.flow()`"
    ]
   },
   {
@@ -259,7 +259,7 @@
    "id": "49b297f6",
    "metadata": {},
    "source": [
-    "`include_source` adds `source` column for each data (non-tag) column patterned like `_source_{column}` and will contain information about where that particular value orginated from."
+    "`include_source` adds `source` column for each data (non-key) column patterned like `_source_{column}` and will contain information about where that particular value orginated from."
    ]
   },
   {
@@ -459,7 +459,7 @@
    "id": "7ce05b68",
    "metadata": {},
    "source": [
-    "### Tags and Datas"
+    "### Keys and Datas"
    ]
   },
   {
@@ -467,7 +467,7 @@
    "id": "20783626",
    "metadata": {},
    "source": [
-    "The tags and data returned by the streams can be thought of as special dictionary."
+    "The keys and data returned by the streams can be thought of as special dictionary."
    ]
   },
   {
@@ -517,7 +517,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "all_tags_and_data = stream.flow()"
+    "all_keys_and_data = stream.flow()"
    ]
   },
   {
@@ -527,7 +527,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tag, data = all_tags_and_data[0]"
+    "key, data = all_keys_and_data[0]"
    ]
   },
   {
@@ -548,7 +548,7 @@
     }
    ],
    "source": [
-    "tag"
+    "key"
    ]
   },
   {
@@ -577,7 +577,7 @@
    "id": "17be117a",
    "metadata": {},
    "source": [
-    "The element of tag/data can be accessed just like dictionary:"
+    "The element of key/data can be accessed just like dictionary:"
    ]
   },
   {
@@ -598,7 +598,7 @@
     }
    ],
    "source": [
-    "tag[\"a\"]"
+    "key[\"a\"]"
    ]
   },
   {
@@ -619,7 +619,7 @@
     }
    ],
    "source": [
-    "tag[\"b\"]"
+    "key[\"b\"]"
    ]
   },
   {
@@ -806,7 +806,7 @@
    "id": "37ad91d0",
    "metadata": {},
    "source": [
-    "You can also get a plain dictionary from tag/data with `as_dict`"
+    "You can also get a plain dictionary from key/data with `as_dict`"
    ]
   },
   {
@@ -827,7 +827,7 @@
     }
    ],
    "source": [
-    "tag.as_dict()"
+    "key.as_dict()"
    ]
   },
   {
@@ -927,7 +927,7 @@
    "id": "98ab6fc7",
    "metadata": {},
    "source": [
-    "The hash of tag/data can be computed with `content_hash()` method. The result will be cached so that it won't be computed again unnecessarily."
+    "The hash of key/data can be computed with `content_hash()` method. The result will be cached so that it won't be computed again unnecessarily."
    ]
   },
   {
@@ -948,7 +948,7 @@
     }
    ],
    "source": [
-    "tag.content_hash()"
+    "key.content_hash()"
    ]
   },
   {
@@ -998,8 +998,8 @@
     "    }\n",
     ")\n",
     "\n",
-    "stream1 = op.streams.ArrowTableStream(table1, tag_columns=[\"id\"])\n",
-    "stream2 = op.streams.ArrowTableStream(table2, tag_columns=[\"id\"])"
+    "stream1 = op.streams.ArrowTableStream(table1, key_columns=[\"id\"])\n",
+    "stream2 = op.streams.ArrowTableStream(table2, key_columns=[\"id\"])"
    ]
   },
   {
@@ -1022,7 +1022,7 @@
        "<small>shape: (3, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*id</th><th>a</th><th>b</th></tr><tr><td>i64</td><td>i64</td><td>str</td></tr></thead><tbody><tr><td>0</td><td>1</td><td>&quot;x&quot;</td></tr><tr><td>1</td><td>2</td><td>&quot;y&quot;</td></tr><tr><td>4</td><td>3</td><td>&quot;z&quot;</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "ArrowTableStream(table=['id', 'a', 'b'], tag_columns=('id',))"
+       "ArrowTableStream(table=['id', 'a', 'b'], key_columns=('id',))"
       ]
      },
      "execution_count": 35,
@@ -1054,7 +1054,7 @@
        "<small>shape: (3, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*id</th><th>c</th><th>d</th></tr><tr><td>i64</td><td>bool</td><td>f64</td></tr></thead><tbody><tr><td>0</td><td>true</td><td>1.1</td></tr><tr><td>1</td><td>false</td><td>2.2</td></tr><tr><td>2</td><td>true</td><td>3.3</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "ArrowTableStream(table=['id', 'c', 'd'], tag_columns=('id',))"
+       "ArrowTableStream(table=['id', 'c', 'd'], key_columns=('id',))"
       ]
      },
      "execution_count": 36,
@@ -1130,7 +1130,7 @@
        "<small>shape: (2, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*id</th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><td>i64</td><td>i64</td><td>str</td><td>bool</td><td>f64</td></tr></thead><tbody><tr><td>0</td><td>1</td><td>&quot;x&quot;</td><td>true</td><td>1.1</td></tr><tr><td>1</td><td>2</td><td>&quot;y&quot;</td><td>false</td><td>2.2</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "DynamicPodStream(kernel=Join, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], tag_columns=('id',)), ArrowTableStream(table=['id', 'c', 'd'], tag_columns=('id',))))"
+       "DynamicPodStream(kernel=Join, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], key_columns=('id',)), ArrowTableStream(table=['id', 'c', 'd'], key_columns=('id',))))"
       ]
      },
      "execution_count": 39,
@@ -1178,7 +1178,7 @@
        "<small>shape: (2, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*id</th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><td>i64</td><td>i64</td><td>str</td><td>bool</td><td>f64</td></tr></thead><tbody><tr><td>0</td><td>1</td><td>&quot;x&quot;</td><td>true</td><td>1.1</td></tr><tr><td>1</td><td>2</td><td>&quot;y&quot;</td><td>false</td><td>2.2</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "DynamicPodStream(kernel=Join, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], tag_columns=('id',)), ArrowTableStream(table=['id', 'c', 'd'], tag_columns=('id',))))"
+       "DynamicPodStream(kernel=Join, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], key_columns=('id',)), ArrowTableStream(table=['id', 'c', 'd'], key_columns=('id',))))"
       ]
      },
      "execution_count": 40,
@@ -1210,7 +1210,7 @@
        "<small>shape: (2, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*id</th><th>a</th><th>b</th></tr><tr><td>i64</td><td>i64</td><td>str</td></tr></thead><tbody><tr><td>0</td><td>1</td><td>&quot;x&quot;</td></tr><tr><td>1</td><td>2</td><td>&quot;y&quot;</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "DynamicPodStream(kernel=SemiJoin, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], tag_columns=('id',)), ArrowTableStream(table=['id', 'c', 'd'], tag_columns=('id',))))"
+       "DynamicPodStream(kernel=SemiJoin, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], key_columns=('id',)), ArrowTableStream(table=['id', 'c', 'd'], key_columns=('id',))))"
       ]
      },
      "execution_count": 41,
@@ -1242,7 +1242,7 @@
        "<small>shape: (3, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*id</th><th>a_mapped</th></tr><tr><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>0</td><td>1</td></tr><tr><td>1</td><td>2</td></tr><tr><td>4</td><td>3</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "DynamicPodStream(kernel=MapData, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], tag_columns=('id',)),))"
+       "DynamicPodStream(kernel=MapData, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], key_columns=('id',)),))"
       ]
      },
      "execution_count": 42,
@@ -1263,7 +1263,7 @@
     {
      "data": {
       "text/html": [
-       "DynamicPodStream[MapTags]\n",
+       "DynamicPodStream[MapKeys]\n",
        "<div><style>\n",
        ".dataframe > thead > tr,\n",
        ".dataframe > tbody > tr {\n",
@@ -1274,7 +1274,7 @@
        "<small>shape: (3, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>*name</th><th>a</th><th>b</th></tr><tr><td>i64</td><td>i64</td><td>str</td></tr></thead><tbody><tr><td>0</td><td>1</td><td>&quot;x&quot;</td></tr><tr><td>1</td><td>2</td><td>&quot;y&quot;</td></tr><tr><td>4</td><td>3</td><td>&quot;z&quot;</td></tr></tbody></table></div>"
       ],
       "text/plain": [
-       "DynamicPodStream(kernel=MapTags, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], tag_columns=('id',)),))"
+       "DynamicPodStream(kernel=MapKeys, upstreams=(ArrowTableStream(table=['id', 'a', 'b'], key_columns=('id',)),))"
       ]
      },
      "execution_count": 43,
@@ -1283,7 +1283,7 @@
     }
    ],
    "source": [
-    "stream1.map_tags({\"id\": \"name\"})"
+    "stream1.map_keys({\"id\": \"name\"})"
    ]
   },
   {
@@ -1299,7 +1299,7 @@
    "id": "e4b7991a",
    "metadata": {},
    "source": [
-    "Now we have explored the basics of streams, tags, data, and operators (i.e. Join), it's time to explore the meat of `orcapod` -- `FunctionPod`s! Let's start by defining a very simple function pod that takes in two numbers and return the sum."
+    "Now we have explored the basics of streams, keys, data, and operators (i.e. Join), it's time to explore the meat of `orcapod` -- `FunctionPod`s! Let's start by defining a very simple function pod that takes in two numbers and return the sum."
    ]
   },
   {
@@ -1388,7 +1388,7 @@
     "    }\n",
     ")\n",
     "\n",
-    "input_stream = op.sources.ArrowTableSource(input_table, tag_columns=[\"id\"])"
+    "input_stream = op.sources.ArrowTableSource(input_table, key_columns=[\"id\"])"
    ]
   },
   {
diff --git a/notebooks/tutorials/02_parallel_execution_on_ray.ipynb b/notebooks/tutorials/02_parallel_execution_on_ray.ipynb
index 7a948559..98684e22 100644
--- a/notebooks/tutorials/02_parallel_execution_on_ray.ipynb
+++ b/notebooks/tutorials/02_parallel_execution_on_ray.ipynb
@@ -48,7 +48,7 @@
    "source": [
     "input_stream = op.streams.TableStream(\n",
     "    pa.Table.from_pylist([{\"id\": i, \"x\": i * 2, \"y\": i * 3} for i in range(50)]),\n",
-    "    tag_columns=[\"id\"],\n",
+    "    key_columns=[\"id\"],\n",
     ")"
    ]
   },
diff --git a/orcapod-design.md b/orcapod-design.md
index 8a55fda7..b0ad92ab 100644
--- a/orcapod-design.md
+++ b/orcapod-design.md
@@ -8,7 +8,7 @@
 
 The **datagram** is the universal immutable data container in OrcaPod. A datagram holds named columns with explicit type information and supports lazy conversion between Python dict and Apache Arrow representations. Datagrams come in two specialized forms:
 
-- **Tag** — metadata columns attached to a data for routing, filtering, and annotation. Tags carry additional **system tags** — framework-managed hidden provenance columns that are excluded from content identity by default.
+- **Key** — metadata columns attached to a data for routing, filtering, and annotation. Keys carry additional **system keys** — framework-managed hidden provenance columns that are excluded from content identity by default.
 
 - **Data** — data columns carrying the computational payload. Datas carry additional **source info** — per-column provenance tokens tracing each value back to its originating source and record.
 
@@ -16,13 +16,13 @@ Datagrams are always constructed from either a Python dict or an Arrow table/rec
 
 ### Stream
 
-A **stream** is a sequence of (Tag, Data) pairs over a shared schema. Streams define two column groups — tag columns and data columns — and provide lazy iteration, table materialization, and schema introspection. Streams are the fundamental data-flow abstraction: every source emits one, every operator consumes and produces them, and every function pod iterates over them.
+A **stream** is a sequence of (Key, Data) pairs over a shared schema. Streams define two column groups — key columns and data columns — and provide lazy iteration, table materialization, and schema introspection. Streams are the fundamental data-flow abstraction: every source emits one, every operator consumes and produces them, and every function pod iterates over them.
 
-The concrete implementation is `ArrowTableStream`, backed by an immutable PyArrow Table with explicit tag/data column assignment.
+The concrete implementation is `ArrowTableStream`, backed by an immutable PyArrow Table with explicit key/data column assignment.
 
 ### Source
 
-A **source** acts as a stream from external data with no upstream dependencies, forming the base case of the pipeline graph. Sources establish provenance: each row gets a source-info token and system tag columns encoding the source's identity.
+A **source** acts as a stream from external data with no upstream dependencies, forming the base case of the pipeline graph. Sources establish provenance: each row gets a source-info token and system key columns encoding the source's identity.
 
 - **Root source** — loads data from the external world (file, database, in-memory table). All root sources delegate to `ArrowTableSource`, which wraps the data in an `ArrowTableStream` with provenance annotations. Concrete subclasses include `CSVSource`, `DeltaTableSource`, `DataFrameSource`, `DictSource`, and `ListSource`.
 
@@ -32,13 +32,13 @@ Every source has a `source_id` — a canonical registry name used to register th
 
 ### Function Pod
 
-A **function pod** wraps a **data function** — a stateless computation that consumes a single data and produces an output data. Function pods never inspect tags or stream structure; they operate purely on data content. When given multiple input streams, a function pod joins them via a configurable multi-stream handler (defaulting to `Join`) before iterating.
+A **function pod** wraps a **data function** — a stateless computation that consumes a single data and produces an output data. Function pods never inspect keys or stream structure; they operate purely on data content. When given multiple input streams, a function pod joins them via a configurable multi-stream handler (defaulting to `Join`) before iterating.
 
 Data functions support pluggable executors (see **Data Function Executor System**). When an executor is set, `call()` routes through `executor.execute()` and `async_call()` routes through `executor.async_execute()`. When no executor is set, the function's native `direct_call()` / `direct_async_call()` is invoked directly. For `PythonDataFunction`, `direct_async_call` runs the synchronous function in a thread pool via `asyncio.run_in_executor`.
 
 Two execution models exist:
 
-- **FunctionPod + FunctionPodStream** — lazy, in-memory evaluation. The function pod processes each (tag, data) pair from the input stream on demand, caching results by index. When the attached executor declares `supports_concurrent_execution = True`, `iter_data()` materializes all remaining inputs and dispatches them concurrently via `asyncio.gather` over `async_call`, yielding results in order.
+- **FunctionPod + FunctionPodStream** — lazy, in-memory evaluation. The function pod processes each (key, data) pair from the input stream on demand, caching results by index. When the attached executor declares `supports_concurrent_execution = True`, `iter_data()` materializes all remaining inputs and dispatches them concurrently via `asyncio.gather` over `async_call`, yielding results in order.
 
 - **FunctionNode** — database-backed evaluation with incremental computation. Execution proceeds in two phases:
   1. **Phase 1**: yield cached results from the pipeline database for inputs whose hashes are already stored.
@@ -48,13 +48,13 @@ Two execution models exist:
 
 ### Operator
 
-An **operator** is a structural pod that transforms streams without synthesizing new data values. Every data value in an operator's output must be traceable to a concrete value already present in the input data — operators perform joins, merges, splits, selections, column renames, batching, and tag operations within this constraint.
+An **operator** is a structural pod that transforms streams without synthesizing new data values. Every data value in an operator's output must be traceable to a concrete value already present in the input data — operators perform joins, merges, splits, selections, column renames, batching, and key operations within this constraint.
 
 Operators are subclasses of `StaticOutputPod` organized by input arity:
 
 | Base Class | Arity | Examples |
 |---|---|---|
-| `UnaryOperator` | Exactly 1 input | Batch, SelectTagColumns, DropDataColumns, MapTags, MapData, PolarsFilter |
+| `UnaryOperator` | Exactly 1 input | Batch, SelectKeyColumns, DropDataColumns, MapKeys, MapData, PolarsFilter |
 | `BinaryOperator` | Exactly 2 inputs | MergeJoin, SemiJoin |
 | `NonZeroInputOperator` | 1 or more inputs | Join |
 
@@ -69,7 +69,7 @@ Every operator inherits a default barrier-mode `async_execute` from its base cla
 ## Operator Catalog
 
 ### Join
-Variable-arity inner join on shared tag columns. Non-overlapping data columns are required — colliding data columns raise `InputValidationError`. Tag schema is the union of all input tag schemas; data schema is the union. Inputs are canonically ordered by `pipeline_hash` for deterministic system tag column naming. Commutative (declared via `frozenset` argument symmetry).
+Variable-arity inner join on shared key columns. Non-overlapping data columns are required — colliding data columns raise `InputValidationError`. Key schema is the union of all input key schemas; data schema is the union. Inputs are canonically ordered by `pipeline_hash` for deterministic system key column naming. Commutative (declared via `frozenset` argument symmetry).
 
 ### MergeJoin
 Binary inner join that handles colliding data columns by merging their values into sorted `list[T]`. Colliding columns must have identical types. Non-colliding columns are kept as scalars. Corresponding source-info columns are reordered to match the sort order of their data column. Commutative — commutativity comes from sorting merged values, not from ordering input streams.
@@ -80,14 +80,14 @@ Binary semi-join: returns entries from the left stream that match on overlapping
 ### Batch
 Groups rows into batches of a configurable size. All column types become `list[T]`. Optionally drops incomplete final batches.
 
-### SelectTagColumns / SelectDataColumns
-Keep only specified tag or data columns. Optional `strict` mode raises on missing columns.
+### SelectKeyColumns / SelectDataColumns
+Keep only specified key or data columns. Optional `strict` mode raises on missing columns.
 
-### DropTagColumns / DropDataColumns
-Remove specified tag or data columns. `DropDataColumns` also removes associated source-info columns.
+### DropKeyColumns / DropDataColumns
+Remove specified key or data columns. `DropDataColumns` also removes associated source-info columns.
 
-### MapTags / MapData
-Rename tag or data columns via a name mapping. `MapData` automatically renames associated source-info columns. Optional `drop_unmapped` mode removes columns not in the mapping.
+### MapKeys / MapData
+Rename key or data columns via a name mapping. `MapData` automatically renames associated source-info columns. Optional `drop_unmapped` mode removes columns not in the mapping.
 
 ### PolarsFilter
 Applies Polars filtering predicates to rows. Output schema is unchanged from input.
@@ -96,7 +96,7 @@ Applies Polars filtering predicates to rows. Output schema is unchanged from inp
 
 ## Schema as a First-Class Citizen
 
-Every stream exposes `output_schema()` returning `(tag_schema, data_schema)` as `Schema` objects — immutable mappings from field names to Python types with support for optional fields. Schema is embedded explicitly at every level rather than resolved against a central registry, making streams fully self-describing.
+Every stream exposes `output_schema()` returning `(key_schema, data_schema)` as `Schema` objects — immutable mappings from field names to Python types with support for optional fields. Schema is embedded explicitly at every level rather than resolved against a central registry, making streams fully self-describing.
 
 The `ColumnConfig` dataclass controls what metadata columns are included in schema and data output:
 
@@ -105,25 +105,25 @@ The `ColumnConfig` dataclass controls what metadata columns are included in sche
 | `meta` | System metadata columns (`__` prefix) |
 | `context` | Data context column |
 | `source` | Source-info provenance columns (`_source_` prefix) |
-| `system_tags` | System tag columns (`_tag_` prefix) |
+| `system_keys` | System key columns (`_key_` prefix) |
 | `content_hash` | Per-row content hash column |
-| `sort_by_tags` | Whether to sort output by tag columns |
+| `sort_by_keys` | Whether to sort output by key columns |
 
-Operators predict their output schema — including system tag column names — without performing the actual computation.
+Operators predict their output schema — including system key column names — without performing the actual computation.
 
 ---
 
-## Tags
+## Keys
 
-Tags are key-value pairs attached to every data providing human-friendly metadata for navigation, filtering, and annotation. They are:
+Keys are key-value pairs attached to every data providing human-friendly metadata for navigation, filtering, and annotation. They are:
 
 - **Non-authoritative** — never used for cache lookup or pod identity computation
-- **Auto-propagated** — tags flow forward through the pipeline automatically
-- **The basis for joins** — operator pods join streams by matching tag keys, never by inspecting data content
+- **Auto-propagated** — keys flow forward through the pipeline automatically
+- **The basis for joins** — operator pods join streams by matching key keys, never by inspecting data content
 
-**Tag merging in joins:**
-- **Shared tag keys** — act as the join predicate; values must match for data to be joined
-- **Non-shared tag keys** — propagate freely into the joined output's tags
+**Key merging in joins:**
+- **Shared key keys** — act as the join predicate; values must match for data to be joined
+- **Non-shared key keys** — propagate freely into the joined output's keys
 
 ---
 
@@ -134,7 +134,7 @@ This is a strict and critical separation:
 | | Operator | Function Pod |
 |---|---|---|
 | Inspects data content | Never | Yes |
-| Inspects / uses tags | Yes | No |
+| Inspects / uses keys | Yes | No |
 | Can rename columns | Yes | No |
 | Stream arity | Configurable (unary/binary/N-ary) | Single stream in, single stream out |
 | Cached by content hash | No | Yes |
@@ -154,7 +154,7 @@ Data-inclusive identity capturing the precise semantic content of an object:
 
 | Component | What Gets Hashed |
 |---|---|
-| RootSource | Class name + tag columns + table content hash |
+| RootSource | Class name + key columns + table content hash |
 | DataFunction | URI (canonical name + output schema hash + version + type ID) |
 | FunctionPodStream | Function pod + argument symmetry of inputs |
 | Operator | Operator class + identity structure |
@@ -170,12 +170,12 @@ Schema-and-topology-only identity used for database path scoping. Excludes data
 
 | Component | What Gets Hashed |
 |---|---|
-| RootSource | `(tag_schema, data_schema)` — base case |
+| RootSource | `(key_schema, data_schema)` — base case |
 | DataFunction | Raw data function object (via content hash) |
 | FunctionPodStream | Function pod + input stream pipeline hashes |
 | Operator | Operator class + argument symmetry (pipeline hashes of inputs) |
 | ArrowTableStream | Producer + upstreams pipeline hashes (or schema if no producer) |
-| DerivedSource | Inherited from RootSource: `(tag_schema, data_schema)` |
+| DerivedSource | Inherited from RootSource: `(key_schema, data_schema)` |
 
 Pipeline hash uses a **resolver pattern** — a callback that routes `PipelineElementProtocol` objects through `pipeline_hash()` and other `ContentIdentifiable` objects through `content_hash()` — ensuring the correct identity chain is used for nested objects within a single hash computation.
 
@@ -224,70 +224,70 @@ Source info is **immutable through the pipeline** — set once when a source cre
 
 ---
 
-## System Tags
+## System Keys
 
-System tags are **framework-managed, hidden provenance columns** automatically attached to every data. Unlike user tags, they are authoritative and guaranteed to maintain perfect traceability from any result row back to its original source rows.
+System keys are **framework-managed, hidden provenance columns** automatically attached to every data. Unlike user keys, they are authoritative and guaranteed to maintain perfect traceability from any result row back to its original source rows.
 
 ### Flat Column Design
 
-System tags store `source_id` and `record_id` as **separate flat columns** rather than a combined string value. This is a deliberate design choice driven by the caching strategy (see **Caching Strategy** section below).
+System keys store `source_id` and `record_id` as **separate flat columns** rather than a combined string value. This is a deliberate design choice driven by the caching strategy (see **Caching Strategy** section below).
 
-In function pod cache tables, which are scoped to a structural pipeline hash and thus shared across different source combinations, filtering by source identity is a first-class operation. Storing `source_id` and `record_id` as separate columns makes this a straightforward equality predicate (`WHERE _tag_source_id::schema1 = 'X'`) with clean standard indexing, rather than a prefix match or string parse against a combined value.
+In function pod cache tables, which are scoped to a structural pipeline hash and thus shared across different source combinations, filtering by source identity is a first-class operation. Storing `source_id` and `record_id` as separate columns makes this a straightforward equality predicate (`WHERE _key_source_id::schema1 = 'X'`) with clean standard indexing, rather than a prefix match or string parse against a combined value.
 
-This is safe because within any given cache table, the system tag schema is fixed — every row has the same set of system tag fields, determined by the pipeline structure. The column count grows with pipeline depth (more join stages produce more system tag column pairs), but this growth is per-table-schema, not within a table. Different pipeline structures produce different tables with different column layouts, which is the expected and correct behavior.
+This is safe because within any given cache table, the system key schema is fixed — every row has the same set of system key fields, determined by the pipeline structure. The column count grows with pipeline depth (more join stages produce more system key column pairs), but this growth is per-table-schema, not within a table. Different pipeline structures produce different tables with different column layouts, which is the expected and correct behavior.
 
-### Source System Tags
+### Source System Keys
 
-Each source automatically adds a pair of system tag columns using the `_tag_` prefix convention:
+Each source automatically adds a pair of system key columns using the `_key_` prefix convention:
 
 ```
-_tag_source_id::{schema_hash}    — the source's canonical source_id
-_tag_record_id::{schema_hash}    — the row identifier within that source
+_key_source_id::{schema_hash}    — the source's canonical source_id
+_key_record_id::{schema_hash}    — the row identifier within that source
 ```
 
-Where `schema_hash` is derived from the source's `(tag_schema, data_schema)`. The `::` delimiter separates segments of the system tag column name, maintaining consistency with the extension pattern used downstream.
+Where `schema_hash` is derived from the source's `(key_schema, data_schema)`. The `::` delimiter separates segments of the system key column name, maintaining consistency with the extension pattern used downstream.
 
 Example at the root level:
 
 ```
-_tag_source_id::schema1   (e.g., value: "customers_2024")
-_tag_record_id::schema1   (e.g., value: "row_42" or "user_id=abc123")
+_key_source_id::schema1   (e.g., value: "customers_2024")
+_key_record_id::schema1   (e.g., value: "row_42" or "user_id=abc123")
 ```
 
 ### Three Evolution Rules
 
 **1. Name-Preserving (~90% of operations)**
-Single-stream operations (filter, select, rename, batch, map). System tag column names and values pass through unchanged.
+Single-stream operations (filter, select, rename, batch, map). System key column names and values pass through unchanged.
 
 **2. Name-Extending (multi-input operations)**
-Joins and merges. Each incoming system tag column name is extended by appending `::node_pipeline_hash:canonical_position`. The `::` delimiter separates each extension segment, and `:` separates the pipeline hash from the canonical position within a segment. Canonical position assignment respects commutativity — for commutative operations, inputs are sorted by `pipeline_hash` to ensure identical column names regardless of wiring order.
+Joins and merges. Each incoming system key column name is extended by appending `::node_pipeline_hash:canonical_position`. The `::` delimiter separates each extension segment, and `:` separates the pipeline hash from the canonical position within a segment. Canonical position assignment respects commutativity — for commutative operations, inputs are sorted by `pipeline_hash` to ensure identical column names regardless of wiring order.
 
-For example, joining two streams that each carry `_tag_source_id::schema1` / `_tag_record_id::schema1`, through a join with pipeline hash `abc123`:
+For example, joining two streams that each carry `_key_source_id::schema1` / `_key_record_id::schema1`, through a join with pipeline hash `abc123`:
 
 ```
-_tag_source_id::schema1::abc123:0    _tag_record_id::schema1::abc123:0    (first stream by canonical position)
-_tag_source_id::schema1::abc123:1    _tag_record_id::schema1::abc123:1    (second stream by canonical position)
+_key_source_id::schema1::abc123:0    _key_record_id::schema1::abc123:0    (first stream by canonical position)
+_key_source_id::schema1::abc123:1    _key_record_id::schema1::abc123:1    (second stream by canonical position)
 ```
 
 A subsequent join (pipeline hash `def456`) over those results would further extend:
 
 ```
-_tag_source_id::schema1::abc123:0::def456:0
-_tag_record_id::schema1::abc123:0::def456:0
+_key_source_id::schema1::abc123:0::def456:0
+_key_record_id::schema1::abc123:0::def456:0
 ```
 
-The full column name is a chain of `::` delimited segments tracing the provenance path: `_tag_{field}::{source_schema_hash}::{join1_hash}:{position}::{join2_hash}:{position}::...`
+The full column name is a chain of `::` delimited segments tracing the provenance path: `_key_{field}::{source_schema_hash}::{join1_hash}:{position}::{join2_hash}:{position}::...`
 
 **3. Type-Evolving (aggregation operations)**
 Batch and similar grouping operations. Column names are unchanged but types evolve: `str → list[str]` as values collect all contributing source row IDs. Both `source_id` and `record_id` columns evolve independently.
 
-### System Tag Value Sorting
+### System Key Value Sorting
 
-For commutative operators (Join, MergeJoin), system tag values from same-`pipeline_hash` streams are sorted per row after the join. This ensures `Op(A, B)` and `Op(B, A)` produce identical system tag columns and values.
+For commutative operators (Join, MergeJoin), system key values from same-`pipeline_hash` streams are sorted per row after the join. This ensures `Op(A, B)` and `Op(B, A)` produce identical system key columns and values.
 
 ### Schema Prediction
 
-Operators predict output system tag column names at schema time — without performing the actual computation — by computing `pipeline_hash` values and canonical positions. This is exposed via `output_schema(columns={"system_tags": True})`.
+Operators predict output system key column names at schema time — without performing the actual computation — by computing `pipeline_hash` values and canonical positions. This is exposed via `output_schema(columns={"system_keys": True})`.
 
 ---
 
@@ -299,7 +299,7 @@ OrcaPod uses a differentiated caching strategy across its three pod types — so
 
 **Cache table identity:** Canonical source identity (content hash).
 
-Each source gets its own dedicated cache table. Sources are provenance roots — there is no upstream system tag mechanism to disambiguate rows from different sources within a shared table. A cached source table represents a cumulative record of all data ever observed from that specific source.
+Each source gets its own dedicated cache table. Sources are provenance roots — there is no upstream system key mechanism to disambiguate rows from different sources within a shared table. A cached source table represents a cumulative record of all data ever observed from that specific source.
 
 **Behavior:**
 - Cache is **always on** by default.
@@ -314,23 +314,23 @@ Each source gets its own dedicated cache table. Sources are provenance roots —
 Function pod caching is split into two tiers:
 
 1. **Data-level cache (global):** Maps input data hash → output data. Shared globally across all pipelines, enabling identical function calls to reuse results regardless of context.
-2. **Tag-level cache (per structural pipeline):** Maps tag → input data hash. Scoped to the structural pipeline hash.
+2. **Key-level cache (per structural pipeline):** Maps key → input data hash. Scoped to the structural pipeline hash.
 
-**Tag-level cache table identity:** Structural pipeline hash (`pipeline_hash()`).
+**Key-level cache table identity:** Structural pipeline hash (`pipeline_hash()`).
 
-A single cache table is used for all runs of structurally identical pipelines (same tag and data schemas at source, followed by the same sequence of operator and function pods), regardless of which specific source combinations were involved. This is safe because function pods operate on individual data independently — each cached mapping is self-contained and valid regardless of what other rows exist in the table.
+A single cache table is used for all runs of structurally identical pipelines (same key and data schemas at source, followed by the same sequence of operator and function pods), regardless of which specific source combinations were involved. This is safe because function pods operate on individual data independently — each cached mapping is self-contained and valid regardless of what other rows exist in the table.
 
 **Why structural hash, not content hash:**
-- System tags already carry full provenance, including source identity as separate queryable columns. Rows from different source combinations are distinguishable within a shared table via equality predicates on `source_id` columns (e.g., `WHERE _tag_source_id::schema1 = 'X'`).
+- System keys already carry full provenance, including source identity as separate queryable columns. Rows from different source combinations are distinguishable within a shared table via equality predicates on `source_id` columns (e.g., `WHERE _key_source_id::schema1 = 'X'`).
 - A shared table provides a natural **cross-source view** — comparing how the same analytical pipeline behaves across different source populations without needing cross-table joins.
-- Content-hash scoping would duplicate disambiguation that system tags already provide, violating the principle against redundant mechanisms.
+- Content-hash scoping would duplicate disambiguation that system keys already provide, violating the principle against redundant mechanisms.
 
 **Behavior:**
 - Cache is **always on** by default.
 - On a pipeline run, incoming data are scoped to the current source combination (determined by upstream source pods).
-- The function pod checks the tag-level cache for existing mappings among the incoming tag-data.
+- The function pod checks the key-level cache for existing mappings among the incoming key-data.
 - **Cache hits** (from this or any prior run over the same structural pipeline) are yielded directly. Cross-source sharing falls out naturally because data-level computation is source-independent.
-- **Cache misses** trigger computation; results are stored in both the data-level and tag-level caches.
+- **Cache misses** trigger computation; results are stored in both the data-level and key-level caches.
 
 **Semantic guarantee:** The cache is a **correct reusable lookup**. Every entry is independently valid. The table as a whole is a historical record of all computations processed through this function within this structural pipeline context.
 
@@ -343,7 +343,7 @@ A single cache table is used for all runs of structurally identical pipelines (s
 Each unique combination of pipeline structure and source identities gets its own cache table. This reflects the fact that operator results are holistic — they depend on the entire input stream, not individual data.
 
 **Why content hash, not structural hash:**
-Operators compute over the stream (joins, aggregations, window functions). Their outputs are meaningful only as a complete set given a specific input. Unlike function pods, operator results cannot be safely mixed across source combinations within a shared table because the distributive property does not hold for most operators. For example, with a join: `(X ⋈ Y) ∪ (X' ⋈ Y') ≠ (X ∪ X') ⋈ (Y ∪ Y')`. The shared table would miss cross-terms `X ⋈ Y'` and `X' ⋈ Y`. Cache invalidation is also cleaner per-table (drop/mark stale) rather than selectively purging rows by system tag.
+Operators compute over the stream (joins, aggregations, window functions). Their outputs are meaningful only as a complete set given a specific input. Unlike function pods, operator results cannot be safely mixed across source combinations within a shared table because the distributive property does not hold for most operators. For example, with a join: `(X ⋈ Y) ∪ (X' ⋈ Y') ≠ (X ∪ X') ⋈ (Y ∪ Y')`. The shared table would miss cross-terms `X ⋈ Y'` and `X' ⋈ Y`. Cache invalidation is also cleaner per-table (drop/mark stale) rather than selectively purging rows by system key.
 
 **Critical correctness caveat:**
 Even scoped to content hash, operator caches are **not guaranteed to be complete** with respect to the full picture of all data ever yielded by the sources. Because sources may use canonical identity for their content hash, the same source identity may yield different data sets over time. The cache accumulates result rows across runs:
@@ -352,7 +352,7 @@ Even scoped to content hash, operator caches are **not guaranteed to be complete
 - Run 2: Sources yield `X'` and `Y'`. The operator computes `X' ⋈ Y'` and appends new rows to cache.
 - The cache now contains `(X ⋈ Y) ∪ (X' ⋈ Y')`, which is **not** equivalent to `(X ∪ X') ⋈ (Y ∪ Y')`.
 
-The operator cache is strictly an **append-only historical record**, not a cumulative materialization. Identical output rows across runs naturally deduplicate (keyed by `hash(tag + data + system_tag)`). Run-level grouping and tracking is managed separately outside the cache mechanism.
+The operator cache is strictly an **append-only historical record**, not a cumulative materialization. Identical output rows across runs naturally deduplicate (keyed by `hash(key + data + system_key)`). Run-level grouping and tracking is managed separately outside the cache mechanism.
 
 **Behavior:**
 - Cache is **off by default**. Operator computation is always triggered fresh in a typical run.
@@ -377,7 +377,7 @@ The operator cache is strictly an **append-only historical record**, not a cumul
 | Default state | Always on | Always on | Off |
 | Semantic role | Cumulative record | Reusable lookup | Historical record |
 | Correctness | Always correct | Always correct | Per-run snapshots only |
-| Cross-source sharing | N/A (one source per table) | Yes, via system tag columns | No (separate tables) |
+| Cross-source sharing | N/A (one source per table) | Yes, via system key columns | No (separate tables) |
 | Computation on cache hit | Dedup and merge | Skip (use cached result) | Recompute by default |
 
 The overall gradient: sources are always cached and always correct, function pods are always cached and always reusable, operators are optionally logged and never silently substituted. Each level directly follows from whether the computation is cumulative, independent, or holistic.
@@ -402,7 +402,7 @@ Function pods and operators use `pipeline_hash()` to scope their database tables
 
 ### Multi-Source Table Sharing
 
-Sources with identical schemas produce identical `pipeline_hash` values. When processed through the same pipeline structure, they share database tables automatically. Different source instances (e.g., `customers_2023`, `customers_2024`) coexist in the same table, differentiated by system tag values and record hashes. This enables natural cross-source analytics without separate table management.
+Sources with identical schemas produce identical `pipeline_hash` values. When processed through the same pipeline structure, they share database tables automatically. Different source instances (e.g., `customers_2023`, `customers_2024`) coexist in the same table, differentiated by system key values and record hashes. This enables natural cross-source analytics without separate table management.
 
 ---
 
@@ -424,7 +424,7 @@ Derived sources serve two purposes:
 
 Data provenance focuses on **data-generating entities only** — sources and function pods. Since operators never synthesize new data values, they leave no computational footprint on the data itself.
 
-The provenance graph is a **bipartite graph of sources and function pods**, with edges encoded as source info pointers per output field. Operator pod topology is captured implicitly in system tag column names and the pipeline Merkle chain but operators do not appear as nodes in the provenance graph.
+The provenance graph is a **bipartite graph of sources and function pods**, with edges encoded as source info pointers per output field. Operator pod topology is captured implicitly in system key column names and the pipeline Merkle chain but operators do not appear as nodes in the provenance graph.
 
 This means:
 - **Operators can be refactored** without invalidating data provenance
@@ -461,14 +461,14 @@ Every pipeline node — source, operator, or function pod — implements the `As
 
 ```python
 async def async_execute(
-    inputs: Sequence[ReadableChannel[tuple[Tag, Data]]],
-    output: WritableChannel[tuple[Tag, Data]],
+    inputs: Sequence[ReadableChannel[tuple[Key, Data]]],
+    output: WritableChannel[tuple[Key, Data]],
 ) -> None
 ```
 
-Nodes consume `(Tag, Data)` pairs from input channels and produce them to an output channel. This enables push-based, streaming execution where data flows through the pipeline as soon as it's available, with backpressure propagated via bounded channel buffers.
+Nodes consume `(Key, Data)` pairs from input channels and produce them to an output channel. This enables push-based, streaming execution where data flows through the pipeline as soon as it's available, with backpressure propagated via bounded channel buffers.
 
-**FunctionPod async strategy:** Streaming mode — each input `(tag, data)` is processed independently with semaphore-controlled concurrency. Uses `asyncio.TaskGroup` for structured concurrency.
+**FunctionPod async strategy:** Streaming mode — each input `(key, data)` is processed independently with semaphore-controlled concurrency. Uses `asyncio.TaskGroup` for structured concurrency.
 
 #### Operator Async Strategies
 
@@ -476,7 +476,7 @@ Each operator overrides `async_execute` with the most efficient streaming patter
 
 | Strategy | Description | Operators |
 |---|---|---|
-| **Per-row streaming** | Transform each `(Tag, Data)` independently as it arrives; zero buffering beyond the current row | SelectTagColumns, SelectDataColumns, DropTagColumns, DropDataColumns, MapTags, MapData |
+| **Per-row streaming** | Transform each `(Key, Data)` independently as it arrives; zero buffering beyond the current row | SelectKeyColumns, SelectDataColumns, DropKeyColumns, DropDataColumns, MapKeys, MapData |
 | **Accumulate-and-emit** | Buffer rows up to `batch_size`, emit full batches immediately, flush partial at end | Batch (`batch_size > 0`) |
 | **Build-probe** | Collect one side fully (build), then stream the other through a hash lookup (probe) | SemiJoin |
 | **Symmetric hash join** | Read both sides concurrently, buffer + index both, emit matches as they're found | Join (2 inputs) |
@@ -484,7 +484,7 @@ Each operator overrides `async_execute` with the most efficient streaming patter
 
 #### Per-Row Streaming (Unary Column/Map Operators)
 
-For operators that transform each row independently (column selection, column dropping, column renaming), the async path iterates `async for tag, data in inputs[0]` and applies the transformation per row. Column metadata (which columns to drop, the rename map, etc.) is computed lazily on the first row and cached for subsequent rows. This avoids materializing the entire input into an Arrow table, enabling true pipeline-level streaming where upstream producers and downstream consumers run concurrently.
+For operators that transform each row independently (column selection, column dropping, column renaming), the async path iterates `async for key, data in inputs[0]` and applies the transformation per row. Column metadata (which columns to drop, the rename map, etc.) is computed lazily on the first row and cached for subsequent rows. This avoids materializing the entire input into an Arrow table, enabling true pipeline-level streaming where upstream producers and downstream consumers run concurrently.
 
 #### Accumulate-and-Emit (Batch)
 
@@ -502,7 +502,7 @@ The 2-input Join uses a symmetric hash join — the same algorithm used by Apach
 2. Probe the opposite side's index for matching keys.
 3. Emit all matches immediately.
 
-When the first rows from both sides have arrived, the shared key columns are determined (intersection of tag column names). Any rows that arrived before shared keys were known are re-indexed and cross-matched in a one-time reconciliation step.
+When the first rows from both sides have arrived, the shared key columns are determined (intersection of key column names). Any rows that arrived before shared keys were known are re-indexed and cross-matched in a one-time reconciliation step.
 
 **Comparison with industry stream processors:**
 
@@ -651,7 +651,7 @@ Pipelines can be composed across boundaries:
 
 The strict operator / function pod boundary is central to OrcaPod's provenance guarantees: operators never synthesize values (provenance transparent), function pods always synthesize values (provenance tracked). This two-category model keeps provenance tracking simple and robust.
 
-However, certain common patterns require combining both behaviors in a single logical operation. The most common is **enrichment** — running a function on a data and appending the computed columns to the original data rather than replacing it. The naïve decomposition into `FunctionPod + Join` works but incurs unnecessary overhead: an intermediate stream is materialized only to be immediately joined back, and the join must re-match tags that trivially correspond because they came from the same input row.
+However, certain common patterns require combining both behaviors in a single logical operation. The most common is **enrichment** — running a function on a data and appending the computed columns to the original data rather than replacing it. The naïve decomposition into `FunctionPod + Join` works but incurs unnecessary overhead: an intermediate stream is materialized only to be immediately joined back, and the join must re-match keys that trivially correspond because they came from the same input row.
 
 ### Fused Pods as Optimization, Not Extension
 
@@ -678,10 +678,10 @@ enriched = AddResult(grade_pf).process(stream)
 
 Equivalent decomposition: `FunctionPod(pf).process(stream)` → `Join()(stream, computed)`.
 
-Efficiency gains: no intermediate stream materialization, no redundant tag matching, no broadcast/rejoin wiring. The async path streams row-by-row like FunctionPod.
+Efficiency gains: no intermediate stream materialization, no redundant key matching, no broadcast/rejoin wiring. The async path streams row-by-row like FunctionPod.
 
 Implementation constraints:
-- `output_schema()` returns `(input_tag_schema, input_data_schema | function_output_schema)`.
+- `output_schema()` returns `(input_key_schema, input_data_schema | function_output_schema)`.
 - Raises `InputValidationError` if function output keys collide with existing data column names.
 - `pipeline_hash` commits to the wrapped DataFunction's identity plus the upstream's pipeline hash (as if the decomposition were performed).
 - Source-info on computed columns references the DataFunction. Source-info on preserved columns passes through unchanged.
diff --git a/plan.md b/plan.md
index d2a42082..7e172fe3 100644
--- a/plan.md
+++ b/plan.md
@@ -16,13 +16,13 @@ Add `async_execute` to all four Node classes. Add cache-aware `async_call` to
 
 ```
 _FunctionPodBase (TraceableBase)
-  ├── process_data(tag, data)       → calls data_function.call(data)
+  ├── process_data(key, data)       → calls data_function.call(data)
   ├── FunctionPod
   │     ├── process() → FunctionPodStream
   │     └── async_execute()             → calls data_function.async_call(data) DIRECTLY
   │
   FunctionPodStream (StreamBase)
-  │   ├── _iter_data_sequential()    → calls _function_pod.process_data(tag, data) ✓
+  │   ├── _iter_data_sequential()    → calls _function_pod.process_data(key, data) ✓
   │   └── _iter_data_concurrent()    → calls _execute_concurrent(data_function, ...) DIRECTLY
   │
   FunctionNode (StreamBase)
@@ -31,10 +31,10 @@ _FunctionPodBase (TraceableBase)
   │   └── (no async_execute)
   │
   PersistentFunctionNode (FunctionNode)
-      ├── process_data(tag, data)   → calls _data_function.call(data, skip_cache_*=...)
+      ├── process_data(key, data)   → calls _data_function.call(data, skip_cache_*=...)
       │                                   then add_pipeline_record(...)
       ├── iter_data()                → Phase 1: replay from DB
-      │                                   Phase 2: calls self.process_data(tag, data) ✓
+      │                                   Phase 2: calls self.process_data(key, data) ✓
       └── (no async_execute)
 
 OperatorNode (StreamBase)
@@ -86,14 +86,14 @@ execution paths go through them — sequential, concurrent, and async. No direct
 `data_function.call()` or `data_function.async_call()` calls outside of these methods.
 
 ```
-_FunctionPodBase.process_data(tag, pkt)         → data_function.call(pkt)
-_FunctionPodBase.async_process_data(tag, pkt)    → await data_function.async_call(pkt)
+_FunctionPodBase.process_data(key, pkt)         → data_function.call(pkt)
+_FunctionPodBase.async_process_data(key, pkt)    → await data_function.async_call(pkt)
 
-FunctionNode.process_data(tag, pkt)              → self._function_pod.process_data(tag, pkt)
-FunctionNode.async_process_data(tag, pkt)        → await self._function_pod.async_process_data(tag, pkt)
+FunctionNode.process_data(key, pkt)              → self._function_pod.process_data(key, pkt)
+FunctionNode.async_process_data(key, pkt)        → await self._function_pod.async_process_data(key, pkt)
 
-PersistentFunctionNode.process_data(tag, pkt)    → cache check → self._function_pod.process_data → pipeline record
-PersistentFunctionNode.async_process_data(tag, pkt) → cache check → await self._function_pod.async_process_data → pipeline record
+PersistentFunctionNode.process_data(key, pkt)    → cache check → self._function_pod.process_data → pipeline record
+PersistentFunctionNode.async_process_data(key, pkt) → cache check → await self._function_pod.async_process_data → pipeline record
 ```
 
 Wait — there's a subtlety with PersistentFunctionNode. Today its `process_data` calls
@@ -105,11 +105,11 @@ needs to pass `skip_cache_*` kwargs that the base `process_data` doesn't accept.
 The cleanest structure:
 
 ```
-PersistentFunctionNode.process_data(tag, pkt)
+PersistentFunctionNode.process_data(key, pkt)
   → self._data_function.call(pkt, skip_cache_*=...)    # CachedDataFunction (sync)
   → self.add_pipeline_record(...)                         # pipeline DB (sync)
 
-PersistentFunctionNode.async_process_data(tag, pkt)
+PersistentFunctionNode.async_process_data(key, pkt)
   → await self._data_function.async_call(pkt, skip_cache_*=...)  # CachedDataFunction (async)
   → self.add_pipeline_record(...)                                   # pipeline DB (sync)
 ```
@@ -128,17 +128,17 @@ through `process_data` (sync).
 For **FunctionPodStream**, the target is the pod:
 ```python
 # concurrent
-await self._function_pod.async_process_data(tag, pkt)
+await self._function_pod.async_process_data(key, pkt)
 # fallback
-self._function_pod.process_data(tag, pkt)
+self._function_pod.process_data(key, pkt)
 ```
 
 For **FunctionNode**, the target is `self` — so overrides (PersistentFunctionNode) kick in:
 ```python
 # concurrent
-await self.async_process_data(tag, pkt)
+await self.async_process_data(key, pkt)
 # fallback
-self.process_data(tag, pkt)
+self.process_data(key, pkt)
 ```
 
 This means PersistentFunctionNode's concurrent path **automatically** gets cache checks +
@@ -184,10 +184,10 @@ Add alongside existing `process_data` (after line 180):
 
 ```python
 async def async_process_data(
-    self, tag: TagProtocol, data: DataProtocol
-) -> tuple[TagProtocol, DataProtocol | None]:
+    self, key: KeyProtocol, data: DataProtocol
+) -> tuple[KeyProtocol, DataProtocol | None]:
     """Async counterpart of ``process_data``."""
-    return tag, await self.data_function.async_call(data)
+    return key, await self.data_function.async_call(data)
 ```
 
 ### Step 2: Fix `FunctionPod.async_execute` to use `async_process_data`
@@ -197,11 +197,11 @@ async def async_process_data(
 Change the `process_one` inner function (lines 315-322):
 
 ```python
-async def process_one(tag: TagProtocol, data: DataProtocol) -> None:
+async def process_one(key: KeyProtocol, data: DataProtocol) -> None:
     try:
-        tag, result_data = await self.async_process_data(tag, data)
+        key, result_data = await self.async_process_data(key, data)
         if result_data is not None:
-            await output.send((tag, result_data))
+            await output.send((key, result_data))
     finally:
         if sem is not None:
             sem.release()
@@ -217,16 +217,16 @@ routing:
 ```python
 def _iter_data_concurrent(
     self,
-) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
     """Collect remaining inputs, execute concurrently, and yield results in order."""
     input_iter = self._cached_input_iterator
 
-    all_inputs: list[tuple[int, TagProtocol, DataProtocol]] = []
-    to_compute: list[tuple[int, TagProtocol, DataProtocol]] = []
-    for i, (tag, data) in enumerate(input_iter):
-        all_inputs.append((i, tag, data))
+    all_inputs: list[tuple[int, KeyProtocol, DataProtocol]] = []
+    to_compute: list[tuple[int, KeyProtocol, DataProtocol]] = []
+    for i, (key, data) in enumerate(input_iter):
+        all_inputs.append((i, key, data))
         if i not in self._cached_output_datas:
-            to_compute.append((i, tag, data))
+            to_compute.append((i, key, data))
     self._cached_input_iterator = None
 
     if to_compute:
@@ -238,30 +238,30 @@ def _iter_data_concurrent(
         if loop is not None:
             # Already in event loop — fall back to sequential sync
             results = [
-                self._function_pod.process_data(tag, pkt)
-                for _, tag, pkt in to_compute
+                self._function_pod.process_data(key, pkt)
+                for _, key, pkt in to_compute
             ]
         else:
             # No event loop — run concurrently via asyncio.run
-            async def _gather() -> list[tuple[TagProtocol, DataProtocol | None]]:
+            async def _gather() -> list[tuple[KeyProtocol, DataProtocol | None]]:
                 return list(
                     await asyncio.gather(
                         *[
-                            self._function_pod.async_process_data(tag, pkt)
-                            for _, tag, pkt in to_compute
+                            self._function_pod.async_process_data(key, pkt)
+                            for _, key, pkt in to_compute
                         ]
                     )
                 )
 
             results = asyncio.run(_gather())
 
-        for (i, _, _), (tag, output_data) in zip(to_compute, results):
-            self._cached_output_datas[i] = (tag, output_data)
+        for (i, _, _), (key, output_data) in zip(to_compute, results):
+            self._cached_output_datas[i] = (key, output_data)
 
     for i, *_ in all_inputs:
-        tag, data = self._cached_output_datas[i]
+        key, data = self._cached_output_datas[i]
         if data is not None:
-            yield tag, data
+            yield key, data
 ```
 
 **Note:** The method signature drops the `data_function` parameter — it no longer needs
@@ -270,7 +270,7 @@ it since it routes through `self._function_pod`.
 The `iter_data` method that calls this also needs updating — remove the `pf` argument:
 
 ```python
-def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
     if self.is_stale:
         self.clear_cache()
     if self._cached_input_iterator is not None:
@@ -280,9 +280,9 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
             yield from self._iter_data_sequential()
     else:
         for i in range(len(self._cached_output_datas)):
-            tag, data = self._cached_output_datas[i]
+            key, data = self._cached_output_datas[i]
             if data is not None:
-                yield tag, data
+                yield key, data
 ```
 
 ### Step 4: Fix `FunctionNode._iter_data_sequential` to use `process_data`
@@ -292,12 +292,12 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
 Change line 831 from:
 ```python
 output_data = self._data_function.call(data)
-self._cached_output_datas[i] = (tag, output_data)
+self._cached_output_datas[i] = (key, output_data)
 ```
 to:
 ```python
-tag, output_data = self.process_data(tag, data)
-self._cached_output_datas[i] = (tag, output_data)
+key, output_data = self.process_data(key, data)
+self._cached_output_datas[i] = (key, output_data)
 ```
 
 ### Step 5: Fix `FunctionNode._iter_data_concurrent` to use `async_process_data`
@@ -309,16 +309,16 @@ Same transformation as Step 3, but routing through `self` instead of `self._func
 ```python
 def _iter_data_concurrent(
     self,
-) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
     """Collect remaining inputs, execute concurrently, and yield results in order."""
     input_iter = self._cached_input_iterator
 
-    all_inputs: list[tuple[int, TagProtocol, DataProtocol]] = []
-    to_compute: list[tuple[int, TagProtocol, DataProtocol]] = []
-    for i, (tag, data) in enumerate(input_iter):
-        all_inputs.append((i, tag, data))
+    all_inputs: list[tuple[int, KeyProtocol, DataProtocol]] = []
+    to_compute: list[tuple[int, KeyProtocol, DataProtocol]] = []
+    for i, (key, data) in enumerate(input_iter):
+        all_inputs.append((i, key, data))
         if i not in self._cached_output_datas:
-            to_compute.append((i, tag, data))
+            to_compute.append((i, key, data))
     self._cached_input_iterator = None
 
     if to_compute:
@@ -330,30 +330,30 @@ def _iter_data_concurrent(
         if loop is not None:
             # Already in event loop — fall back to sequential sync
             results = [
-                self.process_data(tag, pkt)
-                for _, tag, pkt in to_compute
+                self.process_data(key, pkt)
+                for _, key, pkt in to_compute
             ]
         else:
             # No event loop — run concurrently via asyncio.run
-            async def _gather() -> list[tuple[TagProtocol, DataProtocol | None]]:
+            async def _gather() -> list[tuple[KeyProtocol, DataProtocol | None]]:
                 return list(
                     await asyncio.gather(
                         *[
-                            self.async_process_data(tag, pkt)
-                            for _, tag, pkt in to_compute
+                            self.async_process_data(key, pkt)
+                            for _, key, pkt in to_compute
                         ]
                     )
                 )
 
             results = asyncio.run(_gather())
 
-        for (i, _, _), (tag, output_data) in zip(to_compute, results):
-            self._cached_output_datas[i] = (tag, output_data)
+        for (i, _, _), (key, output_data) in zip(to_compute, results):
+            self._cached_output_datas[i] = (key, output_data)
 
     for i, *_ in all_inputs:
-        tag, data = self._cached_output_datas[i]
+        key, data = self._cached_output_datas[i]
         if data is not None:
-            yield tag, data
+            yield key, data
 ```
 
 **Critical difference from Step 3:** Uses `self.process_data` / `self.async_process_data`
@@ -376,16 +376,16 @@ FunctionNode currently has no `process_data`. Add delegation to the function pod
 
 ```python
 def process_data(
-    self, tag: TagProtocol, data: DataProtocol
-) -> tuple[TagProtocol, DataProtocol | None]:
+    self, key: KeyProtocol, data: DataProtocol
+) -> tuple[KeyProtocol, DataProtocol | None]:
     """Process a single data by delegating to the function pod."""
-    return self._function_pod.process_data(tag, data)
+    return self._function_pod.process_data(key, data)
 
 async def async_process_data(
-    self, tag: TagProtocol, data: DataProtocol
-) -> tuple[TagProtocol, DataProtocol | None]:
+    self, key: KeyProtocol, data: DataProtocol
+) -> tuple[KeyProtocol, DataProtocol | None]:
     """Async counterpart of ``process_data``."""
-    return await self._function_pod.async_process_data(tag, data)
+    return await self._function_pod.async_process_data(key, data)
 ```
 
 ### Step 8: Add `FunctionNode.async_execute`
@@ -397,15 +397,15 @@ Sequential streaming through `async_process_data`:
 ```python
 async def async_execute(
     self,
-    inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-    output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+    inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+    output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
 ) -> None:
     """Streaming async execution — process each data via async_process_data."""
     try:
-        async for tag, data in inputs[0]:
-            tag, result_data = await self.async_process_data(tag, data)
+        async for key, data in inputs[0]:
+            key, result_data = await self.async_process_data(key, data)
             if result_data is not None:
-                await output.send((tag, result_data))
+                await output.send((key, result_data))
     finally:
         await output.close()
 ```
@@ -453,11 +453,11 @@ PersistentFunctionNode already has `process_data` (line 1027-1066) which calls
 ```python
 async def async_process_data(
     self,
-    tag: TagProtocol,
+    key: KeyProtocol,
     data: DataProtocol,
     skip_cache_lookup: bool = False,
     skip_cache_insert: bool = False,
-) -> tuple[TagProtocol, DataProtocol | None]:
+) -> tuple[KeyProtocol, DataProtocol | None]:
     """Async counterpart of ``process_data``.
 
     Uses the CachedDataFunction's async_call for computation + result caching.
@@ -476,13 +476,13 @@ async def async_process_data(
             )
         )
         self.add_pipeline_record(
-            tag,
+            key,
             data,
             data_record_id=output_data.datagram_id,
             computed=result_computed,
         )
 
-    return tag, output_data
+    return key, output_data
 ```
 
 ### Step 11: Add `PersistentFunctionNode.async_execute` (two-phase)
@@ -494,8 +494,8 @@ Overrides `FunctionNode.async_execute`:
 ```python
 async def async_execute(
     self,
-    inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-    output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+    inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+    output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
 ) -> None:
     """Two-phase async execution: replay cached, then compute missing."""
     try:
@@ -503,24 +503,24 @@ async def async_execute(
         existing = self.get_all_records(columns={"meta": True})
         computed_hashes: set[str] = set()
         if existing is not None and existing.num_rows > 0:
-            tag_keys = self._input_stream.keys()[0]
+            key_keys = self._input_stream.keys()[0]
             hash_col = constants.INPUT_DATA_HASH_COL
             computed_hashes = set(
                 cast(list[str], existing.column(hash_col).to_pylist())
             )
             data_table = existing.drop([hash_col])
-            existing_stream = ArrowTableStream(data_table, tag_columns=tag_keys)
-            for tag, data in existing_stream.iter_data():
-                await output.send((tag, data))
+            existing_stream = ArrowTableStream(data_table, key_columns=key_keys)
+            for key, data in existing_stream.iter_data():
+                await output.send((key, data))
 
         # Phase 2: process data not already in the DB
-        async for tag, data in inputs[0]:
+        async for key, data in inputs[0]:
             input_hash = data.content_hash().to_string()
             if input_hash in computed_hashes:
                 continue
-            tag, output_data = await self.async_process_data(tag, data)
+            key, output_data = await self.async_process_data(key, data)
             if output_data is not None:
-                await output.send((tag, output_data))
+                await output.send((key, output_data))
     finally:
         await output.close()
 ```
@@ -534,8 +534,8 @@ Direct pass-through:
 ```python
 async def async_execute(
     self,
-    inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-    output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+    inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+    output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
 ) -> None:
     """Delegate to operator's async_execute."""
     await self._operator.async_execute(inputs, output)
@@ -549,7 +549,7 @@ async def async_execute(
 def _store_output_stream(self, stream: StreamProtocol) -> None:
     """Materialize stream and store in the pipeline database with per-row dedup."""
     output_table = stream.as_table(
-        columns={"source": True, "system_tags": True},
+        columns={"source": True, "system_keys": True},
     )
 
     arrow_hasher = self.data_context.arrow_hasher
@@ -595,8 +595,8 @@ def _compute_and_store(self) -> None:
 ```python
 async def async_execute(
     self,
-    inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-    output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+    inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+    output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
 ) -> None:
     """Async execution with cache mode handling.
 
@@ -608,13 +608,13 @@ async def async_execute(
         if self._cache_mode == CacheMode.REPLAY:
             self._replay_from_cache()
             assert self._cached_output_stream is not None
-            for tag, data in self._cached_output_stream.iter_data():
-                await output.send((tag, data))
+            for key, data in self._cached_output_stream.iter_data():
+                await output.send((key, data))
             return  # finally block closes output
 
         # OFF or LOG: delegate to operator, forward results downstream
-        intermediate = Channel[tuple[TagProtocol, DataProtocol]]()
-        collected: list[tuple[TagProtocol, DataProtocol]] = []
+        intermediate = Channel[tuple[KeyProtocol, DataProtocol]]()
+        collected: list[tuple[KeyProtocol, DataProtocol]] = []
 
         async def forward() -> None:
             async for item in intermediate.reader:
@@ -737,16 +737,16 @@ uv run pytest tests/ -x
 **Sync sequential path:**
 ```
 FunctionPodStream._iter_data_sequential
-  → self._function_pod.process_data(tag, pkt)       # already correct
+  → self._function_pod.process_data(key, pkt)       # already correct
     → data_function.call(pkt)
 
 FunctionNode._iter_data_sequential
-  → self.process_data(tag, pkt)                      # CHANGED: was _data_function.call(pkt)
-    → self._function_pod.process_data(tag, pkt)
+  → self.process_data(key, pkt)                      # CHANGED: was _data_function.call(pkt)
+    → self._function_pod.process_data(key, pkt)
       → data_function.call(pkt)
 
 PersistentFunctionNode._iter_data_sequential (inherited from FunctionNode)
-  → self.process_data(tag, pkt)                      # polymorphism kicks in
+  → self.process_data(key, pkt)                      # polymorphism kicks in
     → CachedDataFunction.call(pkt, skip_cache_*=...) # cache check + compute + record
     → self.add_pipeline_record(...)                     # pipeline DB
 ```
@@ -755,21 +755,21 @@ PersistentFunctionNode._iter_data_sequential (inherited from FunctionNode)
 ```
 FunctionPodStream._iter_data_concurrent
   → asyncio.run(gather(
-        self._function_pod.async_process_data(tag, pkt) ...   # CHANGED: was _execute_concurrent
+        self._function_pod.async_process_data(key, pkt) ...   # CHANGED: was _execute_concurrent
     ))
   OR (if event loop running):
-    self._function_pod.process_data(tag, pkt) ...             # fallback
+    self._function_pod.process_data(key, pkt) ...             # fallback
 
 FunctionNode._iter_data_concurrent
   → asyncio.run(gather(
-        self.async_process_data(tag, pkt) ...                 # CHANGED: was _execute_concurrent
+        self.async_process_data(key, pkt) ...                 # CHANGED: was _execute_concurrent
     ))
   OR (if event loop running):
-    self.process_data(tag, pkt) ...                           # fallback
+    self.process_data(key, pkt) ...                           # fallback
 
 PersistentFunctionNode._iter_data_concurrent (inherited from FunctionNode)
   → asyncio.run(gather(
-        self.async_process_data(tag, pkt) ...                 # polymorphism kicks in
+        self.async_process_data(key, pkt) ...                 # polymorphism kicks in
           → await CachedDataFunction.async_call(pkt)          # cache + compute
           → self.add_pipeline_record(...)                       # pipeline DB
     ))
@@ -778,18 +778,18 @@ PersistentFunctionNode._iter_data_concurrent (inherited from FunctionNode)
 **Async execution path:**
 ```
 FunctionPod.async_execute
-  → await self.async_process_data(tag, pkt)          # CHANGED: was data_function.async_call
+  → await self.async_process_data(key, pkt)          # CHANGED: was data_function.async_call
     → await data_function.async_call(pkt)
 
 FunctionNode.async_execute                              # NEW
-  → await self.async_process_data(tag, pkt)
-    → await self._function_pod.async_process_data(tag, pkt)
+  → await self.async_process_data(key, pkt)
+    → await self._function_pod.async_process_data(key, pkt)
       → await data_function.async_call(pkt)
 
 PersistentFunctionNode.async_execute                    # NEW (two-phase)
   Phase 1: emit from DB
   Phase 2:
-    → await self.async_process_data(tag, pkt)         # polymorphic override
+    → await self.async_process_data(key, pkt)         # polymorphic override
       → await CachedDataFunction.async_call(pkt)      # cache + compute
       → self.add_pipeline_record(...)                   # pipeline DB (sync)
 
diff --git a/src/orcapod/config.py b/src/orcapod/config.py
index f36a514a..34a25f85 100644
--- a/src/orcapod/config.py
+++ b/src/orcapod/config.py
@@ -7,7 +7,7 @@
 class Config:
     """Immutable configuration object."""
 
-    system_tag_hash_n_char: int = 12
+    system_key_hash_n_char: int = 12
     schema_hash_n_char: int = 12
     path_hash_n_char: int = 20
 
diff --git a/src/orcapod/core/base.py b/src/orcapod/core/base.py
index ddd178a1..c64d33fb 100644
--- a/src/orcapod/core/base.py
+++ b/src/orcapod/core/base.py
@@ -217,7 +217,7 @@ class PipelineElementBase(DataContextMixin, ABC):
     which provides self.data_context used by pipeline_hash().
 
     The only class that needs to override pipeline_identity_structure() in a
-    non-trivial way is RootSource, which returns (tag_schema, data_schema)
+    non-trivial way is RootSource, which returns (key_schema, data_schema)
     as the base case of the recursion. All other pipeline elements return
     structures built from the pipeline_hash() values of their upstream
     components — ContentHash objects are terminal in the semantic hasher, so
diff --git a/src/orcapod/core/cached_function_pod.py b/src/orcapod/core/cached_function_pod.py
index 81f1a1a7..52498809 100644
--- a/src/orcapod/core/cached_function_pod.py
+++ b/src/orcapod/core/cached_function_pod.py
@@ -12,7 +12,7 @@
     FunctionPodProtocol,
     DataProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
 )
 from orcapod.protocols.database_protocols import ArrowDatabaseProtocol
 from orcapod.protocols.observability_protocols import DataExecutionLoggerProtocol
@@ -26,11 +26,11 @@
 class CachedFunctionPod(WrappedFunctionPod):
     """Pod-level caching wrapper that intercepts ``process_data()``.
 
-    Caches at the ``process_data(tag, data)`` level using only the
+    Caches at the ``process_data(key, data)`` level using only the
     **input data content hash** as the cache key — the output of a
-    data function depends solely on the data, not the tag.
+    data function depends solely on the data, not the key.
 
-    Tag-level provenance tracking (tag + system tags + data hash) is
+    Key-level provenance tracking (key + system keys + data hash) is
     handled separately by ``FunctionNode.add_pipeline_record``.
 
     Uses a shared ``ResultCache`` for lookup/store/conflict-resolution
@@ -66,34 +66,34 @@ def record_path(self) -> tuple[str, ...]:
 
     def process_data(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         *,
         logger: DataExecutionLoggerProtocol | None = None,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Process a data with pod-level caching.
 
         The cache key is the input data content hash only — the function
-        output depends solely on the data, not the tag.  The output
+        output depends solely on the data, not the key.  The output
         data carries a ``RESULT_COMPUTED_FLAG`` meta value: ``True`` if
         freshly computed, ``False`` if retrieved from cache.
 
         Args:
-            tag: The tag associated with the data.
+            key: The key associated with the data.
             data: The input data to process.
             logger: Optional data execution logger.
 
         Returns:
-            A ``(tag, output_data)`` tuple; output_data is ``None``
+            A ``(key, output_data)`` tuple; output_data is ``None``
             if the inner function filters the data out.
         """
         cached = self._cache.lookup(data)
         if cached is not None:
             module_logger.info("Pod-level cache hit")
             cached = cached.with_meta_columns(**{self.RESULT_COMPUTED_FLAG: False})
-            return tag, cached
+            return key, cached
 
-        tag, output = self._function_pod.process_data(tag, data, logger=logger)
+        key, output = self._function_pod.process_data(key, data, logger=logger)
         if output is not None:
             pf = self._function_pod.data_function
             var_dg = Datagram(
@@ -108,15 +108,15 @@ def process_data(
             )
             self._cache.store(data, output, var_dg, exec_dg)
             output = output.with_meta_columns(**{self.RESULT_COMPUTED_FLAG: True})
-        return tag, output
+        return key, output
 
     async def async_process_data(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         *,
         logger: DataExecutionLoggerProtocol | None = None,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Async counterpart of ``process_data``.
 
         DB lookup and store are synchronous (DB protocol is sync), but the
@@ -127,10 +127,10 @@ async def async_process_data(
         if cached is not None:
             module_logger.info("Pod-level cache hit")
             cached = cached.with_meta_columns(**{self.RESULT_COMPUTED_FLAG: False})
-            return tag, cached
+            return key, cached
 
-        tag, output = await self._function_pod.async_process_data(
-            tag, data, logger=logger
+        key, output = await self._function_pod.async_process_data(
+            key, data, logger=logger
         )
         if output is not None:
             pf = self._function_pod.data_function
@@ -146,7 +146,7 @@ async def async_process_data(
             )
             self._cache.store(data, output, var_dg, exec_dg)
             output = output.with_meta_columns(**{self.RESULT_COMPUTED_FLAG: True})
-        return tag, output
+        return key, output
 
     def get_all_cached_outputs(
         self, include_system_columns: bool = False
diff --git a/src/orcapod/core/datagrams/__init__.py b/src/orcapod/core/datagrams/__init__.py
index 7065f27c..2aa41663 100644
--- a/src/orcapod/core/datagrams/__init__.py
+++ b/src/orcapod/core/datagrams/__init__.py
@@ -1,8 +1,8 @@
 from .datagram import Datagram
-from .tag_data import Data, Tag
+from .key_data import Data, Key
 
 __all__ = [
     "Datagram",
-    "Tag",
+    "Key",
     "Data",
 ]
diff --git a/src/orcapod/core/datagrams/tag_data.py b/src/orcapod/core/datagrams/key_data.py
similarity index 85%
rename from src/orcapod/core/datagrams/tag_data.py
rename to src/orcapod/core/datagrams/key_data.py
index f6d0c4c5..afef2d64 100644
--- a/src/orcapod/core/datagrams/tag_data.py
+++ b/src/orcapod/core/datagrams/key_data.py
@@ -1,11 +1,11 @@
 """
-Tag and Data — datagram subclasses with system-tags and source-info support.
+Key and Data — datagram subclasses with system-keys and source-info support.
 
-``Tag``
-    Extends ``Datagram`` with *system tags*: metadata fields whose names start with
-    ``constants.SYSTEM_TAG_PREFIX``.  System tags travel alongside the primary data
+``Key``
+    Extends ``Datagram`` with *system keys*: metadata fields whose names start with
+    ``constants.SYSTEM_KEY_PREFIX``.  System keys travel alongside the primary data
     but are excluded from content hashing and structural operations unless explicitly
-    requested via ``ColumnConfig(system_tags=True)``.
+    requested via ``ColumnConfig(system_keys=True)``.
 
 ``Data``
     Extends ``Datagram`` with *source information*: provenance tokens (strings or None)
@@ -37,27 +37,27 @@
 
 
 # ---------------------------------------------------------------------------
-# Tag
+# Key
 # ---------------------------------------------------------------------------
 
 
-class Tag(Datagram):
+class Key(Datagram):
     """
-    Datagram with system-tags support.
+    Datagram with system-keys support.
 
-    System tags are metadata fields whose names begin with
-    ``constants.SYSTEM_TAG_PREFIX``.  They are excluded from the primary data
+    System keys are metadata fields whose names begin with
+    ``constants.SYSTEM_KEY_PREFIX``.  They are excluded from the primary data
     representation (and therefore from content hashing) unless the caller requests
-    them via ``ColumnConfig(system_tags=True)``.
+    them via ``ColumnConfig(system_keys=True)``.
 
     Accepts the same inputs as ``Datagram`` (dict or Arrow table/batch).
-    System-tag fields found in the input are automatically extracted.
+    System-key fields found in the input are automatically extracted.
     """
 
     def __init__(
         self,
         data: "Mapping[str, DataValue] | pa.Table | pa.RecordBatch",
-        system_tags: "Mapping[str, DataValue] | None" = None,
+        system_keys: "Mapping[str, DataValue] | None" = None,
         meta_info: "Mapping[str, DataValue] | None" = None,
         python_schema: "SchemaLike | None" = None,
         data_context: "str | contexts.DataContext | None" = None,
@@ -69,11 +69,11 @@ def __init__(
         if isinstance(data, _pa.RecordBatch):
             data = _pa.Table.from_batches([data])
 
-        extracted_sys_tags: dict[str, DataValue]
+        extracted_sys_keys: dict[str, DataValue]
 
         if isinstance(data, _pa.Table):
-            # Arrow path: call super() first, then extract system-tag columns from
-            # self._data_table (same pattern as the legacy ArrowTag).
+            # Arrow path: call super() first, then extract system-key columns from
+            # self._data_table (same pattern as the legacy ArrowKey).
             super().__init__(
                 data,
                 meta_info=meta_info,
@@ -81,33 +81,33 @@ def __init__(
                 record_id=record_id,
                 **kwargs,
             )
-            sys_tag_cols = [
+            sys_key_cols = [
                 c
                 for c in self._data_table.column_names  # type: ignore[union-attr]
-                if c.startswith(constants.SYSTEM_TAG_PREFIX)
+                if c.startswith(constants.SYSTEM_KEY_PREFIX)
             ]
-            if sys_tag_cols:
-                extracted_sys_tags = (
+            if sys_key_cols:
+                extracted_sys_keys = (
                     self._data_context.type_converter.arrow_table_to_python_dicts(
-                        self._data_table.select(sys_tag_cols)  # type: ignore[union-attr]
+                        self._data_table.select(sys_key_cols)  # type: ignore[union-attr]
                     )[0]
                 )
-                self._data_table = self._data_table.drop_columns(sys_tag_cols)  # type: ignore[union-attr]
+                self._data_table = self._data_table.drop_columns(sys_key_cols)  # type: ignore[union-attr]
                 # Invalidate derived caches
                 self._data_arrow_schema = None
             else:
-                extracted_sys_tags = {}
+                extracted_sys_keys = {}
         else:
-            # Dict path: extract system-tag keys before calling super()
+            # Dict path: extract system-key columns before calling super()
             data_only = {
                 k: v
                 for k, v in data.items()
-                if not k.startswith(constants.SYSTEM_TAG_PREFIX)
+                if not k.startswith(constants.SYSTEM_KEY_PREFIX)
             }
-            extracted_sys_tags = {
+            extracted_sys_keys = {
                 k: v
                 for k, v in data.items()
-                if k.startswith(constants.SYSTEM_TAG_PREFIX)
+                if k.startswith(constants.SYSTEM_KEY_PREFIX)
             }
             super().__init__(
                 data_only,
@@ -118,28 +118,28 @@ def __init__(
                 **kwargs,
             )
 
-        self._system_tags: dict[str, DataValue] = {
-            **extracted_sys_tags,
-            **(system_tags or {}),
+        self._system_keys: dict[str, DataValue] = {
+            **extracted_sys_keys,
+            **(system_keys or {}),
         }
-        self._system_tags_python_schema: Schema = infer_python_schema_from_pylist_data(
-            [self._system_tags], default_type=str
+        self._system_keys_python_schema: Schema = infer_python_schema_from_pylist_data(
+            [self._system_keys], default_type=str
         )
-        self._system_tags_table: "pa.Table | None" = None
+        self._system_keys_table: "pa.Table | None" = None
 
     # ------------------------------------------------------------------
     # Internal helper
     # ------------------------------------------------------------------
 
-    def _ensure_system_tags_table(self) -> "pa.Table":
-        if self._system_tags_table is None:
-            self._system_tags_table = (
+    def _ensure_system_keys_table(self) -> "pa.Table":
+        if self._system_keys_table is None:
+            self._system_keys_table = (
                 self._data_context.type_converter.python_dicts_to_arrow_table(
-                    [self._system_tags],
-                    python_schema=self._system_tags_python_schema,
+                    [self._system_keys],
+                    python_schema=self._system_keys_python_schema,
                 )
             )
-        return self._system_tags_table
+        return self._system_keys_table
 
     # ------------------------------------------------------------------
     # Overrides
@@ -153,8 +153,8 @@ def keys(
     ) -> tuple[str, ...]:
         keys = super().keys(columns=columns, all_info=all_info)
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
-        if column_config.system_tags:
-            keys += tuple(self._system_tags.keys())
+        if column_config.system_keys:
+            keys += tuple(self._system_keys.keys())
         return keys
 
     def schema(
@@ -165,8 +165,8 @@ def schema(
     ) -> Schema:
         schema = super().schema(columns=columns, all_info=all_info)
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
-        if column_config.system_tags:
-            return Schema({**schema, **self._system_tags_python_schema})
+        if column_config.system_keys:
+            return Schema({**schema, **self._system_keys_python_schema})
         return schema
 
     def arrow_schema(
@@ -177,9 +177,9 @@ def arrow_schema(
     ) -> "pa.Schema":
         schema = super().arrow_schema(columns=columns, all_info=all_info)
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
-        if column_config.system_tags and self._system_tags:
+        if column_config.system_keys and self._system_keys:
             return arrow_utils.join_arrow_schemas(
-                schema, self._ensure_system_tags_table().schema
+                schema, self._ensure_system_keys_table().schema
             )
         return schema
 
@@ -191,8 +191,8 @@ def as_dict(
     ) -> "dict[str, DataValue]":
         result = super().as_dict(columns=columns, all_info=all_info)
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
-        if column_config.system_tags:
-            result.update(self._system_tags)
+        if column_config.system_keys:
+            result.update(self._system_keys)
         return result
 
     def as_table(
@@ -203,13 +203,13 @@ def as_table(
     ) -> "pa.Table":
         table = super().as_table(columns=columns, all_info=all_info)
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
-        if column_config.system_tags and self._system_tags:
-            table = arrow_utils.hstack_tables(table, self._ensure_system_tags_table())
+        if column_config.system_keys and self._system_keys:
+            table = arrow_utils.hstack_tables(table, self._ensure_system_keys_table())
         return table
 
-    def system_tags(self) -> "dict[str, DataValue]":
-        """Return a copy of the system-tags dict."""
-        return dict(self._system_tags)
+    def system_keys(self) -> "dict[str, DataValue]":
+        """Return a copy of the system-keys dict."""
+        return dict(self._system_keys)
 
     def as_datagram(
         self,
@@ -224,11 +224,11 @@ def as_datagram(
         )
 
     def copy(self, include_cache: bool = True, preserve_id: bool = False) -> Self:
-        new_tag = super().copy(include_cache=include_cache, preserve_id=preserve_id)
-        new_tag._system_tags = dict(self._system_tags)
-        new_tag._system_tags_python_schema = self._system_tags_python_schema
-        new_tag._system_tags_table = self._system_tags_table if include_cache else None
-        return new_tag
+        new_key = super().copy(include_cache=include_cache, preserve_id=preserve_id)
+        new_key._system_keys = dict(self._system_keys)
+        new_key._system_keys_python_schema = self._system_keys_python_schema
+        new_key._system_keys_table = self._system_keys_table if include_cache else None
+        return new_key
 
 
 # ---------------------------------------------------------------------------
diff --git a/src/orcapod/core/function_pod.py b/src/orcapod/core/function_pod.py
index dc0ea836..4d4166ed 100644
--- a/src/orcapod/core/function_pod.py
+++ b/src/orcapod/core/function_pod.py
@@ -22,7 +22,7 @@
     DataProtocol,
     PodProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
     TrackerManagerProtocol,
 )
 from orcapod.protocols.database_protocols import ArrowDatabaseProtocol
@@ -144,36 +144,36 @@ def _validate_input_schema(self, input_schema: Schema) -> None:
 
     def process_data(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         *,
         logger: DataExecutionLoggerProtocol | None = None,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Process a single data using the pod's data function.
 
         Args:
-            tag: The tag associated with the data.
+            key: The key associated with the data.
             data: The input data to process.
             logger: Optional DataExecutionLoggerProtocol for
                 recording captured I/O.
 
         Returns:
-            A ``(tag, output_data)`` tuple; output_data is ``None`` if
+            A ``(key, output_data)`` tuple; output_data is ``None`` if
             the function filters the data out.
         """
         result = self.data_function.call(data, logger=logger)
-        return tag, result
+        return key, result
 
     async def async_process_data(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         *,
         logger: DataExecutionLoggerProtocol | None = None,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Async counterpart of ``process_data``."""
         result = await self.data_function.async_call(data, logger=logger)
-        return tag, result
+        return key, result
 
     def handle_input_streams(self, *streams: StreamProtocol) -> StreamProtocol:
         """Handle multiple input streams by joining them if necessary.
@@ -225,7 +225,7 @@ def output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, incoming_data_schema = self.multi_stream_handler().output_schema(
+        key_schema, incoming_data_schema = self.multi_stream_handler().output_schema(
             *streams, columns=columns, all_info=all_info
         )
         # validate that incoming_data_schema is valid
@@ -233,7 +233,7 @@ def output_schema(
         # The output schema of the FunctionPodProtocol is determined by the data function
         # TODO: handle and extend to include additional columns
         # Namely, the source columns
-        return tag_schema, self.data_function.output_data_schema
+        return key_schema, self.data_function.output_data_schema
 
 
 class FunctionPod(_FunctionPodBase):
@@ -343,13 +343,13 @@ def from_config(
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         pipeline_config: PipelineConfig | None = None,
     ) -> None:
         """Streaming async execution with per-data concurrency control.
 
-        Each input (tag, data) is processed independently. A semaphore
+        Each input (key, data) is processed independently. A semaphore
         controls how many data are in-flight concurrently.
         """
         try:
@@ -362,11 +362,11 @@ async def async_execute(
                 else None
             )
 
-            async def process_one(tag: TagProtocol, data: DataProtocol) -> None:
+            async def process_one(key: KeyProtocol, data: DataProtocol) -> None:
                 try:
-                    tag, result_data = await self.async_process_data(tag, data)
+                    key, result_data = await self.async_process_data(key, data)
                     if result_data is not None:
-                        await output.send((tag, result_data))
+                        await output.send((key, result_data))
                 except Exception as e:
                     # Swallow data-level errors so remaining data continue.
                     logger.debug("Data processing failed, skipping: %s", e, exc_info=True)
@@ -375,10 +375,10 @@ async def process_one(tag: TagProtocol, data: DataProtocol) -> None:
                         sem.release()
 
             async with asyncio.TaskGroup() as tg:
-                async for tag, data in inputs[0]:
+                async for key, data in inputs[0]:
                     if sem is not None:
                         await sem.acquire()
-                    tg.create_task(process_one(tag, data))
+                    tg.create_task(process_one(key, data))
         finally:
             await output.close()
 
@@ -396,13 +396,13 @@ def __init__(
         # Iterator acquired lazily on first use to avoid triggering upstream
         # computation during construction.
         self._cached_input_iterator: (
-            Iterator[tuple[TagProtocol, DataProtocol]] | None
+            Iterator[tuple[KeyProtocol, DataProtocol]] | None
         ) = None
         self._needs_iterator = True
 
         # DataProtocol-level caching (for the output data)
         self._cached_output_datas: dict[
-            int, tuple[TagProtocol, DataProtocol | None]
+            int, tuple[KeyProtocol, DataProtocol | None]
         ] = {}
         self._cached_output_table: pa.Table | None = None
         self._cached_content_hash_column: pa.Array | None = None
@@ -440,11 +440,11 @@ def keys(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
-        tag_schema, data_schema = self.output_schema(
+        key_schema, data_schema = self.output_schema(
             columns=columns, all_info=all_info
         )
 
-        return tuple(tag_schema.keys()), tuple(data_schema.keys())
+        return tuple(key_schema.keys()), tuple(data_schema.keys())
 
     def output_schema(
         self,
@@ -472,10 +472,10 @@ def clear_cache(self) -> None:
         self._cached_content_hash_column = None
         self._update_modified_time()
 
-    def __iter__(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    def __iter__(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         return self.iter_data()
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         if self.is_stale:
             self.clear_cache()
         self._ensure_iterator()
@@ -487,45 +487,45 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
         else:
             # Yield from snapshot of complete cache
             for i in range(len(self._cached_output_datas)):
-                tag, data = self._cached_output_datas[i]
+                key, data = self._cached_output_datas[i]
                 if data is not None:
-                    yield tag, data
+                    yield key, data
 
     def _iter_data_sequential(
         self,
-    ) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    ) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         input_iter = self._cached_input_iterator
         assert input_iter is not None
-        for i, (tag, data) in enumerate(input_iter):
+        for i, (key, data) in enumerate(input_iter):
             if i in self._cached_output_datas:
                 # Use cached result
-                tag, data = self._cached_output_datas[i]
+                key, data = self._cached_output_datas[i]
                 if data is not None:
-                    yield tag, data
+                    yield key, data
             else:
                 # Process data
-                tag, output_data = self._function_pod.process_data(tag, data)
-                self._cached_output_datas[i] = (tag, output_data)
+                key, output_data = self._function_pod.process_data(key, data)
+                self._cached_output_datas[i] = (key, output_data)
                 if output_data is not None:
-                    yield tag, output_data
+                    yield key, output_data
 
         # Mark completion by releasing the iterator
         self._cached_input_iterator = None
 
     def _iter_data_concurrent(
         self,
-    ) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    ) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         """Collect remaining inputs, execute concurrently, and yield results in order."""
         input_iter = self._cached_input_iterator
         assert input_iter is not None
 
         # Materialise remaining inputs and separate cached from uncached.
-        all_inputs: list[tuple[int, TagProtocol, DataProtocol]] = []
-        to_compute: list[tuple[int, TagProtocol, DataProtocol]] = []
-        for i, (tag, data) in enumerate(input_iter):
-            all_inputs.append((i, tag, data))
+        all_inputs: list[tuple[int, KeyProtocol, DataProtocol]] = []
+        to_compute: list[tuple[int, KeyProtocol, DataProtocol]] = []
+        for i, (key, data) in enumerate(input_iter):
+            all_inputs.append((i, key, data))
             if i not in self._cached_output_datas:
-                to_compute.append((i, tag, data))
+                to_compute.append((i, key, data))
         self._cached_input_iterator = None
 
         # Submit uncached data concurrently via async_process_data.
@@ -538,31 +538,31 @@ def _iter_data_concurrent(
             if loop is not None:
                 # Already in event loop — fall back to sequential sync
                 results = [
-                    self._function_pod.process_data(tag, pkt)
-                    for _, tag, pkt in to_compute
+                    self._function_pod.process_data(key, pkt)
+                    for _, key, pkt in to_compute
                 ]
             else:
 
-                async def _gather() -> list[tuple[TagProtocol, DataProtocol | None]]:
+                async def _gather() -> list[tuple[KeyProtocol, DataProtocol | None]]:
                     return list(
                         await asyncio.gather(
                             *[
-                                self._function_pod.async_process_data(tag, pkt)
-                                for _, tag, pkt in to_compute
+                                self._function_pod.async_process_data(key, pkt)
+                                for _, key, pkt in to_compute
                             ]
                         )
                     )
 
                 results = asyncio.run(_gather())
 
-            for (i, _, _), (tag, output_data) in zip(to_compute, results):
-                self._cached_output_datas[i] = (tag, output_data)
+            for (i, _, _), (key, output_data) in zip(to_compute, results):
+                self._cached_output_datas[i] = (key, output_data)
 
         # Yield everything in original order.
         for i, *_ in all_inputs:
-            tag, data = self._cached_output_datas[i]
+            key, data = self._cached_output_datas[i]
             if data is not None:
-                yield tag, data
+                yield key, data
 
     def as_table(
         self,
@@ -571,34 +571,34 @@ def as_table(
         all_info: bool = False,
     ) -> pa.Table:
         if self._cached_output_table is None:
-            all_tags = []
+            all_keys = []
             all_data = []
-            tag_schema, data_schema = None, None
-            for tag, data in self.iter_data():
-                if tag_schema is None:
-                    tag_schema = tag.arrow_schema(all_info=True)
+            key_schema, data_schema = None, None
+            for key, data in self.iter_data():
+                if key_schema is None:
+                    key_schema = key.arrow_schema(all_info=True)
                 if data_schema is None:
                     data_schema = data.arrow_schema(all_info=True)
                 # TODO: make use of arrow_compat dict
-                all_tags.append(tag.as_dict(all_info=True))
+                all_keys.append(key.as_dict(all_info=True))
                 all_data.append(data.as_dict(all_info=True))
 
             # TODO: re-verify the implemetation of this conversion
             converter = self.data_context.type_converter
 
             struct_data = converter.python_dicts_to_struct_dicts(all_data)
-            all_tags_as_tables: pa.Table = pa.Table.from_pylist(
-                all_tags, schema=tag_schema
+            all_keys_as_tables: pa.Table = pa.Table.from_pylist(
+                all_keys, schema=key_schema
             )
-            # drop context key column from tags table (guard: column absent on empty stream)
-            if constants.CONTEXT_KEY in all_tags_as_tables.column_names:
-                all_tags_as_tables = all_tags_as_tables.drop([constants.CONTEXT_KEY])
+            # drop context key column from keys table (guard: column absent on empty stream)
+            if constants.CONTEXT_KEY in all_keys_as_tables.column_names:
+                all_keys_as_tables = all_keys_as_tables.drop([constants.CONTEXT_KEY])
             all_data_as_tables: pa.Table = pa.Table.from_pylist(
                 struct_data, schema=data_schema
             )
 
             self._cached_output_table = arrow_utils.hstack_tables(
-                all_tags_as_tables, all_data_as_tables
+                all_keys_as_tables, all_data_as_tables
             )
         assert self._cached_output_table is not None, (
             "_cached_output_table should not be None here."
@@ -607,13 +607,13 @@ def as_table(
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
 
         drop_columns = []
-        if not column_config.system_tags:
-            # TODO: get system tags more effiicently
+        if not column_config.system_keys:
+            # TODO: get system keys more effiicently
             drop_columns.extend(
                 [
                     c
                     for c in self._cached_output_table.column_names
-                    if c.startswith(constants.SYSTEM_TAG_PREFIX)
+                    if c.startswith(constants.SYSTEM_KEY_PREFIX)
                 ]
             )
         if not column_config.source:
@@ -630,7 +630,7 @@ def as_table(
             if self._cached_content_hash_column is None:
                 content_hashes = []
                 # TODO: verify that order will be preserved
-                for tag, data in self.iter_data():
+                for key, data in self.iter_data():
                     content_hashes.append(data.content_hash().to_string())
                 self._cached_content_hash_column = pa.array(
                     content_hashes, type=pa.large_string()
@@ -647,7 +647,7 @@ def as_table(
                 hash_column_name, self._cached_content_hash_column
             )
 
-        if column_config.sort_by_tags:
+        if column_config.sort_by_keys:
             # TODO: reimplement using polars natively
             output_table_schema = output_table.schema
             output_table = (
diff --git a/src/orcapod/core/nodes/function_node.py b/src/orcapod/core/nodes/function_node.py
index 3d2bc830..bd766ba5 100644
--- a/src/orcapod/core/nodes/function_node.py
+++ b/src/orcapod/core/nodes/function_node.py
@@ -20,7 +20,7 @@
     DataFunctionProtocol,
     DataProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
     TrackerManagerProtocol,
 )
 from orcapod.protocols.database_protocols import ArrowDatabaseProtocol
@@ -119,7 +119,7 @@ def __init__(
 
         # stream-level caching state
         self._cached_output_datas: dict[
-            str, tuple[TagProtocol, DataProtocol | None]
+            str, tuple[KeyProtocol, DataProtocol | None]
         ] = {}
         self._cached_output_table: pa.Table | None = None
         self._cached_content_hash_column: pa.Array | None = None
@@ -168,7 +168,7 @@ def attach_databases(
 
         Creates a ``CachedFunctionPod`` wrapping the original function pod
         for result caching.  The pipeline database is used separately for
-        pipeline-level provenance records (tag + data hash).
+        pipeline-level provenance records (key + data hash).
 
         The databases are expected to be pre-scoped by the pipeline (via
         ``db.at(*pipeline_name).at("_result")`` etc.) so no additional path
@@ -459,13 +459,13 @@ def output_schema(
     ) -> tuple[Schema, Schema]:
         """Return output schema, using stored value in read-only mode."""
         if self._function_pod is None:
-            tag = Schema(self._stored_schema.get("tag", {}))
+            key = Schema(self._stored_schema.get("key", {}))
             data = Schema(self._stored_schema.get("data", {}))
-            return tag, data
-        tag_schema = self._input_stream.output_schema(
+            return key, data
+        key_schema = self._input_stream.output_schema(
             columns=columns, all_info=all_info
         )[0]
-        return tag_schema, self._data_function.output_data_schema
+        return key_schema, self._data_function.output_data_schema
 
     def keys(
         self,
@@ -474,13 +474,13 @@ def keys(
         all_info: bool = False,
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
         if self._function_pod is None:
-            tag_keys = tuple(self._stored_schema.get("tag", {}).keys())
+            key_keys = tuple(self._stored_schema.get("key", {}).keys())
             data_keys = tuple(self._stored_schema.get("data", {}).keys())
-            return tag_keys, data_keys
-        tag_schema, data_schema = self.output_schema(
+            return key_keys, data_keys
+        key_schema, data_schema = self.output_schema(
             columns=columns, all_info=all_info
         )
-        return tuple(tag_schema.keys()), tuple(data_schema.keys())
+        return tuple(key_schema.keys()), tuple(data_schema.keys())
 
     # ------------------------------------------------------------------
     # Pipeline path
@@ -540,24 +540,24 @@ def clear_cache(self) -> None:
 
     def execute_data(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Execute a single data: compute, persist, and cache.
 
         Internal method for orchestrators. The caller must guarantee that
-        the tag and data conform to the expected input schema (matching
+        the key and data conform to the expected input schema (matching
         ``self._input_stream``). No validation is performed.
 
         Args:
-            tag: The tag associated with the data.
+            key: The key associated with the data.
             data: The input data to process.
 
         Returns:
-            A ``(tag, output_data)`` tuple.
+            A ``(key, output_data)`` tuple.
         """
-        tag_out, result = self._process_data_internal(tag, data)
-        return tag_out, result
+        key_out, result = self._process_data_internal(key, data)
+        return key_out, result
 
     def execute(
         self,
@@ -565,7 +565,7 @@ def execute(
         *,
         observer: ExecutionObserverProtocol | None = None,
         error_policy: Literal["continue", "fail_fast"] = "continue",
-    ) -> list[tuple[TagProtocol, DataProtocol]]:
+    ) -> list[tuple[KeyProtocol, DataProtocol]]:
         """Execute all data from a stream: compute, persist, and cache.
 
         For each data: fire ``on_data_start``, check the in-memory cache
@@ -579,7 +579,7 @@ def execute(
                 ``"fail_fast"`` re-raises on the first failure.
 
         Returns:
-            Materialized list of (tag, output_data) pairs, excluding
+            Materialized list of (key, output_data) pairs, excluding
             ``None`` outputs and failed data.
         """
         from orcapod.pipeline.observer import NoOpObserver
@@ -590,13 +590,13 @@ def execute(
         obs = observer if observer is not None else NoOpObserver()
         ctx_obs = obs.contextualize(*self.node_identity_path)
 
-        tag_schema = input_stream.output_schema(columns={"system_tags": True})[0]
-        ctx_obs.on_node_start(node_label, node_hash, tag_schema=tag_schema)
+        key_schema = input_stream.output_schema(columns={"system_keys": True})[0]
+        ctx_obs.on_node_start(node_label, node_hash, key_schema=key_schema)
 
         # Collect upstream entries and resolve entry_ids
-        upstream_entries: list[tuple[TagProtocol, DataProtocol, str]] = [
-            (tag, data, self.compute_pipeline_entry_id(tag, data))
-            for tag, data in input_stream.iter_data()
+        upstream_entries: list[tuple[KeyProtocol, DataProtocol, str]] = [
+            (key, data, self.compute_pipeline_entry_id(key, data))
+            for key, data in input_stream.iter_data()
         ]
         entry_ids = [eid for _, _, eid in upstream_entries]
 
@@ -608,20 +608,20 @@ def execute(
         # and prevents spurious recomputation of already-processed data.
         self.get_cached_results(entry_ids=entry_ids)
 
-        output: list[tuple[TagProtocol, DataProtocol]] = []
-        for tag, data, entry_id in upstream_entries:
-            ctx_obs.on_data_start(node_label, tag, data)
+        output: list[tuple[KeyProtocol, DataProtocol]] = []
+        for key, data, entry_id in upstream_entries:
+            ctx_obs.on_data_start(node_label, key, data)
 
             if entry_id in self._cached_output_datas:
-                tag_out, result = self._cached_output_datas[entry_id]
-                ctx_obs.on_data_end(node_label, tag, data, result, cached=True)
+                key_out, result = self._cached_output_datas[entry_id]
+                ctx_obs.on_data_end(node_label, key, data, result, cached=True)
                 if result is not None:
-                    output.append((tag_out, result))
+                    output.append((key_out, result))
             else:
-                pkt_logger = ctx_obs.create_data_logger(tag, data)
+                pkt_logger = ctx_obs.create_data_logger(key, data)
                 try:
-                    tag_out, result = self._process_data_internal(
-                        tag, data, logger=pkt_logger
+                    key_out, result = self._process_data_internal(
+                        key, data, logger=pkt_logger
                     )
                 except Exception as exc:
                     logger.warning(
@@ -630,16 +630,16 @@ def execute(
                         exc,
                         exc_info=True,
                     )
-                    ctx_obs.on_data_crash(node_label, tag, data, exc)
+                    ctx_obs.on_data_crash(node_label, key, data, exc)
                     if error_policy == "fail_fast":
                         ctx_obs.on_node_end(node_label, node_hash)
                         raise
                 else:
                     ctx_obs.on_data_end(
-                        node_label, tag, data, result, cached=False
+                        node_label, key, data, result, cached=False
                     )
                     if result is not None:
-                        output.append((tag_out, result))
+                        output.append((key_out, result))
 
         ctx_obs.on_node_end(node_label, node_hash)
         # Mark this node as freshly computed so subsequent iter_data() calls
@@ -649,11 +649,11 @@ def execute(
 
     def _process_data_internal(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         *,
         logger: DataExecutionLoggerProtocol | None = None,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Core compute + persist + cache.
 
         Used by ``execute_data`` and ``execute``.
@@ -661,11 +661,11 @@ def _process_data_internal(
         Exceptions propagate to the caller — no error handling here.
 
         Returns:
-            A ``(tag, output_data)`` 2-tuple.
+            A ``(key, output_data)`` 2-tuple.
         """
         if self._cached_function_pod is not None:
-            tag_out, output_data = self._cached_function_pod.process_data(
-                tag, data, logger=logger
+            key_out, output_data = self._cached_function_pod.process_data(
+                key, data, logger=logger
             )
 
             if output_data is not None:
@@ -675,27 +675,27 @@ def _process_data_internal(
                     )
                 )
                 self.add_pipeline_record(
-                    tag,
+                    key,
                     data,
                     data_record_id=output_data.datagram_id,
                     computed=result_computed,
                 )
         else:
-            tag_out, output_data = self._function_pod.process_data(
-                tag, data, logger=logger
+            key_out, output_data = self._function_pod.process_data(
+                key, data, logger=logger
             )
 
         # Store by entry_id and invalidate derived caches
-        entry_id = self.compute_pipeline_entry_id(tag, data)
-        self._cached_output_datas[entry_id] = (tag_out, output_data)
+        entry_id = self.compute_pipeline_entry_id(key, data)
+        self._cached_output_datas[entry_id] = (key_out, output_data)
         self._cached_output_table = None
         self._cached_content_hash_column = None
 
-        return tag_out, output_data
+        return key_out, output_data
 
     def get_cached_results(
         self, entry_ids: list[str]
-    ) -> dict[str, tuple[TagProtocol, DataProtocol]]:
+    ) -> dict[str, tuple[KeyProtocol, DataProtocol]]:
         """Retrieve cached results for specific pipeline entry IDs.
 
         Checks in-memory cache first. Loads only truly missing entries from DB.
@@ -707,7 +707,7 @@ def get_cached_results(
             entry_ids: Pipeline entry IDs to look up.
 
         Returns:
-            Mapping from entry_id to ``(tag, output_data)`` for found entries.
+            Mapping from entry_id to ``(key, output_data)`` for found entries.
             Empty dict if no DB is attached or no matches found.
         """
         if self._cached_function_pod is None or not entry_ids:
@@ -730,23 +730,23 @@ def get_cached_results(
 
     async def _async_process_data_internal(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         *,
         logger: DataExecutionLoggerProtocol | None = None,
-    ) -> tuple[TagProtocol, DataProtocol | None]:
+    ) -> tuple[KeyProtocol, DataProtocol | None]:
         """Async counterpart of ``_process_data_internal``.
 
         Computes via async path, writes pipeline provenance, caches by entry_id.
         Exceptions propagate.
 
         Returns:
-            A ``(tag, output_data)`` 2-tuple.
+            A ``(key, output_data)`` 2-tuple.
         """
         if self._cached_function_pod is not None:
-            tag_out, output_data = (
+            key_out, output_data = (
                 await self._cached_function_pod.async_process_data(
-                    tag, data, logger=logger
+                    key, data, logger=logger
                 )
             )
 
@@ -757,30 +757,30 @@ async def _async_process_data_internal(
                     )
                 )
                 self.add_pipeline_record(
-                    tag,
+                    key,
                     data,
                     data_record_id=output_data.datagram_id,
                     computed=result_computed,
                 )
         else:
-            tag_out, output_data = (
+            key_out, output_data = (
                 await self._function_pod.async_process_data(
-                    tag, data, logger=logger
+                    key, data, logger=logger
                 )
             )
 
         # Store by entry_id and invalidate derived caches
-        entry_id = self.compute_pipeline_entry_id(tag, data)
-        self._cached_output_datas[entry_id] = (tag_out, output_data)
+        entry_id = self.compute_pipeline_entry_id(key, data)
+        self._cached_output_datas[entry_id] = (key_out, output_data)
         self._cached_output_table = None
         self._cached_content_hash_column = None
 
-        return tag_out, output_data
+        return key_out, output_data
 
     def compute_pipeline_entry_id(
-        self, tag: TagProtocol, input_data: DataProtocol
+        self, key: KeyProtocol, input_data: DataProtocol
     ) -> str:
-        """Compute a unique pipeline entry ID from tag + system tags + input data hash.
+        """Compute a unique pipeline entry ID from key + system keys + input data hash.
 
         ``NODE_CONTENT_HASH_COL`` is always included so that two runs processing
         identical inputs each get a distinct entry ID, regardless of table scope.
@@ -788,15 +788,15 @@ def compute_pipeline_entry_id(
         by the duplicate entry_id check.
 
         Args:
-            tag: The tag (including system tags).
+            key: The key (including system keys).
             input_data: The input data.
 
         Returns:
-            A hash string uniquely identifying this (tag, input_data, node run)
+            A hash string uniquely identifying this (key, input_data, node run)
             combination.
         """
-        tag_with_hash = (
-            tag.as_table(columns={"system_tags": True})
+        key_with_hash = (
+            key.as_table(columns={"system_keys": True})
             .append_column(
                 constants.INPUT_DATA_HASH_COL,
                 pa.array([input_data.content_hash().to_string()], type=pa.large_string()),
@@ -806,11 +806,11 @@ def compute_pipeline_entry_id(
                 pa.array([self.content_hash().to_string()], type=pa.large_string()),
             )
         )
-        return self.data_context.arrow_hasher.hash_table(tag_with_hash).to_string()
+        return self.data_context.arrow_hasher.hash_table(key_with_hash).to_string()
 
     def add_pipeline_record(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         data_record_id: str,
         computed: bool,
@@ -819,14 +819,14 @@ def add_pipeline_record(
         """Add a pipeline record to the database for a processed data.
 
         The pipeline record stores:
-        - Tag columns (including system tags)
+        - Key columns (including system keys)
         - All source columns of the input data (provenance, not data)
         - Output data record ID (for joining with result records)
         - Input data data context key
         - Whether the result was freshly computed or cached
         """
         self._require_pipeline_database()
-        entry_id = self.compute_pipeline_entry_id(tag, input_data)
+        entry_id = self.compute_pipeline_entry_id(key, input_data)
 
         # Check for existing entry
         existing_record = None
@@ -869,9 +869,9 @@ def add_pipeline_record(
             }
         )
 
-        # Combine: tag (with system tags) + input source columns + meta columns
+        # Combine: key (with system keys) + input source columns + meta columns
         combined_record = arrow_utils.hstack_tables(
-            tag.as_table(columns={"system_tags": True}),
+            key.as_table(columns={"system_keys": True}),
             input_source_table,
             meta_table,
         )
@@ -892,7 +892,7 @@ def get_all_records(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> pa.Table | None:
-        """Return all computed results joined with their pipeline tag records.
+        """Return all computed results joined with their pipeline key records.
 
         Args:
             columns: Column configuration controlling which groups are included.
@@ -938,11 +938,11 @@ def get_all_records(
             drop_columns.extend(
                 c for c in joined.column_names if c.startswith(constants.SOURCE_PREFIX)
             )
-        if not column_config.system_tags and not column_config.all_info:
+        if not column_config.system_keys and not column_config.all_info:
             drop_columns.extend(
                 c
                 for c in joined.column_names
-                if c.startswith(constants.SYSTEM_TAG_PREFIX)
+                if c.startswith(constants.SYSTEM_KEY_PREFIX)
             )
         if drop_columns:
             joined = joined.drop([c for c in drop_columns if c in joined.column_names])
@@ -977,15 +977,15 @@ def as_source(self):
     def _load_cached_entries(
         self,
         entry_ids: list[str] | None = None,
-    ) -> "dict[str, tuple[TagProtocol, DataProtocol]]":
-        """Load (tag, data) pairs from pipeline DB + result DB.
+    ) -> "dict[str, tuple[KeyProtocol, DataProtocol]]":
+        """Load (key, data) pairs from pipeline DB + result DB.
 
         Args:
             entry_ids: If provided, load only these specific entry IDs.
                 If ``None``, load all records for this node.
 
         Returns:
-            dict mapping entry_id → (tag, data). Empty dict when either
+            dict mapping entry_id → (key, data). Empty dict when either
             database is None, records are empty, or no rows match.
 
         Does NOT mutate ``_cached_output_datas``.
@@ -1029,17 +1029,17 @@ def _load_cached_entries(
         if joined.num_rows == 0:
             return {}
 
-        # Derive tag keys: prefer input_stream when available; fall back to
+        # Derive key-schema columns: prefer input_stream when available; fall back to
         # taginfo column exclusion for CACHE_ONLY / deserialized nodes.
         if self._input_stream is not None:
-            tag_keys = self._input_stream.keys()[0]
+            key_keys = self._input_stream.keys()[0]
         else:
-            tag_keys = tuple(
+            key_keys = tuple(
                 c
                 for c in taginfo.column_names
                 if not c.startswith(constants.META_PREFIX)
                 and not c.startswith(constants.SOURCE_PREFIX)
-                and not c.startswith(constants.SYSTEM_TAG_PREFIX)
+                and not c.startswith(constants.SYSTEM_KEY_PREFIX)
                 and c != PIPELINE_ENTRY_ID_COL
                 and c != constants.NODE_CONTENT_HASH_COL
             )
@@ -1054,20 +1054,20 @@ def _load_cached_entries(
             or c == constants.NODE_CONTENT_HASH_COL
         ]
         data_table = joined.drop([c for c in drop_cols if c in joined.column_names])
-        stream = ArrowTableStream(data_table, tag_columns=tag_keys)
+        stream = ArrowTableStream(data_table, key_columns=key_keys)
 
-        loaded: dict[str, tuple[TagProtocol, DataProtocol]] = {}
-        for eid, (tag, data) in zip(entry_ids_col, stream.iter_data()):
-            loaded[eid] = (tag, data)
+        loaded: dict[str, tuple[KeyProtocol, DataProtocol]] = {}
+        for eid, (key, data) in zip(entry_ids_col, stream.iter_data()):
+            loaded[eid] = (key, data)
         return loaded
 
     async def _async_execute_cache_only(
         self,
-        output: "WritableChannel[tuple[TagProtocol, DataProtocol]]",
+        output: "WritableChannel[tuple[KeyProtocol, DataProtocol]]",
         *,
         observer: Any | None = None,
     ) -> None:
-        """Send all DB-cached (tag, data) pairs to *output*.
+        """Send all DB-cached (key, data) pairs to *output*.
 
         Used in ``CACHE_ONLY`` mode when the upstream is unavailable.
         Does not access ``_input_stream``.
@@ -1079,7 +1079,7 @@ async def _async_execute_cache_only(
         node_hash = self.content_hash().to_string()
         ctx_obs = obs.contextualize(*self.node_identity_path)
 
-        ctx_obs.on_node_start(node_label, node_hash, tag_schema=None)
+        ctx_obs.on_node_start(node_label, node_hash, key_schema=None)
         try:
             loaded = self._load_cached_entries()
             self._cached_output_datas.update(loaded)
@@ -1087,11 +1087,11 @@ async def _async_execute_cache_only(
                 self._cached_output_table = None
                 self._cached_content_hash_column = None
 
-            for tag, data in self._cached_output_datas.values():
+            for key, data in self._cached_output_datas.values():
                 if data is not None:
-                    ctx_obs.on_data_start(node_label, tag, data)
-                    ctx_obs.on_data_end(node_label, tag, data, data, cached=True)
-                    await output.send((tag, data))
+                    ctx_obs.on_data_start(node_label, key, data)
+                    ctx_obs.on_data_end(node_label, key, data, data, cached=True)
+                    await output.send((key, data))
             ctx_obs.on_node_end(node_label, node_hash)
         finally:
             await output.close()
@@ -1100,8 +1100,8 @@ async def _async_execute_cache_only(
     # Iteration
     # ------------------------------------------------------------------
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
-        """Yield all computed (tag, data) pairs for this node.
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
+        """Yield all computed (key, data) pairs for this node.
 
         Strictly read-only — never triggers computation. Callers must call
         ``run()`` or ``execute()`` first if they want results computed.
@@ -1130,8 +1130,8 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
                     self._cached_output_table = None
                     self._cached_content_hash_column = None
             yield from (
-                (tag, pkt)
-                for tag, pkt in self._cached_output_datas.values()
+                (key, pkt)
+                for key, pkt in self._cached_output_datas.values()
                 if pkt is not None
             )
             return
@@ -1150,8 +1150,8 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
                 self._cached_content_hash_column = None
 
         yield from (
-            (tag, pkt)
-            for tag, pkt in self._cached_output_datas.values()
+            (key, pkt)
+            for key, pkt in self._cached_output_datas.values()
             if pkt is not None
         )
 
@@ -1190,18 +1190,18 @@ def as_table(
         all_info: bool = False,
     ) -> pa.Table:
         if self._cached_output_table is None:
-            all_tags = []
+            all_keys = []
             all_data = []
-            tag_schema, data_schema = None, None
-            for tag, data in self.iter_data():
-                if tag_schema is None:
-                    tag_schema = tag.arrow_schema(all_info=True)
+            key_schema, data_schema = None, None
+            for key, data in self.iter_data():
+                if key_schema is None:
+                    key_schema = key.arrow_schema(all_info=True)
                 if data_schema is None:
                     data_schema = data.arrow_schema(all_info=True)
-                all_tags.append(tag.as_dict(all_info=True))
+                all_keys.append(key.as_dict(all_info=True))
                 all_data.append(data.as_dict(all_info=True))
 
-            if not all_tags:
+            if not all_keys:
                 self._cached_output_table = pa.table({})
 
             converter = self.data_context.type_converter
@@ -1218,17 +1218,17 @@ def as_table(
             struct_data = converter.python_dicts_to_struct_dicts(
                 all_data, python_schema=data_python_schema
             )
-            all_tags_as_tables: pa.Table = pa.Table.from_pylist(
-                all_tags, schema=tag_schema
+            all_keys_as_tables: pa.Table = pa.Table.from_pylist(
+                all_keys, schema=key_schema
             )
-            if constants.CONTEXT_KEY in all_tags_as_tables.column_names:
-                all_tags_as_tables = all_tags_as_tables.drop([constants.CONTEXT_KEY])
+            if constants.CONTEXT_KEY in all_keys_as_tables.column_names:
+                all_keys_as_tables = all_keys_as_tables.drop([constants.CONTEXT_KEY])
             all_data_as_tables: pa.Table = pa.Table.from_pylist(
                 struct_data, schema=data_schema
             )
 
             self._cached_output_table = arrow_utils.hstack_tables(
-                all_tags_as_tables, all_data_as_tables
+                all_keys_as_tables, all_data_as_tables
             )
         if self._cached_output_table is None:
             self._cached_output_table = pa.table({})
@@ -1236,12 +1236,12 @@ def as_table(
         column_config = ColumnConfig.handle_config(columns, all_info=all_info)
 
         drop_columns = []
-        if not column_config.system_tags:
+        if not column_config.system_keys:
             drop_columns.extend(
                 [
                     c
                     for c in self._cached_output_table.column_names
-                    if c.startswith(constants.SYSTEM_TAG_PREFIX)
+                    if c.startswith(constants.SYSTEM_KEY_PREFIX)
                 ]
             )
         if not column_config.source:
@@ -1269,7 +1269,7 @@ def as_table(
         if column_config.content_hash:
             if self._cached_content_hash_column is None:
                 content_hashes = []
-                for tag, data in self.iter_data():
+                for key, data in self.iter_data():
                     content_hashes.append(data.content_hash().to_string())
                 self._cached_content_hash_column = pa.array(
                     content_hashes, type=pa.large_string()
@@ -1286,7 +1286,7 @@ def as_table(
                 hash_column_name, self._cached_content_hash_column
             )
 
-        if column_config.sort_by_tags:
+        if column_config.sort_by_keys:
             output_table_schema = output_table.schema
             output_table = (
                 pl.DataFrame(output_table)
@@ -1302,8 +1302,8 @@ def as_table(
 
     async def async_execute(
         self,
-        input_channel: ReadableChannel[tuple[TagProtocol, DataProtocol]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        input_channel: ReadableChannel[tuple[KeyProtocol, DataProtocol]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         observer: ExecutionObserverProtocol | None = None,
     ) -> None:
@@ -1318,8 +1318,8 @@ async def async_execute(
         channel.
 
         Args:
-            input_channel: Single readable channel of (tag, data) pairs.
-            output: Writable channel for output (tag, data) pairs.
+            input_channel: Single readable channel of (key, data) pairs.
+            output: Writable channel for output (key, data) pairs.
             observer: Optional execution observer for hooks.
         """
         from orcapod.pipeline.serialization import LoadStatus
@@ -1356,8 +1356,8 @@ async def async_execute(
                 else None
             )
 
-            tag_schema = self._input_stream.output_schema(columns={"system_tags": True})[0]
-            ctx_obs.on_node_start(node_label, node_hash, tag_schema=tag_schema)
+            key_schema = self._input_stream.output_schema(columns={"system_keys": True})[0]
+            ctx_obs.on_node_start(node_label, node_hash, key_schema=key_schema)
 
             if self._cached_function_pod is not None:
                 # Phase 1: build cache lookup from pipeline DB
@@ -1366,32 +1366,32 @@ async def async_execute(
                 if loaded:
                     self._cached_output_table = None
                     self._cached_content_hash_column = None
-                cached_by_entry_id: dict[str, tuple[TagProtocol, DataProtocol]] = dict(loaded)
+                cached_by_entry_id: dict[str, tuple[KeyProtocol, DataProtocol]] = dict(loaded)
 
                 # Phase 2: drive output from input channel — cached or compute
                 async def _process_one_db(
-                    tag: TagProtocol, data: DataProtocol
+                    key: KeyProtocol, data: DataProtocol
                 ) -> None:
-                    entry_id = self.compute_pipeline_entry_id(tag, data)
+                    entry_id = self.compute_pipeline_entry_id(key, data)
                     if entry_id in cached_by_entry_id:
-                        tag_out, result_data = cached_by_entry_id[entry_id]
-                        ctx_obs.on_data_start(node_label, tag, data)
+                        key_out, result_data = cached_by_entry_id[entry_id]
+                        ctx_obs.on_data_start(node_label, key, data)
                         ctx_obs.on_data_end(
-                            node_label, tag, data, result_data, cached=True
+                            node_label, key, data, result_data, cached=True
                         )
-                        await output.send((tag_out, result_data))
+                        await output.send((key_out, result_data))
                     else:
                         await self._async_execute_one_data(
-                            tag, data, output,
+                            key, data, output,
                             observer=ctx_obs,
                             node_label=node_label,
                             node_hash=node_hash,
                         )
 
                 async with asyncio.TaskGroup() as tg:
-                    async for tag, data in input_channel:
+                    async for key, data in input_channel:
                         async def _guarded_db(
-                            t: TagProtocol = tag, p: DataProtocol = data
+                            t: KeyProtocol = key, p: DataProtocol = data
                         ) -> None:
                             try:
                                 await _process_one_db(t, p)
@@ -1405,9 +1405,9 @@ async def _guarded_db(
             else:
                 # Simple async execution without DB
                 async with asyncio.TaskGroup() as tg:
-                    async for tag, data in input_channel:
+                    async for key, data in input_channel:
                         async def _guarded_simple(
-                            t: TagProtocol = tag, p: DataProtocol = data
+                            t: KeyProtocol = key, p: DataProtocol = data
                         ) -> None:
                             try:
                                 await self._async_execute_one_data(
@@ -1430,34 +1430,34 @@ async def _guarded_simple(
 
     async def _async_execute_one_data(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         observer: ExecutionObserverProtocol,
         node_label: str,
         node_hash: str,
     ) -> None:
         """Process one non-cached data in the async execute path."""
-        observer.on_data_start(node_label, tag, data)
-        pkt_logger = observer.create_data_logger(tag, data)
+        observer.on_data_start(node_label, key, data)
+        pkt_logger = observer.create_data_logger(key, data)
 
         try:
-            tag_out, result_data = await self._async_process_data_internal(
-                tag, data, logger=pkt_logger
+            key_out, result_data = await self._async_process_data_internal(
+                key, data, logger=pkt_logger
             )
         except Exception as exc:
             logger.warning(
                 "Data execution failed in %s: %s", node_label, exc,
                 exc_info=True,
             )
-            observer.on_data_crash(node_label, tag, data, exc)
+            observer.on_data_crash(node_label, key, data, exc)
         else:
             observer.on_data_end(
-                node_label, tag, data, result_data, cached=False
+                node_label, key, data, result_data, cached=False
             )
             if result_data is not None:
-                await output.send((tag_out, result_data))
+                await output.send((key_out, result_data))
 
     def __repr__(self) -> str:
         return (
diff --git a/src/orcapod/core/nodes/operator_node.py b/src/orcapod/core/nodes/operator_node.py
index 3cf44c7f..7d656280 100644
--- a/src/orcapod/core/nodes/operator_node.py
+++ b/src/orcapod/core/nodes/operator_node.py
@@ -17,7 +17,7 @@
 from orcapod.protocols.core_protocols import (
     DataProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
     TrackerManagerProtocol,
 )
 from orcapod.protocols.core_protocols.operator_pod import OperatorPodProtocol
@@ -349,13 +349,13 @@ def keys(
         all_info: bool = False,
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
         if self._operator is None:
-            tag_keys = tuple(self._stored_schema.get("tag", {}).keys())
+            key_keys = tuple(self._stored_schema.get("key", {}).keys())
             data_keys = tuple(self._stored_schema.get("data", {}).keys())
-            return tag_keys, data_keys
-        tag_schema, data_schema = self.output_schema(
+            return key_keys, data_keys
+        key_schema, data_schema = self.output_schema(
             columns=columns, all_info=all_info
         )
-        return tuple(tag_schema.keys()), tuple(data_schema.keys())
+        return tuple(key_schema.keys()), tuple(data_schema.keys())
 
     def output_schema(
         self,
@@ -365,9 +365,9 @@ def output_schema(
     ) -> tuple[Schema, Schema]:
         """Return output schema, using stored value in read-only mode."""
         if self._operator is None:
-            tag = Schema(self._stored_schema.get("tag", {}))
+            key = Schema(self._stored_schema.get("key", {}))
             data = Schema(self._stored_schema.get("data", {}))
-            return tag, data
+            return key, data
         return self._operator.output_schema(
             *self._input_streams,
             columns=columns,
@@ -448,7 +448,7 @@ def clear_cache(self) -> None:
     def _store_output_stream(self, stream: StreamProtocol) -> None:
         """Materialize stream and store in the pipeline database with per-row dedup."""
         output_table = stream.as_table(
-            columns={"source": True, "system_tags": True},
+            columns={"source": True, "system_keys": True},
         )
 
         # Always append the node content hash column first so that it is included
@@ -466,7 +466,7 @@ def _store_output_stream(self, stream: StreamProtocol) -> None:
             pa.repeat(self.content_hash().to_string(), n_rows).cast(pa.large_string()),
         )
 
-        # Per-row record hashes for dedup: hash(tag + data + system_tags + node_content_hash).
+        # Per-row record hashes for dedup: hash(key + data + system_keys + node_content_hash).
         arrow_hasher = self.data_context.arrow_hasher
         record_hashes = []
         for batch in output_table.to_batches():
@@ -502,10 +502,10 @@ def _make_empty_table(self) -> "pa.Table":
         Requires ``self._operator is not None`` (pre-existing limitation shared
         with ``_replay_from_cache``).
         """
-        tag_schema, data_schema = self.output_schema()
+        key_schema, data_schema = self.output_schema()
         type_converter = self.data_context.type_converter
         empty_fields: dict = {}
-        for name, py_type in {**tag_schema, **data_schema}.items():
+        for name, py_type in {**key_schema, **data_schema}.items():
             arrow_type = type_converter.python_type_to_arrow_type(py_type)
             empty_fields[name] = pa.array([], type=arrow_type)
         return pa.table(empty_fields)
@@ -549,8 +549,8 @@ def _load_cached_stream_from_db(self) -> "ArrowTableStream | None":
             ]
             if cols_to_drop:
                 records_table = records_table.drop(cols_to_drop)
-        tag_keys = self.keys()[0]
-        return ArrowTableStream(records_table, tag_columns=tag_keys)
+        key_keys = self.keys()[0]
+        return ArrowTableStream(records_table, key_columns=key_keys)
 
     def get_cached_output(self) -> StreamProtocol | None:
         """Return cached output stream in REPLAY mode, else None.
@@ -570,7 +570,7 @@ def execute(
         self,
         *input_streams: StreamProtocol,
         observer: ExecutionObserverProtocol | None = None,
-    ) -> list[tuple[TagProtocol, DataProtocol]]:
+    ) -> list[tuple[KeyProtocol, DataProtocol]]:
         """Execute input streams: compute, persist, and cache.
 
         Args:
@@ -578,7 +578,7 @@ def execute(
             observer: Optional execution observer for hooks.
 
         Returns:
-            Materialized list of (tag, data) pairs.
+            Materialized list of (key, data) pairs.
         """
         from orcapod.pipeline.observer import NoOpObserver
 
@@ -657,8 +657,8 @@ def _replay_from_cache(self) -> None:
             if cols_to_drop:
                 records = records.drop(cols_to_drop)
 
-        tag_keys = self.keys()[0]
-        self._cached_output_stream = ArrowTableStream(records, tag_columns=tag_keys)
+        key_keys = self.keys()[0]
+        self._cached_output_stream = ArrowTableStream(records, key_columns=key_keys)
         self._update_modified_time()
 
     def run(self) -> None:
@@ -689,8 +689,8 @@ def run(self) -> None:
             )
             self._update_modified_time()
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
-        """Return an iterator over (tag, data) pairs.
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
+        """Return an iterator over (key, data) pairs.
 
         Read-only: never triggers computation. Returns empty before ``run()``
         or ``execute()`` populates the cache. Call ``node.is_stale`` before
@@ -726,8 +726,8 @@ def as_table(
         if self._operator is None:
             return pa.table({})
         empty_records = self._make_empty_table()
-        tag_keys = self.keys()[0]
-        empty_stream = ArrowTableStream(empty_records, tag_columns=tag_keys)
+        key_keys = self.keys()[0]
+        empty_stream = ArrowTableStream(empty_records, key_columns=key_keys)
         return empty_stream.as_table(columns=columns, all_info=all_info)
 
     # ------------------------------------------------------------------
@@ -770,11 +770,11 @@ def get_all_records(
             drop_columns.extend(
                 c for c in results.column_names if c.startswith(constants.SOURCE_PREFIX)
             )
-        if not column_config.system_tags and not column_config.all_info:
+        if not column_config.system_keys and not column_config.all_info:
             drop_columns.extend(
                 c
                 for c in results.column_names
-                if c.startswith(constants.SYSTEM_TAG_PREFIX)
+                if c.startswith(constants.SYSTEM_KEY_PREFIX)
             )
         if drop_columns:
             results = results.drop(
@@ -814,8 +814,8 @@ def as_source(self):
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         observer: ExecutionObserverProtocol | None = None,
     ) -> None:
@@ -831,7 +831,7 @@ async def async_execute(
 
         Args:
             inputs: Sequence of readable channels from upstream nodes.
-            output: Writable channel for output (tag, data) pairs.
+            output: Writable channel for output (key, data) pairs.
             observer: Optional execution observer for hooks.
         """
         from orcapod.pipeline.observer import NoOpObserver
@@ -857,15 +857,15 @@ async def async_execute(
             if self._cache_mode == CacheMode.REPLAY:
                 self._replay_from_cache()
                 assert self._cached_output_stream is not None
-                for tag, data in self._cached_output_stream.iter_data():
-                    await output.send((tag, data))
+                for key, data in self._cached_output_stream.iter_data():
+                    await output.send((key, data))
                 ctx_obs.on_node_end(node_label, node_hash)
                 return  # finally block closes output
 
             # OFF or LOG: delegate to operator, forward results downstream
-            intermediate: Channel[tuple[TagProtocol, DataProtocol]] = Channel()
+            intermediate: Channel[tuple[KeyProtocol, DataProtocol]] = Channel()
             should_collect = self._cache_mode == CacheMode.LOG
-            collected: list[tuple[TagProtocol, DataProtocol]] = []
+            collected: list[tuple[KeyProtocol, DataProtocol]] = []
 
             async def forward() -> None:
                 async for item in intermediate.reader:
diff --git a/src/orcapod/core/nodes/source_node.py b/src/orcapod/core/nodes/source_node.py
index c189083c..ec4bc63a 100644
--- a/src/orcapod/core/nodes/source_node.py
+++ b/src/orcapod/core/nodes/source_node.py
@@ -34,7 +34,7 @@ def __init__(
     ):
         super().__init__(label=label, config=config)
         self.stream = stream
-        self._cached_results: list[tuple[cp.TagProtocol, cp.DataProtocol]] | None = (
+        self._cached_results: list[tuple[cp.KeyProtocol, cp.DataProtocol]] | None = (
             None
         )
 
@@ -212,9 +212,9 @@ def keys(
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
         if self.stream is None:
             stored = getattr(self, "_stored_schema", {})
-            tag_keys = tuple(stored.get("tag", {}).keys())
+            key_keys = tuple(stored.get("key", {}).keys())
             data_keys = tuple(stored.get("data", {}).keys())
-            return tag_keys, data_keys
+            return key_keys, data_keys
         return self.stream.keys(columns=columns, all_info=all_info)
 
     def output_schema(
@@ -225,9 +225,9 @@ def output_schema(
     ) -> tuple[Schema, Schema]:
         if self.stream is None:
             stored = getattr(self, "_stored_schema", {})
-            tag = Schema(stored.get("tag", {}))
+            key = Schema(stored.get("key", {}))
             data = Schema(stored.get("data", {}))
-            return tag, data
+            return key, data
         return self.stream.output_schema(columns=columns, all_info=all_info)
 
     @property
@@ -258,7 +258,7 @@ def as_table(
             )
         return self.stream.as_table(columns=columns, all_info=all_info)
 
-    def iter_data(self) -> Iterator[tuple[cp.TagProtocol, cp.DataProtocol]]:
+    def iter_data(self) -> Iterator[tuple[cp.KeyProtocol, cp.DataProtocol]]:
         if self.stream is None:
             raise RuntimeError(
                 "SourceNode in read-only mode has no stream data available"
@@ -271,14 +271,14 @@ def execute(
         self,
         *,
         observer: ExecutionObserverProtocol | None = None,
-    ) -> list[tuple[cp.TagProtocol, cp.DataProtocol]]:
+    ) -> list[tuple[cp.KeyProtocol, cp.DataProtocol]]:
         """Execute this source: materialize data and return.
 
         Args:
             observer: Optional execution observer for hooks.
 
         Returns:
-            List of (tag, data) tuples.
+            List of (key, data) tuples.
         """
         if self.stream is None:
             raise RuntimeError(
@@ -299,11 +299,11 @@ def run(self) -> None:
 
     async def async_execute(
         self,
-        output: WritableChannel[tuple[cp.TagProtocol, cp.DataProtocol]],
+        output: WritableChannel[tuple[cp.KeyProtocol, cp.DataProtocol]],
         *,
         observer: ExecutionObserverProtocol | None = None,
     ) -> None:
-        """Push all (tag, data) pairs from the wrapped stream to the output channel.
+        """Push all (key, data) pairs from the wrapped stream to the output channel.
 
         Args:
             output: Channel to write results to.
@@ -318,8 +318,8 @@ async def async_execute(
         try:
             if observer is not None:
                 observer.on_node_start(node_label, node_hash)
-            for tag, data in self.stream.iter_data():
-                await output.send((tag, data))
+            for key, data in self.stream.iter_data():
+                await output.send((key, data))
             if observer is not None:
                 observer.on_node_end(node_label, node_hash)
         finally:
diff --git a/src/orcapod/core/operators/__init__.py b/src/orcapod/core/operators/__init__.py
index bcc53910..d2784587 100644
--- a/src/orcapod/core/operators/__init__.py
+++ b/src/orcapod/core/operators/__init__.py
@@ -1,13 +1,13 @@
 from .batch import Batch
 from .column_selection import (
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
 )
 from .filters import PolarsFilter
 from .join import Join
-from .mappers import MapData, MapTags
+from .mappers import MapData, MapKeys
 from .merge_join import MergeJoin
 from .semijoin import SemiJoin
 
@@ -15,12 +15,12 @@
     "Join",
     "MergeJoin",
     "SemiJoin",
-    "MapTags",
+    "MapKeys",
     "MapData",
     "Batch",
-    "SelectTagColumns",
+    "SelectKeyColumns",
     "SelectDataColumns",
-    "DropTagColumns",
+    "DropKeyColumns",
     "DropDataColumns",
     "PolarsFilter",
 ]
diff --git a/src/orcapod/core/operators/base.py b/src/orcapod/core/operators/base.py
index 6f035f25..74847267 100644
--- a/src/orcapod/core/operators/base.py
+++ b/src/orcapod/core/operators/base.py
@@ -11,7 +11,7 @@
     ArgumentGroup,
     DataProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
 )
 from orcapod.types import ColumnConfig, ContentHash, Schema
 
@@ -41,7 +41,7 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        """Return the (tag, data) output schemas for the given input stream."""
+        """Return the (key, data) output schemas for the given input stream."""
         ...
 
     def validate_inputs(self, *streams: StreamProtocol) -> None:
@@ -70,8 +70,8 @@ def argument_symmetry(self, streams: Collection[StreamProtocol]) -> ArgumentGrou
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         input_pipeline_hashes: Sequence[ContentHash] | None = None,
     ) -> None:
@@ -80,8 +80,8 @@ async def async_execute(
             rows = await inputs[0].collect()
             stream = self._materialize_to_stream(rows)
             result = self.static_process(stream)
-            for tag, data in result.iter_data():
-                await output.send((tag, data))
+            for key, data in result.iter_data():
+                await output.send((key, data))
         finally:
             await output.close()
 
@@ -154,8 +154,8 @@ def argument_symmetry(self, streams: Collection[StreamProtocol]) -> ArgumentGrou
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         input_pipeline_hashes: Sequence[ContentHash] | None = None,
     ) -> None:
@@ -167,8 +167,8 @@ async def async_execute(
             left_stream = self._materialize_to_stream(left_rows)
             right_stream = self._materialize_to_stream(right_rows)
             result = self.static_process(left_stream, right_stream)
-            for tag, data in result.iter_data():
-                await output.send((tag, data))
+            for key, data in result.iter_data():
+                await output.send((key, data))
         finally:
             await output.close()
 
diff --git a/src/orcapod/core/operators/batch.py b/src/orcapod/core/operators/batch.py
index 0795ede7..c33ce60e 100644
--- a/src/orcapod/core/operators/batch.py
+++ b/src/orcapod/core/operators/batch.py
@@ -6,7 +6,7 @@
 from orcapod.channels import ReadableChannel, WritableChannel
 from orcapod.core.operators.base import UnaryOperator
 from orcapod.core.streams import ArrowTableStream
-from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, KeyProtocol
 from orcapod.types import ColumnConfig
 from orcapod.utils.lazy_module import LazyModule
 
@@ -45,9 +45,9 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
         This method should be implemented by subclasses to define the specific behavior of the binary operator.
         It takes two streams as input and returns a new stream as output.
         """
-        table = stream.as_table(columns={"source": True, "system_tags": True})
+        table = stream.as_table(columns={"source": True, "system_keys": True})
 
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
 
         data_list = table.to_pylist()
 
@@ -78,7 +78,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
         batched_table = pa.Table.from_pylist(batched_data, schema=batched_schema)
         return ArrowTableStream(
             batched_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
         )
 
     def unary_output_schema(
@@ -92,19 +92,19 @@ def unary_output_schema(
         This method should be implemented by subclasses to return the schemas of the input and output streams.
         It takes two streams as input and returns a tuple of schemas.
         """
-        tag_types, data_types = stream.output_schema(
+        key_types, data_types = stream.output_schema(
             columns=columns, all_info=all_info
         )
-        batched_tag_types = {k: list[v] for k, v in tag_types.items()}
+        batched_key_types = {k: list[v] for k, v in key_types.items()}
         batched_data_types = {k: list[v] for k, v in data_types.items()}
 
         # TODO: check if this is really necessary
-        return Schema(batched_tag_types), Schema(batched_data_types)
+        return Schema(batched_key_types), Schema(batched_data_types)
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
         """Streaming batch: emit full batches as they accumulate.
@@ -121,26 +121,26 @@ async def async_execute(
                 if rows:
                     stream = self._materialize_to_stream(rows)
                     result = self.unary_static_process(stream)
-                    for tag, data in result.iter_data():
-                        await output.send((tag, data))
+                    for key, data in result.iter_data():
+                        await output.send((key, data))
                 return
 
-            batch: list[tuple[TagProtocol, DataProtocol]] = []
-            async for tag, data in inputs[0]:
-                batch.append((tag, data))
+            batch: list[tuple[KeyProtocol, DataProtocol]] = []
+            async for key, data in inputs[0]:
+                batch.append((key, data))
                 if len(batch) >= self.batch_size:
                     stream = self._materialize_to_stream(batch)
                     result = self.unary_static_process(stream)
-                    for out_tag, out_data in result.iter_data():
-                        await output.send((out_tag, out_data))
+                    for out_key, out_data in result.iter_data():
+                        await output.send((out_key, out_data))
                     batch = []
 
             # Flush partial batch
             if batch and not self.drop_partial_batch:
                 stream = self._materialize_to_stream(batch)
                 result = self.unary_static_process(stream)
-                for out_tag, out_data in result.iter_data():
-                    await output.send((out_tag, out_data))
+                for out_key, out_data in result.iter_data():
+                    await output.send((out_key, out_data))
         finally:
             await output.close()
 
diff --git a/src/orcapod/core/operators/column_selection.py b/src/orcapod/core/operators/column_selection.py
index 58dab5f9..69174a08 100644
--- a/src/orcapod/core/operators/column_selection.py
+++ b/src/orcapod/core/operators/column_selection.py
@@ -6,7 +6,7 @@
 from orcapod.core.operators.base import UnaryOperator
 from orcapod.core.streams import ArrowTableStream
 from orcapod.errors import InputValidationError
-from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, KeyProtocol
 from orcapod.system_constants import constants
 from orcapod.types import ColumnConfig, Schema
 from orcapod.utils.lazy_module import LazyModule
@@ -19,7 +19,7 @@
 logger = logging.getLogger(__name__)
 
 
-class SelectTagColumns(UnaryOperator):
+class SelectKeyColumns(UnaryOperator):
     """
     Operator that selects specified columns from a stream.
     """
@@ -32,7 +32,7 @@ def __init__(self, columns: str | Collection[str], strict: bool = True, **kwargs
         super().__init__(**kwargs)
 
     def to_config(self) -> dict[str, Any]:
-        """Serialize this SelectTagColumns operator to a config dict.
+        """Serialize this SelectKeyColumns operator to a config dict.
 
         Returns:
             A dict with ``class_name``, ``module_path``, and ``config`` keys,
@@ -46,23 +46,23 @@ def to_config(self) -> dict[str, Any]:
         return config
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
-        tags_to_drop = [c for c in tag_columns if c not in self.columns]
-        new_tag_columns = [c for c in tag_columns if c not in tags_to_drop]
+        key_columns, data_columns = stream.keys()
+        keys_to_drop = [c for c in key_columns if c not in self.columns]
+        new_key_columns = [c for c in key_columns if c not in keys_to_drop]
 
-        if len(new_tag_columns) == len(tag_columns):
-            logger.info("All tag columns are selected. Returning stream unaltered.")
+        if len(new_key_columns) == len(key_columns):
+            logger.info("All key columns are selected. Returning stream unaltered.")
             return stream
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
 
-        modified_table = table.drop_columns(list(tags_to_drop))
+        modified_table = table.drop_columns(list(keys_to_drop))
 
         return ArrowTableStream(
             modified_table,
-            tag_columns=new_tag_columns,
+            key_columns=new_key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -71,12 +71,12 @@ def validate_unary_input(self, stream: StreamProtocol) -> None:
         It takes two streams as input and raises an error if the inputs are not valid.
         """
         # TODO: remove redundant logic
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         columns_to_select = self.columns
-        missing_columns = set(columns_to_select) - set(tag_columns)
+        missing_columns = set(columns_to_select) - set(key_columns)
         if missing_columns and self.strict:
             raise InputValidationError(
-                f"Missing tag columns: {missing_columns}. Make sure all specified columns to select are present or use strict=False to ignore missing columns"
+                f"Missing key columns: {missing_columns}. Make sure all specified columns to select are present or use strict=False to ignore missing columns"
             )
 
     def unary_output_schema(
@@ -86,42 +86,42 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
-        tag_columns, _ = stream.keys()
-        tags_to_drop = [tc for tc in tag_columns if tc not in self.columns]
+        key_columns, _ = stream.keys()
+        keys_to_drop = [tc for tc in key_columns if tc not in self.columns]
 
-        # this ensures all system tag columns are preserved
-        new_tag_schema = {k: v for k, v in tag_schema.items() if k not in tags_to_drop}
+        # this ensures all system key columns are preserved
+        new_key_schema = {k: v for k, v in key_schema.items() if k not in keys_to_drop}
 
-        return Schema(new_tag_schema), data_schema
+        return Schema(new_key_schema), data_schema
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
-        """Streaming: select tag columns per row without materializing."""
+        """Streaming: select key columns per row without materializing."""
         try:
-            tags_to_drop: list[str] | None = None
-            async for tag, data in inputs[0]:
-                if tags_to_drop is None:
-                    tag_keys = tag.keys()
+            keys_to_drop: list[str] | None = None
+            async for key, data in inputs[0]:
+                if keys_to_drop is None:
+                    key_keys = key.keys()
                     if self.strict:
-                        missing = set(self.columns) - set(tag_keys)
+                        missing = set(self.columns) - set(key_keys)
                         if missing:
                             raise InputValidationError(
-                                f"Missing tag columns: {missing}. Make sure all "
+                                f"Missing key columns: {missing}. Make sure all "
                                 f"specified columns to select are present or use "
                                 f"strict=False to ignore missing columns"
                             )
-                    tags_to_drop = [c for c in tag_keys if c not in self.columns]
-                if not tags_to_drop:
-                    await output.send((tag, data))
+                    keys_to_drop = [c for c in key_keys if c not in self.columns]
+                if not keys_to_drop:
+                    await output.send((key, data))
                 else:
-                    await output.send((tag.drop(*tags_to_drop), data))
+                    await output.send((key.drop(*keys_to_drop), data))
         finally:
             await output.close()
 
@@ -160,7 +160,7 @@ def to_config(self) -> dict[str, Any]:
         return config
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         data_columns_to_drop = [c for c in data_columns if c not in self.columns]
         new_data_columns = [
             c for c in data_columns if c not in data_columns_to_drop
@@ -171,7 +171,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
             return stream
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False},
+            columns={"source": True, "system_keys": True, "sort_by_keys": False},
         )
         # make sure to drop associated source fields
         associated_source_fields = [
@@ -183,7 +183,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
 
         return ArrowTableStream(
             modified_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -192,7 +192,7 @@ def validate_unary_input(self, stream: StreamProtocol) -> None:
         It takes two streams as input and raises an error if the inputs are not valid.
         """
         # TODO: remove redundant logic
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         columns_to_select = self.columns
         missing_columns = set(columns_to_select) - set(data_columns)
         if missing_columns and self.strict:
@@ -207,29 +207,29 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
         _, data_columns = stream.keys()
         data_to_drop = [pc for pc in data_columns if pc not in self.columns]
 
-        # this ensures all system tag columns are preserved
+        # this ensures all system key columns are preserved
         new_data_schema = {
             k: v for k, v in data_schema.items() if k not in data_to_drop
         }
 
-        return tag_schema, Schema(new_data_schema)
+        return key_schema, Schema(new_data_schema)
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
         """Streaming: select data columns per row without materializing."""
         try:
             pkts_to_drop: list[str] | None = None
-            async for tag, data in inputs[0]:
+            async for key, data in inputs[0]:
                 if pkts_to_drop is None:
                     pkt_keys = data.keys()
                     if self.strict:
@@ -242,9 +242,9 @@ async def async_execute(
                             )
                     pkts_to_drop = [c for c in pkt_keys if c not in self.columns]
                 if not pkts_to_drop:
-                    await output.send((tag, data))
+                    await output.send((key, data))
                 else:
-                    await output.send((tag, data.drop(*pkts_to_drop)))
+                    await output.send((key, data.drop(*pkts_to_drop)))
         finally:
             await output.close()
 
@@ -256,7 +256,7 @@ def identity_structure(self) -> Any:
         )
 
 
-class DropTagColumns(UnaryOperator):
+class DropKeyColumns(UnaryOperator):
     """
     Operator that drops specified columns from a stream.
     """
@@ -269,7 +269,7 @@ def __init__(self, columns: str | Collection[str], strict: bool = True, **kwargs
         super().__init__(**kwargs)
 
     def to_config(self) -> dict[str, Any]:
-        """Serialize this DropTagColumns operator to a config dict.
+        """Serialize this DropKeyColumns operator to a config dict.
 
         Returns:
             A dict with ``class_name``, ``module_path``, and ``config`` keys,
@@ -283,26 +283,26 @@ def to_config(self) -> dict[str, Any]:
         return config
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         columns_to_drop = self.columns
         if not self.strict:
-            columns_to_drop = [c for c in columns_to_drop if c in tag_columns]
+            columns_to_drop = [c for c in columns_to_drop if c in key_columns]
 
-        new_tag_columns = [c for c in tag_columns if c not in columns_to_drop]
+        new_key_columns = [c for c in key_columns if c not in columns_to_drop]
 
         if len(columns_to_drop) == 0:
-            logger.info("No tag columns to drop. Returning stream unaltered.")
+            logger.info("No key columns to drop. Returning stream unaltered.")
             return stream
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
 
         modified_table = table.drop_columns(list(columns_to_drop))
 
         return ArrowTableStream(
             modified_table,
-            tag_columns=new_tag_columns,
+            key_columns=new_key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -311,12 +311,12 @@ def validate_unary_input(self, stream: StreamProtocol) -> None:
         It takes two streams as input and raises an error if the inputs are not valid.
         """
         # TODO: remove redundant logic
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         columns_to_drop = self.columns
-        missing_columns = set(columns_to_drop) - set(tag_columns)
+        missing_columns = set(columns_to_drop) - set(key_columns)
         if missing_columns and self.strict:
             raise InputValidationError(
-                f"Missing tag columns: {missing_columns}. Make sure all specified columns to drop are present or use strict=False to ignore missing columns"
+                f"Missing key columns: {missing_columns}. Make sure all specified columns to drop are present or use strict=False to ignore missing columns"
             )
 
     def unary_output_schema(
@@ -326,45 +326,45 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
-        tag_columns, _ = stream.keys()
-        new_tag_columns = [c for c in tag_columns if c not in self.columns]
+        key_columns, _ = stream.keys()
+        new_key_columns = [c for c in key_columns if c not in self.columns]
 
-        new_tag_schema = {k: v for k, v in tag_schema.items() if k in new_tag_columns}
+        new_key_schema = {k: v for k, v in key_schema.items() if k in new_key_columns}
 
-        return Schema(new_tag_schema), data_schema
+        return Schema(new_key_schema), data_schema
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
-        """Streaming: drop tag columns per row without materializing."""
+        """Streaming: drop key columns per row without materializing."""
         try:
             effective_drops: list[str] | None = None
-            async for tag, data in inputs[0]:
+            async for key, data in inputs[0]:
                 if effective_drops is None:
-                    tag_keys = tag.keys()
+                    key_keys = key.keys()
                     if self.strict:
-                        missing = set(self.columns) - set(tag_keys)
+                        missing = set(self.columns) - set(key_keys)
                         if missing:
                             raise InputValidationError(
-                                f"Missing tag columns: {missing}. Make sure all "
+                                f"Missing key columns: {missing}. Make sure all "
                                 f"specified columns to drop are present or use "
                                 f"strict=False to ignore missing columns"
                             )
                     effective_drops = (
                         list(self.columns)
                         if self.strict
-                        else [c for c in self.columns if c in tag_keys]
+                        else [c for c in self.columns if c in key_keys]
                     )
                 if not effective_drops:
-                    await output.send((tag, data))
+                    await output.send((key, data))
                 else:
-                    await output.send((tag.drop(*effective_drops), data))
+                    await output.send((key.drop(*effective_drops), data))
         finally:
             await output.close()
 
@@ -403,7 +403,7 @@ def to_config(self) -> dict[str, Any]:
         return config
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         columns_to_drop = list(self.columns)
         if not self.strict:
             columns_to_drop = [c for c in columns_to_drop if c in data_columns]
@@ -419,14 +419,14 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
         columns_to_drop.extend(associated_source_columns)
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
 
         modified_table = table.drop_columns(columns_to_drop)
 
         return ArrowTableStream(
             modified_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -449,7 +449,7 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
 
@@ -457,18 +457,18 @@ def unary_output_schema(
             k: v for k, v in data_schema.items() if k not in self.columns
         }
 
-        return tag_schema, Schema(new_data_schema)
+        return key_schema, Schema(new_data_schema)
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
         """Streaming: drop data columns per row without materializing."""
         try:
             effective_drops: list[str] | None = None
-            async for tag, data in inputs[0]:
+            async for key, data in inputs[0]:
                 if effective_drops is None:
                     pkt_keys = data.keys()
                     if self.strict:
@@ -485,9 +485,9 @@ async def async_execute(
                         else [c for c in self.columns if c in pkt_keys]
                     )
                 if not effective_drops:
-                    await output.send((tag, data))
+                    await output.send((key, data))
                 else:
-                    await output.send((tag, data.drop(*effective_drops)))
+                    await output.send((key, data.drop(*effective_drops)))
         finally:
             await output.close()
 
@@ -499,10 +499,10 @@ def identity_structure(self) -> Any:
         )
 
 
-class MapTags(UnaryOperator):
+class MapKeys(UnaryOperator):
     """
-    Operator that maps tags in a stream using a user-defined function.
-    The function is applied to each tag in the stream, and the resulting tags
+    Operator that maps keys in a stream using a user-defined function.
+    The function is applied to each key in the stream, and the resulting keys
     are returned as a new stream.
     """
 
@@ -514,31 +514,31 @@ def __init__(
         super().__init__(**kwargs)
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
-        missing_tags = set(tag_columns) - set(self.name_map.keys())
+        key_columns, data_columns = stream.keys()
+        missing_keys = set(key_columns) - set(self.name_map.keys())
 
-        if not any(n in tag_columns for n in self.name_map):
-            # nothing to rename in the tags, return stream as is
+        if not any(n in key_columns for n in self.name_map):
+            # nothing to rename in the keys, return stream as is
             return stream
 
-        table = stream.as_table(columns={"source": True, "system_tags": True})
+        table = stream.as_table(columns={"source": True, "system_keys": True})
 
         name_map = {
-            tc: self.name_map.get(tc, tc) for tc in tag_columns
-        }  # rename the tag as necessary
-        new_tag_columns = [name_map[tc] for tc in tag_columns]
+            tc: self.name_map.get(tc, tc) for tc in key_columns
+        }  # rename the key as necessary
+        new_key_columns = [name_map[tc] for tc in key_columns]
         for c in data_columns:
             name_map[c] = c  # no renaming on data columns
 
         renamed_table = table.rename_columns(name_map)
 
-        if missing_tags and self.drop_unmapped:
-            # drop any tags that are not in the name map
-            renamed_table = renamed_table.drop_columns(list(missing_tags))
+        if missing_keys and self.drop_unmapped:
+            # drop any keys that are not in the name map
+            renamed_table = renamed_table.drop_columns(list(missing_keys))
 
         return ArrowTableStream(
             renamed_table,
-            tag_columns=new_tag_columns,
+            key_columns=new_key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -547,21 +547,21 @@ def validate_unary_input(self, stream: StreamProtocol) -> None:
         It takes two streams as input and raises an error if the inputs are not valid.
         """
         # verify that renamed value does NOT collide with other columns
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         relevant_source = []
         relevant_target = []
         for source, target in self.name_map.items():
-            if source in tag_columns:
+            if source in key_columns:
                 relevant_source.append(source)
                 relevant_target.append(target)
-        remaining_tag_columns = set(tag_columns) - set(relevant_source)
-        overlapping_tag_columns = remaining_tag_columns.intersection(relevant_target)
+        remaining_key_columns = set(key_columns) - set(relevant_source)
+        overlapping_key_columns = remaining_key_columns.intersection(relevant_target)
         overlapping_data_columns = set(data_columns).intersection(relevant_target)
 
-        if overlapping_tag_columns or overlapping_data_columns:
+        if overlapping_key_columns or overlapping_data_columns:
             message = f"Renaming {self.name_map} would cause collisions with existing columns: "
-            if overlapping_tag_columns:
-                message += f"overlapping tag columns: {overlapping_tag_columns}."
+            if overlapping_key_columns:
+                message += f"overlapping key columns: {overlapping_key_columns}."
             if overlapping_data_columns:
                 message += f"overlapping data columns: {overlapping_data_columns}."
             raise InputValidationError(message)
@@ -573,14 +573,14 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
 
         # Create new data schema with renamed keys
-        new_tag_schema = {self.name_map.get(k, k): v for k, v in tag_schema.items()}
+        new_key_schema = {self.name_map.get(k, k): v for k, v in key_schema.items()}
 
-        return Schema(new_tag_schema), data_schema
+        return Schema(new_key_schema), data_schema
 
     def identity_structure(self) -> Any:
         return (
diff --git a/src/orcapod/core/operators/filters.py b/src/orcapod/core/operators/filters.py
index 6c583d18..381794a3 100644
--- a/src/orcapod/core/operators/filters.py
+++ b/src/orcapod/core/operators/filters.py
@@ -51,14 +51,14 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
 
         # TODO: improve efficiency here...
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
         df = pl.DataFrame(table)
         filtered_table = df.filter(*self.predicates, **self.constraints).to_arrow()
 
         return ArrowTableStream(
             filtered_table,
-            tag_columns=stream.keys()[0],
+            key_columns=stream.keys()[0],
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -75,7 +75,7 @@ def unary_output_schema(
         *,
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
-        include_system_tags: bool = False,
+        include_system_keys: bool = False,
     ) -> tuple[Schema, Schema]:
         # data types are not modified
         return stream.output_schema(columns=columns, all_info=all_info)
@@ -163,7 +163,7 @@ def __init__(self, columns: str | Collection[str], strict: bool = True, **kwargs
         super().__init__(**kwargs)
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         data_columns_to_drop = [c for c in data_columns if c not in self.columns]
         new_data_columns = [
             c for c in data_columns if c not in data_columns_to_drop
@@ -174,7 +174,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
             return stream
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
         # make sure to drop associated source fields
         associated_source_fields = [
@@ -186,7 +186,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
 
         return ArrowTableStream(
             modified_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -209,20 +209,20 @@ def unary_output_schema(
         *,
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
-        include_system_tags: bool = False,
+        include_system_keys: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
         _, data_columns = stream.keys()
         data_to_drop = [pc for pc in data_columns if pc not in self.columns]
 
-        # this ensures all system tag columns are preserved
+        # this ensures all system key columns are preserved
         new_data_schema = {
             k: v for k, v in data_schema.items() if k not in data_to_drop
         }
 
-        return tag_schema, new_data_schema
+        return key_schema, new_data_schema
 
     def identity_structure(self) -> Any:
         return (
diff --git a/src/orcapod/core/operators/join.py b/src/orcapod/core/operators/join.py
index 4619427a..6ed5f0e9 100644
--- a/src/orcapod/core/operators/join.py
+++ b/src/orcapod/core/operators/join.py
@@ -10,7 +10,7 @@
     ArgumentGroup,
     DataProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
 )
 from orcapod.system_constants import constants
 from orcapod.types import ColumnConfig, ContentHash, Schema
@@ -63,13 +63,13 @@ def output_schema(
             # if single stream, simply return the output schema of the single input stream
             return streams[0].output_schema(columns=columns, all_info=all_info)
 
-        # Always get input schemas WITHOUT system tags for the base computation.
-        # System tags are computed separately because the join renames them.
+        # Always get input schemas WITHOUT system keys for the base computation.
+        # System keys are computed separately because the join renames them.
         stream = streams[0]
-        tag_schema, data_schema = stream.output_schema()
+        key_schema, data_schema = stream.output_schema()
         for other_stream in streams[1:]:
-            other_tag_schema, other_data_schema = other_stream.output_schema()
-            tag_schema = schema_utils.union_schemas(tag_schema, other_tag_schema)
+            other_key_schema, other_data_schema = other_stream.output_schema()
+            key_schema = schema_utils.union_schemas(key_schema, other_key_schema)
             intersection_data_schema = schema_utils.intersection_schemas(
                 data_schema, other_data_schema
             )
@@ -81,70 +81,70 @@ def output_schema(
                     f"Datas should not have overlapping keys, but {data_schema.keys()} found in {stream} and {other_stream}."
                 )
 
-        # Add system tag columns if requested
-        if columns_config.system_tags:
-            system_tag_schema = self._predict_system_tag_schema(*streams)
-            tag_schema = schema_utils.union_schemas(tag_schema, system_tag_schema)
+        # Add system key columns if requested
+        if columns_config.system_keys:
+            system_key_schema = self._predict_system_key_schema(*streams)
+            key_schema = schema_utils.union_schemas(key_schema, system_key_schema)
 
-        return tag_schema, data_schema
+        return key_schema, data_schema
 
-    def _predict_system_tag_schema(self, *streams: StreamProtocol) -> Schema:
-        """Predict the system tag columns that the join would produce.
+    def _predict_system_key_schema(self, *streams: StreamProtocol) -> Schema:
+        """Predict the system key columns that the join would produce.
 
-        Each input stream's existing system tag columns get renamed by
+        Each input stream's existing system key columns get renamed by
         appending ::{pipeline_hash}:{canonical_position}. This method
         computes those output column names without performing the join.
         """
-        n_char = self.orcapod_config.system_tag_hash_n_char
+        n_char = self.orcapod_config.system_key_hash_n_char
         ordered_streams = self.order_input_streams(*streams)
 
-        system_tag_fields: dict[str, type] = {}
+        system_key_fields: dict[str, type] = {}
         for idx, stream in enumerate(ordered_streams):
-            stream_tag_schema, _ = stream.output_schema(columns={"system_tags": True})
-            for col_name in stream_tag_schema:
-                if col_name.startswith(constants.SYSTEM_TAG_PREFIX):
+            stream_key_schema, _ = stream.output_schema(columns={"system_keys": True})
+            for col_name in stream_key_schema:
+                if col_name.startswith(constants.SYSTEM_KEY_PREFIX):
                     new_name = (
                         f"{col_name}{constants.BLOCK_SEPARATOR}"
                         f"{stream.pipeline_hash().to_hex(n_char)}:{idx}"
                     )
-                    system_tag_fields[new_name] = str
-        return Schema(system_tag_fields)
+                    system_key_fields[new_name] = str
+        return Schema(system_key_fields)
 
     def static_process(self, *streams: StreamProtocol) -> StreamProtocol:
         """
-        Joins two streams together based on their tags.
-        The resulting stream will contain all the tags from both streams.
+        Joins two streams together based on their keys.
+        The resulting stream will contain all the keys from both streams.
         """
         if len(streams) == 1:
             return streams[0]
 
         # Canonically order streams by pipeline_hash for deterministic
-        # system tag column names regardless of input order (Join is commutative)
+        # system key column names regardless of input order (Join is commutative)
         ordered_streams = self.order_input_streams(*streams)
 
         COMMON_JOIN_KEY = "_common"
 
-        n_char = self.orcapod_config.system_tag_hash_n_char
+        n_char = self.orcapod_config.system_key_hash_n_char
 
         stream = ordered_streams[0]
 
-        tag_keys, _ = [set(k) for k in stream.keys()]
+        key_keys, _ = [set(k) for k in stream.keys()]
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "meta": True}
+            columns={"source": True, "system_keys": True, "meta": True}
         )
         # trick to get cartesian product
         table = table.add_column(0, COMMON_JOIN_KEY, pa.array([0] * len(table)))
-        table = arrow_utils.append_to_system_tags(
+        table = arrow_utils.append_to_system_keys(
             table,
             f"{stream.pipeline_hash().to_hex(n_char)}:0",
         )
 
         for idx, next_stream in enumerate(ordered_streams[1:], start=1):
-            next_tag_keys, _ = next_stream.keys()
+            next_key_keys, _ = next_stream.keys()
             next_table = next_stream.as_table(
-                columns={"source": True, "system_tags": True, "meta": True}
+                columns={"source": True, "system_keys": True, "meta": True}
             )
-            next_table = arrow_utils.append_to_system_tags(
+            next_table = arrow_utils.append_to_system_keys(
                 next_table,
                 f"{next_stream.pipeline_hash().to_hex(n_char)}:{idx}",
             )
@@ -158,9 +158,9 @@ def static_process(self, *streams: StreamProtocol) -> StreamProtocol:
             # the accumulated table, using stream-index-based suffixes instead of
             # Polars' default ``_right`` suffix which causes cascading collisions
             # on 3+ stream joins.  The only legitimately shared column names are
-            # the tag join keys; everything else (meta columns, their derived
+            # the key join keys; everything else (meta columns, their derived
             # source-info columns, etc.) must be unique.
-            join_key_set = tag_keys.intersection(next_tag_keys) | {COMMON_JOIN_KEY}
+            join_key_set = key_keys.intersection(next_key_keys) | {COMMON_JOIN_KEY}
             existing_names = set(table.column_names)
             rename_map = {}
             for col in next_table.column_names:
@@ -186,46 +186,46 @@ def static_process(self, *streams: StreamProtocol) -> StreamProtocol:
                     [rename_map.get(name, name) for name in next_table.column_names]
                 )
 
-            common_tag_keys = tag_keys.intersection(next_tag_keys)
-            common_tag_keys.add(COMMON_JOIN_KEY)
+            common_key_keys = key_keys.intersection(next_key_keys)
+            common_key_keys.add(COMMON_JOIN_KEY)
 
             # Capture the left-side schema before the Polars join, which sets all
             # fields to nullable=True regardless of the original schema.
             table_ref_schema = table.schema
             table = (
                 pl.DataFrame(table)
-                .join(pl.DataFrame(next_table), on=list(common_tag_keys), how="inner")
+                .join(pl.DataFrame(next_table), on=list(common_key_keys), how="inner")
                 .to_arrow()
             )
             table = arrow_utils.restore_schema_nullability(table, table_ref_schema, next_ref_schema)
 
-            tag_keys.update(next_tag_keys)
+            key_keys.update(next_key_keys)
 
-        # reorder columns to bring tag columns to the front
+        # reorder columns to bring key columns to the front
         # TODO: come up with a better algorithm
         table = table.drop(COMMON_JOIN_KEY)
 
-        # Sort system tag values for same-pipeline-hash streams to ensure commutativity
-        table = arrow_utils.sort_system_tag_values(table)
+        # Sort system key values for same-pipeline-hash streams to ensure commutativity
+        table = arrow_utils.sort_system_key_values(table)
 
-        reordered_columns = [col for col in table.column_names if col in tag_keys]
-        reordered_columns += [col for col in table.column_names if col not in tag_keys]
+        reordered_columns = [col for col in table.column_names if col in key_keys]
+        reordered_columns += [col for col in table.column_names if col not in key_keys]
 
         result_table = table.select(reordered_columns)
         return ArrowTableStream(
             result_table,
-            tag_columns=tuple(tag_keys),
+            key_columns=tuple(key_keys),
         )
 
     # ------------------------------------------------------------------
     # Async execution
     # ------------------------------------------------------------------
 
-    def _compute_system_tag_suffixes(
+    def _compute_system_key_suffixes(
         self,
         input_pipeline_hashes: Sequence[ContentHash],
     ) -> list[str]:
-        """Compute per-input system-tag suffixes from pipeline hashes.
+        """Compute per-input system-key suffixes from pipeline hashes.
 
         Each suffix is ``{truncated_hash}:{canonical_position}`` where
         canonical position is determined by sorting the hashes (matching
@@ -238,7 +238,7 @@ def _compute_system_tag_suffixes(
         Returns:
             List of suffix strings, one per input position.
         """
-        n_char = self.orcapod_config.system_tag_hash_n_char
+        n_char = self.orcapod_config.system_key_hash_n_char
         hex_strings = [h.to_hex() for h in input_pipeline_hashes]
 
         # Canonical order: sorted by full hex (same as order_input_streams).
@@ -258,8 +258,8 @@ def _compute_system_tag_suffixes(
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         input_pipeline_hashes: Sequence[ContentHash] | None = None,
     ) -> None:
@@ -274,20 +274,20 @@ async def async_execute(
         Three or more inputs: staggered pairwise binary joins in
         canonical order — ``join(join(x, y), z)`` — matching
         ``static_process``'s iterative accumulation.  Each binary join
-        uses the per-pair intersection of tag keys, so partially
-        overlapping tag schemas are handled correctly.
+        uses the per-pair intersection of key-schema columns, so partially
+        overlapping key schemas are handled correctly.
 
         Args:
             inputs: Readable channels, one per upstream.
             output: Writable channel for downstream.
             input_pipeline_hashes: Pipeline hash for each input,
                 positionally matching ``inputs``.  Required for
-                correct system-tag renaming with 2+ inputs.
+                correct system-key renaming with 2+ inputs.
         """
         try:
             if len(inputs) == 1:
-                async for tag, data in inputs[0]:
-                    await output.send((tag, data))
+                async for key, data in inputs[0]:
+                    await output.send((key, data))
                 return
 
             n = len(inputs)
@@ -297,7 +297,7 @@ async def async_execute(
                     f"must match inputs length ({n})"
                 )
             suffixes = (
-                self._compute_system_tag_suffixes(input_pipeline_hashes)
+                self._compute_system_key_suffixes(input_pipeline_hashes)
                 if input_pipeline_hashes is not None
                 else [str(i) for i in range(n)]
             )
@@ -307,8 +307,8 @@ async def async_execute(
 
     async def _streaming_join(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         suffixes: list[str],
     ) -> None:
         """Dispatch between binary join (N=2) and staggered chain (N>=3).
@@ -316,7 +316,7 @@ async def _streaming_join(
         Args:
             inputs: Readable channels, one per upstream.
             output: Output channel for matched rows.
-            suffixes: Per-input system-tag suffixes (positional).
+            suffixes: Per-input system-key suffixes (positional).
         """
         n = len(inputs)
         block_sep = constants.BLOCK_SEPARATOR
@@ -324,11 +324,11 @@ async def _streaming_join(
         if n == 2:
 
             def merge_fn(
-                lt: TagProtocol,
+                lt: KeyProtocol,
                 lp: DataProtocol,
-                rt: TagProtocol,
+                rt: KeyProtocol,
                 rp: DataProtocol,
-            ) -> tuple[TagProtocol, DataProtocol]:
+            ) -> tuple[KeyProtocol, DataProtocol]:
                 return self._merge_pair_rename(lt, lp, rt, rp, suffixes, block_sep)
 
             await self._binary_streaming_join(inputs[0], inputs[1], output, merge_fn)
@@ -339,17 +339,17 @@ def merge_fn(
 
     async def _staggered_join(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         suffixes: list[str],
     ) -> None:
         """Staggered pairwise binary joins: ``join(join(x, y), z)``.
 
         Matches ``static_process``'s iterative pairwise join semantics.
-        Each input's system tags are pre-renamed, then binary joins are
+        Each input's system keys are pre-renamed, then binary joins are
         chained in canonical order.  Per-pair join keys are computed
         naturally by each binary join (intersection of its two inputs'
-        tag keys), so partially overlapping tag schemas produce the
+        key-schema columns), so partially overlapping key schemas produce the
         same results as the sync path.
 
         Intermediate results flow through channels, so downstream joins
@@ -359,13 +359,13 @@ async def _staggered_join(
         Args:
             inputs: Readable channels, one per upstream.
             output: Output channel for matched rows.
-            suffixes: Per-input system-tag suffixes (positional).
+            suffixes: Per-input system-key suffixes (positional).
         """
         from orcapod.channels import Channel
 
         n = len(inputs)
         block_sep = constants.BLOCK_SEPARATOR
-        sys_prefix = constants.SYSTEM_TAG_PREFIX
+        sys_prefix = constants.SYSTEM_KEY_PREFIX
 
         # Canonical order: sorted by canonical position encoded in suffixes.
         # Suffixes are "hash:position" when pipeline hashes are provided,
@@ -377,17 +377,17 @@ def _canon_pos(i: int) -> int:
         canon_order = sorted(range(n), key=_canon_pos)
 
         async with asyncio.TaskGroup() as tg:
-            # Pre-rename system tags for each input so binary joins
+            # Pre-rename system keys for each input so binary joins
             # can pass them through without modification
             renamed_readers: list[
-                ReadableChannel[tuple[TagProtocol, DataProtocol]]
+                ReadableChannel[tuple[KeyProtocol, DataProtocol]]
             ] = []
             for orig_idx in canon_order:
-                ch: Channel[tuple[TagProtocol, DataProtocol]] = Channel(
+                ch: Channel[tuple[KeyProtocol, DataProtocol]] = Channel(
                     buffer_size=64
                 )
                 tg.create_task(
-                    self._rename_sys_tags(
+                    self._rename_sys_keys(
                         inputs[orig_idx],
                         ch.writer,
                         suffixes[orig_idx],
@@ -405,7 +405,7 @@ def _canon_pos(i: int) -> int:
                     target_writer = output
                 else:
                     intermediate: Channel[
-                        tuple[TagProtocol, DataProtocol]
+                        tuple[KeyProtocol, DataProtocol]
                     ] = Channel(buffer_size=64)
                     target_writer = intermediate.writer
 
@@ -423,12 +423,12 @@ def _canon_pos(i: int) -> int:
 
     async def _binary_streaming_join(
         self,
-        left: ReadableChannel[tuple[TagProtocol, DataProtocol]],
-        right: ReadableChannel[tuple[TagProtocol, DataProtocol]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        left: ReadableChannel[tuple[KeyProtocol, DataProtocol]],
+        right: ReadableChannel[tuple[KeyProtocol, DataProtocol]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         merge_fn: Callable[
-            [TagProtocol, DataProtocol, TagProtocol, DataProtocol],
-            tuple[TagProtocol, DataProtocol],
+            [KeyProtocol, DataProtocol, KeyProtocol, DataProtocol],
+            tuple[KeyProtocol, DataProtocol],
         ],
     ) -> None:
         """Binary symmetric hash join.
@@ -443,14 +443,14 @@ async def _binary_streaming_join(
             left: Left input channel.
             right: Right input channel.
             output: Output channel for matched rows.
-            merge_fn: Callable(left_tag, left_pkt, right_tag, right_pkt)
-                that produces the merged (Tag, Data) pair.
+            merge_fn: Callable(left_key, left_pkt, right_key, right_pkt)
+                that produces the merged (Key, Data) pair.
         """
         _SENTINEL = object()
         queue: asyncio.Queue = asyncio.Queue(maxsize=64)
 
         async def _drain(
-            ch: ReadableChannel[tuple[TagProtocol, DataProtocol]],
+            ch: ReadableChannel[tuple[KeyProtocol, DataProtocol]],
             side: int,
         ) -> None:
             async for item in ch:
@@ -462,7 +462,7 @@ async def _drain(
                 tg.create_task(_drain(left, 0))
                 tg.create_task(_drain(right, 1))
 
-                buffers: list[list[tuple[TagProtocol, DataProtocol]]] = [
+                buffers: list[list[tuple[KeyProtocol, DataProtocol]]] = [
                     [],
                     [],
                 ]
@@ -479,15 +479,15 @@ async def _drain(
                         closed_count += 1
                         continue
 
-                    tag, pkt = item
+                    key, pkt = item
                     other = 1 - side
 
-                    # Determine shared tag keys once we have rows from both sides
+                    # Determine shared key-schema columns once we have rows from both sides
                     if shared_keys is None:
                         if not buffers[other]:
-                            buffers[side].append((tag, pkt))
+                            buffers[side].append((key, pkt))
                             continue
-                        this_keys = set(tag.keys())
+                        this_keys = set(key.keys())
                         other_keys = set(buffers[other][0][0].keys())
                         shared_keys = tuple(sorted(this_keys & other_keys))
                         needs_reindex = True
@@ -506,89 +506,89 @@ async def _drain(
                                 indexes[buf_side].setdefault(k, []).append(j)
 
                     # Index the new row
-                    td = tag.as_dict()
-                    key = (
+                    td = key.as_dict()
+                    join_key = (
                         tuple(td[sk] for sk in shared_keys)
                         if shared_keys
                         else (0,)
                     )
                     row_idx = len(buffers[side])
-                    buffers[side].append((tag, pkt))
-                    indexes[side].setdefault(key, []).append(row_idx)
+                    buffers[side].append((key, pkt))
+                    indexes[side].setdefault(join_key, []).append(row_idx)
 
                     # Probe the opposite side for matches
-                    for mi in indexes[other].get(key, []):
+                    for mi in indexes[other].get(join_key, []):
                         ot, op = buffers[other][mi]
                         if side == 0:
-                            await output.send(merge_fn(tag, pkt, ot, op))
+                            await output.send(merge_fn(key, pkt, ot, op))
                         else:
-                            await output.send(merge_fn(ot, op, tag, pkt))
+                            await output.send(merge_fn(ot, op, key, pkt))
         finally:
             await output.close()
 
     @staticmethod
-    async def _rename_sys_tags(
-        ch_in: ReadableChannel[tuple[TagProtocol, DataProtocol]],
-        ch_out: WritableChannel[tuple[TagProtocol, DataProtocol]],
+    async def _rename_sys_keys(
+        ch_in: ReadableChannel[tuple[KeyProtocol, DataProtocol]],
+        ch_out: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         suffix: str,
         block_sep: str,
         sys_prefix: str,
     ) -> None:
-        """Read rows and rename system-tag keys by appending the per-input suffix.
+        """Read rows and rename system-key columns by appending the per-input suffix.
 
         Used as a pre-processing step in ``_staggered_join`` so that
-        downstream binary joins can pass system tags through without
+        downstream binary joins can pass system keys through without
         modification.
         """
-        from orcapod.core.datagrams import Tag
+        from orcapod.core.datagrams import Key
 
         try:
-            async for tag, pkt in ch_in:
-                sys_tags = tag.system_tags()
-                if sys_tags:
+            async for key, pkt in ch_in:
+                sys_keys = key.system_keys()
+                if sys_keys:
                     renamed: dict = {}
-                    for k, v in sys_tags.items():
+                    for k, v in sys_keys.items():
                         new_key = (
                             f"{k}{block_sep}{suffix}"
                             if k.startswith(sys_prefix)
                             else k
                         )
                         renamed[new_key] = v
-                    tag = Tag(tag.as_dict(), system_tags=renamed)
-                await ch_out.send((tag, pkt))
+                    key = Key(key.as_dict(), system_keys=renamed)
+                await ch_out.send((key, pkt))
         finally:
             await ch_out.close()
 
     @staticmethod
     def _merge_pair_rename(
-        left_tag: TagProtocol,
+        left_key: KeyProtocol,
         left_pkt: DataProtocol,
-        right_tag: TagProtocol,
+        right_key: KeyProtocol,
         right_pkt: DataProtocol,
         suffixes: list[str],
         block_sep: str,
-    ) -> tuple[TagProtocol, DataProtocol]:
-        """Merge a matched pair, renaming system tags with per-side suffixes.
+    ) -> tuple[KeyProtocol, DataProtocol]:
+        """Merge a matched pair, renaming system keys with per-side suffixes.
 
-        Used for direct 2-input joins where system tags are renamed
+        Used for direct 2-input joins where system keys are renamed
         during the merge (not pre-renamed).
         """
-        from orcapod.core.datagrams import Data, Tag
+        from orcapod.core.datagrams import Data, Key
 
-        sys_prefix = constants.SYSTEM_TAG_PREFIX
+        sys_prefix = constants.SYSTEM_KEY_PREFIX
 
-        # Merge tag dicts — shared keys come from left
-        merged_tag_d: dict = {}
-        for k, v in left_tag.as_dict().items():
-            merged_tag_d[k] = v
-        for k, v in right_tag.as_dict().items():
-            if k not in merged_tag_d:
-                merged_tag_d[k] = v
+        # Merge key dicts — shared keys come from left
+        merged_key_d: dict = {}
+        for k, v in left_key.as_dict().items():
+            merged_key_d[k] = v
+        for k, v in right_key.as_dict().items():
+            if k not in merged_key_d:
+                merged_key_d[k] = v
 
-        # Rename and merge system tags
+        # Rename and merge system keys
         merged_sys: dict = {}
-        for i, tag in enumerate((left_tag, right_tag)):
-            for k, v in tag.system_tags().items():
+        for i, key in enumerate((left_key, right_key)):
+            for k, v in key.system_keys().items():
                 new_key = (
                     f"{k}{block_sep}{suffixes[i]}"
                     if k.startswith(sys_prefix)
@@ -596,8 +596,8 @@ def _merge_pair_rename(
                 )
                 merged_sys[new_key] = v
 
-        merged_sys = Join._sort_merged_system_tags(merged_sys)
-        merged_tag = Tag(merged_tag_d, system_tags=merged_sys)
+        merged_sys = Join._sort_merged_system_keys(merged_sys)
+        merged_key = Key(merged_key_d, system_keys=merged_sys)
 
         # Merge data dicts (non-overlapping by Join's validation)
         merged_pkt_d: dict = {}
@@ -608,38 +608,38 @@ def _merge_pair_rename(
         merged_si.update(right_pkt.source_info())
 
         merged_pkt = Data(merged_pkt_d, source_info=merged_si)
-        return merged_tag, merged_pkt
+        return merged_key, merged_pkt
 
     @staticmethod
     def _merge_pair_passthrough(
-        left_tag: TagProtocol,
+        left_key: KeyProtocol,
         left_pkt: DataProtocol,
-        right_tag: TagProtocol,
+        right_key: KeyProtocol,
         right_pkt: DataProtocol,
-    ) -> tuple[TagProtocol, DataProtocol]:
-        """Merge a matched pair, passing system tags through without renaming.
+    ) -> tuple[KeyProtocol, DataProtocol]:
+        """Merge a matched pair, passing system keys through without renaming.
 
-        Used in the staggered chain where system tags have already been
-        pre-renamed by ``_rename_sys_tags``.
+        Used in the staggered chain where system keys have already been
+        pre-renamed by ``_rename_sys_keys``.
         """
-        from orcapod.core.datagrams import Data, Tag
+        from orcapod.core.datagrams import Data, Key
 
-        # Merge tag dicts — shared keys come from left
-        merged_tag_d: dict = {}
-        for k, v in left_tag.as_dict().items():
-            merged_tag_d[k] = v
-        for k, v in right_tag.as_dict().items():
-            if k not in merged_tag_d:
-                merged_tag_d[k] = v
+        # Merge key dicts — shared keys come from left
+        merged_key_d: dict = {}
+        for k, v in left_key.as_dict().items():
+            merged_key_d[k] = v
+        for k, v in right_key.as_dict().items():
+            if k not in merged_key_d:
+                merged_key_d[k] = v
 
-        # Combine system tags (already renamed)
+        # Combine system keys (already renamed)
         merged_sys: dict = {}
-        merged_sys.update(left_tag.system_tags())
-        merged_sys.update(right_tag.system_tags())
+        merged_sys.update(left_key.system_keys())
+        merged_sys.update(right_key.system_keys())
 
         # Sort within same-provenance-path groups for commutativity
-        merged_sys = Join._sort_merged_system_tags(merged_sys)
-        merged_tag = Tag(merged_tag_d, system_tags=merged_sys)
+        merged_sys = Join._sort_merged_system_keys(merged_sys)
+        merged_key = Key(merged_key_d, system_keys=merged_sys)
 
         # Merge data dicts (non-overlapping by Join's validation)
         merged_pkt_d: dict = {}
@@ -650,20 +650,20 @@ def _merge_pair_passthrough(
         merged_si.update(right_pkt.source_info())
 
         merged_pkt = Data(merged_pkt_d, source_info=merged_si)
-        return merged_tag, merged_pkt
+        return merged_key, merged_pkt
 
     @staticmethod
-    def _sort_merged_system_tags(merged_sys: dict) -> dict:
-        """Sort system tag values within same-provenance-path groups.
+    def _sort_merged_system_keys(merged_sys: dict) -> dict:
+        """Sort system key values within same-provenance-path groups.
 
-        When two joined inputs share a pipeline_hash, their system tag
+        When two joined inputs share a pipeline_hash, their system key
         columns share a provenance path but occupy different canonical
         positions.  Sorting the paired (source_id, record_id) values
         across positions ensures commutativity — mirroring what
-        ``sort_system_tag_values`` does on Arrow tables in
+        ``sort_system_key_values`` does on Arrow tables in
         ``static_process``.
         """
-        sys_prefix = constants.SYSTEM_TAG_PREFIX
+        sys_prefix = constants.SYSTEM_KEY_PREFIX
         block_sep = constants.BLOCK_SEPARATOR
         field_sep = constants.FIELD_SEPARATOR
 
@@ -683,8 +683,8 @@ def _sort_merged_system_tags(merged_sys: dict) -> dict:
                 field_type
             ] = key
 
-        sid_field = constants.SYSTEM_TAG_SOURCE_ID_PREFIX[len(sys_prefix) :]
-        rid_field = constants.SYSTEM_TAG_RECORD_ID_PREFIX[len(sys_prefix) :]
+        sid_field = constants.SYSTEM_KEY_SOURCE_ID_PREFIX[len(sys_prefix) :]
+        rid_field = constants.SYSTEM_KEY_RECORD_ID_PREFIX[len(sys_prefix) :]
 
         for _prov_path, positions in groups.items():
             if len(positions) <= 1:
diff --git a/src/orcapod/core/operators/mappers.py b/src/orcapod/core/operators/mappers.py
index 4c69dd9c..b1d21e68 100644
--- a/src/orcapod/core/operators/mappers.py
+++ b/src/orcapod/core/operators/mappers.py
@@ -5,7 +5,7 @@
 from orcapod.core.operators.base import UnaryOperator
 from orcapod.core.streams import ArrowTableStream
 from orcapod.errors import InputValidationError
-from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, KeyProtocol
 from orcapod.system_constants import constants
 from orcapod.types import ColumnConfig, Schema
 from orcapod.utils.lazy_module import LazyModule
@@ -45,7 +45,7 @@ def to_config(self) -> dict[str, Any]:
         return config
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         unmapped_columns = set(data_columns) - set(self.name_map.keys())
 
         if not any(n in data_columns for n in self.name_map):
@@ -53,7 +53,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
             return stream
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
 
         name_map = {
@@ -65,7 +65,7 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
             tc: tc
             for tc in table.column_names
             if tc not in data_columns and not tc.startswith(constants.SOURCE_PREFIX)
-        }  # no renaming on tag columns
+        }  # no renaming on key columns
         for c in data_columns:
             if c in self.name_map:
                 name_map[c] = self.name_map[c]
@@ -80,11 +80,11 @@ def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
         if self.drop_unmapped and unmapped_columns:
             renamed_table = renamed_table.drop_columns(list(unmapped_columns))
 
-        return ArrowTableStream(renamed_table, tag_columns=tag_columns)
+        return ArrowTableStream(renamed_table, key_columns=key_columns)
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
         # verify that renamed value does NOT collide with other columns
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         relevant_source = []
         relevant_target = []
         for source, target in self.name_map.items():
@@ -95,14 +95,14 @@ def validate_unary_input(self, stream: StreamProtocol) -> None:
         overlapping_data_columns = remaining_data_columns.intersection(
             relevant_target
         )
-        overlapping_tag_columns = set(tag_columns).intersection(relevant_target)
+        overlapping_key_columns = set(key_columns).intersection(relevant_target)
 
-        if overlapping_data_columns or overlapping_tag_columns:
+        if overlapping_data_columns or overlapping_key_columns:
             message = f"Renaming {self.name_map} would cause collisions with existing columns: "
             if overlapping_data_columns:
                 message += f"overlapping data columns: {overlapping_data_columns}, "
-            if overlapping_tag_columns:
-                message += f"overlapping tag columns: {overlapping_tag_columns}."
+            if overlapping_key_columns:
+                message += f"overlapping key columns: {overlapping_key_columns}."
             raise InputValidationError(message)
 
     def unary_output_schema(
@@ -112,7 +112,7 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
 
@@ -123,19 +123,19 @@ def unary_output_schema(
             if k in self.name_map or not self.drop_unmapped
         }
 
-        return tag_schema, Schema(new_data_schema)
+        return key_schema, Schema(new_data_schema)
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
         """Streaming: rename data columns per row without materializing."""
         try:
             rename_map: dict[str, str] | None = None
             unmapped: list[str] | None = None
-            async for tag, data in inputs[0]:
+            async for key, data in inputs[0]:
                 if rename_map is None:
                     pkt_keys = data.keys()
                     rename_map = {
@@ -144,12 +144,12 @@ async def async_execute(
                     if self.drop_unmapped:
                         unmapped = [k for k in pkt_keys if k not in self.name_map]
                 if not rename_map:
-                    await output.send((tag, data))
+                    await output.send((key, data))
                 else:
                     new_pkt = data.rename(rename_map)
                     if unmapped:
                         new_pkt = new_pkt.drop(*unmapped)
-                    await output.send((tag, new_pkt))
+                    await output.send((key, new_pkt))
         finally:
             await output.close()
 
@@ -161,10 +161,10 @@ def identity_structure(self) -> Any:
         )
 
 
-class MapTags(UnaryOperator):
+class MapKeys(UnaryOperator):
     """
-    Operator that maps tags in a stream using a user-defined function.
-    The function is applied to each tag in the stream, and the resulting tags
+    Operator that maps keys in a stream using a user-defined function.
+    The function is applied to each key in the stream, and the resulting keys
     are returned as a new stream.
     """
 
@@ -176,7 +176,7 @@ def __init__(
         super().__init__(**kwargs)
 
     def to_config(self) -> dict[str, Any]:
-        """Serialize this MapTags operator to a config dict.
+        """Serialize this MapKeys operator to a config dict.
 
         Returns:
             A dict with ``class_name``, ``module_path``, and ``config`` keys,
@@ -190,35 +190,35 @@ def to_config(self) -> dict[str, Any]:
         return config
 
     def unary_static_process(self, stream: StreamProtocol) -> StreamProtocol:
-        tag_columns, data_columns = stream.keys()
-        missing_tags = set(tag_columns) - set(self.name_map.keys())
+        key_columns, data_columns = stream.keys()
+        missing_keys = set(key_columns) - set(self.name_map.keys())
 
-        if not any(n in tag_columns for n in self.name_map):
-            # nothing to rename in the tags, return stream as is
+        if not any(n in key_columns for n in self.name_map):
+            # nothing to rename in the keys, return stream as is
             return stream
 
         table = stream.as_table(
-            columns={"source": True, "system_tags": True, "sort_by_tags": False}
+            columns={"source": True, "system_keys": True, "sort_by_keys": False}
         )
 
         name_map = {
             tc: self.name_map.get(tc, tc)
-            for tc in tag_columns
+            for tc in key_columns
             if tc in self.name_map or not self.drop_unmapped
-        }  # rename the tag as necessary
-        new_tag_columns = list(name_map.values())
+        }  # rename the key as necessary
+        new_key_columns = list(name_map.values())
         for c in data_columns:
             name_map[c] = c  # no renaming on data columns
 
         renamed_table = table.rename_columns(name_map)
 
-        if missing_tags and self.drop_unmapped:
-            # drop any tags that are not in the name map
-            renamed_table = renamed_table.drop_columns(list(missing_tags))
+        if missing_keys and self.drop_unmapped:
+            # drop any keys that are not in the name map
+            renamed_table = renamed_table.drop_columns(list(missing_keys))
 
         return ArrowTableStream(
             renamed_table,
-            tag_columns=new_tag_columns,
+            key_columns=new_key_columns,
         )
 
     def validate_unary_input(self, stream: StreamProtocol) -> None:
@@ -227,21 +227,21 @@ def validate_unary_input(self, stream: StreamProtocol) -> None:
         It takes two streams as input and raises an error if the inputs are not valid.
         """
         # verify that renamed value does NOT collide with other columns
-        tag_columns, data_columns = stream.keys()
+        key_columns, data_columns = stream.keys()
         relevant_source = []
         relevant_target = []
         for source, target in self.name_map.items():
-            if source in tag_columns:
+            if source in key_columns:
                 relevant_source.append(source)
                 relevant_target.append(target)
-        remaining_tag_columns = set(tag_columns) - set(relevant_source)
-        overlapping_tag_columns = remaining_tag_columns.intersection(relevant_target)
+        remaining_key_columns = set(key_columns) - set(relevant_source)
+        overlapping_key_columns = remaining_key_columns.intersection(relevant_target)
         overlapping_data_columns = set(data_columns).intersection(relevant_target)
 
-        if overlapping_tag_columns or overlapping_data_columns:
+        if overlapping_key_columns or overlapping_data_columns:
             message = f"Renaming {self.name_map} would cause collisions with existing columns: "
-            if overlapping_tag_columns:
-                message += f"overlapping tag columns: {overlapping_tag_columns}."
+            if overlapping_key_columns:
+                message += f"overlapping key columns: {overlapping_key_columns}."
             if overlapping_data_columns:
                 message += f"overlapping data columns: {overlapping_data_columns}."
             raise InputValidationError(message)
@@ -253,43 +253,43 @@ def unary_output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        tag_schema, data_schema = stream.output_schema(
+        key_schema, data_schema = stream.output_schema(
             columns=columns, all_info=all_info
         )
 
-        new_tag_schema = {
+        new_key_schema = {
             self.name_map.get(k, k): v
-            for k, v in tag_schema.items()
+            for k, v in key_schema.items()
             if k in self.name_map or not self.drop_unmapped
         }
 
-        return Schema(new_tag_schema), data_schema
+        return Schema(new_key_schema), data_schema
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
-        """Streaming: rename tag columns per row without materializing."""
+        """Streaming: rename key columns per row without materializing."""
         try:
             rename_map: dict[str, str] | None = None
             unmapped: list[str] | None = None
-            async for tag, data in inputs[0]:
+            async for key, data in inputs[0]:
                 if rename_map is None:
-                    tag_keys = tag.keys()
+                    key_keys = key.keys()
                     rename_map = {
-                        k: self.name_map[k] for k in tag_keys if k in self.name_map
+                        k: self.name_map[k] for k in key_keys if k in self.name_map
                     }
                     if self.drop_unmapped:
-                        unmapped = [k for k in tag_keys if k not in self.name_map]
+                        unmapped = [k for k in key_keys if k not in self.name_map]
                 if not rename_map:
-                    await output.send((tag, data))
+                    await output.send((key, data))
                 else:
-                    new_tag = tag.rename(rename_map)
+                    new_key = key.rename(rename_map)
                     if unmapped:
-                        new_tag = new_tag.drop(*unmapped)
-                    await output.send((new_tag, data))
+                        new_key = new_key.drop(*unmapped)
+                    await output.send((new_key, data))
         finally:
             await output.close()
 
diff --git a/src/orcapod/core/operators/merge_join.py b/src/orcapod/core/operators/merge_join.py
index 871b0070..43cf55bf 100644
--- a/src/orcapod/core/operators/merge_join.py
+++ b/src/orcapod/core/operators/merge_join.py
@@ -29,10 +29,10 @@ class MergeJoin(BinaryOperator):
 
     For non-colliding columns, values are kept as scalars (same as regular Join).
 
-    Tag columns use inner join on shared tags, with union of tag schemas.
+    Key columns use inner join on shared keys, with union of key schemas.
 
     MergeJoin is commutative: MergeJoin(A, B) produces the same result as
-    MergeJoin(B, A), achieved by sorting merged values and system tag values.
+    MergeJoin(B, A), achieved by sorting merged values and system key values.
     """
 
     @property
@@ -82,13 +82,13 @@ def binary_output_schema(
     ) -> tuple[Schema, Schema]:
         columns_config = ColumnConfig.handle_config(columns, all_info=all_info)
 
-        # Always get input schemas WITHOUT system tags for the base computation.
-        # System tags are computed separately because the join renames them.
-        left_tag_schema, left_data_schema = left_stream.output_schema()
-        right_tag_schema, right_data_schema = right_stream.output_schema()
+        # Always get input schemas WITHOUT system keys for the base computation.
+        # System keys are computed separately because the join renames them.
+        left_key_schema, left_data_schema = left_stream.output_schema()
+        right_key_schema, right_data_schema = right_stream.output_schema()
 
-        # Tag schema: union of both tag schemas
-        tag_schema = schema_utils.union_schemas(left_tag_schema, right_tag_schema)
+        # Key schema: union of both key schemas
+        key_schema = schema_utils.union_schemas(left_key_schema, right_key_schema)
 
         # Data schema: colliding columns become list[T], non-colliding stay scalar
         colliding_schema = schema_utils.intersection_schemas(
@@ -107,14 +107,14 @@ def binary_output_schema(
             else:
                 merged_data_schema[key] = right_data_schema[key]
 
-        # Add system tag columns if requested
-        if columns_config.system_tags:
-            system_tag_schema = self._predict_system_tag_schema(
+        # Add system key columns if requested
+        if columns_config.system_keys:
+            system_key_schema = self._predict_system_key_schema(
                 left_stream, right_stream
             )
-            tag_schema = schema_utils.union_schemas(tag_schema, system_tag_schema)
+            key_schema = schema_utils.union_schemas(key_schema, system_key_schema)
 
-        return tag_schema, Schema(merged_data_schema)
+        return key_schema, Schema(merged_data_schema)
 
     def _canonical_order(
         self, left_stream: StreamProtocol, right_stream: StreamProtocol
@@ -128,45 +128,45 @@ def _canonical_order(
         # Python's sorted is stable, so equal pipeline_hashes preserve input order
         return sorted(streams_with_idx, key=lambda s: s[0].pipeline_hash().to_hex())
 
-    def _predict_system_tag_schema(
+    def _predict_system_key_schema(
         self, left_stream: StreamProtocol, right_stream: StreamProtocol
     ) -> Schema:
-        """Predict the system tag columns that the join would produce.
+        """Predict the system key columns that the join would produce.
 
-        Each input stream's existing system tag columns get renamed by
+        Each input stream's existing system key columns get renamed by
         appending ::{pipeline_hash}:{canonical_position}. This method
         computes those output column names without performing the join.
         """
-        n_char = self.orcapod_config.system_tag_hash_n_char
+        n_char = self.orcapod_config.system_key_hash_n_char
         canonical = self._canonical_order(left_stream, right_stream)
 
-        system_tag_fields: dict[str, type] = {}
+        system_key_fields: dict[str, type] = {}
         for stream, orig_idx in canonical:
             canon_pos = canonical.index((stream, orig_idx))
-            stream_tag_schema, _ = stream.output_schema(columns={"system_tags": True})
-            for col_name in stream_tag_schema:
-                if col_name.startswith(constants.SYSTEM_TAG_PREFIX):
+            stream_key_schema, _ = stream.output_schema(columns={"system_keys": True})
+            for col_name in stream_key_schema:
+                if col_name.startswith(constants.SYSTEM_KEY_PREFIX):
                     new_name = (
                         f"{col_name}{constants.BLOCK_SEPARATOR}"
                         f"{stream.pipeline_hash().to_hex(n_char)}:{canon_pos}"
                     )
-                    system_tag_fields[new_name] = str
-        return Schema(system_tag_fields)
+                    system_key_fields[new_name] = str
+        return Schema(system_key_fields)
 
     def binary_static_process(
         self, left_stream: StreamProtocol, right_stream: StreamProtocol
     ) -> StreamProtocol:
-        n_char = self.orcapod_config.system_tag_hash_n_char
+        n_char = self.orcapod_config.system_key_hash_n_char
 
-        # Determine canonical ordering for system tag positions
+        # Determine canonical ordering for system key positions
         canonical = self._canonical_order(left_stream, right_stream)
 
-        # Get tables with source + system_tags, append system tag blocks
+        # Get tables with source + system_keys, append system key blocks
         tables = {}
         for stream, orig_idx in canonical:
             canon_pos = canonical.index((stream, orig_idx))
-            table = stream.as_table(columns={"source": True, "system_tags": True})
-            table = arrow_utils.append_to_system_tags(
+            table = stream.as_table(columns={"source": True, "system_keys": True})
+            table = arrow_utils.append_to_system_keys(
                 table, f"{stream.pipeline_hash().to_hex(n_char)}:{canon_pos}"
             )
             tables[orig_idx] = table
@@ -174,10 +174,10 @@ def binary_static_process(
         left_table = tables[0]
         right_table = tables[1]
 
-        # Determine shared tag keys for inner join
-        left_tag_keys, left_data_keys = left_stream.keys()
-        right_tag_keys, right_data_keys = right_stream.keys()
-        shared_tag_keys = set(left_tag_keys) & set(right_tag_keys)
+        # Determine shared key-schema columns for inner join
+        left_key_keys, left_data_keys = left_stream.keys()
+        right_key_keys, right_data_keys = right_stream.keys()
+        shared_key_keys = set(left_key_keys) & set(right_key_keys)
 
         # Find colliding data columns
         colliding_keys = set(left_data_keys) & set(right_data_keys)
@@ -185,10 +185,10 @@ def binary_static_process(
         # Capture nullable flags from input schemas BEFORE Polars conversion.
         # Polars' join discards nullable info (defaults all to True); we derive
         # the output schema from the inputs instead of from data null counts.
-        # Only capture tag and data columns — system tag and source columns
+        # Only capture key and data columns — system key and source columns
         # are internally created with Arrow's all-nullable default; those fall
         # through to null-count inference below.
-        captured_cols = set(left_tag_keys) | set(left_data_keys) | set(right_tag_keys) | set(right_data_keys)
+        captured_cols = set(left_key_keys) | set(left_data_keys) | set(right_key_keys) | set(right_data_keys)
         left_nullable = {f.name: f.nullable for f in left_table.schema if f.name in captured_cols}
         right_nullable = {f.name: f.nullable for f in right_table.schema if f.name in captured_cols}
         # Build expected output nullable map
@@ -204,8 +204,8 @@ def binary_static_process(
             if source_col in output_nullable:
                 output_nullable[source_col] = False
 
-        # Perform inner join via Polars on shared tag keys
-        # Use a common key trick to ensure cartesian product if no shared tags
+        # Perform inner join via Polars on shared key-schema columns
+        # Use a common key trick to ensure cartesian product if no shared keys
         COMMON_JOIN_KEY = "_common"
         # COMMON_JOIN_KEY is a temporary column that gets dropped after the join
         output_nullable.pop(COMMON_JOIN_KEY, None)
@@ -216,7 +216,7 @@ def binary_static_process(
             0, COMMON_JOIN_KEY, pa.array([0] * len(right_table))
         )
 
-        join_keys = list(shared_tag_keys | {COMMON_JOIN_KEY})
+        join_keys = list(shared_key_keys | {COMMON_JOIN_KEY})
 
         # Track which columns Polars will auto-suffix with _right
         # (right-table columns that collide with left, excluding join keys)
@@ -299,14 +299,14 @@ def binary_static_process(
                 # Both versions exist, drop the right one
                 joined = joined.drop(suffixed_name)
 
-        # Sort system tag values for same-pipeline-hash streams to ensure commutativity
-        joined = arrow_utils.sort_system_tag_values(joined)
+        # Sort system key values for same-pipeline-hash streams to ensure commutativity
+        joined = arrow_utils.sort_system_key_values(joined)
 
-        # Reorder: tag columns first, then data columns
-        all_tag_keys = set(left_tag_keys) | set(right_tag_keys)
-        tag_cols = [c for c in joined.column_names if c in all_tag_keys]
-        other_cols = [c for c in joined.column_names if c not in all_tag_keys]
-        joined = joined.select(tag_cols + other_cols)
+        # Reorder: key columns first, then data columns
+        all_key_keys = set(left_key_keys) | set(right_key_keys)
+        key_cols = [c for c in joined.column_names if c in all_key_keys]
+        other_cols = [c for c in joined.column_names if c not in all_key_keys]
+        joined = joined.select(key_cols + other_cols)
 
         # Reconstruct schema from captured input nullable flags.
         # Fall back to null-count inference for any unexpected columns.
@@ -324,7 +324,7 @@ def binary_static_process(
         )
         return ArrowTableStream(
             joined,
-            tag_columns=tuple(all_tag_keys),
+            key_columns=tuple(all_key_keys),
         )
 
     def identity_structure(self) -> Any:
diff --git a/src/orcapod/core/operators/semijoin.py b/src/orcapod/core/operators/semijoin.py
index 120002ff..7b5c810e 100644
--- a/src/orcapod/core/operators/semijoin.py
+++ b/src/orcapod/core/operators/semijoin.py
@@ -5,7 +5,7 @@
 from orcapod.core.operators.base import BinaryOperator
 from orcapod.core.streams import ArrowTableStream
 from orcapod.errors import InputValidationError
-from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, StreamProtocol, KeyProtocol
 from orcapod.types import ColumnConfig, Schema
 from orcapod.utils import schema_utils
 from orcapod.utils.lazy_module import LazyModule
@@ -37,15 +37,15 @@ def binary_static_process(
         Performs a semi-join between left and right streams.
         Returns entries from left stream that have matching entries in right stream.
         """
-        left_tag_schema, left_data_schema = left_stream.output_schema()
-        right_tag_schema, right_data_schema = right_stream.output_schema()
+        left_key_schema, left_data_schema = left_stream.output_schema()
+        right_key_schema, right_data_schema = right_stream.output_schema()
 
-        # Find overlapping columns across all columns (tags + data)
+        # Find overlapping columns across all columns (keys + data)
         left_all_schema = schema_utils.union_schemas(
-            left_tag_schema, left_data_schema
+            left_key_schema, left_data_schema
         )
         right_all_schema = schema_utils.union_schemas(
-            right_tag_schema, right_data_schema
+            right_key_schema, right_data_schema
         )
 
         common_keys = tuple(
@@ -56,8 +56,8 @@ def binary_static_process(
         if not common_keys:
             return left_stream
 
-        # include source info and system tags for left stream
-        left_table = left_stream.as_table(columns={"source": True, "system_tags": True})
+        # include source info and system keys for left stream
+        left_table = left_stream.as_table(columns={"source": True, "system_keys": True})
 
         # Get the right table for matching
         right_table = right_stream.as_table()
@@ -73,7 +73,7 @@ def binary_static_process(
 
         return ArrowTableStream(
             semi_joined_table,
-            tag_columns=tuple(left_tag_schema.keys()),
+            key_columns=tuple(left_key_schema.keys()),
         )
 
     def binary_output_schema(
@@ -102,15 +102,15 @@ def validate_binary_inputs(
         to determine the correct empty-right behavior without data.
         """
         try:
-            left_tag_schema, left_data_schema = left_stream.output_schema()
-            right_tag_schema, right_data_schema = right_stream.output_schema()
+            left_key_schema, left_data_schema = left_stream.output_schema()
+            right_key_schema, right_data_schema = right_stream.output_schema()
 
             # Check that overlapping columns have compatible types across all columns
             left_all_schema = schema_utils.union_schemas(
-                left_tag_schema, left_data_schema
+                left_key_schema, left_data_schema
             )
             right_all_schema = schema_utils.union_schemas(
-                right_tag_schema, right_data_schema
+                right_key_schema, right_data_schema
             )
 
             # intersection_schemas will raise an error if types are incompatible
@@ -137,8 +137,8 @@ def _common_keys_from_schema(self) -> tuple[str, ...]:
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         **kwargs: Any,
     ) -> None:
         """Build-probe: collect right input, then stream left through a hash lookup.
@@ -167,30 +167,30 @@ async def async_execute(
                     await left_ch.collect()
                     return
                 # No common keys — pass all left rows through unchanged
-                async for tag, data in left_ch:
-                    await output.send((tag, data))
+                async for key, data in left_ch:
+                    await output.send((key, data))
                 return
 
             # Determine right-side keys from first row
-            right_tag_keys = set(right_rows[0][0].keys())
+            right_key_keys = set(right_rows[0][0].keys())
             right_pkt_keys = set(right_rows[0][1].keys())
-            right_all_keys = right_tag_keys | right_pkt_keys
+            right_all_keys = right_key_keys | right_pkt_keys
 
             # Phase 2: Probe — stream left rows
             common_keys: tuple[str, ...] | None = None
             right_lookup: set[tuple] | None = None
 
-            async for tag, data in left_ch:
+            async for key, data in left_ch:
                 if common_keys is None:
                     # First left row — determine common keys and build index
-                    left_tag_keys = set(tag.keys())
+                    left_key_keys = set(key.keys())
                     left_pkt_keys = set(data.keys())
-                    left_all_keys = left_tag_keys | left_pkt_keys
+                    left_all_keys = left_key_keys | left_pkt_keys
                     common_keys = tuple(sorted(left_all_keys & right_all_keys))
 
                     if not common_keys:
                         # No common keys — pass all left rows through
-                        await output.send((tag, data))
+                        await output.send((key, data))
                         async for t, p in left_ch:
                             await output.send((t, p))
                         return
@@ -203,10 +203,10 @@ async def async_execute(
                         right_lookup.add(tuple(rd[k] for k in common_keys))
 
                 # Probe
-                ld = tag.as_dict()
+                ld = key.as_dict()
                 ld.update(data.as_dict())
                 if tuple(ld[k] for k in common_keys) in right_lookup:  # type: ignore[arg-type]
-                    await output.send((tag, data))
+                    await output.send((key, data))
         finally:
             await output.close()
 
diff --git a/src/orcapod/core/operators/static_output_pod.py b/src/orcapod/core/operators/static_output_pod.py
index a3bcff69..3b708460 100644
--- a/src/orcapod/core/operators/static_output_pod.py
+++ b/src/orcapod/core/operators/static_output_pod.py
@@ -18,7 +18,7 @@
     DataProtocol,
     PodProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
     TrackerManagerProtocol,
 )
 from orcapod.types import ColumnConfig, ContentHash, Schema
@@ -101,7 +101,7 @@ def output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        """Determine output (tag, data) schemas without triggering computation.
+        """Determine output (key, data) schemas without triggering computation.
 
         Args:
             *streams: Input streams to analyze.
@@ -109,7 +109,7 @@ def output_schema(
             all_info: If True, include all info columns.
 
         Returns:
-            A ``(tag_schema, data_schema)`` tuple.
+            A ``(key_schema, data_schema)`` tuple.
 
         Raises:
             ValidationError: If input types are incompatible.
@@ -193,9 +193,9 @@ def __call__(self, *streams: StreamProtocol, **kwargs) -> DynamicPodStream:
 
     @staticmethod
     def _materialize_to_stream(
-        rows: Sequence[tuple[TagProtocol, DataProtocol]],
+        rows: Sequence[tuple[KeyProtocol, DataProtocol]],
     ) -> StreamProtocol:
-        """Materialize a list of (Tag, Data) pairs into an ArrowTableStream.
+        """Materialize a list of (Key, Data) pairs into an ArrowTableStream.
 
         Used by the barrier-mode ``async_execute`` to convert collected
         channel items back into a stream suitable for ``static_process``.
@@ -206,34 +206,34 @@ def _materialize_to_stream(
         if not rows:
             raise ValueError("Cannot materialize an empty list of rows into a stream")
 
-        tag_tables = []
+        key_tables = []
         data_tables = []
 
-        for tag, data in rows:
-            tag_tables.append(tag.as_table(columns={"system_tags": True}))
+        for key, data in rows:
+            key_tables.append(key.as_table(columns={"system_keys": True}))
             data_tables.append(data.as_table(columns={"source": True}))
 
-        combined_tags = pa.concat_tables(tag_tables)
+        combined_keys = pa.concat_tables(key_tables)
         combined_data = pa.concat_tables(data_tables)
 
-        user_tag_keys = tuple(rows[0][0].keys())
+        user_key_keys = tuple(rows[0][0].keys())
 
         # Preserve actual source_info provenance from the first row
         # (all rows share the same data columns and source tokens).
         source_info = rows[0][1].source_info()
 
-        full_table = arrow_utils.hstack_tables(combined_tags, combined_data)
+        full_table = arrow_utils.hstack_tables(combined_keys, combined_data)
 
         return ArrowTableStream(
             full_table,
-            tag_columns=user_tag_keys,
+            key_columns=user_key_keys,
             source_info=source_info,
         )
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         input_pipeline_hashes: Sequence[ContentHash] | None = None,
     ) -> None:
@@ -247,15 +247,15 @@ async def async_execute(
             output: Writable channel for downstream consumption.
             input_pipeline_hashes: Pipeline hash for each input stream,
                 positionally matching ``inputs``.  Multi-input operators
-                (e.g. Join) use these to compute canonical system-tag
+                (e.g. Join) use these to compute canonical system-key
                 column names.  Ignored by single-input operators.
         """
         try:
             all_rows = await asyncio.gather(*(ch.collect() for ch in inputs))
             streams = [self._materialize_to_stream(rows) for rows in all_rows]
             result = self.static_process(*streams)
-            for tag, data in result.iter_data():
-                await output.send((tag, data))
+            for key, data in result.iter_data():
+                await output.send((key, data))
         finally:
             await output.close()
 
@@ -310,13 +310,13 @@ def keys(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
-        """Return the (tag_keys, data_keys) column names for this stream."""
-        tag_schema, data_schema = self._pod.output_schema(
+        """Return the (key_keys, data_keys) column names for this stream."""
+        key_schema, data_schema = self._pod.output_schema(
             *self.upstreams,
             columns=columns,
             all_info=all_info,
         )
-        return tuple(tag_schema.keys()), tuple(data_schema.keys())
+        return tuple(key_schema.keys()), tuple(data_schema.keys())
 
     def output_schema(
         self,
@@ -324,7 +324,7 @@ def output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        """Return the (tag_schema, data_schema) for this stream."""
+        """Return the (key_schema, data_schema) for this stream."""
         return self._pod.output_schema(
             *self.upstreams,
             columns=columns,
@@ -381,7 +381,7 @@ def as_table(
 
     def iter_data(
         self,
-    ) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    ) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         self.run()
         assert self._cached_stream is not None, (
             "StreamProtocol has not been updated or is empty."
diff --git a/src/orcapod/core/sources/arrow_table_source.py b/src/orcapod/core/sources/arrow_table_source.py
index bd00b5c0..3b1a7c7a 100644
--- a/src/orcapod/core/sources/arrow_table_source.py
+++ b/src/orcapod/core/sources/arrow_table_source.py
@@ -18,7 +18,7 @@ class ArrowTableSource(RootSource):
     """A source backed by an in-memory PyArrow Table.
 
     Uses ``SourceStreamBuilder`` to strip system columns, add per-row
-    source-info provenance columns and a system tag column encoding the
+    source-info provenance columns and a system key column encoding the
     schema hash, then wraps the result in an ``ArrowTableStream``.
 
     Nullable handling
@@ -53,8 +53,8 @@ class ArrowTableSource(RootSource):
     def __init__(
         self,
         table: "pa.Table",
-        tag_columns: Collection[str] = (),
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] = (),
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         infer_nullable: bool = False,
         **kwargs: Any,
@@ -72,17 +72,17 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
             source_id=self._source_id,
             record_id_column=record_id_column,
-            system_tag_columns=system_tag_columns,
+            system_key_columns=system_key_columns,
         )
 
         self._stream = result.stream
         self._schema_hash = result.schema_hash
         self._table_hash = result.table_hash
-        self._tag_columns = result.tag_columns
-        self._system_tag_columns = result.system_tag_columns
+        self._key_columns = result.key_columns
+        self._system_key_columns = result.system_key_columns
         self._record_id_column = record_id_column
 
         if self._source_id is None:
@@ -92,7 +92,7 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
         """Serialize metadata-only config (in-memory table is not serializable)."""
         return {
             "source_type": "arrow_table",
-            "tag_columns": list(self._tag_columns),
+            "key_columns": list(self._key_columns),
             "source_id": self.source_id,
             **self._identity_config(),
         }
@@ -111,5 +111,5 @@ def from_config(cls, config: dict[str, Any], db_registry=None) -> ArrowTableSour
 
     @property
     def table(self) -> "pa.Table":
-        """Return the enriched table (with source-info and system tags)."""
-        return self._stream.as_table(columns={"source": True, "system_tags": True})
+        """Return the enriched table (with source-info and system keys)."""
+        return self._stream.as_table(columns={"source": True, "system_keys": True})
diff --git a/src/orcapod/core/sources/base.py b/src/orcapod/core/sources/base.py
index 7589e082..95ccec2d 100644
--- a/src/orcapod/core/sources/base.py
+++ b/src/orcapod/core/sources/base.py
@@ -31,7 +31,7 @@ class RootSource(StreamBase):
       ``self._stream`` by default; concrete subclasses may override them.
 
     As a PipelineElementProtocol:
-    - ``pipeline_identity_structure()`` returns ``(tag_schema, data_schema)``
+    - ``pipeline_identity_structure()`` returns ``(key_schema, data_schema)``
       — schema-only, no data content — forming the base case of the pipeline
       identity Merkle chain.
 
@@ -121,23 +121,23 @@ def _identity_config(self) -> dict[str, Any]:
         """
         from orcapod.pipeline.serialization import serialize_schema
 
-        tag_schema, data_schema = self.output_schema()
+        key_schema, data_schema = self.output_schema()
         type_converter = self.data_context.type_converter
         return {
             "content_hash": self.content_hash().to_string(),
             "pipeline_hash": self.pipeline_hash().to_string(),
-            "tag_schema": serialize_schema(tag_schema, type_converter),
+            "key_schema": serialize_schema(key_schema, type_converter),
             "data_schema": serialize_schema(data_schema, type_converter),
         }
 
     def pipeline_identity_structure(self) -> Any:
-        """Return (tag_schema, data_schema) as the pipeline identity for this
+        """Return (key_schema, data_schema) as the pipeline identity for this
         source.  Schema-only: no data content is included, so sources with
         identical schemas share the same pipeline hash and therefore the same
         pipeline database table.
         """
-        tag_schema, data_schema = self.output_schema()
-        return (tag_schema, data_schema)
+        key_schema, data_schema = self.output_schema()
+        return (key_schema, data_schema)
 
     # -------------------------------------------------------------------------
     # StreamProtocol protocol
diff --git a/src/orcapod/core/sources/cached_source.py b/src/orcapod/core/sources/cached_source.py
index 5b1e5a5c..42f9bb0d 100644
--- a/src/orcapod/core/sources/cached_source.py
+++ b/src/orcapod/core/sources/cached_source.py
@@ -8,7 +8,7 @@
 from orcapod.config import Config
 from orcapod.core.sources.base import RootSource
 from orcapod.core.streams.arrow_table_stream import ArrowTableStream
-from orcapod.protocols.core_protocols import DataProtocol, SourceProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, SourceProtocol, KeyProtocol
 from orcapod.protocols.database_protocols import ArrowDatabaseProtocol
 from orcapod.types import ColumnConfig, Schema
 from orcapod.utils.lazy_module import LazyModule
@@ -42,7 +42,7 @@ class CachedSource(RootSource):
 
     Example::
 
-        source = ArrowTableSource(table, tag_columns=["id"])
+        source = ArrowTableSource(table, key_columns=["id"])
         cached = CachedSource(source, cache_database=db)
         # or equivalently:
         cached = source.cached(cache_database=db)
@@ -216,7 +216,7 @@ def _ingest_live_data(self) -> None:
         ``SourceProxy``).
         """
         live_table = self._source.as_table(
-            columns={"source": True, "system_tags": True}
+            columns={"source": True, "system_keys": True}
         )
 
         # Compute per-row record hashes for dedup: hash(full row)
@@ -262,13 +262,13 @@ def _build_merged_stream(self) -> ArrowTableStream:
         if all_records is None:
             all_records = self._empty_table()
 
-        tag_keys = self._source.keys()[0]
-        return ArrowTableStream(all_records, tag_columns=tag_keys)
+        key_keys = self._source.keys()[0]
+        return ArrowTableStream(all_records, key_columns=key_keys)
 
     def _empty_table(self) -> pa.Table:
         """Build an empty Arrow table matching the source's output schema."""
-        tag_schema, data_schema = self._source.output_schema()
-        merged = dict(tag_schema)
+        key_schema, data_schema = self._source.output_schema()
+        merged = dict(key_schema)
         merged.update(data_schema)
         type_converter = self.data_context.type_converter
         arrow_schema = type_converter.python_schema_to_arrow_schema(merged)
@@ -299,7 +299,7 @@ def clear_cache(self) -> None:
         """Discard in-memory cached stream (forces rebuild on next access)."""
         self._cached_stream = None
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         self._ensure_stream()
         assert self._cached_stream is not None
         return self._cached_stream.iter_data()
diff --git a/src/orcapod/core/sources/csv_source.py b/src/orcapod/core/sources/csv_source.py
index 7346647a..6cef7f27 100644
--- a/src/orcapod/core/sources/csv_source.py
+++ b/src/orcapod/core/sources/csv_source.py
@@ -19,14 +19,14 @@ class CSVSource(RootSource):
 
     The file is read once at construction time using PyArrow's CSV reader,
     converted to an Arrow table, and enriched by ``SourceStreamBuilder``
-    (source-info, schema-hash, system tags).
+    (source-info, schema-hash, system keys).
     """
 
     def __init__(
         self,
         file_path: str,
-        tag_columns: Collection[str] = (),
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] = (),
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         schema: pa.Schema | None = None,
@@ -48,15 +48,15 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
             source_id=self._source_id,
             record_id_column=record_id_column,
-            system_tag_columns=system_tag_columns,
+            system_key_columns=system_key_columns,
         )
 
         self._stream = result.stream
-        self._tag_columns = result.tag_columns
-        self._system_tag_columns = result.system_tag_columns
+        self._key_columns = result.key_columns
+        self._system_key_columns = result.system_key_columns
         self._record_id_column = record_id_column
         if self._source_id is None:
             self._source_id = result.source_id
@@ -66,8 +66,8 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
         return {
             "source_type": "csv",
             "file_path": self._file_path,
-            "tag_columns": list(self._tag_columns),
-            "system_tag_columns": list(self._system_tag_columns),
+            "key_columns": list(self._key_columns),
+            "system_key_columns": list(self._system_key_columns),
             "record_id_column": self._record_id_column,
             "source_id": self.source_id,
             **self._identity_config(),
@@ -78,8 +78,8 @@ def from_config(cls, config: dict[str, Any], db_registry=None) -> CSVSource:
         """Reconstruct a CSVSource from a config dict."""
         return cls(
             file_path=config["file_path"],
-            tag_columns=config.get("tag_columns", ()),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns", ()),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
         )
diff --git a/src/orcapod/core/sources/data_frame_source.py b/src/orcapod/core/sources/data_frame_source.py
index 54b65b87..d7598d96 100644
--- a/src/orcapod/core/sources/data_frame_source.py
+++ b/src/orcapod/core/sources/data_frame_source.py
@@ -24,14 +24,14 @@ class DataFrameSource(RootSource):
     """A source backed by a Polars DataFrame (or any Polars-compatible data).
 
     The DataFrame is converted to an Arrow table and enriched by
-    ``SourceStreamBuilder`` (source-info, schema-hash, system tags).
+    ``SourceStreamBuilder`` (source-info, schema-hash, system keys).
     """
 
     def __init__(
         self,
         data: FrameInitTypes,
-        tag_columns: str | Collection[str] = (),
-        system_tag_columns: Collection[str] = (),
+        key_columns: str | Collection[str] = (),
+        system_key_columns: Collection[str] = (),
         source_id: str | None = None,
         schema: pa.Schema | None = None,
         **kwargs: Any,
@@ -51,15 +51,15 @@ def __init__(
             )
             df = df.with_columns([pl.from_arrow(c) for c in sub_table])
 
-        if isinstance(tag_columns, str):
-            tag_columns = [tag_columns]
-        tag_columns = list(tag_columns)
+        if isinstance(key_columns, str):
+            key_columns = [key_columns]
+        key_columns = list(key_columns)
 
         df = polars_data_utils.drop_system_columns(df)
 
-        missing = set(tag_columns) - set(df.columns)
+        missing = set(key_columns) - set(df.columns)
         if missing:
-            raise ValueError(f"TagProtocol column(s) not found in data: {missing}")
+            raise ValueError(f"KeyProtocol column(s) not found in data: {missing}")
 
         arrow_table = df.to_arrow()
         if schema is not None:
@@ -72,13 +72,13 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             arrow_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
             source_id=self._source_id,
-            system_tag_columns=system_tag_columns,
+            system_key_columns=system_key_columns,
         )
 
         self._stream = result.stream
-        self._tag_columns = result.tag_columns
+        self._key_columns = result.key_columns
         if self._source_id is None:
             self._source_id = result.source_id
 
@@ -86,7 +86,7 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
         """Serialize metadata-only config (DataFrame is not serializable)."""
         return {
             "source_type": "data_frame",
-            "tag_columns": list(self._tag_columns),
+            "key_columns": list(self._key_columns),
             "source_id": self.source_id,
             **self._identity_config(),
         }
diff --git a/src/orcapod/core/sources/db_table_source.py b/src/orcapod/core/sources/db_table_source.py
index 9e80d463..72c53bbc 100644
--- a/src/orcapod/core/sources/db_table_source.py
+++ b/src/orcapod/core/sources/db_table_source.py
@@ -1,13 +1,13 @@
 """DBTableSource — a read-only RootSource backed by any DBConnectorProtocol.
 
-Uses the table's primary-key columns as tag columns by default.
+Uses the table's primary-key columns as key columns by default.
 Type mapping (DB-native → Arrow) is fully delegated to the connector.
 
 Example::
 
     connector = SQLiteConnector(":memory:")   # PLT-1076
-    source = DBTableSource(connector, "measurements")          # PKs → tags
-    source = DBTableSource(connector, "events", tag_columns=["session_id"])
+    source = DBTableSource(connector, "measurements")          # PKs → keys
+    source = DBTableSource(connector, "events", key_columns=["session_id"])
 """
 from __future__ import annotations
 
@@ -34,17 +34,17 @@ class DBTableSource(RootSource):
 
     At construction time the source:
     1. Validates the table exists in the connector.
-    2. Resolves tag columns (defaults to the table's primary-key columns).
+    2. Resolves key columns (defaults to the table's primary-key columns).
     3. Fetches all rows as Arrow batches and assembles a PyArrow table.
-    4. Enriches via ``SourceStreamBuilder`` (source-info, schema-hash, system tags).
+    4. Enriches via ``SourceStreamBuilder`` (source-info, schema-hash, system keys).
 
     Args:
         connector: A ``DBConnectorProtocol`` providing DB access.
         table_name: Name of the table to expose as a source.
-        tag_columns: Columns to use as tag columns.  If ``None`` (default),
+        key_columns: Columns to use as key columns.  If ``None`` (default),
             the table's primary-key columns are used.  Raises ``ValueError``
             if the table has no primary key and no explicit columns are given.
-        system_tag_columns: Additional system-level tag columns (passed through
+        system_key_columns: Additional system-level key columns (passed through
             to ``SourceStreamBuilder``; mirrors ``DeltaTableSource`` API).
         record_id_column: Column for stable per-row record IDs in provenance
             strings.  If ``None``, row indices are used.
@@ -63,8 +63,8 @@ def __init__(
         self,
         connector: DBConnectorProtocol,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -91,16 +91,16 @@ def __init__(
         if table_name not in connector.get_table_names():
             raise ValueError(f"Table {table_name!r} not found in database.")
 
-        # Step 2: Resolve tag columns — default to PK columns
-        if tag_columns is None:
-            resolved_tag_columns: list[str] = connector.get_pk_columns(table_name)
-            if not resolved_tag_columns:
+        # Step 2: Resolve key columns — default to PK columns
+        if key_columns is None:
+            resolved_key_columns: list[str] = connector.get_pk_columns(table_name)
+            if not resolved_key_columns:
                 raise ValueError(
                     f"Table {table_name!r} has no primary key columns. "
-                    "Provide explicit tag_columns."
+                    "Provide explicit key_columns."
                 )
         else:
-            resolved_tag_columns = list(tag_columns)
+            resolved_key_columns = list(key_columns)
 
         # Step 3: Fetch the full table as Arrow.
         # _query allows subclasses (e.g. SQLiteTableSource) to inject a custom
@@ -133,15 +133,15 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             table,
-            tag_columns=resolved_tag_columns,
+            key_columns=resolved_key_columns,
             source_id=self._source_id,
             record_id_column=record_id_column,
-            system_tag_columns=system_tag_columns,
+            system_key_columns=system_key_columns,
         )
 
         self._stream = result.stream
-        self._tag_columns = result.tag_columns
-        self._system_tag_columns = result.system_tag_columns
+        self._key_columns = result.key_columns
+        self._system_key_columns = result.system_key_columns
         if self._source_id is None:
             self._source_id = result.source_id
 
@@ -151,8 +151,8 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
             "source_type": "db_table",
             "connector": self._connector.to_config(),
             "table_name": self._table_name,
-            "tag_columns": list(self._tag_columns),
-            "system_tag_columns": list(self._system_tag_columns),
+            "key_columns": list(self._key_columns),
+            "system_key_columns": list(self._system_key_columns),
             "record_id_column": self._record_id_column,
             "source_id": self.source_id,
             **self._identity_config(),
diff --git a/src/orcapod/core/sources/delta_table_source.py b/src/orcapod/core/sources/delta_table_source.py
index c3e00d80..efe7a061 100644
--- a/src/orcapod/core/sources/delta_table_source.py
+++ b/src/orcapod/core/sources/delta_table_source.py
@@ -22,14 +22,14 @@ class DeltaTableSource(RootSource):
 
     The table is read once at construction time using ``deltalake``'s
     PyArrow integration. The resulting Arrow table is enriched by
-    ``SourceStreamBuilder`` (source-info, schema-hash, system tags).
+    ``SourceStreamBuilder`` (source-info, schema-hash, system keys).
     """
 
     def __init__(
         self,
         delta_table_path: PathLike,
-        tag_columns: Collection[str] = (),
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] = (),
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         **kwargs: Any,
@@ -54,15 +54,15 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
             source_id=self._source_id,
             record_id_column=record_id_column,
-            system_tag_columns=system_tag_columns,
+            system_key_columns=system_key_columns,
         )
 
         self._stream = result.stream
-        self._tag_columns = result.tag_columns
-        self._system_tag_columns = result.system_tag_columns
+        self._key_columns = result.key_columns
+        self._system_key_columns = result.system_key_columns
         self._record_id_column = record_id_column
         if self._source_id is None:
             self._source_id = result.source_id
@@ -72,8 +72,8 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
         return {
             "source_type": "delta_table",
             "delta_table_path": str(self._delta_table_path),
-            "tag_columns": list(self._tag_columns),
-            "system_tag_columns": list(self._system_tag_columns),
+            "key_columns": list(self._key_columns),
+            "system_key_columns": list(self._system_key_columns),
             "record_id_column": self._record_id_column,
             "source_id": self.source_id,
             **self._identity_config(),
@@ -84,8 +84,8 @@ def from_config(cls, config: dict[str, Any], db_registry=None) -> "DeltaTableSou
         """Reconstruct a DeltaTableSource from a config dict."""
         return cls(
             delta_table_path=config["delta_table_path"],
-            tag_columns=config.get("tag_columns", ()),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns", ()),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
         )
diff --git a/src/orcapod/core/sources/derived_source.py b/src/orcapod/core/sources/derived_source.py
index 048e9cc6..231274af 100644
--- a/src/orcapod/core/sources/derived_source.py
+++ b/src/orcapod/core/sources/derived_source.py
@@ -78,12 +78,12 @@ def _get_stream(self) -> ArrowTableStream:
             records = self._origin.get_all_records()
             if records is None:
                 # Build empty table with correct schema
-                tag_schema, data_schema = self._origin.output_schema()
-                tag_keys = self._origin.keys()[0]
+                key_schema, data_schema = self._origin.output_schema()
+                key_keys = self._origin.keys()[0]
                 tc = self.data_context.type_converter
                 fields = [
-                    pa.field(k, tc.python_type_to_arrow_type(tag_schema[k]))
-                    for k in tag_keys
+                    pa.field(k, tc.python_type_to_arrow_type(key_schema[k]))
+                    for k in key_keys
                 ]
                 fields += [
                     pa.field(k, tc.python_type_to_arrow_type(v))
@@ -96,8 +96,8 @@ def _get_stream(self) -> ArrowTableStream:
                 )
             else:
                 self._cached_table = records
-        tag_keys = self._origin.keys()[0]
-        return ArrowTableStream(self._cached_table, tag_columns=tag_keys)
+        key_keys = self._origin.keys()[0]
+        return ArrowTableStream(self._cached_table, key_columns=key_keys)
 
     def iter_data(self):
         return self._get_stream().iter_data()
diff --git a/src/orcapod/core/sources/dict_source.py b/src/orcapod/core/sources/dict_source.py
index b78f42da..f4957a8f 100644
--- a/src/orcapod/core/sources/dict_source.py
+++ b/src/orcapod/core/sources/dict_source.py
@@ -18,16 +18,16 @@
 class DictSource(RootSource):
     """A source backed by a collection of Python dictionaries.
 
-    Each dict becomes one (tag, data) pair in the stream. The dicts are
+    Each dict becomes one (key, data) pair in the stream. The dicts are
     converted to an Arrow table via the data-context type converter, then
-    enriched by ``SourceStreamBuilder`` (source-info, schema-hash, system tags).
+    enriched by ``SourceStreamBuilder`` (source-info, schema-hash, system keys).
     """
 
     def __init__(
         self,
         data: Collection[Mapping[str, DataValue]],
-        tag_columns: Collection[str] = (),
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] = (),
+        system_key_columns: Collection[str] = (),
         data_schema: SchemaLike | pa.Schema | None = None,
         source_id: str | None = None,
         **kwargs: Any,
@@ -55,13 +55,13 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             arrow_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
             source_id=self._source_id,
-            system_tag_columns=system_tag_columns,
+            system_key_columns=system_key_columns,
         )
 
         self._stream = result.stream
-        self._tag_columns = result.tag_columns
+        self._key_columns = result.key_columns
         if self._source_id is None:
             self._source_id = result.source_id
 
@@ -69,7 +69,7 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
         """Serialize metadata-only config (data is not serializable)."""
         return {
             "source_type": "dict",
-            "tag_columns": list(self._tag_columns),
+            "key_columns": list(self._key_columns),
             "source_id": self.source_id,
             **self._identity_config(),
         }
diff --git a/src/orcapod/core/sources/list_source.py b/src/orcapod/core/sources/list_source.py
index 671a4f33..2091ec4a 100644
--- a/src/orcapod/core/sources/list_source.py
+++ b/src/orcapod/core/sources/list_source.py
@@ -5,7 +5,7 @@
 
 from orcapod.core.sources.base import RootSource
 from orcapod.core.sources.stream_builder import SourceStreamBuilder
-from orcapod.protocols.core_protocols import TagProtocol
+from orcapod.protocols.core_protocols import KeyProtocol
 from orcapod.types import SchemaLike
 from orcapod.utils import arrow_utils
 
@@ -16,22 +16,22 @@
 class ListSource(RootSource):
     """A source backed by a Python list.
 
-    Each element in the list becomes one (tag, data) pair. The element is
-    stored as the data under ``name``; the tag is either the element's index
-    (default) or the dict returned by ``tag_function(element, index)``.
+    Each element in the list becomes one (key, data) pair. The element is
+    stored as the data under ``name``; the key is either the element's index
+    (default) or the dict returned by ``key_function(element, index)``.
     """
 
     @staticmethod
-    def _default_tag(element: Any, idx: int) -> dict[str, Any]:
+    def _default_key(element: Any, idx: int) -> dict[str, Any]:
         return {"element_index": idx}
 
     def __init__(
         self,
         name: str,
         data: list[Any],
-        tag_function: Callable[[Any, int], dict[str, Any] | TagProtocol] | None = None,
-        expected_tag_keys: Collection[str] | None = None,
-        tag_function_hash_mode: Literal["content", "signature", "name"] = "name",
+        key_function: Callable[[Any, int], dict[str, Any] | KeyProtocol] | None = None,
+        expected_key_keys: Collection[str] | None = None,
+        key_function_hash_mode: Literal["content", "signature", "name"] = "name",
         data_schema: SchemaLike | None = None,
         source_id: str | None = None,
         **kwargs: Any,
@@ -40,34 +40,34 @@ def __init__(
 
         self.name = name
         self._elements = list(data)
-        self._tag_function_hash_mode = tag_function_hash_mode
+        self._key_function_hash_mode = key_function_hash_mode
 
-        if tag_function is None:
-            tag_function = self.__class__._default_tag
-            if expected_tag_keys is None:
-                expected_tag_keys = ["element_index"]
+        if key_function is None:
+            key_function = self.__class__._default_key
+            if expected_key_keys is None:
+                expected_key_keys = ["element_index"]
 
-        self._tag_function = tag_function
-        self._expected_tag_keys = (
-            tuple(expected_tag_keys) if expected_tag_keys is not None else None
+        self._key_function = key_function
+        self._expected_key_keys = (
+            tuple(expected_key_keys) if expected_key_keys is not None else None
         )
 
-        # Hash the tag function for identity purposes.
-        self._tag_function_hash = self._hash_tag_function()
+        # Hash the key function for identity purposes.
+        self._key_function_hash = self._hash_key_function()
 
-        # Build rows: each row is tag_fields merged with {name: element}.
+        # Build rows: each row is key_fields merged with {name: element}.
         rows = []
         for idx, element in enumerate(self._elements):
-            tag_fields = tag_function(element, idx)
-            if hasattr(tag_fields, "as_dict"):
-                tag_fields = tag_fields.as_dict()
-            row = dict(tag_fields)
+            key_fields = key_function(element, idx)
+            if hasattr(key_fields, "as_dict"):
+                key_fields = key_fields.as_dict()
+            row = dict(key_fields)
             row[name] = element
             rows.append(row)
 
-        tag_columns = (
-            list(self._expected_tag_keys)
-            if self._expected_tag_keys is not None
+        key_columns = (
+            list(self._expected_key_keys)
+            if self._expected_key_keys is not None
             else [k for k in (rows[0].keys() if rows else []) if k != name]
         )
 
@@ -84,7 +84,7 @@ def __init__(
         builder = SourceStreamBuilder(self.data_context, self.orcapod_config)
         result = builder.build(
             arrow_table,
-            tag_columns=tag_columns,
+            key_columns=key_columns,
             source_id=self._source_id,
         )
 
@@ -92,23 +92,23 @@ def __init__(
         if self._source_id is None:
             self._source_id = result.source_id
 
-    def _hash_tag_function(self) -> str:
-        """Produce a stable hash string for the tag function."""
-        if self._tag_function_hash_mode == "name":
-            fn = self._tag_function
+    def _hash_key_function(self) -> str:
+        """Produce a stable hash string for the key function."""
+        if self._key_function_hash_mode == "name":
+            fn = self._key_function
             return f"{fn.__module__}.{fn.__qualname__}"
-        elif self._tag_function_hash_mode == "signature":
+        elif self._key_function_hash_mode == "signature":
             import inspect
 
-            return str(inspect.signature(self._tag_function))
+            return str(inspect.signature(self._key_function))
         else:  # "content"
             import inspect
 
-            src = inspect.getsource(self._tag_function)
+            src = inspect.getsource(self._key_function)
             return self.data_context.semantic_hasher.hash_object(src).to_hex()
 
     def identity_structure(self) -> Any:
-        """Return identity including class name, field name, elements, and tag
+        """Return identity including class name, field name, elements, and key
         function hash.
         """
         try:
@@ -119,7 +119,7 @@ def identity_structure(self) -> Any:
             self.__class__.__name__,
             self.name,
             elements_repr,
-            self._tag_function_hash,
+            self._key_function_hash,
         )
 
     def to_config(self, db_registry=None) -> dict[str, Any]:
diff --git a/src/orcapod/core/sources/postgresql_table_source.py b/src/orcapod/core/sources/postgresql_table_source.py
index 9229789a..35cef34c 100644
--- a/src/orcapod/core/sources/postgresql_table_source.py
+++ b/src/orcapod/core/sources/postgresql_table_source.py
@@ -1,7 +1,7 @@
 """PostgreSQLTableSource — a read-only RootSource backed by a PostgreSQL table.
 
 Wraps a PostgreSQL table as an OrcaPod Source. Primary-key columns are used
-as tag columns by default.
+as key columns by default.
 
 Example::
 
@@ -29,13 +29,13 @@ class PostgreSQLTableSource(DBTableSource):
     1. Stores the DSN for serialisation.
     2. Opens a ``PostgreSQLConnector`` for *dsn*.
     3. Delegates to ``DBTableSource.__init__``, which validates the table,
-       resolves tag columns (defaults to PK columns), fetches all rows as
+       resolves key columns (defaults to PK columns), fetches all rows as
        Arrow batches, and builds the stream.
     4. Closes the connector — all data is eagerly loaded into memory, so the
        connection is released immediately.
 
-    PostgreSQL PK columns are always ``NOT NULL``, so NULL tag values can
-    only arise when *tag_columns* is overridden to point at a nullable
+    PostgreSQL PK columns are always ``NOT NULL``, so NULL key values can
+    only arise when *key_columns* is overridden to point at a nullable
     column. Such NULLs are passed through as-is (Arrow supports nulls).
 
     Args:
@@ -43,10 +43,10 @@ class PostgreSQLTableSource(DBTableSource):
             URI form: ``"postgresql://user:pass@host:5432/dbname"``
             Keyword form: ``"host=localhost dbname=mydb user=alice"``
         table_name: Name of the table to expose as a source.
-        tag_columns: Columns to use as tag columns. If ``None`` (default),
+        key_columns: Columns to use as key columns. If ``None`` (default),
             the table's primary-key columns are used. Raises ``ValueError``
             if the table has no primary key and no explicit columns are given.
-        system_tag_columns: Additional system-level tag columns.
+        system_key_columns: Additional system-level key columns.
         record_id_column: Column for stable per-row record IDs in provenance.
         source_id: Canonical source name. Defaults to *table_name*.
         label: Human-readable label for this source node.
@@ -55,7 +55,7 @@ class PostgreSQLTableSource(DBTableSource):
 
     Raises:
         ValueError: If the table is not found, is empty, or has no PK and
-            no *tag_columns* are given.
+            no *key_columns* are given.
         psycopg.OperationalError: If the DSN is invalid or connection fails.
     """
 
@@ -63,8 +63,8 @@ def __init__(
         self,
         dsn: str,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -77,8 +77,8 @@ def __init__(
             super().__init__(
                 connector,
                 table_name,
-                tag_columns=tag_columns,
-                system_tag_columns=system_tag_columns,
+                key_columns=key_columns,
+                system_key_columns=system_key_columns,
                 record_id_column=record_id_column,
                 source_id=source_id,
                 label=label,
@@ -113,8 +113,8 @@ def from_config(cls, config: dict[str, Any], db_registry=None) -> PostgreSQLTabl
         return cls(
             dsn=config["dsn"],
             table_name=config["table_name"],
-            tag_columns=config.get("tag_columns"),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns"),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
             label=config.get("label"),
diff --git a/src/orcapod/core/sources/source_proxy.py b/src/orcapod/core/sources/source_proxy.py
index 3612f1b2..5ed55c6e 100644
--- a/src/orcapod/core/sources/source_proxy.py
+++ b/src/orcapod/core/sources/source_proxy.py
@@ -21,7 +21,7 @@
 
     import pyarrow as pa
 
-    from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+    from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
     from orcapod.types import ColumnConfig
 
 
@@ -37,7 +37,7 @@ class SourceProxy(RootSource):
         source_id: The original source's canonical ID.
         content_hash_str: The original source's content hash string.
         pipeline_hash_str: The original source's pipeline hash string.
-        tag_schema: The original source's tag schema.
+        key_schema: The original source's key schema.
         data_schema: The original source's data schema.
         expected_class_name: Class name of the original source (e.g.
             ``"ArrowTableSource"``).  Informational and used for validation
@@ -51,7 +51,7 @@ def __init__(
         source_id: str,
         content_hash_str: str,
         pipeline_hash_str: str,
-        tag_schema: Schema,
+        key_schema: Schema,
         data_schema: Schema,
         expected_class_name: str | None = None,
         source_config: dict[str, Any] | None = None,
@@ -60,7 +60,7 @@ def __init__(
         super().__init__(source_id=source_id, label=label)
         self._content_hash_str = content_hash_str
         self._pipeline_hash_str = pipeline_hash_str
-        self._tag_schema = tag_schema
+        self._key_schema = key_schema
         self._data_schema = data_schema
         self._expected_class_name = expected_class_name
         self._source_config = source_config or {}
@@ -89,7 +89,7 @@ def bind(self, source: SourceProtocol) -> None:
         """Bind a live source to this proxy, enabling data access.
 
         The source must match this proxy's identity — same ``source_id``,
-        ``content_hash``, ``pipeline_hash``, tag schema keys, and data
+        ``content_hash``, ``pipeline_hash``, key schema keys, and data
         schema keys.  If ``expected_class_name`` is set, the source's class
         name must also match.
 
@@ -163,7 +163,7 @@ def output_schema(
         columns: ColumnConfig | dict[str, Any] | None = None,
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
-        return (self._tag_schema, self._data_schema)
+        return (self._key_schema, self._data_schema)
 
     def keys(
         self,
@@ -172,7 +172,7 @@ def keys(
         all_info: bool = False,
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
         return (
-            tuple(self._tag_schema.keys()),
+            tuple(self._key_schema.keys()),
             tuple(self._data_schema.keys()),
         )
 
@@ -191,7 +191,7 @@ def _require_delegate(self) -> SourceProtocol:
             )
         return self._delegate
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         return self._require_delegate().iter_data()
 
     def as_table(
diff --git a/src/orcapod/core/sources/spiraldb_table_source.py b/src/orcapod/core/sources/spiraldb_table_source.py
index b7059ffc..3ae435ac 100644
--- a/src/orcapod/core/sources/spiraldb_table_source.py
+++ b/src/orcapod/core/sources/spiraldb_table_source.py
@@ -1,7 +1,7 @@
 """SpiralDBTableSource — a read-only RootSource backed by a SpiralDB table.
 
 Wraps a SpiralDB table as an OrcaPod Source. The table's primary-key
-(key-schema) columns are used as tag columns by default.
+(key-schema) columns are used as key columns by default.
 
 Requires the ``spiraldb`` optional extra: ``pip install orcapod[spiraldb]``.
 Authentication is handled externally — run ``spiral login`` once to store
@@ -9,7 +9,7 @@
 
 Example::
 
-    # Default dataset, PK columns become tag columns automatically
+    # Default dataset, PK columns become key columns automatically
     source = SpiralDBTableSource("my-project-123456", "spike_data")
 
     # Explicit dataset
@@ -17,20 +17,20 @@
         "my-project-123456", "spike_data", dataset="prod"
     )
 
-    # Override tag columns
+    # Override key columns
     source = SpiralDBTableSource(
-        "my-project-123456", "spike_data", tag_columns=["session_id"]
+        "my-project-123456", "spike_data", key_columns=["session_id"]
     )
 
 Note:
     SpiralDB enforces non-null values in key-schema columns at storage time,
-    so NULL values in PK (tag) columns are not expected in practice.  If a
+    so NULL values in PK (key) columns are not expected in practice.  If a
     table is written with NULL key columns via an external tool, those NULLs
-    will be propagated into the tag columns as-is.
+    will be propagated into the key columns as-is.
 
-    Tables with no key schema and no explicit ``tag_columns`` will raise
+    Tables with no key schema and no explicit ``key_columns`` will raise
     ``ValueError`` at construction time.  Either define a key schema on the
-    SpiralDB table or supply explicit ``tag_columns``.
+    SpiralDB table or supply explicit ``key_columns``.
 """
 from __future__ import annotations
 
@@ -52,15 +52,15 @@ class SpiralDBTableSource(DBTableSource):
 
     1. Opens a ``SpiralDBConnector`` for *project_id* and *dataset*.
     2. Validates the table exists.
-    3. Resolves tag columns:
+    3. Resolves key columns:
 
-       - If *tag_columns* is provided, uses them as-is.
+       - If *key_columns* is provided, uses them as-is.
        - Otherwise uses the table's primary-key (key-schema) columns.
        - Raises ``ValueError`` if the table has no key schema and no
-         explicit *tag_columns* are given.
+         explicit *key_columns* are given.
 
     4. Delegates to ``DBTableSource.__init__`` for fetching and stream
-       building (source-info provenance, schema hash, system tags).
+       building (source-info provenance, schema hash, system keys).
     5. Closes the connector — all data is eagerly loaded into memory, so
        the connection is released immediately.
 
@@ -68,10 +68,10 @@ class SpiralDBTableSource(DBTableSource):
         project_id: SpiralDB project identifier (e.g. ``"my-project-123456"``).
         table_name: Name of the SpiralDB table to expose as a source.
         dataset: Dataset within the project. Defaults to ``"default"``.
-        tag_columns: Columns to use as tag columns. If ``None`` (default),
+        key_columns: Columns to use as key columns. If ``None`` (default),
             the table's primary-key (key-schema) columns are used. Raises
             ``ValueError`` if the table has no key schema.
-        system_tag_columns: Additional system-level tag columns.
+        system_key_columns: Additional system-level key columns.
         record_id_column: Column for stable per-row record IDs in provenance.
         source_id: Canonical source name for the registry and provenance
             tokens. Defaults to *table_name*.
@@ -84,7 +84,7 @@ class SpiralDBTableSource(DBTableSource):
 
     Raises:
         ValueError: If the table is not found, has no primary-key columns
-            and no explicit *tag_columns* are given, or the table is empty.
+            and no explicit *key_columns* are given, or the table is empty.
     """
 
     def __init__(
@@ -92,8 +92,8 @@ def __init__(
         project_id: str,
         table_name: str,
         dataset: str = "default",
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -112,17 +112,17 @@ def __init__(
         )
 
         try:
-            resolved_tags: list[str] | None = (
-                list(tag_columns) if tag_columns is not None else None
+            resolved_keys: list[str] | None = (
+                list(key_columns) if key_columns is not None else None
             )
 
             # DBTableSource handles PK resolution and raises ValueError when
-            # the table has no key schema and no explicit tag_columns are given.
+            # the table has no key schema and no explicit key_columns are given.
             super().__init__(
                 connector,
                 table_name,
-                tag_columns=resolved_tags,
-                system_tag_columns=system_tag_columns,
+                key_columns=resolved_keys,
+                system_key_columns=system_key_columns,
                 record_id_column=record_id_column,
                 source_id=source_id,
                 label=label,
@@ -153,8 +153,8 @@ def to_config(self, db_registry=None) -> dict[str, Any]:
             "dataset": self._dataset,
             "overrides": self._overrides,
             "table_name": self._table_name,
-            "tag_columns": list(self._tag_columns),
-            "system_tag_columns": list(self._system_tag_columns),
+            "key_columns": list(self._key_columns),
+            "system_key_columns": list(self._system_key_columns),
             "record_id_column": self._record_id_column,
             "source_id": self.source_id,
             **self._identity_config(),
@@ -174,8 +174,8 @@ def from_config(cls, config: dict[str, Any], db_registry=None) -> "SpiralDBTable
             project_id=config["project_id"],
             table_name=config["table_name"],
             dataset=config.get("dataset", "default"),
-            tag_columns=config.get("tag_columns"),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns"),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
             label=config.get("label"),
diff --git a/src/orcapod/core/sources/sqlite_table_source.py b/src/orcapod/core/sources/sqlite_table_source.py
index 82e6d467..3333125b 100644
--- a/src/orcapod/core/sources/sqlite_table_source.py
+++ b/src/orcapod/core/sources/sqlite_table_source.py
@@ -1,7 +1,7 @@
 """SQLiteTableSource — a read-only RootSource backed by a SQLite table.
 
 Wraps a SQLite table as an OrcaPod Source. Primary-key columns are used
-as tag columns by default. For tables with no explicit primary key
+as key columns by default. For tables with no explicit primary key
 (ROWID-only tables), the implicit ``rowid`` integer column is used
 automatically.
 
@@ -11,7 +11,7 @@
     source = SQLiteTableSource("/path/to/my.db", "measurements")
 
     # In-memory (for tests / throwaway pipelines; cannot round-trip)
-    source = SQLiteTableSource(":memory:", "events", tag_columns=["session_id"])
+    source = SQLiteTableSource(":memory:", "events", key_columns=["session_id"])
 
 Note:
     ``:memory:`` sources cannot be reconstructed via ``from_config`` because
@@ -38,13 +38,13 @@ class SQLiteTableSource(DBTableSource):
     At construction time the source:
     1. Opens a ``SQLiteConnector`` for *db_path*.
     2. Validates the table exists.
-    3. Resolves tag columns:
-       - If *tag_columns* is provided, uses them as-is.
+    3. Resolves key columns:
+       - If *key_columns* is provided, uses them as-is.
        - Otherwise uses the table's primary-key columns.
        - If the table has no explicit PK (ROWID-only), falls back to the
          implicit ``rowid`` integer column.
     4. Determines the fetch query: injects ``SELECT rowid, *`` when
-       ``"rowid"`` is a resolved tag column and not a normal table column
+       ``"rowid"`` is a resolved key column and not a normal table column
        (handles both auto-detection and ``from_config`` reconstruction).
     5. Delegates to ``DBTableSource.__init__`` for fetching and stream building.
     6. Closes the connector — all data is eagerly loaded into memory, so the
@@ -54,10 +54,10 @@ class SQLiteTableSource(DBTableSource):
         db_path: Path to the SQLite database file, or ``":memory:"`` for an
             in-process in-memory database.
         table_name: Name of the table to expose as a source.
-        tag_columns: Columns to use as tag columns. If ``None`` (default),
+        key_columns: Columns to use as key columns. If ``None`` (default),
             the table's primary-key columns are used; ROWID-only tables fall
             back to ``["rowid"]``.
-        system_tag_columns: Additional system-level tag columns.
+        system_key_columns: Additional system-level key columns.
         record_id_column: Column for stable per-row record IDs in provenance.
         source_id: Canonical source name. Defaults to *table_name*.
         label: Human-readable label for this source node.
@@ -73,8 +73,8 @@ def __init__(
         self,
         db_path: str | os.PathLike,
         table_name: str,
-        tag_columns: Collection[str] | None = None,
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] | None = None,
+        system_key_columns: Collection[str] = (),
         record_id_column: str | None = None,
         source_id: str | None = None,
         label: str | None = None,
@@ -85,19 +85,19 @@ def __init__(
         connector = SQLiteConnector(db_path)
 
         try:
-            # Step 3: Resolve tag columns.
-            if tag_columns is None:
+            # Step 3: Resolve key columns.
+            if key_columns is None:
                 pk_cols = connector.get_pk_columns(table_name)
-                resolved_tags: list[str] = pk_cols if pk_cols else ["rowid"]
+                resolved_keys: list[str] = pk_cols if pk_cols else ["rowid"]
             else:
-                resolved_tags = list(tag_columns)
+                resolved_keys = list(key_columns)
 
             # Step 4: Determine the fetch query.
-            # If "rowid" is in resolved_tags but not a real column, we need
+            # If "rowid" is in resolved_keys but not a real column, we need
             # SELECT rowid, * to include it.  This also handles from_config
-            # reconstruction where tag_columns=["rowid"] is passed explicitly.
+            # reconstruction where key_columns=["rowid"] is passed explicitly.
             normal_cols = {ci.name for ci in connector.get_column_info(table_name)}
-            if "rowid" in resolved_tags and "rowid" not in normal_cols:
+            if "rowid" in resolved_keys and "rowid" not in normal_cols:
                 _query: str | None = f'SELECT rowid, * FROM "{table_name}"'
             else:
                 _query = None
@@ -105,8 +105,8 @@ def __init__(
             super().__init__(
                 connector,
                 table_name,
-                tag_columns=resolved_tags,
-                system_tag_columns=system_tag_columns,
+                key_columns=resolved_keys,
+                system_key_columns=system_key_columns,
                 record_id_column=record_id_column,
                 source_id=source_id,
                 label=label,
@@ -144,8 +144,8 @@ def from_config(cls, config: dict[str, Any], db_registry=None) -> "SQLiteTableSo
         return cls(
             db_path=config["db_path"],
             table_name=config["table_name"],
-            tag_columns=config.get("tag_columns"),
-            system_tag_columns=config.get("system_tag_columns", ()),
+            key_columns=config.get("key_columns"),
+            system_key_columns=config.get("system_key_columns", ()),
             record_id_column=config.get("record_id_column"),
             source_id=config.get("source_id"),
             label=config.get("label"),
diff --git a/src/orcapod/core/sources/stream_builder.py b/src/orcapod/core/sources/stream_builder.py
index d790c64a..4fd995e7 100644
--- a/src/orcapod/core/sources/stream_builder.py
+++ b/src/orcapod/core/sources/stream_builder.py
@@ -1,9 +1,9 @@
 """Compositional builder for enriching raw Arrow tables into source streams.
 
 Extracts the enrichment pipeline that was previously embedded in
-``ArrowTableSource.__init__``: dropping system columns, validating tags,
+``ArrowTableSource.__init__``: dropping system columns, validating keys,
 computing schema/table hashes, adding source-info provenance, adding system
-tag columns, and wrapping the result in an ``ArrowTableStream``.
+key columns, and wrapping the result in an ``ArrowTableStream``.
 """
 
 from __future__ import annotations
@@ -47,8 +47,8 @@ class SourceStreamResult:
     schema_hash: str
     table_hash: ContentHash
     source_id: str
-    tag_columns: tuple[str, ...]
-    system_tag_columns: tuple[str, ...]
+    key_columns: tuple[str, ...]
+    system_key_columns: tuple[str, ...]
 
 
 class SourceStreamBuilder:
@@ -66,10 +66,10 @@ def __init__(self, data_context: DataContext, config: Config) -> None:
     def build(
         self,
         table: pa.Table,
-        tag_columns: Collection[str],
+        key_columns: Collection[str],
         source_id: str | None = None,
         record_id_column: str | None = None,
-        system_tag_columns: Collection[str] = (),
+        system_key_columns: Collection[str] = (),
     ) -> SourceStreamResult:
         """Run the full enrichment pipeline.
 
@@ -82,28 +82,28 @@ def build(
 
         Args:
             table: Arrow table with nullable flags already set correctly.
-            tag_columns: Column names forming the tag for each row.
+            key_columns: Column names forming the key for each row.
             source_id: Canonical source name. Defaults to table hash.
             record_id_column: Column for stable record IDs in provenance.
-            system_tag_columns: Additional system-level tag columns.
+            system_key_columns: Additional system-level key columns.
 
         Returns:
             SourceStreamResult with enriched stream and metadata.
 
         Raises:
-            ValueError: If tag_columns or record_id_column are not in table.
+            ValueError: If key_columns or record_id_column are not in table.
         """
-        tag_columns_tuple = tuple(tag_columns)
-        system_tag_columns_tuple = tuple(system_tag_columns)
+        key_columns_tuple = tuple(key_columns)
+        system_key_columns_tuple = tuple(system_key_columns)
 
         # 1. Drop system columns from raw input.
         table = arrow_utils.drop_system_columns(table)
 
-        # 2. Validate tag_columns.
-        missing_tags = set(tag_columns_tuple) - set(table.column_names)
-        if missing_tags:
+        # 2. Validate key_columns.
+        missing_keys = set(key_columns_tuple) - set(table.column_names)
+        if missing_keys:
             raise ValueError(
-                f"tag_columns not found in table: {missing_tags}. "
+                f"key_columns not found in table: {missing_keys}. "
                 f"Available columns: {list(table.column_names)}"
             )
 
@@ -114,20 +114,20 @@ def build(
                 f"{table.column_names}"
             )
 
-        # 4. Compute schema hash from tag/data python schemas.
+        # 4. Compute schema hash from key/data python schemas.
         # Nullable flags in the incoming table are trusted as-is — callers must
         # set them correctly before calling build().
         non_sys = arrow_utils.drop_system_columns(table)
-        tag_schema = non_sys.select(list(tag_columns_tuple)).schema
-        data_schema = non_sys.drop(list(tag_columns_tuple)).schema
-        tag_python = self._data_context.type_converter.arrow_schema_to_python_schema(
-            tag_schema
+        key_schema = non_sys.select(list(key_columns_tuple)).schema
+        data_schema = non_sys.drop(list(key_columns_tuple)).schema
+        key_python = self._data_context.type_converter.arrow_schema_to_python_schema(
+            key_schema
         )
         data_python = self._data_context.type_converter.arrow_schema_to_python_schema(
             data_schema
         )
         schema_hash = self._data_context.semantic_hasher.hash_object(
-            (tag_python, data_python)
+            (key_python, data_python)
         ).to_hex(char_count=self._config.schema_hash_n_char)
 
         # 5. Compute table hash for data identity.
@@ -147,15 +147,15 @@ def build(
 
         # 8. Add source-info provenance columns.
         table = arrow_utils.add_source_info(
-            table, source_info, exclude_columns=tag_columns_tuple
+            table, source_info, exclude_columns=key_columns_tuple
         )
 
-        # 9. Add system tag columns.
+        # 9. Add system key columns.
         record_id_values = [
             _make_record_id(record_id_column, i, row)
             for i, row in enumerate(rows_as_dicts)
         ]
-        table = arrow_utils.add_system_tag_columns(
+        table = arrow_utils.add_system_key_columns(
             table,
             schema_hash,
             source_id,
@@ -166,8 +166,8 @@ def build(
         # the caller set them before calling build().
         stream = ArrowTableStream(
             table=table,
-            tag_columns=tag_columns_tuple,
-            system_tag_columns=system_tag_columns_tuple,
+            key_columns=key_columns_tuple,
+            system_key_columns=system_key_columns_tuple,
         )
 
         return SourceStreamResult(
@@ -175,6 +175,6 @@ def build(
             schema_hash=schema_hash,
             table_hash=table_hash,
             source_id=source_id,
-            tag_columns=tag_columns_tuple,
-            system_tag_columns=system_tag_columns_tuple,
+            key_columns=key_columns_tuple,
+            system_key_columns=system_key_columns_tuple,
         )
diff --git a/src/orcapod/core/streams/arrow_table_stream.py b/src/orcapod/core/streams/arrow_table_stream.py
index 09bfc599..598aa2b9 100644
--- a/src/orcapod/core/streams/arrow_table_stream.py
+++ b/src/orcapod/core/streams/arrow_table_stream.py
@@ -6,9 +6,9 @@
 from typing import TYPE_CHECKING, Any, cast
 
 from orcapod import contexts
-from orcapod.core.datagrams import Data, Tag
+from orcapod.core.datagrams import Data, Key
 from orcapod.core.streams.base import StreamBase
-from orcapod.protocols.core_protocols import PodProtocol, StreamProtocol, TagProtocol
+from orcapod.protocols.core_protocols import PodProtocol, StreamProtocol, KeyProtocol
 from orcapod.protocols.hashing_protocols import PipelineElementProtocol
 from orcapod.system_constants import constants
 from orcapod.types import ColumnConfig, Schema
@@ -28,19 +28,19 @@ class ArrowTableStream(StreamBase):
     """
     An immutable stream based on a PyArrow Table.
     This stream is designed to be used with data that is already in a tabular format,
-    such as data loaded from a file or database. The columns to be treated as tags are
+    such as data loaded from a file or database. The columns to be treated as keys are
     specified at initialization, and the rest of the columns are treated as data.
     The stream is immutable, meaning that once it is created, it cannot be modified.
     This is useful for ensuring that the data in the stream remains consistent and unchanging.
 
-    The types of the tag and data columns are inferred from the PyArrow Table schema.
+    The types of the key and data columns are inferred from the PyArrow Table schema.
     """
 
     def __init__(
         self,
         table: "pa.Table",
-        tag_columns: Collection[str] = (),
-        system_tag_columns: Collection[str] = (),
+        key_columns: Collection[str] = (),
+        system_key_columns: Collection[str] = (),
         source_info: dict[str, str | None] | None = None,
         producer: PodProtocol | None = None,
         upstreams: tuple[StreamProtocol, ...] = (),
@@ -76,37 +76,37 @@ def __init__(
 
         prefix_info = {constants.SOURCE_PREFIX: source_info}
 
-        # determine tag columns first and then exclude any source info
-        self._tag_columns = tuple(c for c in tag_columns if c in table.column_names)
-        self._system_tag_columns = tuple(
-            c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        # determine key columns first and then exclude any source info
+        self._key_columns = tuple(c for c in key_columns if c in table.column_names)
+        self._system_key_columns = tuple(
+            c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
-        if len(system_tag_columns) > 0:
-            # rename system_tag_columns
+        if len(system_key_columns) > 0:
+            # rename system_key_columns
             column_name_map = {
-                c: f"{constants.SYSTEM_TAG_PREFIX}{c}" for c in system_tag_columns
+                c: f"{constants.SYSTEM_KEY_PREFIX}{c}" for c in system_key_columns
             }
             table = table.rename_columns(
                 [column_name_map.get(c, c) for c in table.column_names]
             )
 
-            self._system_tag_columns += tuple(
-                f"{constants.SYSTEM_TAG_PREFIX}{c}" for c in system_tag_columns
+            self._system_key_columns += tuple(
+                f"{constants.SYSTEM_KEY_PREFIX}{c}" for c in system_key_columns
             )
 
-        self._all_tag_columns = self._tag_columns + self._system_tag_columns
-        if delta := set(tag_columns) - set(self._tag_columns):
+        self._all_key_columns = self._key_columns + self._system_key_columns
+        if delta := set(key_columns) - set(self._key_columns):
             raise ValueError(
-                f"Specified tag columns {delta} are not present in the table."
+                f"Specified key columns {delta} are not present in the table."
             )
         table, prefix_tables = arrow_utils.prepare_prefixed_columns(
             table,
             prefix_info,
-            exclude_columns=self._all_tag_columns,
+            exclude_columns=self._all_key_columns,
         )
-        # now table should only contain tag columns and data columns
+        # now table should only contain key columns and data columns
         self._data_columns = tuple(
-            c for c in table.column_names if c not in self._all_tag_columns
+            c for c in table.column_names if c not in self._all_key_columns
         )
         self._table = table
         self._source_info_table = prefix_tables[constants.SOURCE_PREFIX]
@@ -119,23 +119,23 @@ def __init__(
 
         # Respect Arrow nullable flags as-is (after optional inference above):
         # nullable=True → T | None, nullable=False → T.
-        tag_schema = pa.schema(
-            f for f in self._table.schema if f.name in self._tag_columns
+        key_schema = pa.schema(
+            f for f in self._table.schema if f.name in self._key_columns
         )
-        system_tag_schema = pa.schema(
-            f for f in self._table.schema if f.name in self._system_tag_columns
+        system_key_schema = pa.schema(
+            f for f in self._table.schema if f.name in self._system_key_columns
         )
-        all_tag_schema = arrow_utils.join_arrow_schemas(tag_schema, system_tag_schema)
+        all_key_schema = arrow_utils.join_arrow_schemas(key_schema, system_key_schema)
         data_schema = pa.schema(
             f for f in self._table.schema if f.name in self._data_columns
         )
 
-        self._tag_schema = tag_schema
-        self._system_tag_schema = system_tag_schema
-        self._all_tag_schema = all_tag_schema
+        self._key_schema = key_schema
+        self._system_key_schema = system_key_schema
+        self._all_key_schema = all_key_schema
         self._data_schema = data_schema
 
-        self._cached_elements: list[tuple[TagProtocol, Data]] | None = None
+        self._cached_elements: list[tuple[KeyProtocol, Data]] | None = None
         self._update_modified_time()  # set modified time to now
 
     def identity_structure(self) -> Any:
@@ -144,15 +144,15 @@ def identity_structure(self) -> Any:
         return (
             self.__class__.__name__,
             self.as_table(all_info=True),
-            self._tag_columns,
+            self._key_columns,
         )
 
     def pipeline_identity_structure(self) -> Any:
         if self._producer is None or not isinstance(
             self._producer, PipelineElementProtocol
         ):
-            tag_schema, data_schema = self.output_schema()
-            return (tag_schema, data_schema)
+            key_schema, data_schema = self.output_schema()
+            return (key_schema, data_schema)
         return super().pipeline_identity_structure()
 
     @property
@@ -170,15 +170,15 @@ def keys(
         all_info: bool = False,
     ) -> tuple[tuple[str, ...], tuple[str, ...]]:
         """
-        Returns the keys of the tag and data columns in the stream.
+        Returns the keys of the key and data columns in the stream.
         This is useful for accessing the columns in the stream.
         """
-        tag_columns = self._tag_columns
+        key_columns = self._key_columns
         columns_config = ColumnConfig.handle_config(columns, all_info=all_info)
         # TODO: add standard parsing of columns
-        if columns_config.system_tags:
-            tag_columns += self._system_tag_columns
-        return tag_columns, self._data_columns
+        if columns_config.system_keys:
+            key_columns += self._system_key_columns
+        return key_columns, self._data_columns
 
     def output_schema(
         self,
@@ -187,19 +187,19 @@ def output_schema(
         all_info: bool = False,
     ) -> tuple[Schema, Schema]:
         """
-        Returns the types of the tag and data columns in the stream.
+        Returns the types of the key and data columns in the stream.
         This is useful for accessing the types of the columns in the stream.
         """
         # normalize column config
         columns_config = ColumnConfig.handle_config(columns, all_info=all_info)
         # TODO: consider using MappingProxyType to avoid copying the dicts
         converter = self.data_context.type_converter
-        if columns_config.system_tags:
-            tag_schema = self._all_tag_schema
+        if columns_config.system_keys:
+            key_schema = self._all_key_schema
         else:
-            tag_schema = self._tag_schema
+            key_schema = self._key_schema
         return (
-            converter.arrow_schema_to_python_schema(tag_schema),
+            converter.arrow_schema_to_python_schema(key_schema),
             converter.arrow_schema_to_python_schema(self._data_schema),
         )
 
@@ -228,9 +228,9 @@ def as_table(
                 pa.field(hash_column_name, pa.large_string(), nullable=False),
                 pa.array(content_hashes, type=pa.large_string()),
             )
-        if not columns_config.system_tags:
+        if not columns_config.system_keys:
             # Check in original implementation
-            output_table = output_table.drop_columns(list(self._system_tag_columns))
+            output_table = output_table.drop_columns(list(self._system_key_columns))
         table_stack = (output_table,)
         if columns_config.context:
             table_stack += (self._data_context_table,)
@@ -239,15 +239,15 @@ def as_table(
 
         table = arrow_utils.hstack_tables(*table_stack)
 
-        if columns_config.sort_by_tags:
-            # TODO: cleanup the sorting tag selection logic
+        if columns_config.sort_by_keys:
+            # TODO: cleanup the sorting key selection logic
             try:
-                target_tags = (
-                    self._all_tag_columns
-                    if columns_config.system_tags
-                    else self._tag_columns
+                target_keys = (
+                    self._all_key_columns
+                    if columns_config.system_keys
+                    else self._key_columns
                 )
-                return table.sort_by([(column, "ascending") for column in target_tags])
+                return table.sort_by([(column, "ascending") for column in target_keys])
             except pa.ArrowTypeError:
                 # If sorting fails, fall back to unsorted table
                 return table
@@ -261,35 +261,35 @@ def clear_cache(self) -> None:
         """
         self._cached_elements = None
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, Data]]:
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, Data]]:
         """
         Iterates over the data in the stream.
-        Each data is represented as a tuple of (TagProtocol, DataProtocol).
+        Each data is represented as a tuple of (KeyProtocol, DataProtocol).
         """
         # TODO: make it work with table batch stream
         if self._cached_elements is None:
             cached_elements = []
-            tag_present = len(self._all_tag_columns) > 0
-            if tag_present:
-                tags = self._table.select(self._all_tag_columns)
-                tag_batches = tags.to_batches()
+            key_present = len(self._all_key_columns) > 0
+            if key_present:
+                keys = self._table.select(self._all_key_columns)
+                key_batches = keys.to_batches()
             else:
-                tag_batches = repeat(Tag({}))
+                key_batches = repeat(Key({}))
 
             # TODO: come back and clean up this logic
 
             data = self._table.select(self._data_columns)
 
-            for tag_batch, data_batch in zip(tag_batches, data.to_batches()):
+            for key_batch, data_batch in zip(key_batches, data.to_batches()):
                 for i in range(len(data_batch)):
-                    if tag_present:
-                        tag = Tag(
-                            tag_batch.slice(i, 1),  # type: ignore
+                    if key_present:
+                        key = Key(
+                            key_batch.slice(i, 1),  # type: ignore
                             data_context=self.data_context,
                         )
 
                     else:
-                        tag = cast(Tag, tag_batch)
+                        key = cast(Key, key_batch)
 
                     data = Data(
                         data_batch.slice(i, 1),
@@ -297,9 +297,9 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, Data]]:
                         data_context=self.data_context,
                     )
 
-                    yield tag, data
+                    yield key, data
 
-                    cached_elements.append((tag, data))
+                    cached_elements.append((key, data))
             self._cached_elements = cached_elements
         else:
             yield from self._cached_elements
@@ -307,5 +307,5 @@ def iter_data(self) -> Iterator[tuple[TagProtocol, Data]]:
     def __repr__(self) -> str:
         return (
             f"{self.__class__.__name__}(table={self._table.column_names}, "
-            f"tag_columns={self._tag_columns})"
+            f"key_columns={self._key_columns})"
         )
diff --git a/src/orcapod/core/streams/base.py b/src/orcapod/core/streams/base.py
index 0f30cd89..d1234ef1 100644
--- a/src/orcapod/core/streams/base.py
+++ b/src/orcapod/core/streams/base.py
@@ -11,7 +11,7 @@
     DataProtocol,
     PodProtocol,
     StreamProtocol,
-    TagProtocol,
+    KeyProtocol,
 )
 from orcapod.types import ColumnConfig, Schema
 from orcapod.utils.lazy_module import LazyModule
@@ -98,25 +98,25 @@ def semi_join(
     ) -> StreamBase:
         """
         Performs a semi-join with another stream, returning a new stream that contains
-        only the data from this stream that have matching tags in the other stream.
+        only the data from this stream that have matching keys in the other stream.
         """
         from orcapod.core.operators import SemiJoin
 
         return SemiJoin()(self, other_stream, label=label)
 
-    def map_tags(
+    def map_keys(
         self,
         name_map: Mapping[str, str],
         drop_unmapped: bool = True,
         label: str | None = None,
     ) -> StreamBase:
         """
-        Maps the tags in this stream according to the provided name_map.
-        If drop_unmapped is True, any tags that are not in the name_map will be dropped.
+        Maps the keys in this stream according to the provided name_map.
+        If drop_unmapped is True, any keys that are not in the name_map will be dropped.
         """
-        from orcapod.core.operators import MapTags
+        from orcapod.core.operators import MapKeys
 
-        return MapTags(name_map, drop_unmapped)(self, label=label)
+        return MapKeys(name_map, drop_unmapped)(self, label=label)
 
     def map_data(
         self,
@@ -165,19 +165,19 @@ def polars_filter(
             self, label=label
         )
 
-    def select_tag_columns(
+    def select_key_columns(
         self,
-        tag_columns: str | Collection[str],
+        key_columns: str | Collection[str],
         strict: bool = True,
         label: str | None = None,
     ) -> StreamBase:
         """
-        Select the specified tag columns from the stream. A ValueError is raised
-        if one or more specified tag columns do not exist in the stream unless strict = False.
+        Select the specified key columns from the stream. A ValueError is raised
+        if one or more specified key columns do not exist in the stream unless strict = False.
         """
-        from orcapod.core.operators import SelectTagColumns
+        from orcapod.core.operators import SelectKeyColumns
 
-        return SelectTagColumns(tag_columns, strict=strict)(self, label=label)
+        return SelectKeyColumns(key_columns, strict=strict)(self, label=label)
 
     def select_data_columns(
         self,
@@ -193,15 +193,15 @@ def select_data_columns(
 
         return SelectDataColumns(data_columns, strict=strict)(self, label=label)
 
-    def drop_tag_columns(
+    def drop_key_columns(
         self,
-        tag_columns: str | Collection[str],
+        key_columns: str | Collection[str],
         strict: bool = True,
         label: str | None = None,
     ) -> StreamBase:
-        from orcapod.core.operators import DropTagColumns
+        from orcapod.core.operators import DropKeyColumns
 
-        return DropTagColumns(tag_columns, strict=strict)(self, label=label)
+        return DropKeyColumns(key_columns, strict=strict)(self, label=label)
 
     def drop_data_columns(
         self,
@@ -231,18 +231,18 @@ def output_schema(
 
     def __iter__(
         self,
-    ) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    ) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         return self.iter_data()
 
     @abstractmethod
     def iter_data(
         self,
-    ) -> Iterator[tuple[TagProtocol, DataProtocol]]: ...
+    ) -> Iterator[tuple[KeyProtocol, DataProtocol]]: ...
 
     async def async_iter_data(
         self,
-    ) -> AsyncIterator[tuple[TagProtocol, DataProtocol]]:
-        """Async iterator over (tag, data) pairs.
+    ) -> AsyncIterator[tuple[KeyProtocol, DataProtocol]]:
+        """Async iterator over (key, data) pairs.
 
         Subclasses should override this to provide true async iteration.
         """
@@ -309,24 +309,24 @@ def as_pandas_df(
         self,
         *,
         columns: ColumnConfig | dict[str, Any] | None = None,
-        index_by_tags: bool = False,
+        index_by_keys: bool = False,
         all_info: bool = False,
     ) -> "pd.DataFrame":
         df = self.as_polars_df(
             columns=columns,
             all_info=all_info,
         )
-        tag_keys, _ = self.keys()
+        key_keys, _ = self.keys()
         pdf = df.to_pandas()
-        if index_by_tags:
-            pdf = pdf.set_index(list(tag_keys))
+        if index_by_keys:
+            pdf = pdf.set_index(list(key_keys))
         return pdf
 
     def flow(
         self,
-    ) -> list[tuple[TagProtocol, DataProtocol]]:
+    ) -> list[tuple[KeyProtocol, DataProtocol]]:
         """Materialize the stream into a concrete collection of
-        ``(TagProtocol, DataProtocol)`` pairs.
+        ``(KeyProtocol, DataProtocol)`` pairs.
 
         This is implemented by iterating over :meth:`iter_data`. Depending on
         the concrete stream implementation, iterating may trigger computation or
@@ -341,9 +341,9 @@ def _repr_html_(self) -> str:
             c for c in df.columns if c not in self.keys()[0]
         ]
         df = df[new_column_order]
-        tag_map = {t: f"*{t}" for t in self.keys()[0]}
+        key_map = {t: f"*{t}" for t in self.keys()[0]}
         # TODO: construct repr html better
-        df = df.rename(tag_map)
+        df = df.rename(key_map)
         return f"{self.__class__.__name__}[{self.label}]\n" + df._repr_html_()
 
     def view(
@@ -356,9 +356,9 @@ def view(
             columns=columns,
             all_info=all_info,
         )
-        tag_map = {t: f"*{t}" for t in self.keys()[0]}
+        key_map = {t: f"*{t}" for t in self.keys()[0]}
         # TODO: construct repr html better
-        df = df.rename(tag_map)
+        df = df.rename(key_map)
         return StreamView(self, df)
 
 
diff --git a/src/orcapod/errors.py b/src/orcapod/errors.py
index 23cc5f74..e2a8c0f0 100644
--- a/src/orcapod/errors.py
+++ b/src/orcapod/errors.py
@@ -5,8 +5,8 @@ class InputValidationError(Exception):
     """
 
 
-class DuplicateTagError(ValueError):
-    """Raised when duplicate tag values are found and skip_duplicates=False"""
+class DuplicateKeyError(ValueError):
+    """Raised when duplicate key values are found and skip_duplicates=False"""
 
     pass
 
diff --git a/src/orcapod/hashing/semantic_hashing/semantic_hasher.py b/src/orcapod/hashing/semantic_hashing/semantic_hasher.py
index ceb13315..a7c14c75 100644
--- a/src/orcapod/hashing/semantic_hashing/semantic_hasher.py
+++ b/src/orcapod/hashing/semantic_hashing/semantic_hasher.py
@@ -44,12 +44,12 @@
   dict       → {...}                                        # native JSON object; keys sorted
   tuple      → {"__type__": "tuple",     "items": [...]}
   set        → {"__type__": "set",       "items": [...]}   # items sorted by str()
-  frozenset  → {"__type__": "set",       "items": [...]}   # same tag as set
+  frozenset  → {"__type__": "set",       "items": [...]}   # same key as set
   namedtuple → {"__type__": "namedtuple","name": "T",
                 "fields": {...}}                            # sorted by field name
 
 This means a ``list`` and a ``tuple`` with the same elements will hash
-differently (the tuple carries a type tag), while a plain ``list`` and a
+differently (the tuple carries a type key), while a plain ``list`` and a
 plain JSON array embedded anywhere in a structure are indistinguishable --
 which is exactly the desired semantics for interoperability.
 
@@ -225,7 +225,7 @@ def _expand_structure(
                       path, for circular-reference detection.
 
         Returns:
-            A JSON-serialisable dict (with ``__type__`` tag) for containers,
+            A JSON-serialisable dict (with ``__type__`` key) for containers,
             or the primitive value itself.
         """
         # Primitives are leaves -- pass through.
diff --git a/src/orcapod/pipeline/async_orchestrator.py b/src/orcapod/pipeline/async_orchestrator.py
index d156213f..0c6c4ad3 100644
--- a/src/orcapod/pipeline/async_orchestrator.py
+++ b/src/orcapod/pipeline/async_orchestrator.py
@@ -24,7 +24,7 @@
 if TYPE_CHECKING:
     import networkx as nx
 
-    from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+    from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
     from orcapod.protocols.observability_protocols import ExecutionObserverProtocol
 
 logger = logging.getLogger(__name__)
@@ -172,7 +172,7 @@ async def _run_async(
                     terminal_channels.append(ch)
 
             # Result collection
-            collectors: dict[Any, list[tuple[TagProtocol, DataProtocol]]] = {}
+            collectors: dict[Any, list[tuple[KeyProtocol, DataProtocol]]] = {}
             if materialize_results:
                 for node in topo_order:
                     collectors[node] = []
diff --git a/src/orcapod/pipeline/composite_observer.py b/src/orcapod/pipeline/composite_observer.py
index 339d1ba2..5a7fb2a3 100644
--- a/src/orcapod/pipeline/composite_observer.py
+++ b/src/orcapod/pipeline/composite_observer.py
@@ -25,7 +25,7 @@
 from typing import Any
 
 from orcapod.pipeline.observer import NoOpLogger
-from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 from orcapod.types import SchemaLike
 
 _NOOP_LOGGER = NoOpLogger()
@@ -59,41 +59,41 @@ def on_run_end(self, run_id: str) -> None:
             o.on_run_end(run_id)
 
     def on_node_start(
-        self, node_label: str, node_hash: str, tag_schema: SchemaLike | None = None
+        self, node_label: str, node_hash: str, key_schema: SchemaLike | None = None
     ) -> None:
         for o in self._observers:
-            o.on_node_start(node_label, node_hash, tag_schema=tag_schema)
+            o.on_node_start(node_label, node_hash, key_schema=key_schema)
 
     def on_node_end(self, node_label: str, node_hash: str) -> None:
         for o in self._observers:
             o.on_node_end(node_label, node_hash)
 
     def on_data_start(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol
+        self, node_label: str, key: KeyProtocol, data: DataProtocol
     ) -> None:
         for o in self._observers:
-            o.on_data_start(node_label, tag, data)
+            o.on_data_start(node_label, key, data)
 
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
     ) -> None:
         for o in self._observers:
-            o.on_data_end(node_label, tag, input_data, output_data, cached)
+            o.on_data_end(node_label, key, input_data, output_data, cached)
 
     def on_data_crash(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol, error: Exception
+        self, node_label: str, key: KeyProtocol, data: DataProtocol, error: Exception
     ) -> None:
         for o in self._observers:
-            o.on_data_crash(node_label, tag, data, error)
+            o.on_data_crash(node_label, key, data, error)
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> Any:
         """Return the first non-no-op logger from children.
@@ -103,7 +103,7 @@ def create_data_logger(
         Falls back to a no-op logger if all children return no-ops.
         """
         for o in self._observers:
-            pkt_logger = o.create_data_logger(tag, data)
+            pkt_logger = o.create_data_logger(key, data)
             if not isinstance(pkt_logger, NoOpLogger):
                 return pkt_logger
         return _NOOP_LOGGER
diff --git a/src/orcapod/pipeline/graph.py b/src/orcapod/pipeline/graph.py
index ac118fe9..36c66c68 100644
--- a/src/orcapod/pipeline/graph.py
+++ b/src/orcapod/pipeline/graph.py
@@ -654,7 +654,7 @@ def save(self, path: str, level: Literal["minimal", "definition", "standard", "f
         # -- Build node descriptors --
         nodes: dict[str, dict[str, Any]] = {}
         for content_hash_str, node in self._persistent_node_map.items():
-            tag_schema, data_schema = node.output_schema()
+            key_schema, data_schema = node.output_schema()
             type_converter = node.data_context.type_converter
 
             descriptor: dict[str, Any] = {
@@ -663,7 +663,7 @@ def save(self, path: str, level: Literal["minimal", "definition", "standard", "f
                 "content_hash": node.content_hash().to_string(),
                 "pipeline_hash": node.pipeline_hash().to_string(),
                 "output_schema": {
-                    "tag": serialize_schema(tag_schema, type_converter),
+                    "key": serialize_schema(key_schema, type_converter),
                     "data": serialize_schema(data_schema, type_converter),
                 },
             }
@@ -751,7 +751,7 @@ def _build_source_descriptor(self, node: SourceNode, db_registry: DatabaseRegist
             # Remove identity fields — they live in the node descriptor
             source_config = {
                 k: v for k, v in config.items()
-                if k not in ("content_hash", "pipeline_hash", "tag_schema", "data_schema")
+                if k not in ("content_hash", "pipeline_hash", "key_schema", "data_schema")
             }
             reconstructable = stream_type in self._RECONSTRUCTABLE_SOURCE_TYPES
         else:
diff --git a/src/orcapod/pipeline/logging_observer.py b/src/orcapod/pipeline/logging_observer.py
index 07d1f1d5..0368e4a5 100644
--- a/src/orcapod/pipeline/logging_observer.py
+++ b/src/orcapod/pipeline/logging_observer.py
@@ -20,7 +20,7 @@
 
 Log schema (fixed columns):
     Fixed columns are prefixed with ``_log_`` to follow system column conventions
-    and avoid collision with user-defined tag column names.
+    and avoid collision with user-defined key column names.
 
     - ``_log_id`` (large_utf8): UUID unique to this log entry.
     - ``_log_run_id`` (large_utf8): UUID of the pipeline run (from ``on_run_start``).
@@ -31,8 +31,8 @@
     - ``_log_success`` (bool): ``True`` if the data function returned normally.
     - ``_log_timestamp`` (large_utf8): ISO-8601 UTC timestamp when ``record()`` was called.
 
-    In addition, each tag key from the data's tag becomes a separate
-    ``large_utf8`` column (queryable, not JSON-encoded).  Tag columns use
+    In addition, each field from the data's Key becomes a separate
+    ``large_utf8`` column (queryable, not JSON-encoded).  Key columns use
     bare names (no prefix), so they are always distinguishable from fixed
     columns.
 
@@ -52,7 +52,7 @@
 from uuid_utils import uuid7
 
 from orcapod.pipeline.logging_capture import install_capture_streams
-from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 
 if TYPE_CHECKING:
     import pyarrow as pa
@@ -69,9 +69,9 @@ class DataLogger:
     """Context-bound logger created by `_ContextualizedLoggingObserver` per data.
 
     Holds all context needed to write a structured log row
-    (run_id, tag data) so the caller only needs to pass the `CapturedLogs` payload.
+    (run_id, key data) so the caller only needs to pass the `CapturedLogs` payload.
 
-    Tag data is stored as individual queryable columns (not JSON) alongside
+    Key data is stored as individual queryable columns (not JSON) alongside
     the fixed log columns.
 
     This class is not intended to be instantiated directly — use
@@ -83,12 +83,12 @@ def __init__(
         db: ArrowDatabaseProtocol,
         log_path: tuple[str, ...],
         run_id: str,
-        tag_data: dict[str, Any],
+        key_data: dict[str, Any],
     ) -> None:
         self._db = db
         self._log_path = log_path
         self._run_id = run_id
-        self._tag_data = tag_data
+        self._key_data = key_data
 
     def record(self, **kwargs: Any) -> None:
         """Write one log row to the database.
@@ -121,8 +121,8 @@ def record(self, **kwargs: Any) -> None:
                     type=pa.large_utf8(),
                 )
 
-        # Dynamic tag columns — each tag key becomes its own column (unprefixed)
-        for key, value in self._tag_data.items():
+        # Dynamic key columns — each Key field becomes its own column (unprefixed)
+        for key, value in self._key_data.items():
             columns[key] = pa.array([str(value)], type=pa.large_utf8())
 
         row = pa.table(columns)
@@ -190,7 +190,7 @@ def on_run_end(self, run_id: str) -> None:
         pass
 
     def on_node_start(
-        self, node_label: str, node_hash: str, tag_schema=None
+        self, node_label: str, node_hash: str, key_schema=None
     ) -> None:
         pass
 
@@ -199,13 +199,13 @@ def on_node_end(
     ) -> None:
         pass
 
-    def on_data_start(self, node_label: str, tag: TagProtocol, data: DataProtocol) -> None:
+    def on_data_start(self, node_label: str, key: KeyProtocol, data: DataProtocol) -> None:
         pass
 
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
@@ -213,17 +213,17 @@ def on_data_end(
         pass
 
     def on_data_crash(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol, error: Exception
+        self, node_label: str, key: KeyProtocol, data: DataProtocol, error: Exception
     ) -> None:
         pass
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> DataLogger:
-        """Return a `DataLogger` bound to *tag* context using the root DB."""
-        return DataLogger(db=self._db, log_path=DEFAULT_LOG_PATH, run_id=self._current_run_id, tag_data=dict(tag))
+        """Return a `DataLogger` bound to *key* context using the root DB."""
+        return DataLogger(db=self._db, log_path=DEFAULT_LOG_PATH, run_id=self._current_run_id, key_data=dict(key))
 
     # -- convenience --
 
@@ -327,19 +327,19 @@ def on_run_start(self, run_id: str, pipeline_uri: str = "") -> None:
     def on_run_end(self, run_id: str) -> None:
         pass
 
-    def on_node_start(self, node_label: str, node_hash: str, tag_schema=None) -> None:
+    def on_node_start(self, node_label: str, node_hash: str, key_schema=None) -> None:
         pass
 
     def on_node_end(self, node_label: str, node_hash: str) -> None:
         pass
 
-    def on_data_start(self, node_label: str, tag: TagProtocol, data: DataProtocol) -> None:
+    def on_data_start(self, node_label: str, key: KeyProtocol, data: DataProtocol) -> None:
         pass
 
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
@@ -347,13 +347,13 @@ def on_data_end(
         pass
 
     def on_data_crash(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol, error: Exception
+        self, node_label: str, key: KeyProtocol, data: DataProtocol, error: Exception
     ) -> None:
         pass
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> DataLogger:
         """Create a DataLogger using context from this wrapper.
@@ -361,11 +361,11 @@ def create_data_logger(
         Logs are written at ``DEFAULT_LOG_PATH`` within the scoped database.
         Node identity is encoded in the database path, not in column values.
         """
-        tag_data = dict(tag)
+        key_data = dict(key)
 
         return DataLogger(
             db=self._db,
             log_path=DEFAULT_LOG_PATH,
             run_id=self._run_id,
-            tag_data=tag_data,
+            key_data=key_data,
         )
diff --git a/src/orcapod/pipeline/observability_reader.py b/src/orcapod/pipeline/observability_reader.py
index f3af37bf..9c725e72 100644
--- a/src/orcapod/pipeline/observability_reader.py
+++ b/src/orcapod/pipeline/observability_reader.py
@@ -74,16 +74,16 @@ def nodes(self) -> list[str]:
         return sorted(self._status_tables.keys())
 
     @property
-    def tag_columns(self) -> list[str]:
-        """Inferred user tag column names."""
+    def key_columns(self) -> list[str]:
+        """Inferred user key column names."""
         status = self._get_status_df()
         return sorted(
             col for col in status.columns
             if not col.startswith("__")
             and not col.startswith("_status_")
             and not col.startswith("_log_")
-            and not col.startswith("_tag_")
-            and not col.startswith("_tag::")
+            and not col.startswith("_key_")
+            and not col.startswith("_key::")
             and col != "node_label"
         )
 
@@ -128,7 +128,7 @@ def _get_logs_df(self) -> pl.DataFrame:
         "_log_traceback": "traceback",
     }
 
-    _DROP_PREFIXES: ClassVar[tuple[str, ...]] = ("__", "_tag_", "_tag::")
+    _DROP_PREFIXES: ClassVar[tuple[str, ...]] = ("__", "_key_", "_key::")
     _STATUS_DROP_EXACT: ClassVar[set[str]] = {
         "_status_id", "_status_run_id", "_status_pipeline_uri",
     }
@@ -180,7 +180,7 @@ def status(self) -> pl.DataFrame:
         previously computed successful results.
 
         Returns:
-            DataFrame with columns: ``node_label``, tag columns,
+            DataFrame with columns: ``node_label``, key columns,
             ``state``, ``timestamp``, ``error_summary``.
         """
         df = self._get_status_df()
@@ -189,7 +189,7 @@ def status(self) -> pl.DataFrame:
         df = self._clean_status_df(df)
 
         # Deduplicate to latest status per (node, input)
-        group_cols = ["node_label"] + self.tag_columns
+        group_cols = ["node_label"] + self.key_columns
         group_cols = [c for c in group_cols if c in df.columns]
         df = df.sort("timestamp").unique(subset=group_cols, keep="last")
 
@@ -207,14 +207,14 @@ def logs(self, node: str) -> pl.DataFrame:
         """Full log entries for a node.
 
         Returns all log fields: stdout, stderr, python logs, traceback,
-        success status, and timestamp, alongside tag columns.
+        success status, and timestamp, alongside key columns.
 
         Args:
             node: Node name to query. Use ``reader.nodes`` to see
                 available names.
 
         Returns:
-            DataFrame with columns: ``node_label``, tag columns,
+            DataFrame with columns: ``node_label``, key columns,
             ``stdout_log``, ``stderr_log``, ``python_logs``,
             ``traceback``, ``success``, ``timestamp``.
 
diff --git a/src/orcapod/pipeline/observer.py b/src/orcapod/pipeline/observer.py
index 768bf0fa..40d5faf4 100644
--- a/src/orcapod/pipeline/observer.py
+++ b/src/orcapod/pipeline/observer.py
@@ -8,7 +8,7 @@
 
 from typing import Any
 
-from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 from orcapod.types import SchemaLike
 from orcapod.protocols.observability_protocols import (  # noqa: F401  (re-exported for convenience)
     ExecutionObserverProtocol,
@@ -64,7 +64,7 @@ def on_run_end(self, run_id: str) -> None:
         pass
 
     def on_node_start(
-        self, node_label: str, node_hash: str, tag_schema: SchemaLike | None = None
+        self, node_label: str, node_hash: str, key_schema: SchemaLike | None = None
     ) -> None:
         pass
 
@@ -76,7 +76,7 @@ def on_node_end(
     def on_data_start(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> None:
         pass
@@ -84,7 +84,7 @@ def on_data_start(
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
@@ -94,7 +94,7 @@ def on_data_end(
     def on_data_crash(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         error: Exception,
     ) -> None:
@@ -102,7 +102,7 @@ def on_data_crash(
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> NoOpLogger:
         return _NOOP_LOGGER
diff --git a/src/orcapod/pipeline/result.py b/src/orcapod/pipeline/result.py
index 94d15818..1fd83dcc 100644
--- a/src/orcapod/pipeline/result.py
+++ b/src/orcapod/pipeline/result.py
@@ -6,7 +6,7 @@
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+    from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 
 
 @dataclass
@@ -14,10 +14,10 @@ class OrchestratorResult:
     """Result of an orchestrator run.
 
     Attributes:
-        node_outputs: Mapping from graph node to its computed (tag, data)
+        node_outputs: Mapping from graph node to its computed (key, data)
             pairs. Empty when ``materialize_results=False``.
     """
 
-    node_outputs: dict[Any, list[tuple["TagProtocol", "DataProtocol"]]] = field(
+    node_outputs: dict[Any, list[tuple["KeyProtocol", "DataProtocol"]]] = field(
         default_factory=dict
     )
diff --git a/src/orcapod/pipeline/serialization.py b/src/orcapod/pipeline/serialization.py
index bf324bc2..e62d048b 100644
--- a/src/orcapod/pipeline/serialization.py
+++ b/src/orcapod/pipeline/serialization.py
@@ -166,14 +166,14 @@ def _build_operator_registry() -> dict[str, type]:
     from orcapod.core.operators import (
         Batch,
         DropDataColumns,
-        DropTagColumns,
+        DropKeyColumns,
         Join,
         MapData,
-        MapTags,
+        MapKeys,
         MergeJoin,
         PolarsFilter,
         SelectDataColumns,
-        SelectTagColumns,
+        SelectKeyColumns,
         SemiJoin,
     )
 
@@ -182,11 +182,11 @@ def _build_operator_registry() -> dict[str, type]:
         "MergeJoin": MergeJoin,
         "SemiJoin": SemiJoin,
         "Batch": Batch,
-        "SelectTagColumns": SelectTagColumns,
-        "DropTagColumns": DropTagColumns,
+        "SelectKeyColumns": SelectKeyColumns,
+        "DropKeyColumns": DropKeyColumns,
         "SelectDataColumns": SelectDataColumns,
         "DropDataColumns": DropDataColumns,
-        "MapTags": MapTags,
+        "MapKeys": MapKeys,
         "MapData": MapData,
         "PolarsFilter": PolarsFilter,
     }
@@ -480,13 +480,13 @@ def _source_proxy_from_config(
         content_hash = node_descriptor.get("content_hash")
         pipeline_hash_val = node_descriptor.get("pipeline_hash")
         output_schema = node_descriptor.get("output_schema", {})
-        tag_schema_dict = output_schema.get("tag", {})
+        key_schema_dict = output_schema.get("key", {})
         data_schema_dict = output_schema.get("data", {})
     else:
         # Inner sources (e.g. inside CachedSource) embed identity via _identity_config()
         content_hash = config.get("content_hash")
         pipeline_hash_val = config.get("pipeline_hash")
-        tag_schema_dict = config.get("tag_schema", {})
+        key_schema_dict = config.get("key_schema", {})
         data_schema_dict = config.get("data_schema", {})
 
     if not content_hash or not pipeline_hash_val:
@@ -503,14 +503,14 @@ def _source_proxy_from_config(
     if source_type and source_type in SOURCE_REGISTRY:
         expected_class_name = SOURCE_REGISTRY[source_type].__name__
 
-    tag_schema = Schema(deserialize_schema(tag_schema_dict))
+    key_schema = Schema(deserialize_schema(key_schema_dict))
     data_schema = Schema(deserialize_schema(data_schema_dict))
 
     return SourceProxy(
         source_id=config.get("source_id", "unknown"),
         content_hash_str=content_hash,
         pipeline_hash_str=pipeline_hash_val,
-        tag_schema=tag_schema,
+        key_schema=key_schema,
         data_schema=data_schema,
         expected_class_name=expected_class_name,
         source_config=config,
diff --git a/src/orcapod/pipeline/status_observer.py b/src/orcapod/pipeline/status_observer.py
index 768bbde4..3504a019 100644
--- a/src/orcapod/pipeline/status_observer.py
+++ b/src/orcapod/pipeline/status_observer.py
@@ -19,7 +19,7 @@
 
 Status schema (fixed columns):
     Fixed columns are prefixed with ``_status_`` to follow system column
-    conventions and avoid collision with user-defined tag column names.
+    conventions and avoid collision with user-defined key column names.
 
     - ``_status_id`` (large_utf8): UUID7 unique to this status event.
     - ``_status_run_id`` (large_utf8): UUID of the pipeline run (from ``on_run_start``).
@@ -31,8 +31,8 @@
     - ``_status_timestamp`` (large_utf8): ISO-8601 UTC timestamp.
     - ``_status_error_summary`` (large_utf8): Brief error on ``FAILED``; ``None`` otherwise.
 
-    In addition, each tag key from the data's tag becomes a separate
-    ``large_utf8`` column (queryable, not JSON-encoded).  Tag columns use
+    In addition, each field from the data's Key becomes a separate
+    ``large_utf8`` column (queryable, not JSON-encoded).  Key columns use
     bare names (no prefix), so they are always distinguishable from fixed
     columns.
 
@@ -44,7 +44,7 @@
     path, not in column values.
 
 Append-only:
-    Each state transition is a new row.  Current state for a (node, tag)
+    Each state transition is a new row.  Current state for a (node, key)
     combination within a run is the row with the latest ``_status_timestamp``.
     If a ``RUNNING`` event has no subsequent terminal event for the same
     ``run_id``, the process crashed.
@@ -59,7 +59,7 @@
 from uuid_utils import uuid7
 
 from orcapod.pipeline.observer import NoOpLogger
-from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 from orcapod.types import SchemaLike
 
 if TYPE_CHECKING:
@@ -103,9 +103,9 @@ def __init__(
         self._db = status_database
         self._current_run_id: str = ""
         self._current_pipeline_uri: str = ""
-        # Maps node_label → tag_schema for in-flight nodes.
+        # Maps node_label → key_schema for in-flight nodes.
         # Populated by on_node_start; cleared by on_node_end.
-        self._tag_schema_per_node: dict[str, SchemaLike] = {}
+        self._key_schema_per_node: dict[str, SchemaLike] = {}
 
     # -- contextualize --
 
@@ -129,49 +129,49 @@ def on_run_start(
     ) -> None:
         self._current_run_id = run_id
         self._current_pipeline_uri = pipeline_uri
-        self._tag_schema_per_node.clear()
+        self._key_schema_per_node.clear()
 
     def on_run_end(self, run_id: str) -> None:
-        self._tag_schema_per_node.clear()
+        self._key_schema_per_node.clear()
 
     def on_node_start(
         self,
         node_label: str,
         node_hash: str,
-        tag_schema: SchemaLike | None = None,
+        key_schema: SchemaLike | None = None,
     ) -> None:
-        self._tag_schema_per_node[node_label] = tag_schema or {}
+        self._key_schema_per_node[node_label] = key_schema or {}
 
     def on_node_end(
         self,
         node_label: str,
         node_hash: str,
     ) -> None:
-        self._tag_schema_per_node.pop(node_label, None)
+        self._key_schema_per_node.pop(node_label, None)
 
     def on_data_start(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol
+        self, node_label: str, key: KeyProtocol, data: DataProtocol
     ) -> None:
-        self._write_event(node_label, tag, state="RUNNING")
+        self._write_event(node_label, key, state="RUNNING")
 
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
     ) -> None:
-        self._write_event(node_label, tag, state="CACHED" if cached else "SUCCESS")
+        self._write_event(node_label, key, state="CACHED" if cached else "SUCCESS")
 
     def on_data_crash(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol, error: Exception
+        self, node_label: str, key: KeyProtocol, data: DataProtocol, error: Exception
     ) -> None:
-        self._write_event(node_label, tag, state="FAILED", error=error)
+        self._write_event(node_label, key, state="FAILED", error=error)
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> NoOpLogger:
         """Return a no-op logger.
@@ -261,14 +261,14 @@ def from_config(
     def _write_event(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         state: str,
         error: Exception | None = None,
     ) -> None:
         """Build and write a single status event row."""
         import pyarrow as pa
 
-        tag_schema = self._tag_schema_per_node.get(node_label, {})
+        key_schema = self._key_schema_per_node.get(node_label, {})
 
         status_id = str(uuid7())
         timestamp = datetime.now(timezone.utc).isoformat()
@@ -285,10 +285,10 @@ def _write_event(
             ),
         }
 
-        # Tag columns — use statically-known schema from on_node_start
-        for key in tag_schema:
-            value = tag.get(key, None)
-            columns[key] = pa.array(
+        # Key columns — use statically-known schema from on_node_start
+        for col_name in key_schema:
+            value = key.get(col_name, None)
+            columns[col_name] = pa.array(
                 [str(value) if value is not None else None],
                 type=pa.large_utf8(),
             )
@@ -322,7 +322,7 @@ def __init__(
         self._db = db
         self._current_run_id: str = ""
         self._current_pipeline_uri: str = ""
-        self._tag_schema: SchemaLike = {}
+        self._key_schema: SchemaLike = {}
 
     def contextualize(self, *identity_path: str) -> "_ContextualizedStatusObserver":
         """Re-contextualize with a new identity path (scopes the DB)."""
@@ -343,44 +343,44 @@ def on_node_start(
         self,
         node_label: str,
         node_hash: str,
-        tag_schema: SchemaLike | None = None,
+        key_schema: SchemaLike | None = None,
     ) -> None:
-        self._tag_schema = tag_schema or {}
+        self._key_schema = key_schema or {}
 
     def on_node_end(
         self,
         node_label: str,
         node_hash: str,
     ) -> None:
-        self._tag_schema = {}
+        self._key_schema = {}
 
     def on_data_start(
-        self, node_label: str, tag: TagProtocol, data: DataProtocol
+        self, node_label: str, key: KeyProtocol, data: DataProtocol
     ) -> None:
-        self._write_event(node_label, tag, state="RUNNING")
+        self._write_event(node_label, key, state="RUNNING")
 
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
     ) -> None:
-        self._write_event(node_label, tag, state="CACHED" if cached else "SUCCESS")
+        self._write_event(node_label, key, state="CACHED" if cached else "SUCCESS")
 
     def on_data_crash(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         error: Exception,
     ) -> None:
-        self._write_event(node_label, tag, state="FAILED", error=error)
+        self._write_event(node_label, key, state="FAILED", error=error)
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> NoOpLogger:
         return _NOOP_LOGGER
@@ -390,7 +390,7 @@ def create_data_logger(
     def _write_event(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         state: str,
         error: Exception | None = None,
     ) -> None:
@@ -412,9 +412,9 @@ def _write_event(
             ),
         }
 
-        for key in self._tag_schema:
-            value = tag.get(key, None)
-            columns[key] = pa.array(
+        for col_name in self._key_schema:
+            value = key.get(col_name, None)
+            columns[col_name] = pa.array(
                 [str(value) if value is not None else None],
                 type=pa.large_utf8(),
             )
diff --git a/src/orcapod/pipeline/sync_orchestrator.py b/src/orcapod/pipeline/sync_orchestrator.py
index 509d2f00..11d11c64 100644
--- a/src/orcapod/pipeline/sync_orchestrator.py
+++ b/src/orcapod/pipeline/sync_orchestrator.py
@@ -21,7 +21,7 @@
     import networkx as nx
 
     from orcapod.protocols.observability_protocols import ExecutionObserverProtocol
-    from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+    from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 
 logger = logging.getLogger(__name__)
 
@@ -78,7 +78,7 @@ def run(
 
         try:
             topo_order = list(nx.topological_sort(graph))
-            buffers: dict[Any, list[tuple[TagProtocol, DataProtocol]]] = {}
+            buffers: dict[Any, list[tuple[KeyProtocol, DataProtocol]]] = {}
             processed: set[Any] = set()
 
             for node in topo_order:
@@ -148,16 +148,16 @@ def _gather_upstream_multi(
 
     @staticmethod
     def _materialize_as_stream(buf: list[tuple[Any, Any]], upstream_node: Any) -> Any:
-        """Wrap a (tag, data) buffer as an ArrowTableStream.
+        """Wrap a (key, data) buffer as an ArrowTableStream.
 
         Uses the same column selection pattern as
-        ``StaticOutputOperatorPod._materialize_to_stream``: system_tags
-        for tags, source info for data.
+        ``StaticOutputOperatorPod._materialize_to_stream``: system_keys
+        for keys, source info for data.
 
         Args:
-            buf: List of (tag, data) tuples.
+            buf: List of (key, data) tuples.
             upstream_node: The node that produced this buffer (used to
-                determine tag column names).
+                determine key column names).
 
         Returns:
             An ArrowTableStream.
@@ -170,45 +170,45 @@ def _materialize_as_stream(buf: list[tuple[Any, Any]], upstream_node: Any) -> An
 
         if not buf:
             # Build an empty stream with the correct schema from the upstream node
-            tag_schema, data_schema = upstream_node.output_schema(
-                columns={"system_tags": True, "source": True}
+            key_schema, data_schema = upstream_node.output_schema(
+                columns={"system_keys": True, "source": True}
             )
             type_converter = upstream_node.data_context.type_converter
             empty_fields = {}
-            for name, py_type in {**tag_schema, **data_schema}.items():
+            for name, py_type in {**key_schema, **data_schema}.items():
                 arrow_type = type_converter.python_type_to_arrow_type(py_type)
                 empty_fields[name] = pa.array([], type=arrow_type)
             empty_table = pa.table(empty_fields)
-            tag_keys = upstream_node.keys()[0]
+            key_keys = upstream_node.keys()[0]
             return ArrowTableStream(
                 empty_table,
-                tag_columns=tag_keys,
+                key_columns=key_keys,
                 producer=upstream_node.producer,
                 upstreams=upstream_node.upstreams,
             )
 
-        tag_tables = [tag.as_table(columns={"system_tags": True}) for tag, _ in buf]
+        key_tables = [key.as_table(columns={"system_keys": True}) for key, _ in buf]
         data_tables = [pkt.as_table(columns={"source": True}) for _, pkt in buf]
 
-        combined_tags = pa.concat_tables(tag_tables)
+        combined_keys = pa.concat_tables(key_tables)
         combined_data = pa.concat_tables(data_tables)
 
-        user_tag_keys = tuple(buf[0][0].keys())
+        user_key_keys = tuple(buf[0][0].keys())
         source_info = buf[0][1].source_info()
 
-        full_table = arrow_utils.hstack_tables(combined_tags, combined_data)
+        full_table = arrow_utils.hstack_tables(combined_keys, combined_data)
 
         # Pass the upstream node's producer and upstreams so the
         # materialized stream inherits the correct identity_structure
         # and pipeline_identity_structure (via StreamBase delegation).
-        # This ensures downstream operators produce correct system tag
+        # This ensures downstream operators produce correct system key
         # column names (which embed pipeline hashes of their inputs).
         producer = upstream_node.producer
         upstreams = upstream_node.upstreams
 
         return ArrowTableStream(
             full_table,
-            tag_columns=user_tag_keys,
+            key_columns=user_key_keys,
             source_info=source_info,
             producer=producer,
             upstreams=upstreams,
diff --git a/src/orcapod/protocols/core_protocols/__init__.py b/src/orcapod/protocols/core_protocols/__init__.py
index 8c4fe58c..41600058 100644
--- a/src/orcapod/protocols/core_protocols/__init__.py
+++ b/src/orcapod/protocols/core_protocols/__init__.py
@@ -1,7 +1,7 @@
 from orcapod.types import ColumnConfig
 from orcapod.protocols.hashing_protocols import PipelineElementProtocol
 
-from .datagrams import DatagramProtocol, DataProtocol, TagProtocol
+from .datagrams import DatagramProtocol, DataProtocol, KeyProtocol
 from .executor import DataFunctionExecutorProtocol, PythonFunctionExecutorProtocol
 from .function_pod import FunctionPodProtocol
 from .operator_pod import OperatorPodProtocol
@@ -14,7 +14,7 @@
 __all__ = [
     "ColumnConfig",
     "DatagramProtocol",
-    "TagProtocol",
+    "KeyProtocol",
     "DataProtocol",
     "SourceProtocol",
     "StreamProtocol",
diff --git a/src/orcapod/protocols/core_protocols/datagrams.py b/src/orcapod/protocols/core_protocols/datagrams.py
index d3908d90..2590b51a 100644
--- a/src/orcapod/protocols/core_protocols/datagrams.py
+++ b/src/orcapod/protocols/core_protocols/datagrams.py
@@ -33,9 +33,9 @@ class DatagramProtocol(ContentIdentifiableProtocol, DataContextAwareProtocol, Pr
     - **Meta columns**: Internal system metadata with {constants.META_PREFIX} (typically '__') prefixes (e.g. __processed_at, etc.)
     - **Context column**: Data context information ({constants.CONTEXT_KEY})
 
-    Derivative of datagram (such as DataProtocol or TagProtocol) will also include some specific columns pertinent to the function of the specialized datagram:
+    Derivative of datagram (such as DataProtocol or KeyProtocol) will also include some specific columns pertinent to the function of the specialized datagram:
     - **Source info columns**: Data provenance with {constants.SOURCE_PREFIX} ('_source_') prefixes (_source_user_id, etc.) used in DataProtocol
-    - **System tags**: Internal tags for system use, typically prefixed with {constants.SYSTEM_TAG_PREFIX} ('_system_') (_system_created_at, etc.) used in TagProtocol
+    - **System keys**: Internal keys for system use, typically prefixed with {constants.SYSTEM_KEY_PREFIX} ('_system_') (_system_created_at, etc.) used in KeyProtocol
 
     All operations are by design immutable - methods return new datagram instances rather than modifying existing ones.
 
@@ -595,11 +595,11 @@ def __repr__(self) -> str:
 
 
 @runtime_checkable
-class TagProtocol(DatagramProtocol, Protocol):
+class KeyProtocol(DatagramProtocol, Protocol):
     """
     Metadata associated with each data item in a stream.
 
-    Tags carry contextual information about data data as they flow through
+    Keys carry contextual information about data data as they flow through
     the computational graph. They are immutable and provide metadata that
     helps with:
     - Data lineage tracking
@@ -616,7 +616,7 @@ class TagProtocol(DatagramProtocol, Protocol):
     - Quality indicators or confidence scores
     """
 
-    def system_tags(self) -> dict[str, DataValue]:
+    def system_keys(self) -> dict[str, DataValue]:
         """
         Return metadata about the data's source/origin.
 
@@ -639,7 +639,7 @@ class DataProtocol(DatagramProtocol, Protocol):
     The actual data payload in a stream.
 
     Datas represent the core data being processed through the computational
-    graph. Unlike Tags (which are metadata), Datas contain the actual
+    graph. Unlike Keys (which are metadata), Datas contain the actual
     information that computations operate on.
 
     Datas extend DatagramProtocol with additional capabilities for:
@@ -647,8 +647,8 @@ class DataProtocol(DatagramProtocol, Protocol):
     - Content-based hashing for caching
     - Metadata inclusion for debugging
 
-    The distinction between TagProtocol and DataProtocol is crucial for understanding
-    data flow: Tags provide context, Datas provide content.
+    The distinction between KeyProtocol and DataProtocol is crucial for understanding
+    data flow: Keys provide context, Datas provide content.
     """
 
     def source_info(self) -> dict[str, str | None]:
diff --git a/src/orcapod/protocols/core_protocols/function_pod.py b/src/orcapod/protocols/core_protocols/function_pod.py
index 9c8bab91..39076421 100644
--- a/src/orcapod/protocols/core_protocols/function_pod.py
+++ b/src/orcapod/protocols/core_protocols/function_pod.py
@@ -1,6 +1,6 @@
 from typing import Any, Protocol, runtime_checkable
 
-from orcapod.protocols.core_protocols.datagrams import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols.datagrams import DataProtocol, KeyProtocol
 from orcapod.protocols.core_protocols.data_function import DataFunctionProtocol
 from orcapod.protocols.core_protocols.pod import PodProtocol
 from orcapod.protocols.hashing_protocols import PipelineElementProtocol
@@ -20,12 +20,12 @@ def data_function(self) -> DataFunctionProtocol:
         ...
 
     def process_data(
-        self, tag: TagProtocol, data: DataProtocol
-    ) -> tuple[TagProtocol, DataProtocol | None]: ...
+        self, key: KeyProtocol, data: DataProtocol
+    ) -> tuple[KeyProtocol, DataProtocol | None]: ...
 
     async def async_process_data(
-        self, tag: TagProtocol, data: DataProtocol
-    ) -> tuple[TagProtocol, DataProtocol | None]: ...
+        self, key: KeyProtocol, data: DataProtocol
+    ) -> tuple[KeyProtocol, DataProtocol | None]: ...
 
     def to_config(self) -> dict[str, Any]:
         """Serialize this function pod to a JSON-compatible config dict."""
diff --git a/src/orcapod/protocols/core_protocols/pod.py b/src/orcapod/protocols/core_protocols/pod.py
index 4a8b0a58..4bfe0b63 100644
--- a/src/orcapod/protocols/core_protocols/pod.py
+++ b/src/orcapod/protocols/core_protocols/pod.py
@@ -110,7 +110,7 @@ def output_schema(
             *streams: Input streams to analyze
 
         Returns:
-            tuple[TypeSpec, TypeSpec]: (tag_types, data_types) for output
+            tuple[TypeSpec, TypeSpec]: (key_types, data_types) for output
 
         Raises:
             ValidationError: If input types are incompatible
diff --git a/src/orcapod/protocols/core_protocols/streams.py b/src/orcapod/protocols/core_protocols/streams.py
index b5000e7e..978a4079 100644
--- a/src/orcapod/protocols/core_protocols/streams.py
+++ b/src/orcapod/protocols/core_protocols/streams.py
@@ -3,7 +3,7 @@
 from collections.abc import AsyncIterator, Collection, Iterator, Mapping, Sequence
 from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
 
-from orcapod.protocols.core_protocols.datagrams import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols.datagrams import DataProtocol, KeyProtocol
 from orcapod.protocols.core_protocols.traceable import TraceableProtocol
 from orcapod.protocols.hashing_protocols import PipelineElementProtocol
 from orcapod.types import ColumnConfig, Schema
@@ -21,7 +21,7 @@ class StreamProtocol(TraceableProtocol, PipelineElementProtocol, Protocol):
     """
     Base protocol for all streams in Orcapod.
 
-    Streams represent sequences of (TagProtocol, DataProtocol) pairs flowing through the
+    Streams represent sequences of (KeyProtocol, DataProtocol) pairs flowing through the
     computational graph. They are the fundamental data structure connecting
     kernels and carrying both data and metadata.
 
@@ -30,7 +30,7 @@ class StreamProtocol(TraceableProtocol, PipelineElementProtocol, Protocol):
     - Live: Dynamic streams that stay current with upstream dependencies
 
     All streams provide:
-    - Iteration over (tag, data) pairs
+    - Iteration over (key, data) pairs
     - Type information and schema access
     - Lineage information (source kernel and upstream streams)
     - Basic caching and freshness tracking
@@ -78,7 +78,7 @@ def keys(
         """
         Available keys/fields in the stream content.
 
-        Returns the field names present in both tags and data.
+        Returns the field names present in both keys and data.
         This provides schema information without requiring type details,
         useful for:
         - Schema inspection and exploration
@@ -86,7 +86,7 @@ def keys(
         - Field validation and mapping
 
         Returns:
-            tuple[tuple[str, ...], tuple[str, ...]]: (tag_keys, data_keys)
+            tuple[tuple[str, ...], tuple[str, ...]]: (key_keys, data_keys)
         """
         ...
 
@@ -99,38 +99,38 @@ def output_schema(
         """
         Type specifications for the stream content.
 
-        Returns the type schema for both tags and data in this stream.
+        Returns the type schema for both keys and data in this stream.
         This information is used for:
         - Type checking and validation
         - Schema inference and planning
         - Compatibility checking between kernels
 
         Returns:
-            tuple[Schema, Schema]: (tag_types, data_types)
+            tuple[Schema, Schema]: (key_types, data_types)
         """
         ...
 
-    def iter_data(self) -> Iterator[tuple[TagProtocol, DataProtocol]]:
+    def iter_data(self) -> Iterator[tuple[KeyProtocol, DataProtocol]]:
         """
-        Generates explicit iterator over (tag, data) pairs in the stream.
+        Generates explicit iterator over (key, data) pairs in the stream.
 
         Note that multiple invocation of `iter_data` may not always
         return an identical iterator.
 
         Yields:
-            tuple[TagProtocol, DataProtocol]: Sequential (tag, data) pairs
+            tuple[KeyProtocol, DataProtocol]: Sequential (key, data) pairs
         """
         ...
 
-    def async_iter_data(self) -> AsyncIterator[tuple[TagProtocol, DataProtocol]]:
+    def async_iter_data(self) -> AsyncIterator[tuple[KeyProtocol, DataProtocol]]:
         """
-        Generates asynchronous iterator over (tag, data) pairs in the stream.
+        Generates asynchronous iterator over (key, data) pairs in the stream.
 
         Note that multiple invocation of `async_iter_data` may not always
         return an identical iterator.
 
         Yields:
-            tuple[tagProtocol, DataProtcol]: Asynchrnous sequential (tag, data) pairs
+            tuple[tagProtocol, DataProtcol]: Asynchrnous sequential (key, data) pairs
 
         """
         ...
@@ -144,7 +144,7 @@ def as_table(
         """
         Convert the entire stream to a PyArrow Table.
 
-        Materializes all (tag, data) pairs into a single table for
+        Materializes all (key, data) pairs into a single table for
         analysis and processing. This operation may be expensive for
         large streams or live streams that need computation.
 
@@ -205,9 +205,9 @@ def as_pandas_df(
 
     def flow(
         self,
-    ) -> Sequence[tuple[TagProtocol, DataProtocol]]:
+    ) -> Sequence[tuple[KeyProtocol, DataProtocol]]:
         """
-        Return the entire stream as a collection of (tag, data) pairs.
+        Return the entire stream as a collection of (key, data) pairs.
 
         This method materializes the stream content into a list or similar
         collection type. It is useful for small streams or when you need
@@ -226,7 +226,7 @@ def join(
         Join this stream with another stream.
 
         Combines two streams into a single stream by merging their content.
-        The resulting stream contains all (tag, data) pairs from both
+        The resulting stream contains all (key, data) pairs from both
         streams, preserving their order.
 
         Args:
@@ -244,8 +244,8 @@ def semi_join(
         Perform a semi-join with another stream.
 
         This operation filters this stream to only include data that have
-        corresponding tags in the other stream. The resulting stream contains
-        all (tag, data) pairs from this stream that match tags in the other.
+        corresponding keys in the other stream. The resulting stream contains
+        all (key, data) pairs from this stream that match keys in the other.
 
         Args:
             other_stream: The other stream to semi-join with this one.
@@ -255,14 +255,14 @@ def semi_join(
         """
         ...
 
-    def map_tags(
+    def map_keys(
         self,
         name_map: Mapping[str, str],
         drop_unmapped: bool = True,
         label: str | None = None,
     ) -> "StreamProtocol":
         """
-        Map tag names in this stream to new names based on the provided mapping.
+        Map key names in this stream to new names based on the provided mapping.
         """
         ...
 
@@ -285,15 +285,15 @@ def polars_filter(
         **constraints: Any,
     ) -> "StreamProtocol": ...
 
-    def select_tag_columns(
+    def select_key_columns(
         self,
-        tag_columns: str | Collection[str],
+        key_columns: str | Collection[str],
         strict: bool = True,
         label: str | None = None,
     ) -> "StreamProtocol":
         """
-        Select the specified tag columns from the stream. A ValueError is raised
-        if one or more specified tag columns do not exist in the stream unless strict = False.
+        Select the specified key columns from the stream. A ValueError is raised
+        if one or more specified key columns do not exist in the stream unless strict = False.
         """
         ...
 
@@ -304,20 +304,20 @@ def select_data_columns(
         label: str | None = None,
     ) -> "StreamProtocol":
         """
-        Select the specified tag columns from the stream. A ValueError is raised
-        if one or more specified tag columns do not exist in the stream unless strict = False.
+        Select the specified key columns from the stream. A ValueError is raised
+        if one or more specified key columns do not exist in the stream unless strict = False.
         """
         ...
 
-    def drop_tag_columns(
+    def drop_key_columns(
         self,
-        tag_columns: str | Collection[str],
+        key_columns: str | Collection[str],
         strict: bool = True,
         label: str | None = None,
     ) -> "StreamProtocol":
         """
-        Drop the specified tag columns from the stream. A ValueError is raised
-        if one or more specified tag columns do not exist in the stream unless strict = False.
+        Drop the specified key columns from the stream. A ValueError is raised
+        if one or more specified key columns do not exist in the stream unless strict = False.
         """
         ...
 
@@ -343,17 +343,17 @@ def batch(
         """
         Batch the stream into groups of the specified size.
 
-        This operation groups (tag, data) pairs into batches for more
-        efficient processing. Each batch is represented as a single (tag, data)
-        pair where the tag is a list of tags and the data is a list of data.
+        This operation groups (key, data) pairs into batches for more
+        efficient processing. Each batch is represented as a single (key, data)
+        pair where the key is a list of keys and the data is a list of data.
 
         Args:
-            batch_size: Number of (tag, data) pairs per batch. If 0, all
+            batch_size: Number of (key, data) pairs per batch. If 0, all
                         pairs are included in a single batch.
             drop_partial_batch: If True, drop the last batch if it has fewer
                              than batch_size pairs.
 
         Returns:
-            Self: New stream containing batched (tag, data) pairs.
+            Self: New stream containing batched (key, data) pairs.
         """
         ...
diff --git a/src/orcapod/protocols/hashing_protocols.py b/src/orcapod/protocols/hashing_protocols.py
index 3ab2aace..3497180d 100644
--- a/src/orcapod/protocols/hashing_protocols.py
+++ b/src/orcapod/protocols/hashing_protocols.py
@@ -39,7 +39,7 @@ class PipelineElementProtocol(Protocol):
     for pipeline database path scoping: the schemas and the recursive topology
     of the upstream computation.
 
-    The base case (RootSource) returns a hash of (tag_schema, data_schema).
+    The base case (RootSource) returns a hash of (key_schema, data_schema).
     Every other element recurses through the pipeline_hash() of its upstream
     inputs, with the hash values themselves (ContentHash objects) used as
     terminal leaves so no special hasher mode is required.
@@ -53,7 +53,7 @@ def pipeline_identity_structure(self) -> Any:
         """
         Return a structure representing this element's pipeline identity.
 
-        At source nodes (base case): return (tag_schema, data_schema).
+        At source nodes (base case): return (key_schema, data_schema).
         At all other nodes: return a structure containing references to
         upstream pipeline elements and/or data functions as raw objects.
         The pipeline resolver threaded through pipeline_hash() ensures that
diff --git a/src/orcapod/protocols/node_protocols.py b/src/orcapod/protocols/node_protocols.py
index 280aae46..83e2cff3 100644
--- a/src/orcapod/protocols/node_protocols.py
+++ b/src/orcapod/protocols/node_protocols.py
@@ -21,7 +21,7 @@
     from orcapod.protocols.core_protocols import (
         DataProtocol,
         StreamProtocol,
-        TagProtocol,
+        KeyProtocol,
     )
 
 
@@ -44,11 +44,11 @@ def execute(
         self,
         *,
         observer: ExecutionObserverProtocol | None = None,
-    ) -> list[tuple[TagProtocol, DataProtocol]]: ...
+    ) -> list[tuple[KeyProtocol, DataProtocol]]: ...
 
     async def async_execute(
         self,
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         observer: ExecutionObserverProtocol | None = None,
     ) -> None: ...
@@ -79,12 +79,12 @@ def execute(
         *,
         observer: ExecutionObserverProtocol | None = None,
         error_policy: Literal["continue", "fail_fast"] = "continue",
-    ) -> list[tuple[TagProtocol, DataProtocol]]: ...
+    ) -> list[tuple[KeyProtocol, DataProtocol]]: ...
 
     async def async_execute(
         self,
-        input_channel: ReadableChannel[tuple[TagProtocol, DataProtocol]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        input_channel: ReadableChannel[tuple[KeyProtocol, DataProtocol]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         observer: ExecutionObserverProtocol | None = None,
     ) -> None: ...
@@ -105,12 +105,12 @@ def execute(
         self,
         *input_streams: StreamProtocol,
         observer: ExecutionObserverProtocol | None = None,
-    ) -> list[tuple[TagProtocol, DataProtocol]]: ...
+    ) -> list[tuple[KeyProtocol, DataProtocol]]: ...
 
     async def async_execute(
         self,
-        inputs: Sequence[ReadableChannel[tuple[TagProtocol, DataProtocol]]],
-        output: WritableChannel[tuple[TagProtocol, DataProtocol]],
+        inputs: Sequence[ReadableChannel[tuple[KeyProtocol, DataProtocol]]],
+        output: WritableChannel[tuple[KeyProtocol, DataProtocol]],
         *,
         observer: ExecutionObserverProtocol | None = None,
     ) -> None: ...
diff --git a/src/orcapod/protocols/observability_protocols.py b/src/orcapod/protocols/observability_protocols.py
index e285e73d..80d6560c 100644
--- a/src/orcapod/protocols/observability_protocols.py
+++ b/src/orcapod/protocols/observability_protocols.py
@@ -15,7 +15,7 @@
 
 from typing import Any, Protocol, runtime_checkable
 
-from orcapod.protocols.core_protocols import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols import DataProtocol, KeyProtocol
 from orcapod.types import SchemaLike
 
 
@@ -23,7 +23,7 @@
 class DataExecutionLoggerProtocol(Protocol):
     """Receives captured execution output and persists it.
 
-    A logger is *bound* to a specific data execution context (node, tag,
+    A logger is *bound* to a specific data execution context (node, key,
     data) when created by the Observer.  It knows the destination (e.g. a
     Delta Lake table) but does not know how the logs were collected — that is
     the executor's responsibility.
@@ -116,14 +116,14 @@ def on_node_start(
         self,
         node_label: str,
         node_hash: str,
-        tag_schema: SchemaLike | None = None,
+        key_schema: SchemaLike | None = None,
     ) -> None:
         """Called before a node begins processing its data.
 
         Args:
             node_label: Human-readable label of the node.
             node_hash: Content hash of the node.
-            tag_schema: The tag schema (including system tags) for this
+            key_schema: The key schema (including system keys) for this
                 node's input stream.
         """
         ...
@@ -144,7 +144,7 @@ def on_node_end(
     def on_data_start(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> None:
         """Called before a data is processed by a function node."""
@@ -153,7 +153,7 @@ def on_data_start(
     def on_data_end(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         input_data: DataProtocol,
         output_data: DataProtocol | None,
         cached: bool,
@@ -169,7 +169,7 @@ def on_data_end(
     def on_data_crash(
         self,
         node_label: str,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
         error: Exception,
     ) -> None:
@@ -183,7 +183,7 @@ def on_data_crash(
 
     def create_data_logger(
         self,
-        tag: TagProtocol,
+        key: KeyProtocol,
         data: DataProtocol,
     ) -> DataExecutionLoggerProtocol:
         """Create a context-bound logger for a single data execution.
@@ -193,7 +193,7 @@ def create_data_logger(
         without the executor needing to know anything about the pipeline.
 
         Args:
-            tag: The tag for the data being processed.
+            key: The key for the data being processed.
             data: The input data being processed.
         """
         ...
diff --git a/src/orcapod/protocols/pipeline_protocols.py b/src/orcapod/protocols/pipeline_protocols.py
index f21e970c..067a7020 100644
--- a/src/orcapod/protocols/pipeline_protocols.py
+++ b/src/orcapod/protocols/pipeline_protocols.py
@@ -21,7 +21,7 @@ def get_all_records(
         self, include_system_columns: bool = False
     ) -> "pa.Table | None":
         """
-        Retrieve all tag and data processed by this PodProtocol.
+        Retrieve all key and data processed by this PodProtocol.
 
         This method returns a table containing all data processed by the PodProtocol,
         including metadata and system columns if requested. It is useful for:
@@ -52,7 +52,7 @@ def flush(self):
 
     def add_pipeline_record(
         self,
-        tag: cp.TagProtocol,
+        key: cp.KeyProtocol,
         input_data: cp.DataProtocol,
         data_record_id: str,
         retrieved: bool | None = None,
diff --git a/src/orcapod/semantic_types/type_inference.py b/src/orcapod/semantic_types/type_inference.py
index 5ddc58aa..e1a87b0e 100644
--- a/src/orcapod/semantic_types/type_inference.py
+++ b/src/orcapod/semantic_types/type_inference.py
@@ -266,7 +266,7 @@ def test_schema_inference():
             "name": "Alice",
             "scores": [85, 92, 78],
             "coordinates": (10.5, 20.3),
-            "tags": {"python", "data"},
+            "keys": {"python", "data"},
             "metadata": {"created": "2023-01-01", "version": 1},
             "optional_field": "present",
         },
@@ -275,7 +275,7 @@ def test_schema_inference():
             "name": "Bob",
             "scores": [88, 91],
             "coordinates": (15.2, 25.7),
-            "tags": {"java", "backend"},
+            "keys": {"java", "backend"},
             "metadata": {"created": "2023-01-02", "version": 2},
             "optional_field": None,
         },
@@ -284,7 +284,7 @@ def test_schema_inference():
             "name": "Charlie",
             "scores": [95, 87, 89, 92],
             "coordinates": (5.1, 30.9),
-            "tags": {"javascript", "frontend"},
+            "keys": {"javascript", "frontend"},
             "metadata": {"created": "2023-01-03", "version": 1},
             "mixed_field": 42,
         },
@@ -293,7 +293,7 @@ def test_schema_inference():
             "name": "Diana",
             "scores": [],
             "coordinates": (0.0, 0.0),
-            "tags": set(),
+            "keys": set(),
             "metadata": {},
             "mixed_field": "text",
         },
diff --git a/src/orcapod/system_constants.py b/src/orcapod/system_constants.py
index 13639ca9..101532e5 100644
--- a/src/orcapod/system_constants.py
+++ b/src/orcapod/system_constants.py
@@ -9,9 +9,9 @@
 INPUT_DATA_HASH_COL = "input_data_hash"
 DATA_RECORD_ID = "data_id"
 NODE_CONTENT_HASH_COL = "node_content_hash"
-SYSTEM_TAG_PREFIX_NAME = "tag"
-SYSTEM_TAG_SOURCE_ID_FIELD = "source_id"
-SYSTEM_TAG_RECORD_ID_FIELD = "record_id"
+SYSTEM_KEY_PREFIX_NAME = "key"
+SYSTEM_KEY_SOURCE_ID_FIELD = "source_id"
+SYSTEM_KEY_RECORD_ID_FIELD = "record_id"
 POD_VERSION = "pod_version"
 EXECUTION_ENGINE = "execution_engine"
 POD_TIMESTAMP = "pod_ts"
@@ -73,16 +73,16 @@ def NODE_CONTENT_HASH_COL(self) -> str:
         return f"{self._global_prefix}{DATAGRAM_PREFIX}{NODE_CONTENT_HASH_COL}"
 
     @property
-    def SYSTEM_TAG_PREFIX(self) -> str:
-        return f"{self._global_prefix}{DATAGRAM_PREFIX}{SYSTEM_TAG_PREFIX_NAME}_"
+    def SYSTEM_KEY_PREFIX(self) -> str:
+        return f"{self._global_prefix}{DATAGRAM_PREFIX}{SYSTEM_KEY_PREFIX_NAME}_"
 
     @property
-    def SYSTEM_TAG_SOURCE_ID_PREFIX(self) -> str:
-        return f"{self.SYSTEM_TAG_PREFIX}{SYSTEM_TAG_SOURCE_ID_FIELD}"
+    def SYSTEM_KEY_SOURCE_ID_PREFIX(self) -> str:
+        return f"{self.SYSTEM_KEY_PREFIX}{SYSTEM_KEY_SOURCE_ID_FIELD}"
 
     @property
-    def SYSTEM_TAG_RECORD_ID_PREFIX(self) -> str:
-        return f"{self.SYSTEM_TAG_PREFIX}{SYSTEM_TAG_RECORD_ID_FIELD}"
+    def SYSTEM_KEY_RECORD_ID_PREFIX(self) -> str:
+        return f"{self.SYSTEM_KEY_PREFIX}{SYSTEM_KEY_RECORD_ID_FIELD}"
 
     @property
     def POD_VERSION(self) -> str:
diff --git a/src/orcapod/types.py b/src/orcapod/types.py
index 6d001b44..1cdc5dc8 100644
--- a/src/orcapod/types.py
+++ b/src/orcapod/types.py
@@ -3,7 +3,7 @@
 Defines the fundamental data types, type aliases, and data structures used
 throughout the OrcaPod framework, including:
 
-    - Type aliases for data values, schemas, paths, and tags.
+    - Type aliases for data values, schemas, paths, and keys.
     - ``Schema`` -- an immutable, hashable mapping of field names to Python types.
     - ``ContentHash`` -- a content-addressable hash pairing a method name with
       a raw digest, with convenience conversions to hex, int, UUID, and base64.
@@ -42,9 +42,9 @@
 ``os.PathLike``)."""
 
 # TODO: accomodate other common data types such as datetime
-TagValue: TypeAlias = int | str | None | Collection["TagValue"]
-"""A tag metadata value: an int, string, ``None``, or an arbitrarily nested
-collection thereof. Tags are used to label and organise data and
+KeyValue: TypeAlias = int | str | None | Collection["KeyValue"]
+"""A key metadata value: an int, string, ``None``, or an arbitrarily nested
+collection thereof. Keys are used to label and organise data and
 datagrams."""
 
 PathSet: TypeAlias = PathLike | Collection[PathLike | None]
@@ -358,7 +358,7 @@ class CacheMode(Enum):
 @dataclass(frozen=True, slots=True)
 class ColumnConfig:
     """
-    Configuration for column inclusion in DatagramProtocol/DataProtocol/TagProtocol operations.
+    Configuration for column inclusion in DatagramProtocol/DataProtocol/KeyProtocol operations.
 
     Controls which column types to include when converting to tables, dicts,
     or querying keys/types.
@@ -371,7 +371,7 @@ class ColumnConfig:
                 (prefix '__' is added automatically if not present)
         context: Include context column
         source: Include source info columns (DataProtocol only, ignored for others)
-        system_tags: Include system tag columns (TagProtocol only, ignored for others)
+        system_keys: Include system key columns (KeyProtocol only, ignored for others)
         all_info: Include all available columns (overrides other settings)
 
     Examples:
@@ -394,9 +394,9 @@ class ColumnConfig:
     meta: bool | Collection[str] = False
     context: bool = False
     source: bool = False  # Only relevant for DataProtocol
-    system_tags: bool = False  # Only relevant for TagProtocol
+    system_keys: bool = False  # Only relevant for KeyProtocol
     content_hash: bool | str = False  # Only relevant for DataProtocol
-    sort_by_tags: bool = False  # Only relevant for TagProtocol
+    sort_by_keys: bool = False  # Only relevant for KeyProtocol
     all_info: bool = False
 
     @classmethod
@@ -406,9 +406,9 @@ def all(cls) -> Self:
             meta=True,
             context=True,
             source=True,
-            system_tags=True,
+            system_keys=True,
             content_hash=True,
-            sort_by_tags=True,
+            sort_by_keys=True,
             all_info=True,
         )
 
diff --git a/src/orcapod/utils/arrow_utils.py b/src/orcapod/utils/arrow_utils.py
index 13ba410c..ddf9f831 100644
--- a/src/orcapod/utils/arrow_utils.py
+++ b/src/orcapod/utils/arrow_utils.py
@@ -932,18 +932,18 @@ def get_system_columns(table: "pa.Table") -> "pa.Table":
         [
             col
             for col in table.column_names
-            if col.startswith(constants.SYSTEM_TAG_PREFIX)
+            if col.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
     )
 
 
-def add_system_tag_columns(
+def add_system_key_columns(
     table: "pa.Table",
     schema_hash: str,
     source_ids: str | Collection[str],
     record_ids: Collection[str],
 ) -> "pa.Table":
-    """Add paired source_id and record_id system tag columns to an Arrow table."""
+    """Add paired source_id and record_id system key columns to an Arrow table."""
     if not table.column_names:
         raise ValueError("Table is empty")
 
@@ -961,13 +961,13 @@ def add_system_tag_columns(
     if len(record_ids) != table.num_rows:
         raise ValueError("Length of record_ids must match number of rows in the table.")
 
-    source_id_col_name = f"{constants.SYSTEM_TAG_SOURCE_ID_PREFIX}{constants.BLOCK_SEPARATOR}{schema_hash}"
-    record_id_col_name = f"{constants.SYSTEM_TAG_RECORD_ID_PREFIX}{constants.BLOCK_SEPARATOR}{schema_hash}"
+    source_id_col_name = f"{constants.SYSTEM_KEY_SOURCE_ID_PREFIX}{constants.BLOCK_SEPARATOR}{schema_hash}"
+    record_id_col_name = f"{constants.SYSTEM_KEY_RECORD_ID_PREFIX}{constants.BLOCK_SEPARATOR}{schema_hash}"
 
     source_id_array = pa.array(source_ids, type=pa.large_string())
     record_id_array = pa.array(record_ids, type=pa.large_string())
 
-    # System tag columns are always computed, never null — declare nullable=False
+    # System key columns are always computed, never null — declare nullable=False
     # explicitly so the schema intent is not lost in Polars round-trips.
     table = table.append_column(
         pa.field(source_id_col_name, pa.large_string(), nullable=False), source_id_array
@@ -978,30 +978,30 @@ def add_system_tag_columns(
     return table
 
 
-def append_to_system_tags(table: "pa.Table", value: str) -> "pa.Table":
-    """Append a value to the system tags column in an Arrow table."""
+def append_to_system_keys(table: "pa.Table", value: str) -> "pa.Table":
+    """Append a value to the system keys column in an Arrow table."""
     if not table.column_names:
         raise ValueError("Table is empty")
 
     column_name_map = {
         c: f"{c}{constants.BLOCK_SEPARATOR}{value}"
-        if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        if c.startswith(constants.SYSTEM_KEY_PREFIX)
         else c
         for c in table.column_names
     }
     return table.rename_columns(column_name_map)
 
 
-def _parse_system_tag_column(
+def _parse_system_key_column(
     col_name: str,
 ) -> tuple[str, str, str] | None:
-    """Parse a system tag column name into (field_type, provenance_path, position).
+    """Parse a system key column name into (field_type, provenance_path, position).
 
     For example:
-        _tag_source_id::abc123::def456:0
+        _key_source_id::abc123::def456:0
         → field_type="source_id", provenance_path="abc123::def456", position="0"
 
-        _tag_record_id::abc123::def456:0
+        _key_record_id::abc123::def456:0
         → field_type="record_id", provenance_path="abc123::def456", position="0"
 
     Returns None if the column doesn't end with a :position suffix.
@@ -1012,7 +1012,7 @@ def _parse_system_tag_column(
         return None
 
     # Determine field type by checking known prefixes
-    prefix = constants.SYSTEM_TAG_PREFIX
+    prefix = constants.SYSTEM_KEY_PREFIX
     if not base.startswith(prefix):
         return None
 
@@ -1029,34 +1029,34 @@ def _parse_system_tag_column(
     return field_type, provenance_path, position
 
 
-def sort_system_tag_values(table: "pa.Table") -> "pa.Table":
-    """Sort paired system tag values for columns that share the same provenance path.
+def sort_system_key_values(table: "pa.Table") -> "pa.Table":
+    """Sort paired system key values for columns that share the same provenance path.
 
-    System tag columns come in (source_id, record_id) pairs. Columns that differ
+    System key columns come in (source_id, record_id) pairs. Columns that differ
     only by their canonical position (the final :N) represent streams with the same
     pipeline_hash that were joined. For commutativity, paired (source_id, record_id)
     tuples must be sorted together per row so that the result is independent of
     input order.
 
     Algorithm:
-    1. Parse each system tag column into (field_type, provenance_path, position)
+    1. Parse each system key column into (field_type, provenance_path, position)
     2. Group by provenance_path — source_id and record_id at the same path+position
        are paired
     3. For each group with >1 position, sort per-row by (source_id, record_id) tuples
     4. Assign sorted values back to both columns at each position
     """
-    sys_tag_cols = [
-        c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+    sys_key_cols = [
+        c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
     ]
 
-    if not sys_tag_cols:
+    if not sys_key_cols:
         return table
 
-    # Parse all system tag columns and group by provenance_path
+    # Parse all system key columns and group by provenance_path
     # groups[provenance_path][position] = {field_type: col_name}
     groups: dict[str, dict[str, dict[str, str]]] = {}
-    for col in sys_tag_cols:
-        parsed = _parse_system_tag_column(col)
+    for col in sys_key_cols:
+        parsed = _parse_system_key_column(col)
         if parsed is None:
             continue
         field_type, provenance_path, position = parsed
@@ -1064,11 +1064,11 @@ def sort_system_tag_values(table: "pa.Table") -> "pa.Table":
             col
         )
 
-    source_id_field = constants.SYSTEM_TAG_SOURCE_ID_PREFIX[
-        len(constants.SYSTEM_TAG_PREFIX) :
+    source_id_field = constants.SYSTEM_KEY_SOURCE_ID_PREFIX[
+        len(constants.SYSTEM_KEY_PREFIX) :
     ]
-    record_id_field = constants.SYSTEM_TAG_RECORD_ID_PREFIX[
-        len(constants.SYSTEM_TAG_PREFIX) :
+    record_id_field = constants.SYSTEM_KEY_RECORD_ID_PREFIX[
+        len(constants.SYSTEM_KEY_PREFIX) :
     ]
 
     # For each provenance_path group with >1 position, sort paired tuples per row
diff --git a/src/orcapod/utils/polars_data_utils.py b/src/orcapod/utils/polars_data_utils.py
index a6ca778c..95dda2fe 100644
--- a/src/orcapod/utils/polars_data_utils.py
+++ b/src/orcapod/utils/polars_data_utils.py
@@ -50,34 +50,34 @@ def get_system_columns(
     )
 
 
-def add_system_tag_column(
+def add_system_key_column(
     df: "pl.DataFrame",
-    system_tag_column_name: str,
-    system_tag_values: str | Collection[str],
+    system_key_column_name: str,
+    system_key_values: str | Collection[str],
 ) -> "pl.DataFrame":
-    """Add a system tags column to a Polars DataFrame."""
+    """Add a system keys column to a Polars DataFrame."""
     if df.is_empty():
         raise ValueError("DataFrame is empty")
-    if isinstance(system_tag_values, str):
-        system_tag_values = [system_tag_values] * df.height
+    if isinstance(system_key_values, str):
+        system_key_values = [system_key_values] * df.height
     else:
-        system_tag_values = list(system_tag_values)
-        if len(system_tag_values) != df.height:
+        system_key_values = list(system_key_values)
+        if len(system_key_values) != df.height:
             raise ValueError(
-                "Length of system_tag_values must match number of rows in the DataFrame."
+                "Length of system_key_values must match number of rows in the DataFrame."
             )
-    if not system_tag_column_name.startswith(constants.SYSTEM_TAG_PREFIX):
-        system_tag_column_name = (
-            f"{constants.SYSTEM_TAG_PREFIX}{system_tag_column_name}"
+    if not system_key_column_name.startswith(constants.SYSTEM_KEY_PREFIX):
+        system_key_column_name = (
+            f"{constants.SYSTEM_KEY_PREFIX}{system_key_column_name}"
         )
-    tags_column = pl.Series(
-        system_tag_column_name, system_tag_values, dtype=pl.String()
+    keys_column = pl.Series(
+        system_key_column_name, system_key_values, dtype=pl.String()
     )
-    return df.with_columns(tags_column)
+    return df.with_columns(keys_column)
 
 
-def append_to_system_tags(df: "pl.DataFrame", value: str) -> "pl.DataFrame":
-    """Append a value to the system tags column in an Arrow table."""
+def append_to_system_keys(df: "pl.DataFrame", value: str) -> "pl.DataFrame":
+    """Append a value to the system keys column in an Arrow table."""
     if df.is_empty():
         raise ValueError("Table is empty")
 
@@ -85,7 +85,7 @@ def append_to_system_tags(df: "pl.DataFrame", value: str) -> "pl.DataFrame":
     column_name_map = {
         c: f"{c}{constants.BLOCK_SEPARATOR}{value}"
         for c in df.columns
-        if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        if c.startswith(constants.SYSTEM_KEY_PREFIX)
     }
     return df.rename(column_name_map)
 
diff --git a/superpowers/plans/2026-05-12-tag-to-key-rename.md b/superpowers/plans/2026-05-12-tag-to-key-rename.md
new file mode 100644
index 00000000..d0d8191c
--- /dev/null
+++ b/superpowers/plans/2026-05-12-tag-to-key-rename.md
@@ -0,0 +1,639 @@
+# Rename `tag` → `key` Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use sensei:subagent-driven-development (recommended) or sensei:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Rename every `tag`/`Tag`/`TAG` identifier to `key`/`Key`/`KEY` across orcapod-python and land it as one atomic commit.
+
+**Architecture:** A Python script applies an ordered substitution table to all text files in the repo — using plain `str.replace` for compound identifiers and regex word-boundary replacement for bare `tag`/`tags` (required because `_staggered_join` contains "tag" as a substring and must not be touched). Two files are renamed via `git mv`. The existing test suite verifies correctness. One atomic commit captures all changes.
+
+**Tech Stack:** Python 3.x (rename script), Git (file renames + commit), pytest via `uv run`
+
+**Spec:** `superpowers/specs/2026-05-12-tag-to-key-rename-design.md`
+
+---
+
+### Task 1: Verify the feature branch
+
+**Files:** (git operation only)
+
+- [ ] **Step 1: Confirm you are on the correct branch**
+
+```bash
+git branch --show-current
+```
+
+Expected:
+```
+eywalker/eng-455-rename-tags-keys-across-orcapod-python-hard-break
+```
+
+If you are not on this branch:
+```bash
+git checkout eywalker/eng-455-rename-tags-keys-across-orcapod-python-hard-break
+```
+
+---
+
+### Task 2: Create `scripts/rename_tag_to_key.py`
+
+**Files:**
+- Create: `scripts/rename_tag_to_key.py`
+
+This script rewrites file **contents** only. File renames are done separately via `git mv`
+in Task 4. The script must be deleted before committing (Task 8).
+
+**IMPORTANT — why regex is required:** The method `_staggered_join` in
+`src/orcapod/core/operators/join.py` contains the substring "tag" inside "stagger". A bare
+`str.replace("tag", "key")` would corrupt it to `_skeygered_join`. The script therefore uses:
+- Plain `str.replace` for all **compound** identifiers (these are specific enough to be safe)
+- `re.sub(r'\b<word>\b', ...)` for bare `tag`/`tags` only (word boundaries prevent the corruption)
+
+- [ ] **Step 1: Create `scripts/` directory and write the script**
+
+```bash
+mkdir -p scripts
+```
+
+Create `scripts/rename_tag_to_key.py` with this exact content:
+
+```python
+#!/usr/bin/env python3
+"""One-shot rename script: tag → key across orcapod-python.
+
+Run from the repo root:
+    uv run python scripts/rename_tag_to_key.py
+
+After running, use git mv to rename files (Task 4 of the implementation plan).
+Delete this script before committing.
+
+DESIGN NOTE
+-----------
+Plain str.replace is used for all compound identifiers (safe because they are
+specific enough that no unintended substrings match).
+
+Bare "tag"/"tags" use regex word-boundary (\b) replacement.  This is required
+because `_staggered_join` contains "tag" as a substring — a plain replace would
+silently corrupt it to `_skeygered_join`.
+"""
+
+import re
+from pathlib import Path
+
+# ── Phase 1: Specific compound identifier substitutions ────────────────────
+# Plain str.replace — no regex. Order is significant: longer/more-specific
+# patterns must precede shorter ones that are substrings of them.
+SPECIFIC_SUBSTITUTIONS: list[tuple[str, str]] = [
+    # Column prefix string literals — most specific first
+    ("_tag_source_id", "_key_source_id"),
+    ("_tag_record_id", "_key_record_id"),
+    ("_tag::", "_key::"),
+    ("_tag_", "_key_"),
+
+    # SCREAMING_SNAKE_CASE constants — most specific first
+    ("SYSTEM_TAG_SOURCE_ID_PREFIX", "SYSTEM_KEY_SOURCE_ID_PREFIX"),
+    ("SYSTEM_TAG_RECORD_ID_PREFIX", "SYSTEM_KEY_RECORD_ID_PREFIX"),
+    ("SYSTEM_TAG_SOURCE_ID_FIELD", "SYSTEM_KEY_SOURCE_ID_FIELD"),
+    ("SYSTEM_TAG_RECORD_ID_FIELD", "SYSTEM_KEY_RECORD_ID_FIELD"),
+    ("SYSTEM_TAG_PREFIX_NAME", "SYSTEM_KEY_PREFIX_NAME"),
+    ("SYSTEM_TAG_PREFIX", "SYSTEM_KEY_PREFIX"),
+
+    # PascalCase class / protocol names — most specific first
+    ("TagProtocol", "KeyProtocol"),
+    ("DuplicateTagError", "DuplicateKeyError"),
+    ("SelectTagColumns", "SelectKeyColumns"),
+    ("DropTagColumns", "DropKeyColumns"),
+    ("MapTags", "MapKeys"),
+    ("TagValue", "KeyValue"),
+    ("Tag", "Key"),  # catch-all PascalCase — safe, no English PascalCase word has "Tag" unintentionally
+
+    # Named snake_case methods / private functions — most specific first
+    ("_ensure_system_tags_table", "_ensure_system_keys_table"),
+    ("_system_tags_python_schema", "_system_keys_python_schema"),
+    ("_system_tags_table", "_system_keys_table"),
+    ("_system_tags", "_system_keys"),
+    ("_predict_system_tag_schema", "_predict_system_key_schema"),
+    ("_compute_system_tag_suffixes", "_compute_system_key_suffixes"),
+    ("_sort_merged_system_tags", "_sort_merged_system_keys"),
+    ("_rename_sys_tags", "_rename_sys_keys"),
+    ("_parse_system_tag_column", "_parse_system_key_column"),
+    ("add_system_tag_columns", "add_system_key_columns"),
+    ("add_system_tag_column", "add_system_key_column"),
+    ("append_to_system_tags", "append_to_system_keys"),
+    ("sort_system_tag_values", "sort_system_key_values"),
+    ("select_tag_columns", "select_key_columns"),
+    ("drop_tag_columns", "drop_key_columns"),
+    ("include_system_tags", "include_system_keys"),
+    ("system_tags", "system_keys"),
+    ("sort_by_tags", "sort_by_keys"),
+    ("map_tags", "map_keys"),
+
+    # Common compound variable / parameter patterns — most specific first
+    # (each entry also catches variants with a prefix, e.g. "all_tag_schema"
+    #  contains "tag_schema" and is handled automatically)
+    ("tag_schema", "key_schema"),
+    ("tag_columns", "key_columns"),
+    ("tag_data", "key_data"),    # file / module name references
+    ("test_tag", "test_key"),    # test file name references
+
+    # Catch-all snake_case patterns
+    # Safety proof: "stagger"/"_staggered" do NOT contain "tags_", "tag_", or
+    # "_tag" as substrings (verified: after "tag" in "stagger" comes "g", not
+    # "_" or "s_"; and "_staggered" starts with "_s", not "_t").
+    ("tags_", "keys_"),   # e.g. tags_to_drop → keys_to_drop
+    ("tag_", "key_"),     # e.g. tag_cols → key_cols, tag_tables → key_tables
+    ("_tag", "_key"),     # e.g. sys_tags → sys_keys, left_tag → left_key
+]
+
+# ── Phase 2: Word-boundary regex for bare tag / tags ───────────────────────
+# Handles standalone `tag`/`tags` identifiers and natural-language uses in
+# docstrings, comments, and prose.  \b ensures "stagger" is never touched.
+# Order: most-specific case variant first.
+WORD_BOUNDARY_SUBSTITUTIONS: list[tuple[str, str]] = [
+    ("TAGS", "KEYS"),
+    ("TAG", "KEY"),
+    ("Tags", "Keys"),
+    ("Tag", "Key"),
+    ("tags", "keys"),
+    ("tag", "key"),
+]
+
+# File extensions to process
+EXTENSIONS = frozenset({
+    ".py", ".md", ".rst", ".ipynb", ".toml",
+    ".yaml", ".yml", ".ini", ".cfg", ".txt",
+})
+
+# Known extensionless files to also process (relative to repo root)
+EXTRA_FILES = [
+    ".zed/rules",
+]
+
+# Directories whose entire subtree is skipped
+SKIP_DIRS = frozenset({
+    ".git", ".venv", "venv", "__pycache__", ".mypy_cache",
+    "node_modules", ".tox", "dist", "build",
+    "superpowers",  # design docs — preserve old names as historical record
+})
+
+
+def apply_substitutions(content: str) -> str:
+    # Phase 1: specific compound identifiers (plain replace)
+    for old, new in SPECIFIC_SUBSTITUTIONS:
+        content = content.replace(old, new)
+    # Phase 2: bare word-boundary replacements for residual tag/tags
+    for old, new in WORD_BOUNDARY_SUBSTITUTIONS:
+        content = re.sub(r"\b" + re.escape(old) + r"\b", new, content)
+    return content
+
+
+def process_file(path: Path) -> bool:
+    """Apply substitutions to one file. Returns True if the file changed."""
+    try:
+        original = path.read_text(encoding="utf-8")
+    except (UnicodeDecodeError, PermissionError) as exc:
+        print(f"  SKIP {path}: {exc}")
+        return False
+    updated = apply_substitutions(original)
+    if updated != original:
+        path.write_text(updated, encoding="utf-8")
+        return True
+    return False
+
+
+def main() -> None:
+    repo_root = Path(__file__).resolve().parent.parent
+    changed: list[Path] = []
+
+    # Walk all files with matching extensions
+    for path in sorted(repo_root.rglob("*")):
+        if any(part in SKIP_DIRS for part in path.parts):
+            continue
+        if path.resolve() == Path(__file__).resolve():
+            continue
+        if path.is_file() and path.suffix in EXTENSIONS:
+            if process_file(path):
+                changed.append(path.relative_to(repo_root))
+
+    # Process known extensionless files
+    for rel in EXTRA_FILES:
+        path = repo_root / rel
+        if path.exists() and path.is_file():
+            if process_file(path):
+                changed.append(path.relative_to(repo_root))
+
+    print(f"\nModified {len(changed)} file(s):")
+    for p in sorted(changed):
+        print(f"  {p}")
+
+
+if __name__ == "__main__":
+    main()
+```
+
+---
+
+### Task 3: Run the rename script (content substitution)
+
+**Files:** ~50+ files modified in-place by the script
+
+- [ ] **Step 1: Run the script from the repo root**
+
+```bash
+uv run python scripts/rename_tag_to_key.py
+```
+
+Expected output (abbreviated — exact count will vary):
+```
+Modified N file(s):
+  .zed/rules
+  CLAUDE.md
+  DESIGN_ISSUES.md
+  README.md
+  orcapod-design.md
+  src/orcapod/core/datagrams/tag_data.py
+  src/orcapod/core/operators/column_selection.py
+  src/orcapod/core/operators/join.py
+  src/orcapod/core/operators/mappers.py
+  src/orcapod/core/operators/merge_join.py
+  src/orcapod/core/streams/arrow_table_stream.py
+  src/orcapod/core/streams/base.py
+  src/orcapod/errors.py
+  src/orcapod/protocols/core_protocols/datagrams.py
+  src/orcapod/system_constants.py
+  src/orcapod/types.py
+  src/orcapod/utils/arrow_data_utils.py
+  src/orcapod/utils/arrow_utils.py
+  src/orcapod/utils/polars_data_utils.py
+  ...
+  test-objective/unit/test_tag.py
+  tests/...
+```
+
+Note: file *names* still show the old names at this stage — that is correct.
+File renames happen via `git mv` in Task 4.
+
+- [ ] **Step 2: Verify `_staggered_join` was NOT corrupted**
+
+```bash
+grep -n "_staggered" src/orcapod/core/operators/join.py
+```
+
+Expected: lines containing `_staggered_join` (NOT `_skeygered_join`). If you see
+`_skeygered_join`, the word-boundary guard failed — stop and diagnose before continuing.
+
+- [ ] **Step 3: Spot-check critical renamed identifiers**
+
+```bash
+grep -n "KeyProtocol\|DuplicateKeyError\|SelectKeyColumns\|MapKeys\|system_keys\|SYSTEM_KEY_PREFIX" \
+    src/orcapod/protocols/core_protocols/datagrams.py \
+    src/orcapod/errors.py \
+    src/orcapod/core/operators/column_selection.py \
+    src/orcapod/system_constants.py | head -30
+```
+
+Expected: lines containing the NEW names.
+
+- [ ] **Step 4: Verify `system_constants.py` string value was updated**
+
+```bash
+grep -n "SYSTEM_KEY_PREFIX_NAME\|PREFIX_NAME" src/orcapod/system_constants.py
+```
+
+Expected output contains:
+```
+SYSTEM_KEY_PREFIX_NAME = "key"
+```
+
+(Both the identifier AND the string value `"tag"` → `"key"` must be updated.)
+
+- [ ] **Step 5: Confirm no bare `tag`/`Tag` remain in Python source**
+
+```bash
+grep -rn --include="*.py" "\btag\b\|\bTag\b\|\bTAG\b" src/ tests/ test-objective/ | \
+    grep -v "_staggered" | head -30
+```
+
+Expected: zero matches (or only false-positives that you can review and confirm are intentional).
+If genuine misses appear, add them to `SPECIFIC_SUBSTITUTIONS` and re-run the script.
+
+---
+
+### Task 4: Rename the two files with `tag` in their path
+
+**Files:** (git mv operations)
+
+- [ ] **Step 1: List all git-tracked files with `tag` in their path**
+
+```bash
+git ls-files | grep -i tag | grep -v superpowers
+```
+
+Expected output:
+```
+src/orcapod/core/datagrams/tag_data.py
+test-objective/unit/test_tag.py
+```
+
+If additional files appear, rename them with the same `tag` → `key` pattern before proceeding.
+
+- [ ] **Step 2: Rename the source file**
+
+```bash
+git mv src/orcapod/core/datagrams/tag_data.py \
+       src/orcapod/core/datagrams/key_data.py
+```
+
+- [ ] **Step 3: Rename the test-objective file**
+
+```bash
+git mv test-objective/unit/test_tag.py \
+       test-objective/unit/test_key.py
+```
+
+- [ ] **Step 4: Verify no `tag` remains in any tracked path name (excluding superpowers)**
+
+```bash
+git ls-files | grep -i tag | grep -v superpowers
+```
+
+Expected: no output (zero matches).
+
+- [ ] **Step 5: Verify the renamed files exist and are staged**
+
+```bash
+git status --short | grep -E "^R" | grep -E "key_data|test_key"
+```
+
+Expected: each renamed file appears as `R  old/path -> new/path`.
+
+- [ ] **Step 6: Quick import check**
+
+```bash
+uv run python -c "import orcapod; print('import OK')"
+```
+
+Expected:
+```
+import OK
+```
+
+If this raises `ImportError` or `ModuleNotFoundError`, a `__init__.py` is still importing
+from the old module path. Find it:
+
+```bash
+grep -rn "tag_data\|test_tag" src/orcapod/
+```
+
+Update any matches manually (e.g. change `from .tag_data import ...` to
+`from .key_data import ...`).
+
+---
+
+### Task 5: Update `CHANGELOG.md`
+
+**Files:**
+- Modify: `CHANGELOG.md` (repo root)
+
+- [ ] **Step 1: Add the `tag → key` rename section**
+
+Open `CHANGELOG.md` and add the following block immediately after the
+`## [Unreleased]` heading (before the existing `packets → data` entry):
+
+```markdown
+#### `tag` → `key` rename (hard break)
+
+All identifiers containing `tag`/`tags`/`Tag` have been renamed to
+`key`/`keys`/`Key`. No deprecation aliases. Pre-v0.1 artifacts will not load.
+
+| Old name | New name |
+|---|---|
+| `Tag` | `Key` |
+| `TagProtocol` | `KeyProtocol` |
+| `TagValue` | `KeyValue` |
+| `DuplicateTagError` | `DuplicateKeyError` |
+| `SelectTagColumns` | `SelectKeyColumns` |
+| `DropTagColumns` | `DropKeyColumns` |
+| `MapTags` | `MapKeys` |
+| `system_tags()` | `system_keys()` |
+| `map_tags()` | `map_keys()` |
+| `select_tag_columns()` | `select_key_columns()` |
+| `drop_tag_columns()` | `drop_key_columns()` |
+| `sort_by_tags` | `sort_by_keys` |
+| `SYSTEM_TAG_PREFIX` | `SYSTEM_KEY_PREFIX` |
+| `SYSTEM_TAG_PREFIX_NAME` (`"tag"`) | `SYSTEM_KEY_PREFIX_NAME` (`"key"`) |
+| `SYSTEM_TAG_SOURCE_ID_PREFIX` | `SYSTEM_KEY_SOURCE_ID_PREFIX` |
+| `SYSTEM_TAG_RECORD_ID_PREFIX` | `SYSTEM_KEY_RECORD_ID_PREFIX` |
+| `SYSTEM_TAG_SOURCE_ID_FIELD` | `SYSTEM_KEY_SOURCE_ID_FIELD` |
+| `SYSTEM_TAG_RECORD_ID_FIELD` | `SYSTEM_KEY_RECORD_ID_FIELD` |
+| `ColumnConfig(system_tags=...)` | `ColumnConfig(system_keys=...)` |
+| Column prefix `_tag_` | `_key_` (e.g. `_tag_source_id` → `_key_source_id`) |
+| Column prefix `_tag::` | `_key::` (e.g. `_tag::source:abc` → `_key::source:abc`) |
+| `src/orcapod/core/datagrams/tag_data.py` | `key_data.py` |
+| `test-objective/unit/test_tag.py` | `test_key.py` |
+```
+
+---
+
+### Task 6: Run the full test suite
+
+**Files:** (read-only — verify only)
+
+- [ ] **Step 1: Run all tests with fail-fast**
+
+```bash
+uv run pytest tests/ -x -q 2>&1 | tail -20
+```
+
+Expected:
+```
+... passed, ... warnings in ...s
+```
+
+- [ ] **Step 2: If any tests fail, diagnose and fix**
+
+The most common failure modes after this rename:
+
+**a) ImportError — a file still imports from the old name**
+```bash
+grep -rn "from.*tag_data\|import.*tag_data\|from.*TagProtocol\|import.*Tag[^V]" src/ tests/
+```
+Update each remaining reference manually.
+
+**b) AttributeError on old method name (e.g. `system_tags`, `map_tags`)**
+```bash
+grep -rn "\.system_tags\b\|\.map_tags\b\|\.select_tag_columns\b\|\.drop_tag_columns\b" src/ tests/
+```
+Replace each occurrence with the new name.
+
+**c) `KeyError` / `ValueError` on column prefix string**
+
+If a test asserts an exact column name like `"_tag_source_id"`:
+```bash
+grep -rn '"_tag_\|_tag::' src/ tests/
+```
+Any match is a string literal the script missed. Update manually.
+
+**d) `NameError` on a renamed constant**
+```bash
+grep -rn "SYSTEM_TAG_" src/ tests/
+```
+Replace each with the `SYSTEM_KEY_` equivalent.
+
+Repeat `uv run pytest tests/ -x -q` after each fix until all tests pass.
+
+- [ ] **Step 3: Run the full suite without `-x` for a complete picture**
+
+```bash
+uv run pytest tests/ -q 2>&1 | tail -5
+```
+
+Expected: `N passed` with no failures or errors.
+
+---
+
+### Task 7: Final verification
+
+**Files:** (read-only — verify only)
+
+- [ ] **Step 1: Confirm no `tag`/`Tag`/`TAG` remain in Python source (excluding `_staggered`)**
+
+```bash
+grep -rn --include="*.py" "\btag\b\|\bTag\b\|\bTAG\b" src/ tests/ test-objective/ | \
+    grep -v "_staggered"
+```
+
+Expected: zero matches. Any match is a genuine miss — fix it manually.
+
+- [ ] **Step 2: Confirm no `tag`/`Tag` remain in Markdown docs**
+
+```bash
+grep -rn --include="*.md" "\btag\b\|\bTag\b" \
+    docs/ examples/ notebooks/ README.md CLAUDE.md orcapod-design.md 2>/dev/null | \
+    grep -iv "git tag\|github.*tag\|release.*tag\|version.*tag" | head -20
+```
+
+Review any remaining matches. Legitimate exceptions (e.g. "git tag" in RELEASING.md)
+are fine; content references to the old API are not.
+
+- [ ] **Step 3: Confirm `.zed/rules` was updated**
+
+```bash
+grep -n "tag\|Tag" .zed/rules | grep -iv "git tag" | head -10
+```
+
+Expected: zero matches (or only contextual historical mentions). The script processes
+`.zed/rules` as an `EXTRA_FILE`.
+
+- [ ] **Step 4: Run the full test suite one final time**
+
+```bash
+uv run pytest tests/ -q 2>&1 | tail -5
+```
+
+Expected: `N passed` with no failures or errors. Do not proceed to Task 8 until this is clean.
+
+---
+
+### Task 8: Delete the rename script, stage all changes, atomic commit
+
+**Files:**
+- Delete: `scripts/rename_tag_to_key.py`
+- Commit: all staged and unstaged changes
+
+- [ ] **Step 1: Delete the rename script**
+
+```bash
+rm scripts/rename_tag_to_key.py
+rmdir scripts/ 2>/dev/null || true
+```
+
+- [ ] **Step 2: Stage all changes**
+
+```bash
+git add -A
+```
+
+- [ ] **Step 3: Verify the staged file count looks right**
+
+```bash
+git diff --cached --stat | tail -5
+```
+
+Expected: 50+ files changed (content edits + 2 file renames + `CHANGELOG.md`).
+The rename script itself must NOT appear (it was deleted, never committed).
+
+- [ ] **Step 4: Create the single atomic commit**
+
+```bash
+git commit -m "$(cat <<'EOF'
+refactor: rename tag → key across orcapod-python (ENG-455)
+
+Hard break — no deprecation aliases, no migration shims.
+All public and private identifiers, docstrings, comments,
+module names, file names, and serialized column prefixes
+containing tag/Tag renamed to key/Key. Updates CHANGELOG.md
+with full name-mapping table.
+
+Fixes ENG-455
+EOF
+)"
+```
+
+- [ ] **Step 5: Verify the commit looks correct**
+
+```bash
+git show --stat HEAD | head -20
+```
+
+Expected: the commit message shows `refactor: rename tag → key...` and the stat
+lists the two renamed files as `old_name => new_name`.
+
+---
+
+### Task 9: Push branch and open the pull request
+
+- [ ] **Step 1: Push the branch**
+
+```bash
+git push -u origin eywalker/eng-455-rename-tags-keys-across-orcapod-python-hard-break
+```
+
+- [ ] **Step 2: Create the PR targeting `dev`**
+
+```bash
+gh pr create \
+  --base dev \
+  --title "refactor: rename tag → key across orcapod-python" \
+  --body "$(cat <<'EOF'
+## Summary
+
+Hard break rename of all `tag`/`Tag`/`TAG` identifiers to `key`/`Key`/`KEY`
+throughout orcapod-python, landing before v0.1 stable.
+
+- All public + private symbols, module names, file names, docstrings, and comments updated
+- Serialized column prefixes `_tag_` and `_tag::` renamed to `_key_` and `_key::` in-place
+- `SYSTEM_TAG_PREFIX_NAME = "tag"` changed to `SYSTEM_KEY_PREFIX_NAME = "key"` (drives all generated column headers)
+- No deprecation shims, no backwards-compat aliases
+- `CHANGELOG.md` updated with full name-mapping table
+- Test suite passing
+
+Fixes ENG-455
+
+## Files renamed
+
+| Old | New |
+|---|---|
+| `src/orcapod/core/datagrams/tag_data.py` | `key_data.py` |
+| `test-objective/unit/test_tag.py` | `test_key.py` |
+
+## Test plan
+- [ ] `uv run pytest tests/ -q` passes with zero failures
+- [ ] `grep -rn '\btag\b\|\bTag\b' src/ tests/ | grep -v _staggered` returns zero matches
+- [ ] `git ls-files | grep -i tag | grep -v superpowers` returns zero matches
+EOF
+)"
+```
+
+- [ ] **Step 3: Record the PR URL**
+
+Copy the URL printed by `gh pr create` and note it for the Linear issue.
diff --git a/superpowers/specs/2026-05-12-tag-to-key-rename-design.md b/superpowers/specs/2026-05-12-tag-to-key-rename-design.md
new file mode 100644
index 00000000..405a2b38
--- /dev/null
+++ b/superpowers/specs/2026-05-12-tag-to-key-rename-design.md
@@ -0,0 +1,172 @@
+# Tag → Key Rename Design
+
+**Issue:** ENG-455
+**Date:** 2026-05-12
+**Status:** Approved
+
+## Overview
+
+Rename the `tags` concept and all related identifiers to `keys` throughout orcapod-python.
+This is a hard break with no deprecation aliases or migration shims, targeting the v0.1 stable
+release. `keys` is a clearer fit than `tags` because these fields are the primary key into a
+stream of data and the term aligns naturally with the upcoming `SourceSpec` (key schema + data
+schema) abstraction.
+
+## Goals & Success Criteria
+
+- All public and private symbols, class names, attributes, methods, function arguments, and
+  module names containing `tag`/`tags`/`Tag` are renamed to `key`/`keys`/`Key`.
+- Serialized/persisted column name prefixes (`_tag_`, `_tag::`) are renamed to `_key_` and
+  `_key::` in-place (pre-v0.1 artifacts are not expected to load).
+- All docstrings, comments, type stubs, error messages, and log strings use the new terminology.
+- All examples, design docs, CLAUDE.md, `.zed/rules`, and `orcapod-design.md` are updated.
+- Test suite renamed and passing.
+- No deprecation shims, no backwards-compat aliases — old names are gone.
+
+## Complete Rename Map
+
+### Classes & Protocols
+
+| Old | New |
+|-----|-----|
+| `Tag` | `Key` |
+| `TagProtocol` | `KeyProtocol` |
+| `DuplicateTagError` | `DuplicateKeyError` |
+| `SelectTagColumns` | `SelectKeyColumns` |
+| `DropTagColumns` | `DropKeyColumns` |
+| `MapTags` | `MapKeys` |
+
+### Methods & Functions
+
+| Old | New |
+|-----|-----|
+| `system_tags()` | `system_keys()` |
+| `_ensure_system_tags_table()` | `_ensure_system_keys_table()` |
+| `map_tags()` | `map_keys()` |
+| `select_tag_columns()` | `select_key_columns()` |
+| `drop_tag_columns()` | `drop_key_columns()` |
+| `add_system_tag_columns()` | `add_system_key_columns()` |
+| `add_system_tag_column()` | `add_system_key_column()` |
+| `append_to_system_tags()` | `append_to_system_keys()` |
+| `_parse_system_tag_column()` | `_parse_system_key_column()` |
+| `sort_system_tag_values()` | `sort_system_key_values()` |
+| `_predict_system_tag_schema()` | `_predict_system_key_schema()` |
+| `_compute_system_tag_suffixes()` | `_compute_system_key_suffixes()` |
+| `_rename_sys_tags()` | `_rename_sys_keys()` |
+| `_sort_merged_system_tags()` | `_sort_merged_system_keys()` |
+| `tag_columns` (property) | `key_columns` |
+
+### Attributes
+
+| Old | New |
+|-----|-----|
+| `_system_tags` | `_system_keys` |
+| `_system_tags_python_schema` | `_system_keys_python_schema` |
+| `_system_tags_table` | `_system_keys_table` |
+
+### Constants — identifiers and string values
+
+| Old identifier | New identifier | Old value | New value |
+|---|---|---|---|
+| `SYSTEM_TAG_PREFIX_NAME` | `SYSTEM_KEY_PREFIX_NAME` | `"tag"` | `"key"` |
+| `SYSTEM_TAG_SOURCE_ID_FIELD` | `SYSTEM_KEY_SOURCE_ID_FIELD` | unchanged | — |
+| `SYSTEM_TAG_RECORD_ID_FIELD` | `SYSTEM_KEY_RECORD_ID_FIELD` | unchanged | — |
+| `SYSTEM_TAG_PREFIX` | `SYSTEM_KEY_PREFIX` | generates `"_tag_"` | generates `"_key_"` |
+| `SYSTEM_TAG_SOURCE_ID_PREFIX` | `SYSTEM_KEY_SOURCE_ID_PREFIX` | `"_tag_source_id"` | `"_key_source_id"` |
+| `SYSTEM_TAG_RECORD_ID_PREFIX` | `SYSTEM_KEY_RECORD_ID_PREFIX` | `"_tag_record_id"` | `"_key_record_id"` |
+
+### Type aliases & ColumnConfig fields
+
+| Old | New |
+|-----|-----|
+| `TagValue` | `KeyValue` |
+| `system_tags: bool` | `system_keys: bool` |
+| `sort_by_tags: bool` | `sort_by_keys: bool` |
+
+### File renames
+
+| Old | New |
+|-----|-----|
+| `src/orcapod/core/datagrams/tag_data.py` | `src/orcapod/core/datagrams/key_data.py` |
+| `test-objective/unit/test_tag.py` | `test-objective/unit/test_key.py` |
+
+### Column name prefix strings (serialized Arrow table column headers)
+
+| Old | New |
+|-----|-----|
+| `_tag_` | `_key_` (e.g. `_tag_source_id` → `_key_source_id`) |
+| `_tag::` | `_key::` (e.g. `_tag::source:abc123` → `_key::source:abc123`) |
+
+## Execution Approach
+
+### Phase 1 — File renames
+
+Use `git mv` to rename the two files with `tag` in their name, then update all import
+statements that reference them.
+
+### Phase 2 — Scripted identifier rename
+
+A Python script applies the full rename map across all `.py` files using word-boundary
+replacement, working from most-specific to least-specific pattern to avoid partial
+substitutions. Handles all case variants:
+
+- `tags` → `keys`, `tag` → `key`
+- `Tags` → `Keys`, `Tag` → `Key`
+- `TAGS` → `KEYS`, `TAG` → `KEY`
+
+Targets: `src/`, `tests/`, `test-objective/`, `examples/`, `notebooks/`
+
+Run `uv run pytest tests/` after this phase to catch any broken identifiers.
+
+### Phase 3 — Manual string value fixes
+
+- `system_constants.py`: set `SYSTEM_KEY_PREFIX_NAME = "key"` (drives the `_key_` column prefix)
+- Scan for any remaining `"_tag_"` or `"_tag::"` string literals and update to `"_key_"` / `"_key::"`
+- Verify no stray `"tag"` string literals remain in error messages or log strings
+
+### Phase 4 — Docs & config files
+
+Manual updates to non-Python files:
+
+- `CLAUDE.md` + `.zed/rules`: update all terminology (class names, column prefix table,
+  architecture overview section)
+- `orcapod-design.md`: update all ~81 occurrences (section headings, concept descriptions,
+  column name examples)
+- `superpowers/specs/` and `docs/specs/`: update any existing spec docs that reference tag
+  terminology
+- `examples/`, `notebooks/`: update terminology in prose
+
+### Phase 5 — Test run & cleanup
+
+- `uv run pytest tests/` — fix any remaining failures
+- Final grep: `grep -r '\btag\b\|\bTag\b\|\bTAG\b' src/ tests/` to confirm no stragglers
+  (reviewing for intentional exceptions such as "git tag" in RELEASING.md)
+- `uv run pytest tests/` — confirm fully green before PR
+
+### Phase 6 — Single commit
+
+All changes land in one commit:
+
+```
+refactor: rename tag → key across orcapod-python (ENG-455)
+```
+
+## Edge Cases
+
+| Case | Risk | Resolution |
+|------|------|-----------|
+| `RELEASING.md` mentions "git tag" | Must NOT rename | Script targets only `.py` files; docs updated manually with explicit exclusion |
+| `sorted(..., key=...)` Python builtin | `key=` keyword arg must not be renamed | Script uses word-boundary matching — `key=` is not a `tag` pattern so no collision |
+| `dict.keys()` calls | No identifier collision | Monitor for readability confusion only; no action needed |
+| `"system key"` could read as auth credential | Noted in issue | Accept the name; docstrings clarify it refers to provenance/dimensional columns |
+| `tag_columns` property in `pipeline/observability_reader.py` | Must become `key_columns` | Caught by script |
+| `_tag::` column format in Arrow tables | Both the constant AND generated string values need updating | Phase 3 manual pass; pre-v0.1 artifacts are not expected to load |
+| Two `MapTags` class definitions | One in `column_selection.py`, one in `mappers.py` | Script renames both to `MapKeys` |
+| CLAUDE.md and `.zed/rules` are non-Python | Not covered by Phase 2 script | Phase 4 manual update |
+
+## Out of Scope
+
+- orcapod-rust rename (separate sibling work)
+- `packets` → `data` rename (already landed in ENG-454)
+- Pipeline / PipelineJob refactor (tracked separately)
+- Migration tooling for older saved artifacts
diff --git a/test-objective/conftest.py b/test-objective/conftest.py
index 3fa5a3e3..bc7a3d0e 100644
--- a/test-objective/conftest.py
+++ b/test-objective/conftest.py
@@ -10,20 +10,20 @@
 import pytest
 
 from orcapod.core.datagrams.datagram import Datagram
-from orcapod.core.datagrams.tag_data import Data, Tag
+from orcapod.core.datagrams.key_data import Data, Key
 from orcapod.core.function_pod import FunctionPod
 from orcapod.core.nodes import FunctionNode
 from orcapod.core.operators import (
     Batch,
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     Join,
     MapData,
-    MapTags,
+    MapKeys,
     MergeJoin,
     PolarsFilter,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
     SemiJoin,
 )
 from orcapod.core.data_function import PythonDataFunction
@@ -79,7 +79,7 @@ def return_none(x: int) -> int | None:
 
 
 def make_simple_table(n: int = 3) -> pa.Table:
-    """Table with tag=id (int), data=value (int)."""
+    """Table with key=id (int), data=value (int)."""
     return pa.table(
         {
             "id": pa.array(list(range(n)), type=pa.int64()),
@@ -89,7 +89,7 @@ def make_simple_table(n: int = 3) -> pa.Table:
 
 
 def make_two_data_col_table(n: int = 3) -> pa.Table:
-    """Table with tag=id, data={x, y}."""
+    """Table with key=id, data={x, y}."""
     return pa.table(
         {
             "id": pa.array(list(range(n)), type=pa.int64()),
@@ -100,7 +100,7 @@ def make_two_data_col_table(n: int = 3) -> pa.Table:
 
 
 def make_string_table(n: int = 3) -> pa.Table:
-    """Table with tag=id, data=name (str)."""
+    """Table with key=id, data=name (str)."""
     names = ["alice", "bob", "charlie"][:n]
     return pa.table(
         {
@@ -111,7 +111,7 @@ def make_string_table(n: int = 3) -> pa.Table:
 
 
 def make_joinable_tables() -> tuple[pa.Table, pa.Table]:
-    """Two tables with shared tag=id, non-overlapping data columns."""
+    """Two tables with shared key=id, non-overlapping data columns."""
     left = pa.table(
         {
             "id": pa.array([1, 2, 3], type=pa.int64()),
@@ -128,7 +128,7 @@ def make_joinable_tables() -> tuple[pa.Table, pa.Table]:
 
 
 def make_overlapping_data_tables() -> tuple[pa.Table, pa.Table]:
-    """Two tables with shared tag=id AND overlapping data column 'value'."""
+    """Two tables with shared key=id AND overlapping data column 'value'."""
     left = pa.table(
         {
             "id": pa.array([1, 2, 3], type=pa.int64()),
@@ -171,29 +171,29 @@ def string_table() -> pa.Table:
 
 @pytest.fixture
 def simple_stream() -> ArrowTableStream:
-    """Stream with tag=id, data=value."""
-    return ArrowTableStream(make_simple_table(), tag_columns=["id"])
+    """Stream with key=id, data=value."""
+    return ArrowTableStream(make_simple_table(), key_columns=["id"])
 
 
 @pytest.fixture
 def two_col_stream() -> ArrowTableStream:
-    """Stream with tag=id, data={x, y}."""
-    return ArrowTableStream(make_two_data_col_table(), tag_columns=["id"])
+    """Stream with key=id, data={x, y}."""
+    return ArrowTableStream(make_two_data_col_table(), key_columns=["id"])
 
 
 @pytest.fixture
 def string_stream() -> ArrowTableStream:
-    """Stream with tag=id, data=name."""
-    return ArrowTableStream(make_string_table(), tag_columns=["id"])
+    """Stream with key=id, data=name."""
+    return ArrowTableStream(make_string_table(), key_columns=["id"])
 
 
 @pytest.fixture
 def joinable_streams() -> tuple[ArrowTableStream, ArrowTableStream]:
-    """Two streams with shared tag=id, non-overlapping data columns."""
+    """Two streams with shared key=id, non-overlapping data columns."""
     left, right = make_joinable_tables()
     return (
-        ArrowTableStream(left, tag_columns=["id"]),
-        ArrowTableStream(right, tag_columns=["id"]),
+        ArrowTableStream(left, key_columns=["id"]),
+        ArrowTableStream(right, key_columns=["id"]),
     )
 
 
@@ -204,14 +204,14 @@ def joinable_streams() -> tuple[ArrowTableStream, ArrowTableStream]:
 
 @pytest.fixture
 def simple_source() -> ArrowTableSource:
-    return ArrowTableSource(make_simple_table(), tag_columns=["id"])
+    return ArrowTableSource(make_simple_table(), key_columns=["id"])
 
 
 @pytest.fixture
 def dict_source() -> DictSource:
     return DictSource(
         {"id": [1, 2, 3], "value": [10, 20, 30]},
-        tag_columns=["id"],
+        key_columns=["id"],
         infer_nullable=True,
     )
 
diff --git a/test-objective/integration/test_caching_flows.py b/test-objective/integration/test_caching_flows.py
index 9afb838d..d46b5673 100644
--- a/test-objective/integration/test_caching_flows.py
+++ b/test-objective/integration/test_caching_flows.py
@@ -38,7 +38,7 @@ def _make_source(n: int = 3) -> ArrowTableSource:
             "x": pa.array(list(range(n)), type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 # ===================================================================
@@ -136,7 +136,7 @@ def test_log_mode_stores_results(self):
                     "age": pa.array([25, 30, 35], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         source_b = ArrowTableSource(
@@ -146,7 +146,7 @@ def test_log_mode_stores_results(self):
                     "score": pa.array([85, 90, 95], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         join = Join()
@@ -170,7 +170,7 @@ def test_replay_mode_loads_from_db(self):
                     "age": pa.array([25, 30, 35], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         source_b = ArrowTableSource(
@@ -180,7 +180,7 @@ def test_replay_mode_loads_from_db(self):
                     "score": pa.array([85, 90, 95], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         join = Join()
@@ -221,7 +221,7 @@ def test_full_caching_flow(self):
         cached_pf = CachedDataFunction(inner_pf, result_database=db)
         cached_pf.set_auto_flush(True)
 
-        from orcapod.core.datagrams.tag_data import Data
+        from orcapod.core.datagrams.key_data import Data
 
         # Process multiple data
         for x in [1, 2, 3]:
diff --git a/test-objective/integration/test_column_config_filtering.py b/test-objective/integration/test_column_config_filtering.py
index 5026ceb8..e7a67a69 100644
--- a/test-objective/integration/test_column_config_filtering.py
+++ b/test-objective/integration/test_column_config_filtering.py
@@ -1,7 +1,7 @@
 """Specification-derived integration tests for ColumnConfig filtering across components.
 
 Tests that ColumnConfig consistently controls column visibility across
-Datagram, Tag, Data, Stream, and Source components.
+Datagram, Key, Data, Stream, and Source components.
 """
 
 from __future__ import annotations
@@ -10,14 +10,14 @@
 import pytest
 
 from orcapod.core.datagrams.datagram import Datagram
-from orcapod.core.datagrams.tag_data import Data, Tag
+from orcapod.core.datagrams.key_data import Data, Key
 from orcapod.core.sources import ArrowTableSource
 from orcapod.core.streams import ArrowTableStream
 from orcapod.system_constants import constants
 from orcapod.types import ColumnConfig
 
-# Use the actual system tag prefix from constants
-_SYS_TAG_KEY = f"{constants.SYSTEM_TAG_PREFIX}source:abc"
+# Use the actual system key prefix from constants
+_SYS_TAG_KEY = f"{constants.SYSTEM_KEY_PREFIX}source:abc"
 
 
 # ===================================================================
@@ -62,35 +62,35 @@ def test_all_info_includes_everything(self):
 
 
 # ===================================================================
-# Tag ColumnConfig
+# Key ColumnConfig
 # ===================================================================
 
 
-class TestTagColumnConfig:
-    """Per design, system_tags=True includes _tag_ columns in Tag."""
+class TestKeyColumnConfig:
+    """Per design, system_keys=True includes _key_ columns in Key."""
 
-    def test_system_tags_excluded_by_default(self):
-        t = Tag(
+    def test_system_keys_excluded_by_default(self):
+        t = Key(
             {"id": 1},
-            system_tags={_SYS_TAG_KEY: "rec1"},
+            system_keys={_SYS_TAG_KEY: "rec1"},
         )
         keys = t.keys()
         assert _SYS_TAG_KEY not in keys
 
-    def test_system_tags_included_with_config(self):
-        t = Tag(
+    def test_system_keys_included_with_config(self):
+        t = Key(
             {"id": 1},
-            system_tags={_SYS_TAG_KEY: "rec1"},
+            system_keys={_SYS_TAG_KEY: "rec1"},
         )
         keys_default = t.keys()
-        keys_with_tags = t.keys(columns=ColumnConfig(system_tags=True))
-        assert len(keys_with_tags) > len(keys_default)
-        assert _SYS_TAG_KEY in keys_with_tags
+        keys_with_keys = t.keys(columns=ColumnConfig(system_keys=True))
+        assert len(keys_with_keys) > len(keys_default)
+        assert _SYS_TAG_KEY in keys_with_keys
 
-    def test_all_info_includes_system_tags(self):
-        t = Tag(
+    def test_all_info_includes_system_keys(self):
+        t = Key(
             {"id": 1},
-            system_tags={_SYS_TAG_KEY: "rec1"},
+            system_keys={_SYS_TAG_KEY: "rec1"},
         )
         keys = t.keys(all_info=True)
         assert _SYS_TAG_KEY in keys
@@ -149,19 +149,19 @@ def test_keys_schema_table_consistency_default(self):
                     "value": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
-        tag_keys, data_keys = source.keys()
-        tag_schema, data_schema = source.output_schema()
+        key_keys, data_keys = source.keys()
+        key_schema, data_schema = source.output_schema()
         table = source.as_table()
 
         # keys and schema should have same field names
-        assert set(tag_keys) == set(tag_schema.keys())
+        assert set(key_keys) == set(key_schema.keys())
         assert set(data_keys) == set(data_schema.keys())
 
         # Table should have all key columns
-        all_keys = set(tag_keys) | set(data_keys)
+        all_keys = set(key_keys) | set(data_keys)
         assert all_keys.issubset(set(table.column_names))
 
     def test_keys_schema_table_consistency_all_info(self):
@@ -172,17 +172,17 @@ def test_keys_schema_table_consistency_all_info(self):
                     "value": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
-        tag_keys, data_keys = source.keys(all_info=True)
-        tag_schema, data_schema = source.output_schema(all_info=True)
+        key_keys, data_keys = source.keys(all_info=True)
+        key_schema, data_schema = source.output_schema(all_info=True)
         table = source.as_table(all_info=True)
 
-        assert set(tag_keys) == set(tag_schema.keys())
+        assert set(key_keys) == set(key_schema.keys())
         assert set(data_keys) == set(data_schema.keys())
 
-        all_keys = set(tag_keys) | set(data_keys)
+        all_keys = set(key_keys) | set(data_keys)
         assert all_keys.issubset(set(table.column_names))
 
     def test_all_info_has_more_columns_than_default(self):
@@ -193,7 +193,7 @@ def test_all_info_has_more_columns_than_default(self):
                     "value": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         default_table = source.as_table()
diff --git a/test-objective/integration/test_hash_invariants.py b/test-objective/integration/test_hash_invariants.py
index c4c8f655..a85df1ba 100644
--- a/test-objective/integration/test_hash_invariants.py
+++ b/test-objective/integration/test_hash_invariants.py
@@ -26,9 +26,9 @@ def _double(x: int) -> int:
     return x * 2
 
 
-def _make_source(data: dict, tag_columns: list[str]) -> ArrowTableSource:
+def _make_source(data: dict, key_columns: list[str]) -> ArrowTableSource:
     table = pa.table(data)
-    return ArrowTableSource(table, tag_columns=tag_columns, infer_nullable=True)
+    return ArrowTableSource(table, key_columns=key_columns, infer_nullable=True)
 
 
 # ===================================================================
@@ -42,19 +42,19 @@ class TestContentHashStability:
 
     def test_same_data_same_hash(self):
         s1 = ArrowTableStream(
-            pa.table({"id": [1, 2], "x": [10, 20]}), tag_columns=["id"]
+            pa.table({"id": [1, 2], "x": [10, 20]}), key_columns=["id"]
         )
         s2 = ArrowTableStream(
-            pa.table({"id": [1, 2], "x": [10, 20]}), tag_columns=["id"]
+            pa.table({"id": [1, 2], "x": [10, 20]}), key_columns=["id"]
         )
         assert s1.content_hash() == s2.content_hash()
 
     def test_different_data_different_hash(self):
         s1 = ArrowTableStream(
-            pa.table({"id": [1, 2], "x": [10, 20]}), tag_columns=["id"]
+            pa.table({"id": [1, 2], "x": [10, 20]}), key_columns=["id"]
         )
         s2 = ArrowTableStream(
-            pa.table({"id": [1, 2], "x": [10, 99]}), tag_columns=["id"]
+            pa.table({"id": [1, 2], "x": [10, 99]}), key_columns=["id"]
         )
         assert s1.content_hash() != s2.content_hash()
 
@@ -108,7 +108,7 @@ def test_downstream_hash_depends_on_upstream(self):
             {"id": pa.array([1, 2], type=pa.int64()), "x": pa.array([10, 20], type=pa.int64())},
             ["id"],
         )
-        # Different schema: tag=category instead of tag=id
+        # Different schema: key=category instead of key=id
         source_b = _make_source(
             {"category": pa.array([1, 2], type=pa.int64()), "x": pa.array([10, 20], type=pa.int64())},
             ["category"],
diff --git a/test-objective/integration/test_pipeline_flows.py b/test-objective/integration/test_pipeline_flows.py
index baad23b6..e91b9c55 100644
--- a/test-objective/integration/test_pipeline_flows.py
+++ b/test-objective/integration/test_pipeline_flows.py
@@ -14,7 +14,7 @@
     Batch,
     DropDataColumns,
     Join,
-    MapTags,
+    MapKeys,
     MergeJoin,
     PolarsFilter,
     SelectDataColumns,
@@ -46,10 +46,10 @@ def _square_doubled(doubled: int) -> int:
     return doubled * doubled
 
 
-def _make_source(tag_data: dict, data_data: dict, tag_columns: list[str]):
-    all_data = {**tag_data, **data_data}
+def _make_source(key_data: dict, data_data: dict, key_columns: list[str]):
+    all_data = {**key_data, **data_data}
     table = pa.table(all_data)
-    return ArrowTableSource(table, tag_columns=tag_columns, infer_nullable=True)
+    return ArrowTableSource(table, key_columns=key_columns, infer_nullable=True)
 
 
 # ===================================================================
@@ -119,7 +119,7 @@ def test_join_combines_matching_rows(self):
 
 
 class TestChainedOperators:
-    """Source → Filter → Select → MapTags → Stream."""
+    """Source → Filter → Select → MapKeys → Stream."""
 
     def test_chain_of_three_operators(self):
         source = _make_source(
@@ -138,8 +138,8 @@ def test_chain_of_three_operators(self):
         select = SelectDataColumns(columns=["value"])
         selected = select.process(filtered)
 
-        # Step 3: Rename tag
-        mapper = MapTags(name_map={"id": "item_id"})
+        # Step 3: Rename key
+        mapper = MapKeys(name_map={"id": "item_id"})
         result = mapper.process(selected)
 
         table = result.as_table()
@@ -162,7 +162,7 @@ def test_transform_then_filter(self):
         transformed = pod.process(source)
 
         # Filter to only results >= 6 (i.e., x >= 3 → result >= 6)
-        # We can filter on tag id >= 3
+        # We can filter on key id >= 3
         filt = PolarsFilter(constraints={"id": 3})
         result = filt.process(transformed)
         table = result.as_table()
@@ -189,7 +189,7 @@ def test_join_then_batch(self):
         batch = Batch()
         result = batch.process(joined)
         table = result.as_table()
-        # After join and batch, rows should be grouped by tag
+        # After join and batch, rows should be grouped by key
         assert table.num_rows >= 1
 
 
diff --git a/test-objective/integration/test_provenance.py b/test-objective/integration/test_provenance.py
index a7b66826..44cba3f5 100644
--- a/test-objective/integration/test_provenance.py
+++ b/test-objective/integration/test_provenance.py
@@ -1,6 +1,6 @@
-"""Specification-derived integration tests for system tag lineage tracking.
+"""Specification-derived integration tests for system key lineage tracking.
 
-Tests the three system tag evolution rules from the design specification:
+Tests the three system key evolution rules from the design specification:
 1. Name-preserving — single-stream ops (filter, select, map)
 2. Name-extending — multi-input ops (join, merge join)
 3. Type-evolving — aggregation ops (batch)
@@ -11,7 +11,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.operators import Batch, Join, MapTags, PolarsFilter, SelectDataColumns
+from orcapod.core.operators import Batch, Join, MapKeys, PolarsFilter, SelectDataColumns
 from orcapod.core.sources import ArrowTableSource
 from orcapod.core.streams import ArrowTableStream
 from orcapod.system_constants import constants
@@ -23,33 +23,33 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_source(tag_data: dict, data_data: dict, tag_columns: list[str]):
-    all_data = {**tag_data, **data_data}
+def _make_source(key_data: dict, data_data: dict, key_columns: list[str]):
+    all_data = {**key_data, **data_data}
     table = pa.table(all_data)
-    return ArrowTableSource(table, tag_columns=tag_columns, infer_nullable=True)
+    return ArrowTableSource(table, key_columns=key_columns, infer_nullable=True)
 
 
-def _get_system_tag_columns(table: pa.Table) -> list[str]:
-    return [c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)]
+def _get_system_key_columns(table: pa.Table) -> list[str]:
+    return [c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)]
 
 
 # ===================================================================
-# Source creates system tag column
+# Source creates system key column
 # ===================================================================
 
 
-class TestSourceSystemTags:
-    """Per design: each source adds a system tag column encoding provenance."""
+class TestSourceSystemKeys:
+    """Per design: each source adds a system key column encoding provenance."""
 
-    def test_source_creates_system_tag_column(self):
+    def test_source_creates_system_key_column(self):
         source = _make_source(
             {"id": pa.array([1, 2], type=pa.int64())},
             {"value": pa.array([10, 20], type=pa.int64())},
             ["id"],
         )
         table = source.as_table(all_info=True)
-        tag_cols = _get_system_tag_columns(table)
-        assert len(tag_cols) >= 1, "Source should add at least one system tag column"
+        key_cols = _get_system_key_columns(table)
+        assert len(key_cols) >= 1, "Source should add at least one system key column"
 
 
 # ===================================================================
@@ -58,42 +58,42 @@ def test_source_creates_system_tag_column(self):
 
 
 class TestNamePreserving:
-    """Per design: single-stream ops preserve system tag column names and values."""
+    """Per design: single-stream ops preserve system key column names and values."""
 
-    def test_filter_preserves_system_tags(self):
+    def test_filter_preserves_system_keys(self):
         source = _make_source(
             {"id": pa.array([1, 2, 3], type=pa.int64())},
             {"value": pa.array([10, 20, 30], type=pa.int64())},
             ["id"],
         )
         source_table = source.as_table(all_info=True)
-        source_tag_cols = _get_system_tag_columns(source_table)
+        source_key_cols = _get_system_key_columns(source_table)
 
         filt = PolarsFilter(constraints={"id": 2})
         result = filt.process(source)
         result_table = result.as_table(all_info=True)
-        result_tag_cols = _get_system_tag_columns(result_table)
+        result_key_cols = _get_system_key_columns(result_table)
 
         # Column names should be identical
-        assert set(source_tag_cols) == set(result_tag_cols)
+        assert set(source_key_cols) == set(result_key_cols)
 
-    def test_select_preserves_system_tags(self):
+    def test_select_preserves_system_keys(self):
         source = _make_source(
             {"id": pa.array([1, 2], type=pa.int64())},
             {"a": pa.array([10, 20], type=pa.int64()), "b": pa.array([30, 40], type=pa.int64())},
             ["id"],
         )
         source_table = source.as_table(all_info=True)
-        source_tag_cols = _get_system_tag_columns(source_table)
+        source_key_cols = _get_system_key_columns(source_table)
 
         select = SelectDataColumns(columns=["a"])
         result = select.process(source)
         result_table = result.as_table(all_info=True)
-        result_tag_cols = _get_system_tag_columns(result_table)
+        result_key_cols = _get_system_key_columns(result_table)
 
-        assert set(source_tag_cols) == set(result_tag_cols)
+        assert set(source_key_cols) == set(result_key_cols)
 
-    def test_map_preserves_system_tags(self):
+    def test_map_preserves_system_keys(self):
         source = _make_source(
             {
                 "id": pa.array([1, 2], type=pa.int64()),
@@ -103,14 +103,14 @@ def test_map_preserves_system_tags(self):
             ["id", "group"],
         )
         source_table = source.as_table(all_info=True)
-        source_tag_cols = _get_system_tag_columns(source_table)
+        source_key_cols = _get_system_key_columns(source_table)
 
-        mapper = MapTags(name_map={"id": "item_id"})
+        mapper = MapKeys(name_map={"id": "item_id"})
         result = mapper.process(source)
         result_table = result.as_table(all_info=True)
-        result_tag_cols = _get_system_tag_columns(result_table)
+        result_key_cols = _get_system_key_columns(result_table)
 
-        assert set(source_tag_cols) == set(result_tag_cols)
+        assert set(source_key_cols) == set(result_key_cols)
 
 
 # ===================================================================
@@ -119,10 +119,10 @@ def test_map_preserves_system_tags(self):
 
 
 class TestNameExtending:
-    """Per design: multi-input ops extend system tag column names with
+    """Per design: multi-input ops extend system key column names with
     ::pipeline_hash:canonical_position."""
 
-    def test_join_extends_system_tag_names(self):
+    def test_join_extends_system_key_names(self):
         source_a = _make_source(
             {"id": pa.array([1, 2], type=pa.int64())},
             {"a": pa.array([10, 20], type=pa.int64())},
@@ -134,21 +134,21 @@ def test_join_extends_system_tag_names(self):
             ["id"],
         )
 
-        # Get original system tag column names
-        a_tags = _get_system_tag_columns(source_a.as_table(all_info=True))
-        b_tags = _get_system_tag_columns(source_b.as_table(all_info=True))
+        # Get original system key column names
+        a_keys = _get_system_key_columns(source_a.as_table(all_info=True))
+        b_keys = _get_system_key_columns(source_b.as_table(all_info=True))
 
         join = Join()
         result = join.process(source_a, source_b)
         result_table = result.as_table(all_info=True)
-        result_tags = _get_system_tag_columns(result_table)
+        result_keys = _get_system_key_columns(result_table)
 
-        # After join, system tag columns should be extended (longer names)
-        # Each input contributes system tag columns with extended names
-        assert len(result_tags) >= len(a_tags) + len(b_tags)
+        # After join, system key columns should be extended (longer names)
+        # Each input contributes system key columns with extended names
+        assert len(result_keys) >= len(a_keys) + len(b_keys)
 
-    def test_join_sorts_system_tag_values_for_commutativity(self):
-        """Per design: commutative ops sort paired tag values per row."""
+    def test_join_sorts_system_key_values_for_commutativity(self):
+        """Per design: commutative ops sort paired key values per row."""
         source_a = _make_source(
             {"id": pa.array([1, 2], type=pa.int64())},
             {"a": pa.array([10, 20], type=pa.int64())},
@@ -167,10 +167,10 @@ def test_join_sorts_system_tag_values_for_commutativity(self):
         table_ab = result_ab.as_table(all_info=True)
         table_ba = result_ba.as_table(all_info=True)
 
-        # System tag column names should be identical for commutative join
-        tags_ab = sorted(_get_system_tag_columns(table_ab))
-        tags_ba = sorted(_get_system_tag_columns(table_ba))
-        assert tags_ab == tags_ba
+        # System key column names should be identical for commutative join
+        keys_ab = sorted(_get_system_key_columns(table_ab))
+        keys_ba = sorted(_get_system_key_columns(table_ba))
+        assert keys_ab == keys_ba
 
 
 # ===================================================================
@@ -179,27 +179,27 @@ def test_join_sorts_system_tag_values_for_commutativity(self):
 
 
 class TestTypeEvolving:
-    """Per design: batch operation changes system tag type from str to list[str]."""
+    """Per design: batch operation changes system key type from str to list[str]."""
 
-    def test_batch_evolves_system_tag_type(self):
+    def test_batch_evolves_system_key_type(self):
         source = _make_source(
             {"group": pa.array(["a", "a", "b"], type=pa.large_string())},
             {"value": pa.array([1, 2, 3], type=pa.int64())},
             ["group"],
         )
         source_table = source.as_table(all_info=True)
-        source_tag_cols = _get_system_tag_columns(source_table)
+        source_key_cols = _get_system_key_columns(source_table)
 
         batch = Batch()
         result = batch.process(source)
         result_table = result.as_table(all_info=True)
-        result_tag_cols = _get_system_tag_columns(result_table)
+        result_key_cols = _get_system_key_columns(result_table)
 
-        # System tag columns should exist in output
-        assert len(result_tag_cols) == len(source_tag_cols)
+        # System key columns should exist in output
+        assert len(result_key_cols) == len(source_key_cols)
 
         # The type should have evolved to list
-        for col_name in result_tag_cols:
+        for col_name in result_key_cols:
             col_type = result_table.schema.field(col_name).type
             assert pa.types.is_list(col_type) or pa.types.is_large_list(
                 col_type
@@ -239,12 +239,12 @@ def test_full_chain(self):
         batched = batch.process(filtered)
 
         table = batched.as_table(all_info=True)
-        tag_cols = _get_system_tag_columns(table)
+        key_cols = _get_system_key_columns(table)
 
-        # After all three stages, system tags should exist
-        assert len(tag_cols) > 0
+        # After all three stages, system keys should exist
+        assert len(key_cols) > 0
 
         # After batch, types should be lists
-        for col_name in tag_cols:
+        for col_name in key_cols:
             col_type = table.schema.field(col_name).type
             assert pa.types.is_list(col_type) or pa.types.is_large_list(col_type)
diff --git a/test-objective/property/test_operator_algebra.py b/test-objective/property/test_operator_algebra.py
index 3f5510f8..795b8fbc 100644
--- a/test-objective/property/test_operator_algebra.py
+++ b/test-objective/property/test_operator_algebra.py
@@ -38,9 +38,9 @@ def _sorted_rows(table: pa.Table, sort_col: str = "id") -> list[dict]:
     return sorted(rows, key=lambda r: r.get(sort_col, 0))
 
 
-def _make_stream(tag_data: dict, data_data: dict, tag_cols: list[str]) -> ArrowTableStream:
-    all_data = {**tag_data, **data_data}
-    return ArrowTableStream(pa.table(all_data), tag_columns=tag_cols)
+def _make_stream(key_data: dict, data_data: dict, key_cols: list[str]) -> ArrowTableStream:
+    all_data = {**key_data, **data_data}
+    return ArrowTableStream(pa.table(all_data), key_columns=key_cols)
 
 
 # ===================================================================
diff --git a/test-objective/unit/test_arrow_data_utils.py b/test-objective/unit/test_arrow_data_utils.py
index 58da04b9..102376bf 100644
--- a/test-objective/unit/test_arrow_data_utils.py
+++ b/test-objective/unit/test_arrow_data_utils.py
@@ -1,6 +1,6 @@
 """Specification-derived tests for arrow_utils (formerly arrow_data_utils).
 
-Tests system tag manipulation, source info, and column helper functions
+Tests system key manipulation, source info, and column helper functions
 based on documented behavior in the design specification.
 """
 
@@ -12,41 +12,41 @@
 from orcapod.system_constants import constants
 from orcapod.utils.arrow_utils import (
     add_source_info,
-    add_system_tag_columns,
-    append_to_system_tags,
+    add_system_key_columns,
+    append_to_system_keys,
     drop_columns_with_prefix,
     drop_system_columns,
-    sort_system_tag_values,
+    sort_system_key_values,
 )
 
 
 # ---------------------------------------------------------------------------
-# add_system_tag_columns
+# add_system_key_columns
 # ---------------------------------------------------------------------------
 
 
-class TestAddSystemTagColumns:
-    """Per the design spec, system tag columns are prefixed with _tag_ and
+class TestAddSystemKeyColumns:
+    """Per the design spec, system key columns are prefixed with _key_ and
     track per-row provenance (source_id, record_id pairs)."""
 
-    def test_adds_system_tag_columns(self):
+    def test_adds_system_key_columns(self):
         table = pa.table({"id": [1, 2], "value": [10, 20]})
-        result = add_system_tag_columns(
+        result = add_system_key_columns(
             table,
             schema_hash="abc123",
             source_ids="src1",
             record_ids=["rec1", "rec2"],
         )
-        # Should have original columns plus new system tag columns
+        # Should have original columns plus new system key columns
         assert result.num_rows == 2
-        tag_cols = [
-            c for c in result.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        key_cols = [
+            c for c in result.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        assert len(tag_cols) > 0
+        assert len(key_cols) > 0
 
     def test_empty_table_returns_empty(self):
         table = pa.table({"id": pa.array([], type=pa.int64())})
-        result = add_system_tag_columns(
+        result = add_system_key_columns(
             table,
             schema_hash="abc",
             source_ids="src1",
@@ -57,7 +57,7 @@ def test_empty_table_returns_empty(self):
     def test_length_mismatch_raises(self):
         table = pa.table({"id": [1, 2, 3]})
         with pytest.raises(ValueError):
-            add_system_tag_columns(
+            add_system_key_columns(
                 table,
                 schema_hash="abc",
                 source_ids=["s1", "s2"],  # 2 source_ids for 3 rows
@@ -66,65 +66,65 @@ def test_length_mismatch_raises(self):
 
 
 # ---------------------------------------------------------------------------
-# append_to_system_tags
+# append_to_system_keys
 # ---------------------------------------------------------------------------
 
 
-class TestAppendToSystemTags:
-    """Per design, appends a value to existing system tag columns."""
+class TestAppendToSystemKeys:
+    """Per design, appends a value to existing system key columns."""
 
-    def test_appends_value_to_system_tags(self):
-        # Create a table that already has system tag columns
+    def test_appends_value_to_system_keys(self):
+        # Create a table that already has system key columns
         table = pa.table({"id": [1, 2], "value": [10, 20]})
-        table_with_tags = add_system_tag_columns(
+        table_with_keys = add_system_key_columns(
             table,
             schema_hash="abc",
             source_ids="src1",
             record_ids=["r1", "r2"],
         )
-        result = append_to_system_tags(table_with_tags, value="::extra:0")
-        # System tag column names should have changed (appended)
-        tag_cols_before = [
+        result = append_to_system_keys(table_with_keys, value="::extra:0")
+        # System key column names should have changed (appended)
+        key_cols_before = [
             c
-            for c in table_with_tags.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            for c in table_with_keys.column_names
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        tag_cols_after = [
-            c for c in result.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        key_cols_after = [
+            c for c in result.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
         # The column names should be extended
-        assert len(tag_cols_after) == len(tag_cols_before)
+        assert len(key_cols_after) == len(key_cols_before)
 
     def test_empty_table_returns_empty(self):
         table = pa.table(
             {"id": pa.array([], type=pa.int64()), "value": pa.array([], type=pa.int64())}
         )
-        result = append_to_system_tags(table, value="::extra:0")
+        result = append_to_system_keys(table, value="::extra:0")
         assert result.num_rows == 0
 
 
 # ---------------------------------------------------------------------------
-# sort_system_tag_values
+# sort_system_key_values
 # ---------------------------------------------------------------------------
 
 
-class TestSortSystemTagValues:
-    """Per design, system tag values must be sorted for commutativity in
+class TestSortSystemKeyValues:
+    """Per design, system key values must be sorted for commutativity in
     multi-input operators. Paired (source_id, record_id) tuples are sorted
     together per row."""
 
-    def test_sorts_system_tag_values(self):
+    def test_sorts_system_key_values(self):
         # This is a structural test — ensure the function runs without error
         # and produces a table with the same shape
         table = pa.table({"id": [1, 2], "value": [10, 20]})
-        table_with_tags = add_system_tag_columns(
+        table_with_keys = add_system_key_columns(
             table,
             schema_hash="abc",
             source_ids="src1",
             record_ids=["r1", "r2"],
         )
-        result = sort_system_tag_values(table_with_tags)
-        assert result.num_rows == table_with_tags.num_rows
+        result = sort_system_key_values(table_with_keys)
+        assert result.num_rows == table_with_keys.num_rows
 
 
 # ---------------------------------------------------------------------------
diff --git a/test-objective/unit/test_data.py b/test-objective/unit/test_data.py
index ac6729e7..96efaee3 100644
--- a/test-objective/unit/test_data.py
+++ b/test-objective/unit/test_data.py
@@ -4,7 +4,7 @@
 import pytest
 
 from orcapod.core.datagrams.datagram import Datagram
-from orcapod.core.datagrams.tag_data import Data
+from orcapod.core.datagrams.key_data import Data
 from orcapod.types import ColumnConfig
 
 
diff --git a/test-objective/unit/test_data_function.py b/test-objective/unit/test_data_function.py
index 530d1e3d..37351341 100644
--- a/test-objective/unit/test_data_function.py
+++ b/test-objective/unit/test_data_function.py
@@ -8,7 +8,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams.tag_data import Data
+from orcapod.core.datagrams.key_data import Data
 from orcapod.core.data_function import CachedDataFunction, PythonDataFunction
 from orcapod.databases import InMemoryArrowDatabase
 from orcapod.types import Schema
diff --git a/test-objective/unit/test_function_pod.py b/test-objective/unit/test_function_pod.py
index f6f017d8..57603c09 100644
--- a/test-objective/unit/test_function_pod.py
+++ b/test-objective/unit/test_function_pod.py
@@ -2,7 +2,7 @@
 
 Tests based on FunctionPodProtocol and documented behaviors:
 - FunctionPod wraps a DataFunction for per-data transformation
-- Never inspects or modifies tags
+- Never inspects or modifies keys
 - Exactly one input stream
 - output_schema() prediction matches actual output
 """
@@ -12,7 +12,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams.tag_data import Data, Tag
+from orcapod.core.datagrams.key_data import Data, Key
 from orcapod.core.function_pod import FunctionPod, FunctionPodStream, function_pod
 from orcapod.core.data_function import PythonDataFunction
 from orcapod.core.sources import ArrowTableSource
@@ -41,7 +41,7 @@ def _make_stream(n: int = 3) -> ArrowTableSource:
             "x": pa.array(list(range(n)), type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 def _make_two_col_stream(n: int = 3) -> ArrowTableSource:
@@ -52,7 +52,7 @@ def _make_two_col_stream(n: int = 3) -> ArrowTableSource:
             "y": pa.array([i * 10 for i in range(n)], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 # ---------------------------------------------------------------------------
@@ -93,23 +93,23 @@ def test_validate_inputs_accepts_single_stream(self):
         pod.validate_inputs(stream)  # Should not raise
 
 
-class TestFunctionPodTagInvariant:
-    """Per the strict boundary: function pods NEVER inspect or modify tags."""
+class TestFunctionPodKeyInvariant:
+    """Per the strict boundary: function pods NEVER inspect or modify keys."""
 
-    def test_tags_pass_through_unchanged(self):
+    def test_keys_pass_through_unchanged(self):
         pf = PythonDataFunction(_double, output_keys="result")
         pod = FunctionPod(data_function=pf)
         stream = _make_stream()
         result = pod.process(stream)
 
-        input_tags = [tag for tag, _ in stream.iter_data()]
-        output_tags = [tag for tag, _ in result.iter_data()]
+        input_keys = [key for key, _ in stream.iter_data()]
+        output_keys = [key for key, _ in result.iter_data()]
 
-        for in_tag, out_tag in zip(input_tags, output_tags):
-            # Tag data columns should be identical
-            assert in_tag.keys() == out_tag.keys()
-            for key in in_tag.keys():
-                assert in_tag[key] == out_tag[key]
+        for in_key, out_key in zip(input_keys, output_keys):
+            # Key data columns should be identical
+            assert in_key.keys() == out_key.keys()
+            for key in in_key.keys():
+                assert in_key[key] == out_key[key]
 
     def test_data_are_transformed(self):
         pf = PythonDataFunction(_double, output_keys="result")
@@ -117,7 +117,7 @@ def test_data_are_transformed(self):
         stream = _make_stream()
         result = pod.process(stream)
 
-        for tag, data in result.iter_data():
+        for key, data in result.iter_data():
             assert "result" in data.keys()
 
 
@@ -129,12 +129,12 @@ def test_output_schema_matches_actual(self):
         pod = FunctionPod(data_function=pf)
         stream = _make_stream()
 
-        predicted_tag_schema, predicted_data_schema = pod.output_schema(stream)
+        predicted_key_schema, predicted_data_schema = pod.output_schema(stream)
         result = pod.process(stream)
-        actual_tag_schema, actual_data_schema = result.output_schema()
+        actual_key_schema, actual_data_schema = result.output_schema()
 
-        # Tag schemas should match
-        assert set(predicted_tag_schema.keys()) == set(actual_tag_schema.keys())
+        # Key schemas should match
+        assert set(predicted_key_schema.keys()) == set(actual_key_schema.keys())
         # Data schemas should match
         assert set(predicted_data_schema.keys()) == set(actual_data_schema.keys())
 
@@ -166,9 +166,9 @@ def test_keys_matches_output_schema(self):
         pod = FunctionPod(data_function=pf)
         stream = _make_stream()
         result = pod.process(stream)
-        tag_keys, data_keys = result.keys()
-        tag_schema, data_schema = result.output_schema()
-        assert set(tag_keys) == set(tag_schema.keys())
+        key_keys, data_keys = result.keys()
+        key_schema, data_schema = result.output_schema()
+        assert set(key_keys) == set(key_schema.keys())
         assert set(data_keys) == set(data_schema.keys())
 
     def test_as_table_materialization(self):
diff --git a/test-objective/unit/test_key.py b/test-objective/unit/test_key.py
new file mode 100644
index 00000000..bf60916a
--- /dev/null
+++ b/test-objective/unit/test_key.py
@@ -0,0 +1,157 @@
+"""Specification-derived tests for Key."""
+
+import pyarrow as pa
+import pytest
+
+from orcapod.core.datagrams.datagram import Datagram
+from orcapod.core.datagrams.key_data import Key
+from orcapod.system_constants import constants
+from orcapod.types import ColumnConfig
+
+# Use the actual system key prefix from constants
+_SYS_TAG_KEY = f"{constants.SYSTEM_KEY_PREFIX}src:abc"
+
+
+def _make_context():
+    """Create a DataContext for tests."""
+    from orcapod.contexts import resolve_context
+    return resolve_context(None)
+
+
+# ---------------------------------------------------------------------------
+# System keys stored separately from data columns
+# ---------------------------------------------------------------------------
+
+class TestKeySystemKeysSeparation:
+    """System keys are stored separately from data columns."""
+
+    def test_system_keys_not_in_keys_by_default(self):
+        ctx = _make_context()
+        key = Key({"x": 1, "y": "hello"}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        keys = list(key.keys())
+        assert "x" in keys
+        assert "y" in keys
+        assert not any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in keys)
+
+    def test_system_keys_not_in_as_dict_by_default(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        d = key.as_dict()
+        assert not any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in d)
+
+    def test_system_keys_not_in_as_table_by_default(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        table = key.as_table()
+        assert not any(name.startswith(constants.SYSTEM_KEY_PREFIX) for name in table.column_names)
+
+    def test_system_keys_not_in_schema_by_default(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        s = key.schema()
+        assert not any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in s)
+
+
+# ---------------------------------------------------------------------------
+# System keys included with ColumnConfig
+# ---------------------------------------------------------------------------
+
+class TestKeySystemKeysWithConfig:
+    """With ColumnConfig system_keys=True or all_info=True, system keys are included."""
+
+    def test_keys_with_system_keys_true(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        keys = list(key.keys(columns=ColumnConfig(system_keys=True)))
+        assert any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in keys)
+
+    def test_as_dict_with_system_keys_true(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        d = key.as_dict(columns=ColumnConfig(system_keys=True))
+        assert any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in d)
+
+    def test_as_table_with_system_keys_true(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        table = key.as_table(columns=ColumnConfig(system_keys=True))
+        assert any(name.startswith(constants.SYSTEM_KEY_PREFIX) for name in table.column_names)
+
+    def test_keys_with_all_info(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        keys = list(key.keys(columns=ColumnConfig.all()))
+        assert any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in keys)
+
+    def test_schema_with_system_keys_true(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        s = key.schema(columns=ColumnConfig(system_keys=True))
+        assert any(k.startswith(constants.SYSTEM_KEY_PREFIX) for k in s)
+
+
+# ---------------------------------------------------------------------------
+# system_keys() returns a dict COPY
+# ---------------------------------------------------------------------------
+
+class TestKeySystemKeysCopy:
+    """system_keys() returns a dict COPY (not a reference)."""
+
+    def test_system_keys_returns_dict(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        st = key.system_keys()
+        assert isinstance(st, dict)
+        assert _SYS_TAG_KEY in st
+
+    def test_system_keys_is_copy(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        st = key.system_keys()
+        st[_SYS_TAG_KEY] = "modified"
+        # Original should be unchanged
+        assert key.system_keys()[_SYS_TAG_KEY] == "val"
+
+
+# ---------------------------------------------------------------------------
+# copy() preserves system keys
+# ---------------------------------------------------------------------------
+
+class TestKeyCopy:
+    """copy() preserves system keys."""
+
+    def test_copy_preserves_system_keys(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={_SYS_TAG_KEY: "val"})
+        copied = key.copy()
+        assert copied is not key
+        assert copied.system_keys() == key.system_keys()
+
+    def test_copy_preserves_data(self):
+        ctx = _make_context()
+        key = Key({"x": 1, "y": "hello"}, data_context=ctx, system_keys={})
+        copied = key.copy()
+        assert copied["x"] == 1
+        assert copied["y"] == "hello"
+
+
+# ---------------------------------------------------------------------------
+# as_datagram() returns Datagram, not Key
+# ---------------------------------------------------------------------------
+
+class TestKeyAsDatagram:
+    """as_datagram() returns a Datagram (not Key)."""
+
+    def test_as_datagram_returns_datagram_type(self):
+        ctx = _make_context()
+        key = Key({"x": 1}, data_context=ctx, system_keys={})
+        dg = key.as_datagram()
+        assert isinstance(dg, Datagram)
+        assert not isinstance(dg, Key)
+
+    def test_as_datagram_preserves_data(self):
+        ctx = _make_context()
+        key = Key({"x": 1, "y": "hello"}, data_context=ctx, system_keys={})
+        dg = key.as_datagram()
+        assert dg["x"] == 1
+        assert dg["y"] == "hello"
diff --git a/test-objective/unit/test_nodes.py b/test-objective/unit/test_nodes.py
index 39acdcb2..50f7b024 100644
--- a/test-objective/unit/test_nodes.py
+++ b/test-objective/unit/test_nodes.py
@@ -41,7 +41,7 @@ def _make_stream(n: int = 3) -> ArrowTableSource:
             "x": pa.array(list(range(n)), type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 def _make_joinable_streams() -> tuple[ArrowTableSource, ArrowTableSource]:
@@ -58,8 +58,8 @@ def _make_joinable_streams() -> tuple[ArrowTableSource, ArrowTableSource]:
         }
     )
     return (
-        ArrowTableSource(left, tag_columns=["id"], infer_nullable=True),
-        ArrowTableSource(right, tag_columns=["id"], infer_nullable=True),
+        ArrowTableSource(left, key_columns=["id"], infer_nullable=True),
+        ArrowTableSource(right, key_columns=["id"], infer_nullable=True),
     )
 
 
@@ -78,7 +78,7 @@ def test_iter_data(self):
         node = FunctionNode(function_pod=pod, input_stream=stream)
         data = list(node.iter_data())
         assert len(data) == 3
-        for tag, data in data:
+        for key, data in data:
             assert "result" in data.keys()
 
     def test_process_data(self):
@@ -86,9 +86,9 @@ def test_process_data(self):
         pod = FunctionPod(data_function=pf)
         stream = _make_stream()
         node = FunctionNode(function_pod=pod, input_stream=stream)
-        # Get first tag/data from input
-        tag, data = next(iter(stream.iter_data()))
-        out_tag, out_data = node.process_data(tag, data)
+        # Get first key/data from input
+        key, data = next(iter(stream.iter_data()))
+        out_key, out_data = node.process_data(key, data)
         assert out_data is not None
         assert "result" in out_data.keys()
 
diff --git a/test-objective/unit/test_operators.py b/test-objective/unit/test_operators.py
index d0794b1f..478c5fd0 100644
--- a/test-objective/unit/test_operators.py
+++ b/test-objective/unit/test_operators.py
@@ -1,9 +1,9 @@
 """Specification-derived tests for all operators.
 
 Tests based on the design specification's operator semantics:
-- Operators inspect tags, never data content
+- Operators inspect keys, never data content
 - Operators can rename columns but never synthesize new values
-- System tag evolution rules: name-preserving, name-extending, type-evolving
+- System key evolution rules: name-preserving, name-extending, type-evolving
 """
 
 from __future__ import annotations
@@ -14,14 +14,14 @@
 from orcapod.core.operators import (
     Batch,
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     Join,
     MapData,
-    MapTags,
+    MapKeys,
     MergeJoin,
     PolarsFilter,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
     SemiJoin,
 )
 from orcapod.core.sources import ArrowTableSource
@@ -37,15 +37,15 @@
 
 
 def _make_stream(
-    tag_data: dict, data_data: dict, tag_columns: list[str]
+    key_data: dict, data_data: dict, key_columns: list[str]
 ) -> ArrowTableStream:
-    all_data = {**tag_data, **data_data}
+    all_data = {**key_data, **data_data}
     table = pa.table(all_data)
-    return ArrowTableStream(table, tag_columns=tag_columns)
+    return ArrowTableStream(table, key_columns=key_columns)
 
 
 def _stream_a() -> ArrowTableStream:
-    """Stream with tag=id, data=age."""
+    """Stream with key=id, data=age."""
     return _make_stream(
         {"id": pa.array([1, 2, 3], type=pa.int64())},
         {"age": pa.array([25, 30, 35], type=pa.int64())},
@@ -54,7 +54,7 @@ def _stream_a() -> ArrowTableStream:
 
 
 def _stream_b() -> ArrowTableStream:
-    """Stream with tag=id, data=score (overlaps with A on id=2,3)."""
+    """Stream with key=id, data=score (overlaps with A on id=2,3)."""
     return _make_stream(
         {"id": pa.array([2, 3, 4], type=pa.int64())},
         {"score": pa.array([85, 90, 95], type=pa.int64())},
@@ -63,7 +63,7 @@ def _stream_b() -> ArrowTableStream:
 
 
 def _stream_b_overlapping_data() -> ArrowTableStream:
-    """Stream with tag=id, data=age (same data col name as A)."""
+    """Stream with key=id, data=age (same data col name as A)."""
     return _make_stream(
         {"id": pa.array([2, 3, 4], type=pa.int64())},
         {"age": pa.array([40, 45, 50], type=pa.int64())},
@@ -71,8 +71,8 @@ def _stream_b_overlapping_data() -> ArrowTableStream:
     )
 
 
-def _stream_with_two_tags() -> ArrowTableStream:
-    """Stream with tag={id, group}, data=value."""
+def _stream_with_two_keys() -> ArrowTableStream:
+    """Stream with key={id, group}, data=value."""
     return _make_stream(
         {
             "id": pa.array([1, 2, 3], type=pa.int64()),
@@ -89,10 +89,10 @@ def _stream_with_two_tags() -> ArrowTableStream:
 
 
 class TestJoin:
-    """Per design: N-ary inner join on shared tag columns. Requires
-    non-overlapping data columns. Commutative. System tags: name-extending."""
+    """Per design: N-ary inner join on shared key columns. Requires
+    non-overlapping data columns. Commutative. System keys: name-extending."""
 
-    def test_two_streams_on_common_tags(self):
+    def test_two_streams_on_common_keys(self):
         join = Join()
         result = join.process(_stream_a(), _stream_b())
         table = result.as_table()
@@ -124,7 +124,7 @@ def test_commutative(self):
         assert ab_ids == ba_ids
 
     def test_empty_result_when_no_matches(self):
-        """Disjoint tags → empty stream."""
+        """Disjoint keys → empty stream."""
         s1 = _make_stream(
             {"id": pa.array([1], type=pa.int64())},
             {"a": pa.array([10], type=pa.int64())},
@@ -164,35 +164,35 @@ def test_three_or_more_streams(self):
         assert "b" in table.column_names
         assert "c" in table.column_names
 
-    def test_system_tag_name_extending(self):
-        """Per design, multi-input ops extend system tag column names with
-        ::pipeline_hash:position. Sources (not raw streams) create system tags."""
+    def test_system_key_name_extending(self):
+        """Per design, multi-input ops extend system key column names with
+        ::pipeline_hash:position. Sources (not raw streams) create system keys."""
         sa = ArrowTableSource(
             pa.table({"id": pa.array([2, 3], type=pa.int64()), "a": pa.array([10, 20], type=pa.int64())}),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         sb = ArrowTableSource(
             pa.table({"id": pa.array([2, 3], type=pa.int64()), "b": pa.array([30, 40], type=pa.int64())}),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         join = Join()
         result = join.process(sa, sb)
         table = result.as_table(all_info=True)
-        tag_cols = [
-            c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        key_cols = [
+            c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        # After join, system tag columns should have extended names (at least 2 per input)
-        assert len(tag_cols) >= 2
+        # After join, system key columns should have extended names (at least 2 per input)
+        assert len(key_cols) >= 2
 
     def test_output_schema_prediction(self):
         join = Join()
         sa, sb = _stream_a(), _stream_b()
-        predicted_tag, predicted_data = join.output_schema(sa, sb)
+        predicted_key, predicted_data = join.output_schema(sa, sb)
         result = join.process(sa, sb)
-        actual_tag, actual_data = result.output_schema()
-        assert set(predicted_tag.keys()) == set(actual_tag.keys())
+        actual_key, actual_data = result.output_schema()
+        assert set(predicted_key.keys()) == set(actual_key.keys())
         assert set(predicted_data.keys()) == set(actual_data.keys())
 
 
@@ -239,7 +239,7 @@ def test_output_schema_predicts_list_types(self):
         merge = MergeJoin()
         sa = _stream_a()
         sb = _stream_b_overlapping_data()
-        predicted_tag, predicted_data = merge.output_schema(sa, sb)
+        predicted_key, predicted_data = merge.output_schema(sa, sb)
         # The 'age' column should be predicted as list type
         assert "age" in predicted_data
 
@@ -251,9 +251,9 @@ def test_output_schema_predicts_list_types(self):
 
 class TestSemiJoin:
     """Per design: binary non-commutative join. Keeps left rows matching
-    right tags. Right data columns are dropped."""
+    right keys. Right data columns are dropped."""
 
-    def test_filters_left_by_right_tags(self):
+    def test_filters_left_by_right_keys(self):
         semi = SemiJoin()
         result = semi.process(_stream_a(), _stream_b())
         table = result.as_table()
@@ -286,8 +286,8 @@ def test_preserves_left_data_columns(self):
 
 
 class TestBatch:
-    """Per design: groups rows by tag, aggregates data. Data column
-    types become list[T]. System tag type evolves from str to list[str]."""
+    """Per design: groups rows by key, aggregates data. Data column
+    types become list[T]. System key type evolves from str to list[str]."""
 
     def test_groups_rows(self):
         stream = _make_stream(
@@ -325,10 +325,10 @@ def test_batch_output_schema_prediction(self):
             ["group"],
         )
         batch = Batch()
-        predicted_tag, predicted_data = batch.output_schema(stream)
+        predicted_key, predicted_data = batch.output_schema(stream)
         result = batch.process(stream)
-        actual_tag, actual_data = result.output_schema()
-        assert set(predicted_tag.keys()) == set(actual_tag.keys())
+        actual_key, actual_data = result.output_schema()
+        assert set(predicted_key.keys()) == set(actual_key.keys())
         assert set(predicted_data.keys()) == set(actual_data.keys())
 
     def test_batch_with_batch_size(self):
@@ -361,20 +361,20 @@ def test_batch_drop_partial(self):
 # ===================================================================
 
 
-class TestSelectTagColumns:
-    """Per design: keeps only specified tag columns."""
+class TestSelectKeyColumns:
+    """Per design: keeps only specified key columns."""
 
-    def test_select_tag_columns(self):
-        stream = _stream_with_two_tags()
-        select = SelectTagColumns(columns=["id"])
+    def test_select_key_columns(self):
+        stream = _stream_with_two_keys()
+        select = SelectKeyColumns(columns=["id"])
         result = select.process(stream)
-        tag_keys, _ = result.keys()
-        assert "id" in tag_keys
-        assert "group" not in tag_keys
+        key_keys, _ = result.keys()
+        assert "id" in key_keys
+        assert "group" not in key_keys
 
     def test_strict_missing_raises(self):
-        stream = _stream_with_two_tags()
-        select = SelectTagColumns(columns=["nonexistent"], strict=True)
+        stream = _stream_with_two_keys()
+        select = SelectKeyColumns(columns=["nonexistent"], strict=True)
         with pytest.raises(Exception):
             select.process(stream)
 
@@ -395,16 +395,16 @@ def test_select_data_columns(self):
         assert "b" not in data_keys
 
 
-class TestDropTagColumns:
-    """Per design: removes specified tag columns."""
+class TestDropKeyColumns:
+    """Per design: removes specified key columns."""
 
-    def test_drop_tag_columns(self):
-        stream = _stream_with_two_tags()
-        drop = DropTagColumns(columns=["group"])
+    def test_drop_key_columns(self):
+        stream = _stream_with_two_keys()
+        drop = DropKeyColumns(columns=["group"])
         result = drop.process(stream)
-        tag_keys, _ = result.keys()
-        assert "group" not in tag_keys
-        assert "id" in tag_keys
+        key_keys, _ = result.keys()
+        assert "group" not in key_keys
+        assert "id" in key_keys
 
 
 class TestDropDataColumns:
@@ -424,28 +424,28 @@ def test_drop_data_columns(self):
 
 
 # ===================================================================
-# MapTags / MapData
+# MapKeys / MapData
 # ===================================================================
 
 
-class TestMapTags:
-    """Per design: renames tag columns. System tags: name-preserving."""
+class TestMapKeys:
+    """Per design: renames key columns. System keys: name-preserving."""
 
-    def test_renames_tag_columns(self):
-        stream = _stream_with_two_tags()
-        mapper = MapTags(name_map={"id": "identifier"})
+    def test_renames_key_columns(self):
+        stream = _stream_with_two_keys()
+        mapper = MapKeys(name_map={"id": "identifier"})
         result = mapper.process(stream)
-        tag_keys, _ = result.keys()
-        assert "identifier" in tag_keys
-        assert "id" not in tag_keys
+        key_keys, _ = result.keys()
+        assert "identifier" in key_keys
+        assert "id" not in key_keys
 
     def test_drop_unmapped(self):
-        stream = _stream_with_two_tags()
-        mapper = MapTags(name_map={"id": "identifier"}, drop_unmapped=True)
+        stream = _stream_with_two_keys()
+        mapper = MapKeys(name_map={"id": "identifier"}, drop_unmapped=True)
         result = mapper.process(stream)
-        tag_keys, _ = result.keys()
-        assert "identifier" in tag_keys
-        assert "group" not in tag_keys
+        key_keys, _ = result.keys()
+        assert "identifier" in key_keys
+        assert "group" not in key_keys
 
 
 class TestMapData:
@@ -471,7 +471,7 @@ def test_renames_data_columns(self):
 
 class TestPolarsFilter:
     """Per design: filters rows by predicate or constraints. Schema preserved.
-    System tags: name-preserving."""
+    System keys: name-preserving."""
 
     def test_filter_with_constraints(self):
         stream = _stream_a()
@@ -484,10 +484,10 @@ def test_filter_with_constraints(self):
     def test_filter_preserves_schema(self):
         stream = _stream_a()
         filt = PolarsFilter(constraints={"id": 2})
-        predicted_tag, predicted_data = filt.output_schema(stream)
+        predicted_key, predicted_data = filt.output_schema(stream)
         result = filt.process(stream)
-        actual_tag, actual_data = result.output_schema()
-        assert set(predicted_tag.keys()) == set(actual_tag.keys())
+        actual_key, actual_data = result.output_schema()
+        assert set(predicted_key.keys()) == set(actual_key.keys())
         assert set(predicted_data.keys()) == set(actual_data.keys())
 
 
diff --git a/test-objective/unit/test_source_registry.py b/test-objective/unit/test_source_registry.py
index 0960bdfb..80b8ed16 100644
--- a/test-objective/unit/test_source_registry.py
+++ b/test-objective/unit/test_source_registry.py
@@ -16,15 +16,15 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_source(tag_val: str = "a", data_val: int = 1) -> ArrowTableSource:
+def _make_source(key_val: str = "a", data_val: int = 1) -> ArrowTableSource:
     """Create a minimal ArrowTableSource for registry testing."""
     table = pa.table(
         {
-            "tag": pa.array([tag_val], type=pa.large_string()),
+            "key": pa.array([key_val], type=pa.large_string()),
             "data": pa.array([data_val], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["tag"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
 
 
 # ---------------------------------------------------------------------------
diff --git a/test-objective/unit/test_sources.py b/test-objective/unit/test_sources.py
index 650bc664..0952a3d9 100644
--- a/test-objective/unit/test_sources.py
+++ b/test-objective/unit/test_sources.py
@@ -9,7 +9,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams import Data, Tag
+from orcapod.core.datagrams import Data, Key
 from orcapod.core.sources import ArrowTableSource
 from orcapod.core.sources.derived_source import DerivedSource
 from orcapod.core.sources.dict_source import DictSource
@@ -41,8 +41,8 @@ class TestArrowTableSourceConstruction:
     """ArrowTableSource construction behaviors."""
 
     def test_normal_construction(self):
-        """A valid table with tag columns constructs successfully."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        """A valid table with key columns constructs successfully."""
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         assert source is not None
 
     def test_empty_table_constructs_successfully(self):
@@ -53,33 +53,33 @@ def test_empty_table_constructs_successfully(self):
                 "age": pa.array([], type=pa.int64()),
             }
         )
-        source = ArrowTableSource(empty, tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(empty, key_columns=["name"], infer_nullable=True)
         assert source is not None
         assert source.as_table().num_rows == 0
 
-    def test_missing_tag_columns_raises_value_error(self):
-        """Specifying tag columns not in the table raises ValueError."""
+    def test_missing_key_columns_raises_value_error(self):
+        """Specifying key columns not in the table raises ValueError."""
         table = _simple_table()
-        with pytest.raises(ValueError, match="tag_columns"):
-            ArrowTableSource(table, tag_columns=["nonexistent"], infer_nullable=True)
+        with pytest.raises(ValueError, match="key_columns"):
+            ArrowTableSource(table, key_columns=["nonexistent"], infer_nullable=True)
 
-    def test_adds_system_tag_column(self):
-        """The source auto-adds system tag columns to the underlying table."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+    def test_adds_system_key_column(self):
+        """The source auto-adds system key columns to the underlying table."""
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         table = source.as_table(all_info=True)
-        system_tag_cols = [c for c in table.column_names if c.startswith("_tag_")]
-        assert len(system_tag_cols) > 0
+        system_key_cols = [c for c in table.column_names if c.startswith("_key_")]
+        assert len(system_key_cols) > 0
 
     def test_adds_source_info_columns(self):
         """The source adds source info columns (prefixed with _source_)."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         table = source.as_table(columns=ColumnConfig(source=True))
         source_cols = [c for c in table.column_names if c.startswith("_source_")]
         assert len(source_cols) > 0
 
     def test_source_id_populated(self):
         """source_id property is populated (defaults to table hash)."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         assert source.source_id is not None
         assert len(source.source_id) > 0
 
@@ -87,7 +87,7 @@ def test_source_id_explicit(self):
         """Explicit source_id is preserved."""
         source = ArrowTableSource(
             _simple_table(),
-            tag_columns=["name"],
+            key_columns=["name"],
             source_id="my_source",
             infer_nullable=True,
         )
@@ -95,19 +95,19 @@ def test_source_id_explicit(self):
 
     def test_producer_is_none(self):
         """Root sources have producer == None."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         assert source.producer is None
 
     def test_upstreams_is_empty(self):
         """Root sources have empty upstreams tuple."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         assert source.upstreams == ()
 
-    def test_no_tag_columns_valid(self):
-        """Construction with no tag columns is valid (all columns are data)."""
-        source = ArrowTableSource(_simple_table(), tag_columns=[], infer_nullable=True)
-        tag_keys, data_keys = source.keys()
-        assert tag_keys == ()
+    def test_no_key_columns_valid(self):
+        """Construction with no key columns is valid (all columns are data)."""
+        source = ArrowTableSource(_simple_table(), key_columns=[], infer_nullable=True)
+        key_keys, data_keys = source.keys()
+        assert key_keys == ()
         assert "name" in data_keys
         assert "age" in data_keys
 
@@ -129,14 +129,14 @@ class TestArrowTableSourceResolveField:
     @NOT_IMPLEMENTED
     def test_resolve_field_valid_record_id(self):
         """resolve_field works with valid positional record_id."""
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         value = source.resolve_field("row_0", "age")
         assert value == 20
 
     @NOT_IMPLEMENTED
     def test_resolve_field_second_row(self):
         """resolve_field returns data from the correct row."""
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         value = source.resolve_field("row_1", "age")
         assert value == 21
 
@@ -145,7 +145,7 @@ def test_resolve_field_with_record_id_column(self):
         """resolve_field works with named record_id column."""
         source = ArrowTableSource(
             _simple_table(3),
-            tag_columns=["name"],
+            key_columns=["name"],
             record_id_column="name",
             infer_nullable=True,
         )
@@ -155,28 +155,28 @@ def test_resolve_field_with_record_id_column(self):
     @NOT_IMPLEMENTED
     def test_resolve_field_missing_record_raises(self):
         """resolve_field raises FieldNotResolvableError for missing records."""
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         with pytest.raises(FieldNotResolvableError):
             source.resolve_field("row_999", "age")
 
     @NOT_IMPLEMENTED
     def test_resolve_field_missing_field_raises(self):
         """resolve_field raises FieldNotResolvableError for missing field names."""
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         with pytest.raises(FieldNotResolvableError):
             source.resolve_field("row_0", "nonexistent_field")
 
     @NOT_IMPLEMENTED
     def test_resolve_field_invalid_record_id_format(self):
         """resolve_field raises FieldNotResolvableError for invalid record_id format."""
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         with pytest.raises(FieldNotResolvableError):
             source.resolve_field("invalid_format", "age")
 
     @NOT_IMPLEMENTED
-    def test_resolve_field_tag_column(self):
-        """resolve_field can resolve tag column values too."""
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+    def test_resolve_field_key_column(self):
+        """resolve_field can resolve key column values too."""
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         value = source.resolve_field("row_0", "name")
         assert value == "n0"
 
@@ -185,33 +185,33 @@ class TestArrowTableSourceSchema:
     """ArrowTableSource schema and identity behaviors."""
 
     def test_pipeline_identity_structure_returns_schemas(self):
-        """pipeline_identity_structure returns (tag_schema, data_schema)."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        """pipeline_identity_structure returns (key_schema, data_schema)."""
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         result = source.pipeline_identity_structure()
         assert isinstance(result, tuple)
         assert len(result) == 2
-        tag_schema, data_schema = result
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = result
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
     def test_output_schema_returns_schemas(self):
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
-        tag_schema, data_schema = source.output_schema()
-        assert "name" in tag_schema
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
+        key_schema, data_schema = source.output_schema()
+        assert "name" in key_schema
         assert "age" in data_schema
 
     def test_output_schema_types(self):
         """output_schema types match column data types."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
-        tag_schema, data_schema = source.output_schema()
-        assert tag_schema["name"] is str
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
+        key_schema, data_schema = source.output_schema()
+        assert key_schema["name"] is str
         assert data_schema["age"] is int
 
     def test_keys_returns_correct_split(self):
-        """keys() correctly separates tag and data columns."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
-        tag_keys, data_keys = source.keys()
-        assert "name" in tag_keys
+        """keys() correctly separates key and data columns."""
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
+        key_keys, data_keys = source.keys()
+        assert "name" in key_keys
         assert "age" in data_keys
         assert "name" not in data_keys
 
@@ -219,36 +219,36 @@ def test_keys_returns_correct_split(self):
 class TestArrowTableSourceIteration:
     """ArrowTableSource iter_data and as_table behaviors."""
 
-    def test_iter_data_yields_tag_data_pairs(self):
-        source = ArrowTableSource(_simple_table(3), tag_columns=["name"], infer_nullable=True)
+    def test_iter_data_yields_key_data_pairs(self):
+        source = ArrowTableSource(_simple_table(3), key_columns=["name"], infer_nullable=True)
         pairs = list(source.iter_data())
         assert len(pairs) == 3
-        for tag, data in pairs:
-            assert isinstance(tag, Tag)
+        for key, data in pairs:
+            assert isinstance(key, Key)
             assert isinstance(data, Data)
 
     def test_as_table_has_expected_columns(self):
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         table = source.as_table()
         assert "name" in table.column_names
         assert "age" in table.column_names
 
     def test_as_table_row_count(self):
         """as_table row count matches input table row count."""
-        source = ArrowTableSource(_simple_table(5), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(5), key_columns=["name"], infer_nullable=True)
         table = source.as_table()
         assert table.num_rows == 5
 
     def test_as_table_all_info_has_more_columns(self):
         """as_table(all_info=True) has more columns than default."""
-        source = ArrowTableSource(_simple_table(), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(), key_columns=["name"], infer_nullable=True)
         table_default = source.as_table()
         table_all = source.as_table(all_info=True)
         assert table_all.num_columns > table_default.num_columns
 
     def test_iter_data_count_matches_as_table_rows(self):
         """iter_data count equals as_table row count."""
-        source = ArrowTableSource(_simple_table(4), tag_columns=["name"], infer_nullable=True)
+        source = ArrowTableSource(_simple_table(4), key_columns=["name"], infer_nullable=True)
         pairs = list(source.iter_data())
         table = source.as_table()
         assert len(pairs) == table.num_rows
@@ -265,68 +265,68 @@ class TestDictSource:
     def test_construction_from_list_of_dicts(self):
         """DictSource can be constructed from a collection of dicts."""
         data = [{"x": 1, "y": "a"}, {"x": 2, "y": "b"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         assert source is not None
 
     def test_delegates_to_arrow_table_source(self):
         """DictSource produces valid iter_data output."""
         data = [{"x": 1, "y": "a"}, {"x": 2, "y": "b"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         pairs = list(source.iter_data())
         assert len(pairs) == 2
 
     def test_keys_correct(self):
         data = [{"x": 1, "y": "a"}]
-        source = DictSource(data=data, tag_columns=["x"])
-        tag_keys, data_keys = source.keys()
-        assert "x" in tag_keys
+        source = DictSource(data=data, key_columns=["x"])
+        key_keys, data_keys = source.keys()
+        assert "x" in key_keys
         assert "y" in data_keys
 
     def test_source_id_populated(self):
         data = [{"x": 1, "y": "a"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         assert source.source_id is not None
         assert len(source.source_id) > 0
 
     def test_producer_is_none(self):
         data = [{"x": 1, "y": "a"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         assert source.producer is None
 
     def test_upstreams_is_empty(self):
         data = [{"x": 1, "y": "a"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         assert source.upstreams == ()
 
     def test_output_schema(self):
         """DictSource output_schema delegates correctly."""
         data = [{"x": 1, "y": "a"}]
-        source = DictSource(data=data, tag_columns=["x"])
-        tag_schema, data_schema = source.output_schema()
-        assert "x" in tag_schema
+        source = DictSource(data=data, key_columns=["x"])
+        key_schema, data_schema = source.output_schema()
+        assert "x" in key_schema
         assert "y" in data_schema
 
     def test_as_table_has_correct_rows(self):
         """DictSource as_table returns correct number of rows."""
         data = [{"x": 1, "y": "a"}, {"x": 2, "y": "b"}, {"x": 3, "y": "c"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         table = source.as_table()
         assert table.num_rows == 3
 
-    def test_iter_data_yields_tag_data_pairs(self):
+    def test_iter_data_yields_key_data_pairs(self):
         """DictSource iter_data yields proper types."""
         data = [{"x": 1, "y": "a"}]
-        source = DictSource(data=data, tag_columns=["x"])
+        source = DictSource(data=data, key_columns=["x"])
         pairs = list(source.iter_data())
         assert len(pairs) == 1
-        tag, data = pairs[0]
-        assert isinstance(tag, Tag)
+        key, data = pairs[0]
+        assert isinstance(key, Key)
         assert isinstance(data, Data)
 
     def test_multiple_data_columns(self):
         """DictSource handles multiple data columns."""
-        data = [{"tag": 1, "a": "x", "b": 10}]
-        source = DictSource(data=data, tag_columns=["tag"])
+        data = [{"key": 1, "a": "x", "b": 10}]
+        source = DictSource(data=data, key_columns=["key"])
         _, data_keys = source.keys()
         assert "a" in data_keys
         assert "b" in data_keys
@@ -350,27 +350,27 @@ def test_iter_data_yields_correct_count(self):
         pairs = list(source.iter_data())
         assert len(pairs) == 3
 
-    def test_default_tag_is_element_index(self):
-        """Default tag function produces element_index tag."""
+    def test_default_key_is_element_index(self):
+        """Default key function produces element_index key."""
         source = ListSource(name="item", data=["a", "b"])
-        tag_keys, _ = source.keys()
-        assert "element_index" in tag_keys
+        key_keys, _ = source.keys()
+        assert "element_index" in key_keys
 
     def test_empty_list_raises_value_error(self):
         """An empty list raises ValueError (empty table)."""
         with pytest.raises(ValueError):
             ListSource(name="item", data=[])
 
-    def test_custom_tag_function(self):
-        """Custom tag_function is used for tag generation."""
+    def test_custom_key_function(self):
+        """Custom key_function is used for key generation."""
         source = ListSource(
             name="item",
             data=["a", "b"],
-            tag_function=lambda el, idx: {"pos": idx * 10},
-            expected_tag_keys=["pos"],
+            key_function=lambda el, idx: {"pos": idx * 10},
+            expected_key_keys=["pos"],
         )
-        tag_keys, _ = source.keys()
-        assert "pos" in tag_keys
+        key_keys, _ = source.keys()
+        assert "pos" in key_keys
 
     def test_data_column_name_matches(self):
         """The data column is named after the 'name' parameter."""
@@ -407,10 +407,10 @@ def test_integer_elements(self):
         assert len(pairs) == 3
 
     def test_output_schema(self):
-        """ListSource output_schema has tag and data fields."""
+        """ListSource output_schema has key and data fields."""
         source = ListSource(name="item", data=["a", "b"])
-        tag_schema, data_schema = source.output_schema()
-        assert "element_index" in tag_schema
+        key_schema, data_schema = source.output_schema()
+        assert "element_index" in key_schema
         assert "item" in data_schema
 
 
@@ -429,10 +429,10 @@ def _make_mock_origin(self, records=None):
             to_string=MagicMock(return_value="abcdef1234567890")
         )
         mock_origin.output_schema.return_value = (
-            Schema({"tag_col": str}),
+            Schema({"key_col": str}),
             Schema({"data_col": int}),
         )
-        mock_origin.keys.return_value = (("tag_col",), ("data_col",))
+        mock_origin.keys.return_value = (("key_col",), ("data_col",))
         mock_origin.get_all_records.return_value = records
         return mock_origin
 
@@ -448,7 +448,7 @@ def test_before_run_correct_schema(self):
         mock_origin = self._make_mock_origin(records=None)
         source = DerivedSource(origin=mock_origin)
         table = source.as_table()
-        assert "tag_col" in table.column_names
+        assert "key_col" in table.column_names
         assert "data_col" in table.column_names
 
     def test_source_id_derived_prefix(self):
@@ -467,23 +467,23 @@ def test_output_schema_delegates_to_origin(self):
         """output_schema delegates to origin node."""
         mock_origin = self._make_mock_origin(records=None)
         source = DerivedSource(origin=mock_origin)
-        tag_schema, data_schema = source.output_schema()
-        assert "tag_col" in tag_schema
+        key_schema, data_schema = source.output_schema()
+        assert "key_col" in key_schema
         assert "data_col" in data_schema
 
     def test_keys_delegates_to_origin(self):
         """keys() delegates to origin node."""
         mock_origin = self._make_mock_origin(records=None)
         source = DerivedSource(origin=mock_origin)
-        tag_keys, data_keys = source.keys()
-        assert "tag_col" in tag_keys
+        key_keys, data_keys = source.keys()
+        assert "key_col" in key_keys
         assert "data_col" in data_keys
 
     def test_after_run_with_records(self):
         """After run(), DerivedSource presents the computed records."""
         records_table = pa.table(
             {
-                "tag_col": pa.array(["a", "b"], type=pa.large_string()),
+                "key_col": pa.array(["a", "b"], type=pa.large_string()),
                 "data_col": pa.array([1, 2], type=pa.int64()),
             }
         )
diff --git a/test-objective/unit/test_stream.py b/test-objective/unit/test_stream.py
index 2314aeb2..7d2e4212 100644
--- a/test-objective/unit/test_stream.py
+++ b/test-objective/unit/test_stream.py
@@ -8,7 +8,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams import Data, Tag
+from orcapod.core.datagrams import Data, Key
 from orcapod.core.streams import ArrowTableStream
 from orcapod.types import ColumnConfig, Schema
 
@@ -19,7 +19,7 @@
 
 
 def _simple_table(n_rows: int = 3) -> pa.Table:
-    """A table with one tag-eligible column and one data column."""
+    """A table with one key-eligible column and one data column."""
     schema = pa.schema([
         pa.field("id", pa.int64(), nullable=False),
         pa.field("value", pa.large_string(), nullable=False),
@@ -34,7 +34,7 @@ def _simple_table(n_rows: int = 3) -> pa.Table:
 
 
 def _multi_data_table(n_rows: int = 3) -> pa.Table:
-    """A table with one tag column and two data columns."""
+    """A table with one key column and two data columns."""
     schema = pa.schema([
         pa.field("id", pa.int64(), nullable=False),
         pa.field("x", pa.int64(), nullable=False),
@@ -51,12 +51,12 @@ def _multi_data_table(n_rows: int = 3) -> pa.Table:
 
 
 def _make_stream(
-    tag_columns: list[str] | None = None,
+    key_columns: list[str] | None = None,
     n_rows: int = 3,
     **kwargs,
 ) -> ArrowTableStream:
-    tag_columns = tag_columns if tag_columns is not None else ["id"]
-    return ArrowTableStream(_simple_table(n_rows), tag_columns=tag_columns, **kwargs)
+    key_columns = key_columns if key_columns is not None else ["id"]
+    return ArrowTableStream(_simple_table(n_rows), key_columns=key_columns, **kwargs)
 
 
 # ---------------------------------------------------------------------------
@@ -68,12 +68,12 @@ class TestConstruction:
     """ArrowTableStream construction from a pa.Table."""
 
     def test_basic_construction(self):
-        """Stream can be created from a pa.Table with tag_columns."""
+        """Stream can be created from a pa.Table with key_columns."""
         stream = _make_stream()
         assert stream is not None
 
-    def test_construction_with_system_tag_columns(self):
-        """Stream accepts system_tag_columns parameter."""
+    def test_construction_with_system_key_columns(self):
+        """Stream accepts system_key_columns parameter."""
         table = pa.table(
             {
                 "id": pa.array([1, 2], type=pa.int64()),
@@ -82,7 +82,7 @@ def test_construction_with_system_tag_columns(self):
             }
         )
         stream = ArrowTableStream(
-            table, tag_columns=["id"], system_tag_columns=["sys"]
+            table, key_columns=["id"], system_key_columns=["sys"]
         )
         assert stream is not None
 
@@ -90,7 +90,7 @@ def test_construction_with_source_info(self):
         """Stream accepts source_info dict parameter."""
         stream = ArrowTableStream(
             _simple_table(),
-            tag_columns=["id"],
+            key_columns=["id"],
             source_info={"value": "test_source::row_0"},
         )
         assert stream is not None
@@ -100,7 +100,7 @@ def test_construction_with_producer_and_upstreams(self):
         upstream = _make_stream()
         # producer=None is the default; just verify upstreams tuple is stored
         stream = ArrowTableStream(
-            _simple_table(), tag_columns=["id"], upstreams=(upstream,)
+            _simple_table(), key_columns=["id"], upstreams=(upstream,)
         )
         assert stream.upstreams == (upstream,)
         assert stream.producer is None
@@ -109,18 +109,18 @@ def test_no_data_columns_raises_value_error(self):
         """Stream requires at least one data column; ValueError if none."""
         table = pa.table({"id": pa.array([1, 2, 3], type=pa.int64())})
         with pytest.raises(ValueError):
-            ArrowTableStream(table, tag_columns=["id"])
+            ArrowTableStream(table, key_columns=["id"])
 
-    def test_no_tag_columns_is_valid(self):
-        """All columns may be data columns (no tags)."""
+    def test_no_key_columns_is_valid(self):
+        """All columns may be data columns (no keys)."""
         table = pa.table({"value": pa.array(["a", "b"], type=pa.large_string())})
-        stream = ArrowTableStream(table, tag_columns=[])
-        tag_keys, data_keys = stream.keys()
-        assert tag_keys == ()
+        stream = ArrowTableStream(table, key_columns=[])
+        key_keys, data_keys = stream.keys()
+        assert key_keys == ()
         assert "value" in data_keys
 
-    def test_multiple_tag_columns(self):
-        """Stream supports multiple tag columns."""
+    def test_multiple_key_columns(self):
+        """Stream supports multiple key columns."""
         table = pa.table(
             {
                 "t1": pa.array([1, 2], type=pa.int64()),
@@ -128,15 +128,15 @@ def test_multiple_tag_columns(self):
                 "val": pa.array([10.0, 20.0], type=pa.float64()),
             }
         )
-        stream = ArrowTableStream(table, tag_columns=["t1", "t2"])
-        tag_keys, data_keys = stream.keys()
-        assert set(tag_keys) == {"t1", "t2"}
+        stream = ArrowTableStream(table, key_columns=["t1", "t2"])
+        key_keys, data_keys = stream.keys()
+        assert set(key_keys) == {"t1", "t2"}
         assert data_keys == ("val",)
 
     def test_multiple_data_columns(self):
         """Stream supports multiple data columns."""
         stream = ArrowTableStream(
-            _multi_data_table(), tag_columns=["id"]
+            _multi_data_table(), key_columns=["id"]
         )
         _, data_keys = stream.keys()
         assert set(data_keys) == {"x", "y"}
@@ -148,27 +148,27 @@ def test_multiple_data_columns(self):
 
 
 class TestKeys:
-    """keys() returns (tag_keys, data_keys) tuples."""
+    """keys() returns (key_keys, data_keys) tuples."""
 
     def test_keys_returns_tuple_of_tuples(self):
         stream = _make_stream()
         result = stream.keys()
         assert isinstance(result, tuple)
         assert len(result) == 2
-        tag_keys, data_keys = result
-        assert isinstance(tag_keys, tuple)
+        key_keys, data_keys = result
+        assert isinstance(key_keys, tuple)
         assert isinstance(data_keys, tuple)
 
     def test_keys_correct_split(self):
-        stream = _make_stream(tag_columns=["id"])
-        tag_keys, data_keys = stream.keys()
-        assert "id" in tag_keys
+        stream = _make_stream(key_columns=["id"])
+        key_keys, data_keys = stream.keys()
+        assert "id" in key_keys
         assert "value" in data_keys
         assert "id" not in data_keys
-        assert "value" not in tag_keys
+        assert "value" not in key_keys
 
-    def test_keys_with_column_config_system_tags(self):
-        """When system_tags=True, system tag columns appear in tag_keys."""
+    def test_keys_with_column_config_system_keys(self):
+        """When system_keys=True, system key columns appear in key_keys."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
@@ -177,15 +177,15 @@ def test_keys_with_column_config_system_tags(self):
             }
         )
         stream = ArrowTableStream(
-            table, tag_columns=["id"], system_tag_columns=["sys_col"]
+            table, key_columns=["id"], system_key_columns=["sys_col"]
         )
-        tag_keys_default, _ = stream.keys()
-        tag_keys_all, _ = stream.keys(columns=ColumnConfig(system_tags=True))
-        # Default: system tags excluded from keys
-        assert len(tag_keys_all) > len(tag_keys_default)
+        key_keys_default, _ = stream.keys()
+        key_keys_all, _ = stream.keys(columns=ColumnConfig(system_keys=True))
+        # Default: system keys excluded from keys
+        assert len(key_keys_all) > len(key_keys_default)
 
     def test_keys_with_all_info(self):
-        """all_info=True includes system tags in tag_keys."""
+        """all_info=True includes system keys in key_keys."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
@@ -194,19 +194,19 @@ def test_keys_with_all_info(self):
             }
         )
         stream = ArrowTableStream(
-            table, tag_columns=["id"], system_tag_columns=["sys_col"]
+            table, key_columns=["id"], system_key_columns=["sys_col"]
         )
-        tag_keys_all, _ = stream.keys(all_info=True)
-        assert len(tag_keys_all) > 1  # id + system tag(s)
+        key_keys_all, _ = stream.keys(all_info=True)
+        assert len(key_keys_all) > 1  # id + system key(s)
 
-    def test_keys_no_tag_columns(self):
-        """With no tag columns, tag_keys is empty."""
+    def test_keys_no_key_columns(self):
+        """With no key columns, key_keys is empty."""
         table = pa.table(
             {"a": pa.array([1], type=pa.int64()), "b": pa.array([2], type=pa.int64())}
         )
-        stream = ArrowTableStream(table, tag_columns=[])
-        tag_keys, data_keys = stream.keys()
-        assert tag_keys == ()
+        stream = ArrowTableStream(table, key_columns=[])
+        key_keys, data_keys = stream.keys()
+        assert key_keys == ()
         assert set(data_keys) == {"a", "b"}
 
 
@@ -216,56 +216,56 @@ def test_keys_no_tag_columns(self):
 
 
 class TestOutputSchema:
-    """output_schema() returns (tag_schema, data_schema) as Schema objects."""
+    """output_schema() returns (key_schema, data_schema) as Schema objects."""
 
     def test_returns_tuple_of_schemas(self):
         stream = _make_stream()
-        tag_schema, data_schema = stream.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = stream.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
     def test_schema_field_names_match_keys(self):
-        stream = _make_stream(tag_columns=["id"])
-        tag_schema, data_schema = stream.output_schema()
-        tag_keys, data_keys = stream.keys()
-        assert set(tag_schema.keys()) == set(tag_keys)
+        stream = _make_stream(key_columns=["id"])
+        key_schema, data_schema = stream.output_schema()
+        key_keys, data_keys = stream.keys()
+        assert set(key_schema.keys()) == set(key_keys)
         assert set(data_schema.keys()) == set(data_keys)
 
     def test_schema_types_match_table_column_types(self):
         """output_schema types must be consistent with the actual data in as_table."""
-        stream = _make_stream(tag_columns=["id"])
-        tag_schema, data_schema = stream.output_schema()
-        # tag schema type for "id" should be int
-        assert tag_schema["id"] is int
+        stream = _make_stream(key_columns=["id"])
+        key_schema, data_schema = stream.output_schema()
+        # key schema type for "id" should be int
+        assert key_schema["id"] is int
         # data schema type for "value" should be str
         assert data_schema["value"] is str
 
     def test_schema_with_multiple_types(self):
         """Schema correctly reflects different column types."""
         schema = pa.schema([
-            pa.field("tag", pa.int64(), nullable=False),
+            pa.field("key", pa.int64(), nullable=False),
             pa.field("col_int", pa.int64(), nullable=False),
             pa.field("col_str", pa.large_string(), nullable=False),
             pa.field("col_float", pa.float64(), nullable=False),
         ])
         table = pa.table(
             {
-                "tag": pa.array([1], type=pa.int64()),
+                "key": pa.array([1], type=pa.int64()),
                 "col_int": pa.array([42], type=pa.int64()),
                 "col_str": pa.array(["hello"], type=pa.large_string()),
                 "col_float": pa.array([3.14], type=pa.float64()),
             },
             schema=schema,
         )
-        stream = ArrowTableStream(table, tag_columns=["tag"])
-        tag_schema, data_schema = stream.output_schema()
-        assert tag_schema["tag"] is int
+        stream = ArrowTableStream(table, key_columns=["key"])
+        key_schema, data_schema = stream.output_schema()
+        assert key_schema["key"] is int
         assert data_schema["col_int"] is int
         assert data_schema["col_str"] is str
         assert data_schema["col_float"] is float
 
-    def test_schema_with_system_tags_config(self):
-        """output_schema with system_tags=True includes system tag fields."""
+    def test_schema_with_system_keys_config(self):
+        """output_schema with system_keys=True includes system key fields."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
@@ -274,13 +274,13 @@ def test_schema_with_system_tags_config(self):
             }
         )
         stream = ArrowTableStream(
-            table, tag_columns=["id"], system_tag_columns=["sys"]
+            table, key_columns=["id"], system_key_columns=["sys"]
         )
-        tag_schema_default, _ = stream.output_schema()
-        tag_schema_with_sys, _ = stream.output_schema(
-            columns=ColumnConfig(system_tags=True)
+        key_schema_default, _ = stream.output_schema()
+        key_schema_with_sys, _ = stream.output_schema(
+            columns=ColumnConfig(system_keys=True)
         )
-        assert len(tag_schema_with_sys) > len(tag_schema_default)
+        assert len(key_schema_with_sys) > len(key_schema_default)
 
 
 # ---------------------------------------------------------------------------
@@ -289,14 +289,14 @@ def test_schema_with_system_tags_config(self):
 
 
 class TestIterDatas:
-    """iter_data() yields (Tag, Data) pairs."""
+    """iter_data() yields (Key, Data) pairs."""
 
-    def test_yields_tag_data_pairs(self):
+    def test_yields_key_data_pairs(self):
         stream = _make_stream(n_rows=2)
         pairs = list(stream.iter_data())
         assert len(pairs) == 2
-        for tag, data in pairs:
-            assert isinstance(tag, Tag)
+        for key, data in pairs:
+            assert isinstance(key, Key)
             assert isinstance(data, Data)
 
     def test_count_matches_row_count(self):
@@ -317,14 +317,14 @@ def test_single_row(self):
         stream = _make_stream(n_rows=1)
         pairs = list(stream.iter_data())
         assert len(pairs) == 1
-        tag, data = pairs[0]
-        assert isinstance(tag, Tag)
+        key, data = pairs[0]
+        assert isinstance(key, Key)
         assert isinstance(data, Data)
 
-    def test_no_tag_columns_still_yields_data(self):
-        """iter_data works when there are no tag columns."""
+    def test_no_key_columns_still_yields_data(self):
+        """iter_data works when there are no key columns."""
         table = pa.table({"value": pa.array(["a", "b"], type=pa.large_string())})
-        stream = ArrowTableStream(table, tag_columns=[])
+        stream = ArrowTableStream(table, key_columns=[])
         pairs = list(stream.iter_data())
         assert len(pairs) == 2
 
@@ -348,24 +348,24 @@ def test_as_table_row_count_matches_iter_data(self):
         pairs = list(stream.iter_data())
         assert table.num_rows == len(pairs)
 
-    def test_as_table_contains_tag_and_data_columns(self):
-        stream = _make_stream(tag_columns=["id"])
+    def test_as_table_contains_key_and_data_columns(self):
+        stream = _make_stream(key_columns=["id"])
         table = stream.as_table()
         assert "id" in table.column_names
         assert "value" in table.column_names
 
     def test_as_table_column_count_matches_keys(self):
-        """Default as_table columns match keys() tag + data columns."""
-        stream = _make_stream(tag_columns=["id"])
+        """Default as_table columns match keys() key + data columns."""
+        stream = _make_stream(key_columns=["id"])
         table = stream.as_table()
-        tag_keys, data_keys = stream.keys()
-        expected_cols = set(tag_keys) | set(data_keys)
+        key_keys, data_keys = stream.keys()
+        expected_cols = set(key_keys) | set(data_keys)
         assert set(table.column_names) == expected_cols
 
     def test_as_table_data_values_consistent(self):
         """The data in as_table matches the original input data."""
         table_in = _simple_table(3)
-        stream = ArrowTableStream(table_in, tag_columns=["id"])
+        stream = ArrowTableStream(table_in, key_columns=["id"])
         table_out = stream.as_table()
         assert table_out.column("id").to_pylist() == [0, 1, 2]
         assert table_out.column("value").to_pylist() == ["v0", "v1", "v2"]
@@ -379,8 +379,8 @@ def test_as_table_data_values_consistent(self):
 class TestColumnConfigFiltering:
     """ColumnConfig controls which columns appear in keys/schema/table."""
 
-    def test_default_excludes_system_tags(self):
-        """Default ColumnConfig excludes system tag columns."""
+    def test_default_excludes_system_keys(self):
+        """Default ColumnConfig excludes system key columns."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
@@ -389,14 +389,14 @@ def test_default_excludes_system_tags(self):
             }
         )
         stream = ArrowTableStream(
-            table, tag_columns=["id"], system_tag_columns=["stag"]
+            table, key_columns=["id"], system_key_columns=["stag"]
         )
-        tag_keys, _ = stream.keys()
-        # System tag columns are prefixed with _tag_ internally
-        assert all(not k.startswith("_tag_") for k in tag_keys)
+        key_keys, _ = stream.keys()
+        # System key columns are prefixed with _key_ internally
+        assert all(not k.startswith("_key_") for k in key_keys)
 
     def test_all_info_includes_everything(self):
-        """all_info=True should include source, context, system_tags columns."""
+        """all_info=True should include source, context, system_keys columns."""
         stream = _make_stream()
         table_default = stream.as_table()
         table_all = stream.as_table(all_info=True)
@@ -418,8 +418,8 @@ def test_context_column_config(self):
         table_with_ctx = stream.as_table(columns=ColumnConfig(context=True))
         assert table_with_ctx.num_columns >= table_no_ctx.num_columns
 
-    def test_system_tags_in_as_table(self):
-        """system_tags=True includes system tag columns in the output table."""
+    def test_system_keys_in_as_table(self):
+        """system_keys=True includes system key columns in the output table."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
@@ -428,10 +428,10 @@ def test_system_tags_in_as_table(self):
             }
         )
         stream = ArrowTableStream(
-            table, tag_columns=["id"], system_tag_columns=["stag"]
+            table, key_columns=["id"], system_key_columns=["stag"]
         )
         table_default = stream.as_table()
-        table_with_sys = stream.as_table(columns=ColumnConfig(system_tags=True))
+        table_with_sys = stream.as_table(columns=ColumnConfig(system_keys=True))
         assert table_with_sys.num_columns > table_default.num_columns
 
     def test_column_config_as_dict(self):
@@ -442,14 +442,14 @@ def test_column_config_as_dict(self):
 
     def test_keys_schema_table_consistency_with_config(self):
         """keys(), output_schema(), and as_table() agree under the same ColumnConfig."""
-        stream = _make_stream(tag_columns=["id"])
-        tag_keys, data_keys = stream.keys()
-        tag_schema, data_schema = stream.output_schema()
+        stream = _make_stream(key_columns=["id"])
+        key_keys, data_keys = stream.keys()
+        key_schema, data_schema = stream.output_schema()
         table = stream.as_table()
 
-        assert set(tag_schema.keys()) == set(tag_keys)
+        assert set(key_schema.keys()) == set(key_keys)
         assert set(data_schema.keys()) == set(data_keys)
-        expected_cols = set(tag_keys) | set(data_keys)
+        expected_cols = set(key_keys) | set(data_keys)
         assert set(table.column_names) == expected_cols
 
 
@@ -486,7 +486,7 @@ def test_as_lazy_frame(self):
 
     def test_as_polars_df_preserves_columns(self):
         """Polars DataFrame has the same columns as as_table."""
-        stream = _make_stream(tag_columns=["id"])
+        stream = _make_stream(key_columns=["id"])
         table = stream.as_table()
         df = stream.as_polars_df()
         assert set(df.columns) == set(table.column_names)
diff --git a/test-objective/unit/test_tag.py b/test-objective/unit/test_tag.py
deleted file mode 100644
index 599196f3..00000000
--- a/test-objective/unit/test_tag.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""Specification-derived tests for Tag."""
-
-import pyarrow as pa
-import pytest
-
-from orcapod.core.datagrams.datagram import Datagram
-from orcapod.core.datagrams.tag_data import Tag
-from orcapod.system_constants import constants
-from orcapod.types import ColumnConfig
-
-# Use the actual system tag prefix from constants
-_SYS_TAG_KEY = f"{constants.SYSTEM_TAG_PREFIX}src:abc"
-
-
-def _make_context():
-    """Create a DataContext for tests."""
-    from orcapod.contexts import resolve_context
-    return resolve_context(None)
-
-
-# ---------------------------------------------------------------------------
-# System tags stored separately from data columns
-# ---------------------------------------------------------------------------
-
-class TestTagSystemTagsSeparation:
-    """System tags are stored separately from data columns."""
-
-    def test_system_tags_not_in_keys_by_default(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1, "y": "hello"}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        keys = list(tag.keys())
-        assert "x" in keys
-        assert "y" in keys
-        assert not any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in keys)
-
-    def test_system_tags_not_in_as_dict_by_default(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        d = tag.as_dict()
-        assert not any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in d)
-
-    def test_system_tags_not_in_as_table_by_default(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        table = tag.as_table()
-        assert not any(name.startswith(constants.SYSTEM_TAG_PREFIX) for name in table.column_names)
-
-    def test_system_tags_not_in_schema_by_default(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        s = tag.schema()
-        assert not any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in s)
-
-
-# ---------------------------------------------------------------------------
-# System tags included with ColumnConfig
-# ---------------------------------------------------------------------------
-
-class TestTagSystemTagsWithConfig:
-    """With ColumnConfig system_tags=True or all_info=True, system tags are included."""
-
-    def test_keys_with_system_tags_true(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        keys = list(tag.keys(columns=ColumnConfig(system_tags=True)))
-        assert any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in keys)
-
-    def test_as_dict_with_system_tags_true(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        d = tag.as_dict(columns=ColumnConfig(system_tags=True))
-        assert any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in d)
-
-    def test_as_table_with_system_tags_true(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        table = tag.as_table(columns=ColumnConfig(system_tags=True))
-        assert any(name.startswith(constants.SYSTEM_TAG_PREFIX) for name in table.column_names)
-
-    def test_keys_with_all_info(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        keys = list(tag.keys(columns=ColumnConfig.all()))
-        assert any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in keys)
-
-    def test_schema_with_system_tags_true(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        s = tag.schema(columns=ColumnConfig(system_tags=True))
-        assert any(k.startswith(constants.SYSTEM_TAG_PREFIX) for k in s)
-
-
-# ---------------------------------------------------------------------------
-# system_tags() returns a dict COPY
-# ---------------------------------------------------------------------------
-
-class TestTagSystemTagsCopy:
-    """system_tags() returns a dict COPY (not a reference)."""
-
-    def test_system_tags_returns_dict(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        st = tag.system_tags()
-        assert isinstance(st, dict)
-        assert _SYS_TAG_KEY in st
-
-    def test_system_tags_is_copy(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        st = tag.system_tags()
-        st[_SYS_TAG_KEY] = "modified"
-        # Original should be unchanged
-        assert tag.system_tags()[_SYS_TAG_KEY] == "val"
-
-
-# ---------------------------------------------------------------------------
-# copy() preserves system tags
-# ---------------------------------------------------------------------------
-
-class TestTagCopy:
-    """copy() preserves system tags."""
-
-    def test_copy_preserves_system_tags(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={_SYS_TAG_KEY: "val"})
-        copied = tag.copy()
-        assert copied is not tag
-        assert copied.system_tags() == tag.system_tags()
-
-    def test_copy_preserves_data(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1, "y": "hello"}, data_context=ctx, system_tags={})
-        copied = tag.copy()
-        assert copied["x"] == 1
-        assert copied["y"] == "hello"
-
-
-# ---------------------------------------------------------------------------
-# as_datagram() returns Datagram, not Tag
-# ---------------------------------------------------------------------------
-
-class TestTagAsDatagram:
-    """as_datagram() returns a Datagram (not Tag)."""
-
-    def test_as_datagram_returns_datagram_type(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1}, data_context=ctx, system_tags={})
-        dg = tag.as_datagram()
-        assert isinstance(dg, Datagram)
-        assert not isinstance(dg, Tag)
-
-    def test_as_datagram_preserves_data(self):
-        ctx = _make_context()
-        tag = Tag({"x": 1, "y": "hello"}, data_context=ctx, system_tags={})
-        dg = tag.as_datagram()
-        assert dg["x"] == 1
-        assert dg["y"] == "hello"
diff --git a/test-objective/unit/test_tracker.py b/test-objective/unit/test_tracker.py
index 689a5ee2..24335487 100644
--- a/test-objective/unit/test_tracker.py
+++ b/test-objective/unit/test_tracker.py
@@ -40,7 +40,7 @@ def _make_stream(n: int = 3) -> ArrowTableSource:
             "x": pa.array([i * 10 for i in range(n)], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 class TestBasicTrackerManager:
diff --git a/test-objective/unit/test_types.py b/test-objective/unit/test_types.py
index 0b4b8604..fd719ff6 100644
--- a/test-objective/unit/test_types.py
+++ b/test-objective/unit/test_types.py
@@ -281,14 +281,14 @@ def test_all_sets_everything_true(self):
         cc = ColumnConfig.all()
         assert cc.meta is True
         assert cc.source is True
-        assert cc.system_tags is True
+        assert cc.system_keys is True
         assert cc.context is True
 
     def test_data_only_excludes_extras(self):
         cc = ColumnConfig.data_only()
         assert cc.meta is False
         assert cc.source is False
-        assert cc.system_tags is False
+        assert cc.system_keys is False
 
     def test_default_construction(self):
         cc = ColumnConfig()
@@ -316,4 +316,4 @@ def test_handle_config_all_info_flag(self):
         result = ColumnConfig.handle_config(None, all_info=True)
         assert result.meta is True
         assert result.source is True
-        assert result.system_tags is True
+        assert result.system_keys is True
diff --git a/tests/test_channels/test_async_execute.py b/tests/test_channels/test_async_execute.py
index 8b9f15bd..b8835863 100644
--- a/tests/test_channels/test_async_execute.py
+++ b/tests/test_channels/test_async_execute.py
@@ -28,14 +28,14 @@
 from orcapod.core.operators import (
     Batch,
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     Join,
     MapData,
-    MapTags,
+    MapKeys,
     MergeJoin,
     PolarsFilter,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
     SemiJoin,
 )
 from orcapod.core.operators.static_output_pod import StaticOutputOperatorPod
@@ -49,7 +49,7 @@
 
 
 def make_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data=x (ints). Uses nullable=False schema."""
+    """Stream with key=id, data=x (ints). Uses nullable=False schema."""
     schema = pa.schema(
         [pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)]
     )
@@ -57,11 +57,11 @@ def make_stream(n: int = 3) -> ArrowTableStream:
         {"id": pa.array(list(range(n)), type=pa.int64()), "x": pa.array(list(range(n)), type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_two_col_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data={x, y}. Uses nullable=False schema."""
+    """Stream with key=id, data={x, y}. Uses nullable=False schema."""
     schema = pa.schema(
         [
             pa.field("id", pa.int64(), nullable=False),
@@ -77,11 +77,11 @@ def make_two_col_stream(n: int = 3) -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_name_stream() -> ArrowTableStream:
-    """Stream with tag=id, data=name (str). Uses nullable=False schema."""
+    """Stream with key=id, data=name (str). Uses nullable=False schema."""
     schema = pa.schema(
         [
             pa.field("id", pa.int64(), nullable=False),
@@ -95,18 +95,18 @@ def make_name_stream() -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 async def feed_stream_to_channel(stream: ArrowTableStream, ch: Channel) -> None:
-    """Push all (tag, data) pairs from a stream into a channel, then close."""
-    for tag, data in stream.iter_data():
-        await ch.writer.send((tag, data))
+    """Push all (key, data) pairs from a stream into a channel, then close."""
+    for key, data in stream.iter_data():
+        await ch.writer.send((key, data))
     await ch.writer.close()
 
 
 async def collect_output(ch: Channel) -> list[tuple]:
-    """Collect all (tag, data) pairs from a channel's reader."""
+    """Collect all (key, data) pairs from a channel's reader."""
     return await ch.reader.collect()
 
 
@@ -138,9 +138,9 @@ def test_round_trip_preserves_schema(self):
         rows = list(stream.iter_data())
         rebuilt = StaticOutputOperatorPod._materialize_to_stream(rows)
 
-        orig_tag, orig_pkt = stream.output_schema()
-        rebuilt_tag, rebuilt_pkt = rebuilt.output_schema()
-        assert dict(orig_tag) == dict(rebuilt_tag)
+        orig_key, orig_pkt = stream.output_schema()
+        rebuilt_key, rebuilt_pkt = rebuilt.output_schema()
+        assert dict(orig_key) == dict(rebuilt_key)
         assert dict(orig_pkt) == dict(rebuilt_pkt)
 
     def test_empty_rows_raises(self):
@@ -214,9 +214,9 @@ def record_thread(x: int) -> int:
 
 class TestUnaryOperatorAsyncExecute:
     @pytest.mark.asyncio
-    async def test_select_tag_columns(self):
+    async def test_select_key_columns(self):
         stream = make_two_col_stream(3)
-        op = SelectTagColumns(["id"])
+        op = SelectKeyColumns(["id"])
 
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
@@ -226,8 +226,8 @@ async def test_select_tag_columns(self):
 
         results = await output_ch.reader.collect()
         assert len(results) == 3
-        for tag, data in results:
-            assert "id" in tag.keys()
+        for key, data in results:
+            assert "id" in key.keys()
 
     @pytest.mark.asyncio
     async def test_select_data_columns(self):
@@ -266,8 +266,8 @@ async def test_drop_data_columns(self):
             assert "y" not in pkt_dict
 
     @pytest.mark.asyncio
-    async def test_drop_tag_columns(self):
-        # Need multi-tag stream
+    async def test_drop_key_columns(self):
+        # Need multi-key stream
         table = pa.table(
             {
                 "a": pa.array([1, 2], type=pa.int64()),
@@ -275,8 +275,8 @@ async def test_drop_tag_columns(self):
                 "x": pa.array([100, 200], type=pa.int64()),
             }
         )
-        stream = ArrowTableStream(table, tag_columns=["a", "b"])
-        op = DropTagColumns(["b"])
+        stream = ArrowTableStream(table, key_columns=["a", "b"])
+        op = DropKeyColumns(["b"])
 
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
@@ -286,15 +286,15 @@ async def test_drop_tag_columns(self):
 
         results = await output_ch.reader.collect()
         assert len(results) == 2
-        for tag, _ in results:
-            tag_keys = tag.keys()
-            assert "a" in tag_keys
-            assert "b" not in tag_keys
+        for key, _ in results:
+            key_keys = key.keys()
+            assert "a" in key_keys
+            assert "b" not in key_keys
 
     @pytest.mark.asyncio
-    async def test_map_tags(self):
+    async def test_map_keys(self):
         stream = make_stream(3)
-        op = MapTags({"id": "row_id"}, drop_unmapped=True)
+        op = MapKeys({"id": "row_id"}, drop_unmapped=True)
 
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
@@ -304,9 +304,9 @@ async def test_map_tags(self):
 
         results = await output_ch.reader.collect()
         assert len(results) == 3
-        for tag, _ in results:
-            assert "row_id" in tag.keys()
-            assert "id" not in tag.keys()
+        for key, _ in results:
+            assert "row_id" in key.keys()
+            assert "id" not in key.keys()
 
     @pytest.mark.asyncio
     async def test_map_data(self):
@@ -339,8 +339,8 @@ async def test_polars_filter(self):
 
         results = await output_ch.reader.collect()
         assert len(results) == 1
-        tag, data = results[0]
-        assert tag.as_dict()["id"] == 2
+        key, data = results[0]
+        assert key.as_dict()["id"] == 2
         assert data.as_dict()["x"] == 2
 
     @pytest.mark.asyncio
@@ -373,7 +373,7 @@ async def test_semi_join(self):
                 "z": pa.array([100, 300], type=pa.int64()),
             }
         )
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         op = SemiJoin()
 
@@ -387,7 +387,7 @@ async def test_semi_join(self):
         await op.async_execute([left_ch.reader, right_ch.reader], output_ch.writer)
 
         results = await output_ch.reader.collect()
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [1, 3]
 
     @pytest.mark.asyncio
@@ -404,8 +404,8 @@ async def test_merge_join(self):
                 "val": pa.array([100, 200], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         op = MergeJoin()
 
@@ -442,8 +442,8 @@ async def test_two_way_join(self):
                 "y": pa.array([100, 200, 300], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         op = Join()
 
@@ -459,8 +459,8 @@ async def test_two_way_join(self):
         results = await output_ch.reader.collect()
         assert len(results) == 3
 
-        # Verify all tag values present
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        # Verify all key values present
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [0, 1, 2]
 
         # Verify both data columns present
@@ -518,8 +518,8 @@ def add(x: int, y: int) -> int:
         assert values == [0, 11, 22]
 
     @pytest.mark.asyncio
-    async def test_tags_pass_through(self):
-        """FunctionPod should preserve the input tag for each output."""
+    async def test_keys_pass_through(self):
+        """FunctionPod should preserve the input key for each output."""
 
         def noop(x: int) -> int:
             return x
@@ -535,7 +535,7 @@ def noop(x: int) -> int:
         await pod.async_execute([input_ch.reader], output_ch.writer)
 
         results = await output_ch.reader.collect()
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [0, 1, 2]
 
     @pytest.mark.asyncio
@@ -662,8 +662,8 @@ def triple(x: int) -> int:
 
         # Wire
         async def source():
-            for tag, data in stream.iter_data():
-                await ch1.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch1.writer.send((key, data))
             await ch1.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -675,7 +675,7 @@ async def source():
         assert len(results) == 4
 
         result_map = {
-            tag.as_dict()["id"]: pkt.as_dict()["result"] for tag, pkt in results
+            key.as_dict()["id"]: pkt.as_dict()["result"] for key, pkt in results
         }
         assert result_map[1] == 3
         assert result_map[3] == 9
@@ -697,8 +697,8 @@ async def test_source_join_function_chain(self):
                 "y": pa.array([1, 2, 3], type=pa.int64()),
             }
         )
-        left_stream = ArrowTableStream(left_table, tag_columns=["id"])
-        right_stream = ArrowTableStream(right_table, tag_columns=["id"])
+        left_stream = ArrowTableStream(left_table, key_columns=["id"])
+        right_stream = ArrowTableStream(right_table, key_columns=["id"])
 
         def add(x: int, y: int) -> int:
             return x + y
@@ -712,8 +712,8 @@ def add(x: int, y: int) -> int:
         ch_out = Channel(buffer_size=16)
 
         async def push(stream, ch):
-            for tag, data in stream.iter_data():
-                await ch.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch.writer.send((key, data))
             await ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -730,7 +730,7 @@ async def push(stream, ch):
         assert len(results) == 3
 
         result_map = {
-            tag.as_dict()["id"]: pkt.as_dict()["result"] for tag, pkt in results
+            key.as_dict()["id"]: pkt.as_dict()["result"] for key, pkt in results
         }
         assert result_map[0] == 11  # 10 + 1
         assert result_map[1] == 22  # 20 + 2
@@ -809,7 +809,7 @@ def test_operator_sync_process_still_works(self):
         op = PolarsFilter(predicates=(pl.col("id").is_in([1, 3]),))
         output = op.process(stream)
         results = list(output.iter_data())
-        ids = sorted(cast(int, tag.as_dict()["id"]) for tag, _ in results)
+        ids = sorted(cast(int, key.as_dict()["id"]) for key, _ in results)
         assert ids == [1, 3]
 
     def test_join_sync_process_still_works(self):
@@ -825,8 +825,8 @@ def test_join_sync_process_still_works(self):
                 "y": pa.array([100, 200], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         join = Join()
         output = join.process(left, right)
diff --git a/tests/test_channels/test_copilot_review_issues.py b/tests/test_channels/test_copilot_review_issues.py
index 81cc5aac..eab283f6 100644
--- a/tests/test_channels/test_copilot_review_issues.py
+++ b/tests/test_channels/test_copilot_review_issues.py
@@ -43,12 +43,12 @@ def make_stream(n: int = 5) -> ArrowTableStream:
         {"id": pa.array(list(range(n)), type=pa.int64()), "x": pa.array(list(range(n)), type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 async def feed_stream_to_channel(stream: ArrowTableStream, ch: Channel) -> None:
-    for tag, data in stream.iter_data():
-        await ch.writer.send((tag, data))
+    for key, data in stream.iter_data():
+        await ch.writer.send((key, data))
     await ch.writer.close()
 
 
diff --git a/tests/test_channels/test_native_async_operators.py b/tests/test_channels/test_native_async_operators.py
index d800ab65..81b0e9c7 100644
--- a/tests/test_channels/test_native_async_operators.py
+++ b/tests/test_channels/test_native_async_operators.py
@@ -6,11 +6,11 @@
 operator tests in ``tests/test_core/operators/test_operators.py``.
 
 Covers:
-- SelectTagColumns streaming: per-row tag column selection
+- SelectKeyColumns streaming: per-row key column selection
 - SelectDataColumns streaming: per-row data column selection
-- DropTagColumns streaming: per-row tag column dropping
+- DropKeyColumns streaming: per-row key column dropping
 - DropDataColumns streaming: per-row data column dropping
-- MapTags streaming: per-row tag column renaming
+- MapKeys streaming: per-row key column renaming
 - MapData streaming: per-row data column renaming
 - Batch streaming: accumulate-and-emit full batches, partial batch handling
 - SemiJoin build-probe: collect right, stream left through hash lookup
@@ -31,12 +31,12 @@
 from orcapod.core.operators import (
     Batch,
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     Join,
     MapData,
-    MapTags,
+    MapKeys,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
     SemiJoin,
 )
 from orcapod.core.streams.arrow_table_stream import ArrowTableStream
@@ -54,7 +54,7 @@ def _make_non_nullable_schema(*fields: tuple) -> "pa.Schema":
 
 
 def make_simple_stream() -> ArrowTableStream:
-    """Stream with 1 tag (animal) and 2 data columns (weight, legs). Uses nullable=False schema."""
+    """Stream with 1 key (animal) and 2 data columns (weight, legs). Uses nullable=False schema."""
     schema = _make_non_nullable_schema(
         ("animal", pa.large_string()), ("weight", pa.float64()), ("legs", pa.int64())
     )
@@ -64,11 +64,11 @@ def make_simple_stream() -> ArrowTableStream:
          "legs": pa.array([4, 4, 2], type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["animal"])
+    return ArrowTableStream(table, key_columns=["animal"])
 
 
-def make_two_tag_stream() -> ArrowTableStream:
-    """Stream with 2 tags (region, animal) and 1 data column (count). Uses nullable=False schema."""
+def make_two_key_stream() -> ArrowTableStream:
+    """Stream with 2 keys (region, animal) and 1 data column (count). Uses nullable=False schema."""
     schema = _make_non_nullable_schema(
         ("region", pa.large_string()), ("animal", pa.large_string()), ("count", pa.int64())
     )
@@ -78,21 +78,21 @@ def make_two_tag_stream() -> ArrowTableStream:
          "count": pa.array([10, 5, 8], type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["region", "animal"])
+    return ArrowTableStream(table, key_columns=["region", "animal"])
 
 
 def make_int_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data=x (ints). Uses nullable=False schema."""
+    """Stream with key=id, data=x (ints). Uses nullable=False schema."""
     schema = _make_non_nullable_schema(("id", pa.int64()), ("x", pa.int64()))
     table = pa.table(
         {"id": pa.array(list(range(n)), type=pa.int64()), "x": pa.array(list(range(n)), type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_two_col_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data={x, y}. Uses nullable=False schema."""
+    """Stream with key=id, data={x, y}. Uses nullable=False schema."""
     schema = _make_non_nullable_schema(("id", pa.int64()), ("x", pa.int64()), ("y", pa.int64()))
     table = pa.table(
         {"id": pa.array(list(range(n)), type=pa.int64()),
@@ -100,7 +100,7 @@ def make_two_col_stream(n: int = 3) -> ArrowTableStream:
          "y": pa.array([i * 10 for i in range(n)], type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_left_stream() -> ArrowTableStream:
@@ -109,7 +109,7 @@ def make_left_stream() -> ArrowTableStream:
         {"id": pa.array([1, 2, 3], type=pa.int64()), "value_a": pa.array([10, 20, 30], type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_right_stream() -> ArrowTableStream:
@@ -118,24 +118,24 @@ def make_right_stream() -> ArrowTableStream:
         {"id": pa.array([2, 3, 4], type=pa.int64()), "value_b": pa.array([200, 300, 400], type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_disjoint_stream() -> ArrowTableStream:
-    """Stream with same tags as simple_stream but different data columns. Uses nullable=False schema."""
+    """Stream with same keys as simple_stream but different data columns. Uses nullable=False schema."""
     schema = _make_non_nullable_schema(("animal", pa.large_string()), ("speed", pa.float64()))
     table = pa.table(
         {"animal": pa.array(["cat", "dog", "bird"], type=pa.large_string()),
          "speed": pa.array([30.0, 45.0, 80.0], type=pa.float64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["animal"])
+    return ArrowTableStream(table, key_columns=["animal"])
 
 
 async def feed(stream: ArrowTableStream, ch: Channel) -> None:
-    """Push all (tag, data) from a stream into a channel, then close."""
-    for tag, data in stream.iter_data():
-        await ch.writer.send((tag, data))
+    """Push all (key, data) from a stream into a channel, then close."""
+    for key, data in stream.iter_data():
+        await ch.writer.send((key, data))
     await ch.writer.close()
 
 
@@ -162,50 +162,50 @@ async def run_binary(
 
 
 def sync_process_to_rows(op, *streams):
-    """Run sync static_process and return list of (tag, data) pairs."""
+    """Run sync static_process and return list of (key, data) pairs."""
     result = op.static_process(*streams)
     return list(result.iter_data())
 
 
 # ===================================================================
-# SelectTagColumns — streaming per-row
+# SelectKeyColumns — streaming per-row
 # ===================================================================
 
 
-class TestSelectTagColumnsStreaming:
+class TestSelectKeyColumnsStreaming:
     @pytest.mark.asyncio
-    async def test_keeps_only_selected_tags(self):
-        stream = make_two_tag_stream()
-        op = SelectTagColumns(columns=["region"])
+    async def test_keeps_only_selected_keys(self):
+        stream = make_two_key_stream()
+        op = SelectKeyColumns(columns=["region"])
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, data in results:
-            tag_keys = tag.keys()
-            assert "region" in tag_keys
-            assert "animal" not in tag_keys
+        for key, data in results:
+            key_keys = key.keys()
+            assert "region" in key_keys
+            assert "animal" not in key_keys
             # data columns unchanged
             assert "count" in data.keys()
 
     @pytest.mark.asyncio
     async def test_all_columns_selected_passthrough(self):
-        """When all tag columns are already selected, rows pass through unaltered."""
-        stream = make_two_tag_stream()
-        op = SelectTagColumns(columns=["region", "animal"])
+        """When all key columns are already selected, rows pass through unaltered."""
+        stream = make_two_key_stream()
+        op = SelectKeyColumns(columns=["region", "animal"])
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, data in results:
-            assert set(tag.keys()) == {"region", "animal"}
+        for key, data in results:
+            assert set(key.keys()) == {"region", "animal"}
             assert "count" in data.keys()
 
     @pytest.mark.asyncio
     async def test_data_values_preserved(self):
-        stream = make_two_tag_stream()
-        op = SelectTagColumns(columns=["region"])
+        stream = make_two_key_stream()
+        op = SelectKeyColumns(columns=["region"])
         results = await run_unary(op, stream)
 
-        regions = sorted(tag.as_dict()["region"] for tag, _ in results)
+        regions = sorted(key.as_dict()["region"] for key, _ in results)
         assert regions == ["east", "east", "west"]
 
     @pytest.mark.asyncio
@@ -213,27 +213,27 @@ async def test_empty_input(self):
         input_ch = Channel(buffer_size=4)
         output_ch = Channel(buffer_size=4)
         await input_ch.writer.close()
-        op = SelectTagColumns(columns=["region"])
+        op = SelectKeyColumns(columns=["region"])
         await op.async_execute([input_ch.reader], output_ch.writer)
         results = await output_ch.reader.collect()
         assert results == []
 
     @pytest.mark.asyncio
     async def test_matches_sync_output(self):
-        stream = make_two_tag_stream()
-        op = SelectTagColumns(columns=["region"])
+        stream = make_two_key_stream()
+        op = SelectKeyColumns(columns=["region"])
 
         async_results = await run_unary(op, stream)
         sync_results = sync_process_to_rows(op, stream)
 
         assert len(async_results) == len(sync_results)
-        async_tags = sorted(t.as_dict()["region"] for t, _ in async_results)
-        sync_tags = sorted(t.as_dict()["region"] for t, _ in sync_results)
-        assert async_tags == sync_tags
+        async_keys = sorted(t.as_dict()["region"] for t, _ in async_results)
+        sync_keys = sorted(t.as_dict()["region"] for t, _ in sync_results)
+        assert async_keys == sync_keys
 
     @pytest.mark.asyncio
-    async def test_system_tags_preserved(self):
-        """System tags on Tag objects should survive per-row selection."""
+    async def test_system_keys_preserved(self):
+        """System keys on Key objects should survive per-row selection."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
 
         src = ArrowTableSource(
@@ -244,17 +244,17 @@ async def test_system_tags_preserved(self):
                     "count": pa.array([10, 5], type=pa.int64()),
                 }
             ),
-            tag_columns=["region", "animal"],
+            key_columns=["region", "animal"],
             infer_nullable=True,
         )
-        op = SelectTagColumns(columns=["region"])
+        op = SelectKeyColumns(columns=["region"])
         results = await run_unary(op, src)
 
         assert len(results) == 2
-        for tag, _ in results:
-            sys_tags = tag.system_tags()
-            # Source-backed streams have system tags
-            assert len(sys_tags) > 0
+        for key, _ in results:
+            sys_keys = key.system_keys()
+            # Source-backed streams have system keys
+            assert len(sys_keys) > 0
 
 
 # ===================================================================
@@ -274,9 +274,9 @@ async def test_keeps_only_selected_data(self):
             pkt_keys = data.keys()
             assert "weight" in pkt_keys
             assert "legs" not in pkt_keys
-            # tag columns unchanged
-        for tag, _ in results:
-            assert "animal" in tag.keys()
+            # key columns unchanged
+        for key, _ in results:
+            assert "animal" in key.keys()
 
     @pytest.mark.asyncio
     async def test_all_columns_selected_passthrough(self):
@@ -333,7 +333,7 @@ async def test_source_info_for_dropped_columns_not_surfaced(self):
                     "legs": pa.array([4, 4], type=pa.int64()),
                 }
             ),
-            tag_columns=["animal"],
+            key_columns=["animal"],
             infer_nullable=True,
         )
         op = SelectDataColumns(columns=["weight"])
@@ -346,40 +346,40 @@ async def test_source_info_for_dropped_columns_not_surfaced(self):
 
 
 # ===================================================================
-# DropTagColumns — streaming per-row
+# DropKeyColumns — streaming per-row
 # ===================================================================
 
 
-class TestDropTagColumnsStreaming:
+class TestDropKeyColumnsStreaming:
     @pytest.mark.asyncio
-    async def test_drops_specified_tags(self):
-        stream = make_two_tag_stream()
-        op = DropTagColumns(columns=["region"])
+    async def test_drops_specified_keys(self):
+        stream = make_two_key_stream()
+        op = DropKeyColumns(columns=["region"])
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, data in results:
-            assert "region" not in tag.keys()
-            assert "animal" in tag.keys()
+        for key, data in results:
+            assert "region" not in key.keys()
+            assert "animal" in key.keys()
             assert "count" in data.keys()
 
     @pytest.mark.asyncio
     async def test_no_columns_to_drop_passthrough(self):
-        stream = make_two_tag_stream()
-        op = DropTagColumns(columns=["nonexistent"], strict=False)
+        stream = make_two_key_stream()
+        op = DropKeyColumns(columns=["nonexistent"], strict=False)
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, _ in results:
-            assert set(tag.keys()) == {"region", "animal"}
+        for key, _ in results:
+            assert set(key.keys()) == {"region", "animal"}
 
     @pytest.mark.asyncio
     async def test_data_values_preserved(self):
-        stream = make_two_tag_stream()
-        op = DropTagColumns(columns=["region"])
+        stream = make_two_key_stream()
+        op = DropKeyColumns(columns=["region"])
         results = await run_unary(op, stream)
 
-        animals = sorted(tag.as_dict()["animal"] for tag, _ in results)
+        animals = sorted(key.as_dict()["animal"] for key, _ in results)
         assert animals == ["cat", "cat", "dog"]
 
     @pytest.mark.asyncio
@@ -387,15 +387,15 @@ async def test_empty_input(self):
         input_ch = Channel(buffer_size=4)
         output_ch = Channel(buffer_size=4)
         await input_ch.writer.close()
-        op = DropTagColumns(columns=["region"])
+        op = DropKeyColumns(columns=["region"])
         await op.async_execute([input_ch.reader], output_ch.writer)
         results = await output_ch.reader.collect()
         assert results == []
 
     @pytest.mark.asyncio
     async def test_matches_sync_output(self):
-        stream = make_two_tag_stream()
-        op = DropTagColumns(columns=["region"])
+        stream = make_two_key_stream()
+        op = DropKeyColumns(columns=["region"])
 
         async_results = await run_unary(op, stream)
         sync_results = sync_process_to_rows(op, stream)
@@ -422,8 +422,8 @@ async def test_drops_specified_data(self):
         for _, data in results:
             assert "legs" not in data.keys()
             assert "weight" in data.keys()
-        for tag, _ in results:
-            assert "animal" in tag.keys()
+        for key, _ in results:
+            assert "animal" in key.keys()
 
     @pytest.mark.asyncio
     async def test_no_columns_to_drop_passthrough(self):
@@ -479,7 +479,7 @@ async def test_source_info_for_dropped_columns_not_surfaced(self):
                     "legs": pa.array([4, 4], type=pa.int64()),
                 }
             ),
-            tag_columns=["animal"],
+            key_columns=["animal"],
             infer_nullable=True,
         )
         op = DropDataColumns(columns=["legs"])
@@ -492,68 +492,68 @@ async def test_source_info_for_dropped_columns_not_surfaced(self):
 
 
 # ===================================================================
-# MapTags — streaming per-row
+# MapKeys — streaming per-row
 # ===================================================================
 
 
-class TestMapTagsStreaming:
+class TestMapKeysStreaming:
     @pytest.mark.asyncio
-    async def test_renames_tag_column(self):
-        stream = make_two_tag_stream()
-        op = MapTags(name_map={"region": "area"})
+    async def test_renames_key_column(self):
+        stream = make_two_key_stream()
+        op = MapKeys(name_map={"region": "area"})
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, _ in results:
-            tag_keys = tag.keys()
-            assert "area" in tag_keys
-            assert "region" not in tag_keys
+        for key, _ in results:
+            key_keys = key.keys()
+            assert "area" in key_keys
+            assert "region" not in key_keys
 
     @pytest.mark.asyncio
     async def test_data_values_preserved(self):
-        stream = make_two_tag_stream()
-        op = MapTags(name_map={"region": "area"})
+        stream = make_two_key_stream()
+        op = MapKeys(name_map={"region": "area"})
         results = await run_unary(op, stream)
 
-        areas = sorted(tag.as_dict()["area"] for tag, _ in results)
+        areas = sorted(key.as_dict()["area"] for key, _ in results)
         assert areas == ["east", "east", "west"]
 
     @pytest.mark.asyncio
     async def test_drop_unmapped(self):
-        stream = make_two_tag_stream()
-        op = MapTags(name_map={"region": "area"}, drop_unmapped=True)
+        stream = make_two_key_stream()
+        op = MapKeys(name_map={"region": "area"}, drop_unmapped=True)
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, _ in results:
-            tag_keys = tag.keys()
-            assert "area" in tag_keys
-            assert "animal" not in tag_keys  # dropped because unmapped
+        for key, _ in results:
+            key_keys = key.keys()
+            assert "area" in key_keys
+            assert "animal" not in key_keys  # dropped because unmapped
 
     @pytest.mark.asyncio
     async def test_no_matching_rename_passthrough(self):
-        stream = make_two_tag_stream()
-        op = MapTags(name_map={"nonexistent": "nope"})
+        stream = make_two_key_stream()
+        op = MapKeys(name_map={"nonexistent": "nope"})
         results = await run_unary(op, stream)
 
         assert len(results) == 3
-        for tag, _ in results:
-            assert set(tag.keys()) == {"region", "animal"}
+        for key, _ in results:
+            assert set(key.keys()) == {"region", "animal"}
 
     @pytest.mark.asyncio
     async def test_empty_input(self):
         input_ch = Channel(buffer_size=4)
         output_ch = Channel(buffer_size=4)
         await input_ch.writer.close()
-        op = MapTags(name_map={"region": "area"})
+        op = MapKeys(name_map={"region": "area"})
         await op.async_execute([input_ch.reader], output_ch.writer)
         results = await output_ch.reader.collect()
         assert results == []
 
     @pytest.mark.asyncio
     async def test_matches_sync_output(self):
-        stream = make_two_tag_stream()
-        op = MapTags(name_map={"region": "area"})
+        stream = make_two_key_stream()
+        op = MapKeys(name_map={"region": "area"})
 
         async_results = await run_unary(op, stream)
         sync_results = sync_process_to_rows(op, stream)
@@ -565,8 +565,8 @@ async def test_matches_sync_output(self):
 
     @pytest.mark.asyncio
     async def test_matches_sync_output_with_drop_unmapped(self):
-        stream = make_two_tag_stream()
-        op = MapTags(name_map={"region": "area"}, drop_unmapped=True)
+        stream = make_two_key_stream()
+        op = MapKeys(name_map={"region": "area"}, drop_unmapped=True)
 
         async_results = await run_unary(op, stream)
         sync_results = sync_process_to_rows(op, stream)
@@ -632,7 +632,7 @@ async def test_source_info_renamed(self):
                     "legs": pa.array([4, 4], type=pa.int64()),
                 }
             ),
-            tag_columns=["animal"],
+            key_columns=["animal"],
             infer_nullable=True,
         )
         op = MapData(name_map={"weight": "mass"})
@@ -717,13 +717,13 @@ async def test_batch_values_are_lists(self):
         results = await run_unary(op, stream)
 
         assert len(results) == 2
-        for tag, data in results:
+        for key, data in results:
             # Each value should be a list
-            tag_d = tag.as_dict()
+            key_d = key.as_dict()
             pkt_d = data.as_dict()
-            assert isinstance(tag_d["id"], list)
+            assert isinstance(key_d["id"], list)
             assert isinstance(pkt_d["x"], list)
-            assert len(tag_d["id"]) == 2
+            assert len(key_d["id"]) == 2
             assert len(pkt_d["x"]) == 2
 
     @pytest.mark.asyncio
@@ -807,7 +807,7 @@ async def test_filters_left_by_right(self):
         op = SemiJoin()
         results = await run_binary(op, left, right)
 
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [2, 3]
 
     @pytest.mark.asyncio
@@ -817,8 +817,8 @@ async def test_preserves_left_schema(self):
         op = SemiJoin()
         results = await run_binary(op, left, right)
 
-        for tag, data in results:
-            assert "id" in tag.keys()
+        for key, data in results:
+            assert "id" in key.keys()
             assert "value_a" in data.keys()
             assert "value_b" not in data.keys()
 
@@ -830,7 +830,7 @@ async def test_preserves_left_data(self):
         results = await run_binary(op, left, right)
 
         result_map = {
-            tag.as_dict()["id"]: pkt.as_dict()["value_a"] for tag, pkt in results
+            key.as_dict()["id"]: pkt.as_dict()["value_a"] for key, pkt in results
         }
         assert result_map[2] == 20
         assert result_map[3] == 30
@@ -849,8 +849,8 @@ async def test_no_common_keys_returns_all_left(self):
                 "y": pa.array([100, 200], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["a"])
-        right = ArrowTableStream(right_table, tag_columns=["b"])
+        left = ArrowTableStream(left_table, key_columns=["a"])
+        right = ArrowTableStream(right_table, key_columns=["b"])
         op = SemiJoin()
         results = await run_binary(op, left, right)
 
@@ -870,8 +870,8 @@ async def test_no_matching_rows_empty_result(self):
                 "y": pa.array([30, 40], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
         op = SemiJoin()
         results = await run_binary(op, left, right)
 
@@ -886,7 +886,7 @@ async def test_empty_left_returns_empty(self):
                 "y": pa.array([100, 200], type=pa.int64()),
             }
         )
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         left_ch = Channel(buffer_size=4)
         right_ch = Channel(buffer_size=64)
@@ -949,8 +949,8 @@ async def test_large_input_streaming(self):
                 "y": pa.array(list(range(0, 100, 3)), type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
         op = SemiJoin()
         results = await run_binary(op, left, right)
 
@@ -990,8 +990,8 @@ async def test_two_way_join(self):
         results = await run_binary(op, left, right)
 
         assert len(results) == 3
-        for tag, data in results:
-            assert "animal" in tag.keys()
+        for key, data in results:
+            assert "animal" in key.keys()
             pkt_d = data.as_dict()
             assert "weight" in pkt_d
             assert "speed" in pkt_d
@@ -1010,13 +1010,13 @@ async def test_two_way_join_data_correct(self):
                 "y": pa.array([100, 200, 300], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
         op = Join()
         results = await run_binary(op, left, right)
 
         assert len(results) == 3
-        result_map = {tag.as_dict()["id"]: pkt.as_dict() for tag, pkt in results}
+        result_map = {key.as_dict()["id"]: pkt.as_dict() for key, pkt in results}
         assert result_map[0] == {"x": 10, "y": 100}
         assert result_map[1] == {"x": 20, "y": 200}
         assert result_map[2] == {"x": 30, "y": 300}
@@ -1041,9 +1041,9 @@ async def test_three_way_join(self):
                 "c": pa.array([1000, 2000], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
-        s3 = ArrowTableStream(t3, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
+        s3 = ArrowTableStream(t3, key_columns=["id"])
 
         op = Join()
         ch1 = Channel(buffer_size=64)
@@ -1058,13 +1058,13 @@ async def test_three_way_join(self):
         results = await out.reader.collect()
 
         assert len(results) == 2
-        result_map = {tag.as_dict()["id"]: pkt.as_dict() for tag, pkt in results}
+        result_map = {key.as_dict()["id"]: pkt.as_dict() for key, pkt in results}
         assert result_map[1] == {"a": 10, "b": 100, "c": 1000}
         assert result_map[2] == {"a": 20, "b": 200, "c": 2000}
 
     @pytest.mark.asyncio
-    async def test_join_no_shared_tags_cartesian(self):
-        """When no shared tag keys, join produces a cartesian product."""
+    async def test_join_no_shared_keys_cartesian(self):
+        """When no shared key keys, join produces a cartesian product."""
         left_table = pa.table(
             {
                 "a": pa.array([1, 2], type=pa.int64()),
@@ -1077,8 +1077,8 @@ async def test_join_no_shared_tags_cartesian(self):
                 "y": pa.array([30, 40], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["a"])
-        right = ArrowTableStream(right_table, tag_columns=["b"])
+        left = ArrowTableStream(left_table, key_columns=["a"])
+        right = ArrowTableStream(right_table, key_columns=["b"])
         op = Join()
         results = await run_binary(op, left, right)
 
@@ -1125,9 +1125,9 @@ async def test_matches_sync_three_way(self):
         t3 = pa.table(
             {"id": pa.array([1, 2, 3], type=pa.int64()), "c": pa.array([1000, 2000, 3000], type=pa.int64())}
         )
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
-        s3 = ArrowTableStream(t3, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
+        s3 = ArrowTableStream(t3, key_columns=["id"])
 
         op = Join()
         sync_results = sync_process_to_rows(op, s1, s2, s3)
@@ -1157,7 +1157,7 @@ async def test_three_way_streams_before_all_closed(self):
 
         This is the key behavioral difference from the old collect-based
         fallback: downstream can start work as soon as all N sides have
-        contributed a matching row for a tag key.
+        contributed a matching row for a key key.
         """
         op = Join()
         ch1 = Channel(buffer_size=64)
@@ -1165,7 +1165,7 @@ async def test_three_way_streams_before_all_closed(self):
         ch3 = Channel(buffer_size=64)
         out = Channel(buffer_size=64)
 
-        from orcapod.core.datagrams import Data, Tag
+        from orcapod.core.datagrams import Data, Key
 
         # Start the join in the background
         join_task = asyncio.create_task(
@@ -1173,13 +1173,13 @@ async def test_three_way_streams_before_all_closed(self):
         )
 
         # Send one matching row from each side
-        await ch1.writer.send((Tag({"id": 1}), Data({"a": 10})))
-        await ch2.writer.send((Tag({"id": 1}), Data({"b": 100})))
-        await ch3.writer.send((Tag({"id": 1}), Data({"c": 1000})))
+        await ch1.writer.send((Key({"id": 1}), Data({"a": 10})))
+        await ch2.writer.send((Key({"id": 1}), Data({"b": 100})))
+        await ch3.writer.send((Key({"id": 1}), Data({"c": 1000})))
 
         # The match should be emitted while channels are still open
-        tag, pkt = await asyncio.wait_for(out.reader.receive(), timeout=2.0)
-        assert tag.as_dict()["id"] == 1
+        key, pkt = await asyncio.wait_for(out.reader.receive(), timeout=2.0)
+        assert key.as_dict()["id"] == 1
         assert pkt.as_dict() == {"a": 10, "b": 100, "c": 1000}
 
         # Close all inputs and let join finish
@@ -1195,7 +1195,7 @@ async def test_four_way_join(self):
             pa.table({"id": pa.array([1, 2], type=pa.int64()), f"v{i}": pa.array([i * 10, i * 20], type=pa.int64())})
             for i in range(4)
         ]
-        streams = [ArrowTableStream(t, tag_columns=["id"]) for t in tables]
+        streams = [ArrowTableStream(t, key_columns=["id"]) for t in tables]
 
         op = Join()
         channels = [Channel(buffer_size=64) for _ in range(4)]
@@ -1208,7 +1208,7 @@ async def test_four_way_join(self):
         results = await out.reader.collect()
 
         assert len(results) == 2
-        result_map = {tag.as_dict()["id"]: pkt.as_dict() for tag, pkt in results}
+        result_map = {key.as_dict()["id"]: pkt.as_dict() for key, pkt in results}
         assert result_map[1] == {"v0": 0, "v1": 10, "v2": 20, "v3": 30}
         assert result_map[2] == {"v0": 0, "v1": 20, "v2": 40, "v3": 60}
 
@@ -1221,23 +1221,23 @@ async def test_three_way_partial_match_no_premature_emit(self):
         ch3 = Channel(buffer_size=64)
         out = Channel(buffer_size=64)
 
-        from orcapod.core.datagrams import Data, Tag
+        from orcapod.core.datagrams import Data, Key
 
         join_task = asyncio.create_task(
             op.async_execute([ch1.reader, ch2.reader, ch3.reader], out.writer)
         )
 
         # Send matching rows from only 2 of 3 sides
-        await ch1.writer.send((Tag({"id": 1}), Data({"a": 10})))
-        await ch2.writer.send((Tag({"id": 1}), Data({"b": 100})))
+        await ch1.writer.send((Key({"id": 1}), Data({"a": 10})))
+        await ch2.writer.send((Key({"id": 1}), Data({"b": 100})))
 
         # Output should be empty — side 3 hasn't contributed yet
         with pytest.raises(asyncio.TimeoutError):
             await asyncio.wait_for(out.reader.receive(), timeout=0.5)
 
         # Now send the third side — match should complete
-        await ch3.writer.send((Tag({"id": 1}), Data({"c": 1000})))
-        tag, pkt = await asyncio.wait_for(out.reader.receive(), timeout=2.0)
+        await ch3.writer.send((Key({"id": 1}), Data({"c": 1000})))
+        key, pkt = await asyncio.wait_for(out.reader.receive(), timeout=2.0)
         assert pkt.as_dict() == {"a": 10, "b": 100, "c": 1000}
 
         await ch1.writer.close()
@@ -1250,8 +1250,8 @@ async def test_three_way_empty_side_produces_nothing(self):
         """If any input channel is empty, join produces no output."""
         t1 = pa.table({"id": pa.array([1], type=pa.int64()), "a": pa.array([10], type=pa.int64())})
         t2 = pa.table({"id": pa.array([1], type=pa.int64()), "b": pa.array([100], type=pa.int64())})
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
 
         op = Join()
         ch1 = Channel(buffer_size=64)
@@ -1266,14 +1266,14 @@ async def test_three_way_empty_side_produces_nothing(self):
         assert results == []
 
     @pytest.mark.asyncio
-    async def test_three_way_no_shared_tags_cartesian(self):
-        """Three-way join with disjoint tag keys produces cartesian product."""
+    async def test_three_way_no_shared_keys_cartesian(self):
+        """Three-way join with disjoint key keys produces cartesian product."""
         t1 = pa.table({"a": pa.array([1, 2], type=pa.int64()), "x": pa.array([10, 20], type=pa.int64())})
         t2 = pa.table({"b": pa.array([3], type=pa.int64()), "y": pa.array([30], type=pa.int64())})
         t3 = pa.table({"c": pa.array([5, 6], type=pa.int64()), "z": pa.array([50, 60], type=pa.int64())})
-        s1 = ArrowTableStream(t1, tag_columns=["a"])
-        s2 = ArrowTableStream(t2, tag_columns=["b"])
-        s3 = ArrowTableStream(t3, tag_columns=["c"])
+        s1 = ArrowTableStream(t1, key_columns=["a"])
+        s2 = ArrowTableStream(t2, key_columns=["b"])
+        s3 = ArrowTableStream(t3, key_columns=["c"])
 
         op = Join()
         ch1 = Channel(buffer_size=64)
@@ -1290,8 +1290,8 @@ async def test_three_way_no_shared_tags_cartesian(self):
         assert len(results) == 4
 
     @pytest.mark.asyncio
-    async def test_duplicate_tag_keys_cross_product(self):
-        """Multiple rows per tag key per side should produce a cross-product.
+    async def test_duplicate_key_keys_cross_product(self):
+        """Multiple rows per key key per side should produce a cross-product.
 
         Side 0 has 2 rows with id=1, side 1 has 3 rows with id=1.
         The join should emit 2 × 3 = 6 rows.
@@ -1308,8 +1308,8 @@ async def test_duplicate_tag_keys_cross_product(self):
                 "b": pa.array([100, 101, 102], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
 
         op = Join()
 
@@ -1338,7 +1338,7 @@ async def test_duplicate_tag_keys_cross_product(self):
 
     @pytest.mark.asyncio
     async def test_three_way_duplicate_keys_cross_product(self):
-        """Three-way join with duplicate tag keys produces correct cross-product.
+        """Three-way join with duplicate key keys produces correct cross-product.
 
         Side 0: 2 rows, side 1: 1 row, side 2: 2 rows → 2 × 1 × 2 = 4 rows.
         """
@@ -1360,9 +1360,9 @@ async def test_three_way_duplicate_keys_cross_product(self):
                 "c": pa.array([1000, 1001], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
-        s3 = ArrowTableStream(t3, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
+        s3 = ArrowTableStream(t3, key_columns=["id"])
 
         op = Join()
         sync_results = sync_process_to_rows(op, s1, s2, s3)
@@ -1391,12 +1391,12 @@ async def test_three_way_duplicate_keys_cross_product(self):
         assert async_data == sync_data
 
     @pytest.mark.asyncio
-    async def test_partially_overlapping_tags_matches_sync(self):
-        """Staggered join with partially overlapping tags matches sync.
+    async def test_partially_overlapping_keys_matches_sync(self):
+        """Staggered join with partially overlapping keys matches sync.
 
-        S0: tag={a}, S1: tag={b}, S2: tag={a}.
+        S0: key={a}, S1: key={b}, S2: key={a}.
         static_process joins iteratively: (S0 ⋈ S1) cartesian, then
-        result ⋈ S2 on shared tag 'a'.  The async staggered chain must
+        result ⋈ S2 on shared key 'a'.  The async staggered chain must
         produce the same 2 rows (not 4 from a full cartesian).
         """
         t1 = pa.table(
@@ -1417,9 +1417,9 @@ async def test_partially_overlapping_tags_matches_sync(self):
                 "z": pa.array([100, 200], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(t1, tag_columns=["a"])
-        s2 = ArrowTableStream(t2, tag_columns=["b"])
-        s3 = ArrowTableStream(t3, tag_columns=["a"])
+        s1 = ArrowTableStream(t1, key_columns=["a"])
+        s2 = ArrowTableStream(t2, key_columns=["b"])
+        s3 = ArrowTableStream(t3, key_columns=["a"])
 
         op = Join()
         sync_results = sync_process_to_rows(op, s1, s2, s3)
@@ -1456,8 +1456,8 @@ async def test_input_pipeline_hashes_length_mismatch(self):
         """async_execute raises ValueError if input_pipeline_hashes length != inputs."""
         t1 = pa.table({"id": pa.array([1], type=pa.int64()), "a": pa.array([10], type=pa.int64())})
         t2 = pa.table({"id": pa.array([1], type=pa.int64()), "b": pa.array([20], type=pa.int64())})
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
 
         op = Join()
         ch1 = Channel(buffer_size=64)
@@ -1484,22 +1484,22 @@ async def test_buffered_rows_both_sides_emit_on_reindex(self):
         ch2 = Channel(buffer_size=64)
         out = Channel(buffer_size=64)
 
-        from orcapod.core.datagrams import Data, Tag
+        from orcapod.core.datagrams import Data, Key
 
         join_task = asyncio.create_task(
             op.async_execute([ch1.reader, ch2.reader], out.writer)
         )
 
         # Buffer 2 rows on side 0 before side 1 sends anything
-        await ch1.writer.send((Tag({"id": 1}), Data({"a": 10})))
-        await ch1.writer.send((Tag({"id": 2}), Data({"a": 20})))
+        await ch1.writer.send((Key({"id": 1}), Data({"a": 10})))
+        await ch1.writer.send((Key({"id": 2}), Data({"a": 20})))
 
         # Yield to event loop so drain tasks process the sends
         await asyncio.sleep(0)
 
         # Now side 1 sends — triggers shared_keys computation and re-index
-        await ch2.writer.send((Tag({"id": 1}), Data({"b": 100})))
-        await ch2.writer.send((Tag({"id": 2}), Data({"b": 200})))
+        await ch2.writer.send((Key({"id": 1}), Data({"b": 100})))
+        await ch2.writer.send((Key({"id": 2}), Data({"b": 200})))
 
         # Close and collect
         await ch1.writer.close()
@@ -1521,19 +1521,19 @@ async def test_buffered_rows_both_sides_emit_on_reindex(self):
 class TestStreamingPipelineIntegration:
     @pytest.mark.asyncio
     async def test_select_then_map_chain(self):
-        """SelectTagColumns → MapTags in a streaming pipeline."""
-        stream = make_two_tag_stream()
+        """SelectKeyColumns → MapKeys in a streaming pipeline."""
+        stream = make_two_key_stream()
 
-        select_op = SelectTagColumns(columns=["region"])
-        map_op = MapTags(name_map={"region": "area"})
+        select_op = SelectKeyColumns(columns=["region"])
+        map_op = MapKeys(name_map={"region": "area"})
 
         ch1 = Channel(buffer_size=16)
         ch2 = Channel(buffer_size=16)
         ch3 = Channel(buffer_size=16)
 
         async def source():
-            for tag, data in stream.iter_data():
-                await ch1.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch1.writer.send((key, data))
             await ch1.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -1543,10 +1543,10 @@ async def source():
 
         results = await ch3.reader.collect()
         assert len(results) == 3
-        for tag, _ in results:
-            assert "area" in tag.keys()
-            assert "region" not in tag.keys()
-            assert "animal" not in tag.keys()
+        for key, _ in results:
+            assert "area" in key.keys()
+            assert "region" not in key.keys()
+            assert "animal" not in key.keys()
 
     @pytest.mark.asyncio
     async def test_join_then_select_chain(self):
@@ -1563,8 +1563,8 @@ async def test_join_then_select_chain(self):
                 "y": pa.array([100, 200, 300], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         join_op = Join()
         select_op = SelectDataColumns(columns=["x"])
@@ -1575,8 +1575,8 @@ async def test_join_then_select_chain(self):
         ch_out = Channel(buffer_size=16)
 
         async def push(stream, ch):
-            for tag, data in stream.iter_data():
-                await ch.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch.writer.send((key, data))
             await ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -1608,8 +1608,8 @@ async def test_semijoin_then_batch_chain(self):
         ch_out = Channel(buffer_size=16)
 
         async def push(stream, ch):
-            for tag, data in stream.iter_data():
-                await ch.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch.writer.send((key, data))
             await ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -1623,9 +1623,9 @@ async def push(stream, ch):
         results = await ch_out.reader.collect()
         # SemiJoin produces 2 rows (id=[2,3]), Batch(2) → 1 batch
         assert len(results) == 1
-        tag_d = results[0][0].as_dict()
-        assert isinstance(tag_d["id"], list)
-        assert sorted(tag_d["id"]) == [2, 3]
+        key_d = results[0][0].as_dict()
+        assert isinstance(key_d["id"], list)
+        assert sorted(key_d["id"]) == [2, 3]
 
     @pytest.mark.asyncio
     async def test_drop_map_select_three_stage(self):
@@ -1643,8 +1643,8 @@ async def test_drop_map_select_three_stage(self):
         ch4 = Channel(buffer_size=16)
 
         async def source():
-            for tag, data in stream.iter_data():
-                await ch1.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch1.writer.send((key, data))
             await ch1.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -1662,21 +1662,21 @@ async def source():
 
 
 # ===================================================================
-# Sync vs Async system-tag equivalence
+# Sync vs Async system-key equivalence
 # ===================================================================
 
 
-def _make_source(tag_col: str, data_col: str, data: dict) -> ArrowTableStream:
-    """Build an ArrowTableSource (which generates system tags) and return its stream."""
+def _make_source(key_col: str, data_col: str, data: dict) -> ArrowTableStream:
+    """Build an ArrowTableSource (which generates system keys) and return its stream."""
     from orcapod.core.sources.arrow_table_source import ArrowTableSource
 
     table = pa.table(
         {
-            tag_col: pa.array(data[tag_col], type=pa.large_string()),
+            key_col: pa.array(data[key_col], type=pa.large_string()),
             data_col: pa.array(data[data_col], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=[tag_col], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=[key_col], infer_nullable=True)
 
 
 async def run_binary_validated(
@@ -1688,7 +1688,7 @@ async def run_binary_validated(
 
     Calls ``validate_inputs`` for schema validation, then passes
     ``input_pipeline_hashes`` so operators like ``Join`` can compute
-    canonical system-tag column names.
+    canonical system-key column names.
     """
     op.validate_inputs(left, right)
     left_ch = Channel(buffer_size=1024)
@@ -1705,35 +1705,35 @@ async def run_binary_validated(
     return await output_ch.reader.collect()
 
 
-def _extract_system_tags(
+def _extract_system_keys(
     rows: list[tuple],
 ) -> list[dict[str, str]]:
-    """Extract sorted system-tag dicts from (tag, data) pairs."""
+    """Extract sorted system-key dicts from (key, data) pairs."""
     return sorted(
-        [tag.system_tags() for tag, _ in rows],
+        [key.system_keys() for key, _ in rows],
         key=lambda d: sorted(d.items()),
     )
 
 
-def _extract_system_tag_keys(rows: list[tuple]) -> set[str]:
-    """Collect all unique system-tag keys across rows."""
+def _extract_system_key_keys(rows: list[tuple]) -> set[str]:
+    """Collect all unique system-key keys across rows."""
     keys: set[str] = set()
-    for tag, _ in rows:
-        keys.update(tag.system_tags().keys())
+    for key, _ in rows:
+        keys.update(key.system_keys().keys())
     return keys
 
 
-class TestJoinSystemTagEquivalence:
-    """Verify that Join.async_execute produces the same system-tag column
+class TestJoinSystemKeyEquivalence:
+    """Verify that Join.async_execute produces the same system-key column
     names and values as the sync static_process path.
 
-    Uses ``ArrowTableSource`` (which adds system-tag columns) rather than
-    bare ``ArrowTableStream`` to ensure system tags are present.
+    Uses ``ArrowTableSource`` (which adds system-key columns) rather than
+    bare ``ArrowTableStream`` to ensure system keys are present.
     """
 
     @pytest.mark.asyncio
-    async def test_two_way_system_tag_column_names_match(self):
-        """System-tag column names must be identical between sync and async."""
+    async def test_two_way_system_key_column_names_match(self):
+        """System-key column names must be identical between sync and async."""
         left = _make_source("key", "value", {"key": ["a", "b"], "value": [10, 20]})
         right = _make_source("key", "score", {"key": ["a", "b"], "score": [100, 200]})
         op = Join()
@@ -1745,15 +1745,15 @@ async def test_two_way_system_tag_column_names_match(self):
         # Async
         async_rows = await run_binary_validated(op, left, right)
 
-        sync_sys_keys = _extract_system_tag_keys(sync_rows)
-        async_sys_keys = _extract_system_tag_keys(async_rows)
+        sync_sys_keys = _extract_system_key_keys(sync_rows)
+        async_sys_keys = _extract_system_key_keys(async_rows)
 
-        assert sync_sys_keys, "Expected system tags to be present"
+        assert sync_sys_keys, "Expected system keys to be present"
         assert sync_sys_keys == async_sys_keys
 
     @pytest.mark.asyncio
-    async def test_two_way_system_tag_values_match(self):
-        """System-tag values for each row must match between sync and async."""
+    async def test_two_way_system_key_values_match(self):
+        """System-key values for each row must match between sync and async."""
         left = _make_source(
             "key", "value", {"key": ["a", "b", "c"], "value": [1, 2, 3]}
         )
@@ -1768,31 +1768,31 @@ async def test_two_way_system_tag_values_match(self):
 
         assert len(sync_rows) == len(async_rows)
 
-        sync_sys = _extract_system_tags(sync_rows)
-        async_sys = _extract_system_tags(async_rows)
+        sync_sys = _extract_system_keys(sync_rows)
+        async_sys = _extract_system_keys(async_rows)
         assert sync_sys == async_sys
 
     @pytest.mark.asyncio
-    async def test_two_way_system_tag_suffixes_use_pipeline_hash(self):
-        """System-tag column names should contain the pipeline_hash and
+    async def test_two_way_system_key_suffixes_use_pipeline_hash(self):
+        """System-key column names should contain the pipeline_hash and
         canonical position, matching the name-extending convention."""
         left = _make_source("key", "val", {"key": ["x"], "val": [1]})
         right = _make_source("key", "score", {"key": ["x"], "score": [2]})
         op = Join()
 
         async_rows = await run_binary_validated(op, left, right)
-        sys_keys = _extract_system_tag_keys(async_rows)
+        sys_keys = _extract_system_key_keys(async_rows)
 
-        # Each system-tag key should end with :{canonical_position}
+        # Each system-key key should end with :{canonical_position}
         for key in sys_keys:
-            assert key.startswith(constants.SYSTEM_TAG_PREFIX)
+            assert key.startswith(constants.SYSTEM_KEY_PREFIX)
             assert key[-2:] in (":0", ":1"), (
-                f"System tag key {key!r} does not end with :0 or :1"
+                f"System key key {key!r} does not end with :0 or :1"
             )
 
     @pytest.mark.asyncio
-    async def test_commutativity_system_tags_identical(self):
-        """Join(A, B) and Join(B, A) should produce identical system tags
+    async def test_commutativity_system_keys_identical(self):
+        """Join(A, B) and Join(B, A) should produce identical system keys
         (Join is commutative — canonical ordering by pipeline_hash)."""
         src_a = _make_source("id", "x", {"id": ["p", "q"], "x": [1, 2]})
         src_b = _make_source("id", "y", {"id": ["p", "q"], "y": [10, 20]})
@@ -1803,13 +1803,13 @@ async def test_commutativity_system_tags_identical(self):
 
         assert len(rows_ab) == len(rows_ba)
 
-        sys_ab = _extract_system_tags(rows_ab)
-        sys_ba = _extract_system_tags(rows_ba)
+        sys_ab = _extract_system_keys(rows_ab)
+        sys_ba = _extract_system_keys(rows_ba)
         assert sys_ab == sys_ba
 
     @pytest.mark.asyncio
-    async def test_three_way_system_tags_match_sync(self):
-        """N-way Staggered join should produce the same system tags as sync."""
+    async def test_three_way_system_keys_match_sync(self):
+        """N-way Staggered join should produce the same system keys as sync."""
         s1 = _make_source("id", "a", {"id": ["m", "n"], "a": [1, 2]})
         s2 = _make_source("id", "b", {"id": ["m", "n"], "b": [10, 20]})
         s3 = _make_source("id", "c", {"id": ["m", "n"], "c": [100, 200]})
@@ -1838,63 +1838,63 @@ async def test_three_way_system_tags_match_sync(self):
 
         assert len(sync_rows) == len(async_rows)
 
-        sync_sys_keys = _extract_system_tag_keys(sync_rows)
-        async_sys_keys = _extract_system_tag_keys(async_rows)
+        sync_sys_keys = _extract_system_key_keys(sync_rows)
+        async_sys_keys = _extract_system_key_keys(async_rows)
         assert sync_sys_keys == async_sys_keys
 
-        sync_sys = _extract_system_tags(sync_rows)
-        async_sys = _extract_system_tags(async_rows)
+        sync_sys = _extract_system_keys(sync_rows)
+        async_sys = _extract_system_keys(async_rows)
         assert sync_sys == async_sys
 
 
-class TestSortMergedSystemTags:
-    """Unit tests for Join._sort_merged_system_tags."""
+class TestSortMergedSystemKeys:
+    """Unit tests for Join._sort_merged_system_keys."""
 
     def test_sorts_same_provenance_path_by_value(self):
-        """System tag values at different positions within the same provenance
+        """System key values at different positions within the same provenance
         path should be sorted by (source_id, record_id) tuple."""
         merged_sys = {
             # Provenance path "abc123", position 0 — higher values
-            "_tag_source_id::abc123:0": "z_source",
-            "_tag_record_id::abc123:0": "z_record",
+            "_key_source_id::abc123:0": "z_source",
+            "_key_record_id::abc123:0": "z_record",
             # Provenance path "abc123", position 1 — lower values
-            "_tag_source_id::abc123:1": "a_source",
-            "_tag_record_id::abc123:1": "a_record",
+            "_key_source_id::abc123:1": "a_source",
+            "_key_record_id::abc123:1": "a_record",
         }
-        result = Join._sort_merged_system_tags(merged_sys)
+        result = Join._sort_merged_system_keys(merged_sys)
 
         # After sorting, position 0 should have the smaller values
-        assert result["_tag_source_id::abc123:0"] == "a_source"
-        assert result["_tag_record_id::abc123:0"] == "a_record"
-        assert result["_tag_source_id::abc123:1"] == "z_source"
-        assert result["_tag_record_id::abc123:1"] == "z_record"
+        assert result["_key_source_id::abc123:0"] == "a_source"
+        assert result["_key_record_id::abc123:0"] == "a_record"
+        assert result["_key_source_id::abc123:1"] == "z_source"
+        assert result["_key_record_id::abc123:1"] == "z_record"
 
     def test_single_position_unchanged(self):
         """Groups with only one position should not be modified."""
         merged_sys = {
-            "_tag_source_id::abc123:0": "only_source",
-            "_tag_record_id::abc123:0": "only_record",
+            "_key_source_id::abc123:0": "only_source",
+            "_key_record_id::abc123:0": "only_record",
         }
-        result = Join._sort_merged_system_tags(merged_sys)
+        result = Join._sort_merged_system_keys(merged_sys)
         assert result == merged_sys
 
-    def test_non_system_tag_keys_ignored(self):
-        """Non-system-tag keys should pass through unchanged."""
+    def test_non_system_key_keys_ignored(self):
+        """Non-system-key keys should pass through unchanged."""
         merged_sys = {
             "regular_key": "value",
-            "_tag_source_id::abc123:0": "src0",
-            "_tag_record_id::abc123:0": "rec0",
+            "_key_source_id::abc123:0": "src0",
+            "_key_record_id::abc123:0": "rec0",
         }
-        result = Join._sort_merged_system_tags(merged_sys)
+        result = Join._sort_merged_system_keys(merged_sys)
         assert result["regular_key"] == "value"
 
 
-class TestSemiJoinSystemTagEquivalence:
-    """Verify SemiJoin system-tag handling matches between sync and async."""
+class TestSemiJoinSystemKeyEquivalence:
+    """Verify SemiJoin system-key handling matches between sync and async."""
 
     @pytest.mark.asyncio
-    async def test_system_tags_preserved_through_semijoin(self):
-        """SemiJoin should preserve left-side system tags in both paths."""
+    async def test_system_keys_preserved_through_semijoin(self):
+        """SemiJoin should preserve left-side system keys in both paths."""
         left = _make_source("id", "val", {"id": ["a", "b", "c"], "val": [1, 2, 3]})
         right = _make_source(
             "id", "score", {"id": ["b", "c", "d"], "score": [20, 30, 40]}
@@ -1910,10 +1910,10 @@ async def test_system_tags_preserved_through_semijoin(self):
 
         assert len(sync_rows) == len(async_rows) == 2
 
-        sync_sys_keys = _extract_system_tag_keys(sync_rows)
-        async_sys_keys = _extract_system_tag_keys(async_rows)
+        sync_sys_keys = _extract_system_key_keys(sync_rows)
+        async_sys_keys = _extract_system_key_keys(async_rows)
         assert sync_sys_keys == async_sys_keys
 
-        sync_sys = _extract_system_tags(sync_rows)
-        async_sys = _extract_system_tags(async_rows)
+        sync_sys = _extract_system_keys(sync_rows)
+        async_sys = _extract_system_keys(async_rows)
         assert sync_sys == async_sys
diff --git a/tests/test_channels/test_node_async_execute.py b/tests/test_channels/test_node_async_execute.py
index c8483e35..94b27c5b 100644
--- a/tests/test_channels/test_node_async_execute.py
+++ b/tests/test_channels/test_node_async_execute.py
@@ -46,7 +46,7 @@ def make_stream(n: int = 5) -> ArrowTableStream:
         {"id": pa.array(list(range(n)), type=pa.int64()), "x": pa.array(list(range(n)), type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_two_col_stream(n: int = 3) -> ArrowTableStream:
@@ -65,13 +65,13 @@ def make_two_col_stream(n: int = 3) -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 async def feed_stream_to_channel(stream: ArrowTableStream, ch: Channel) -> None:
-    """Push all (tag, data) pairs from a stream into a channel, then close."""
-    for tag, data in stream.iter_data():
-        await ch.writer.send((tag, data))
+    """Push all (key, data) pairs from a stream into a channel, then close."""
+    for key, data in stream.iter_data():
+        await ch.writer.send((key, data))
     await ch.writer.close()
 
 
@@ -216,8 +216,8 @@ async def test_empty_input_closes_cleanly(self):
         assert results == []
 
     @pytest.mark.asyncio
-    async def test_tags_preserved(self):
-        """Tags should pass through unchanged."""
+    async def test_keys_preserved(self):
+        """Keys should pass through unchanged."""
         _, pod = make_double_pod()
         node = FunctionNode(pod, make_stream(3))
 
@@ -228,7 +228,7 @@ async def test_tags_preserved(self):
         await node.async_execute(input_ch.reader, output_ch.writer)
 
         results = await output_ch.reader.collect()
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [0, 1, 2]
 
 
@@ -276,8 +276,8 @@ async def test_sync_run_then_async_emits_from_cache(self):
         output_ch = Channel(buffer_size=16)
 
         # Send the same data that were already cached
-        for tag, data in input_stream.iter_data():
-            await input_ch.writer.send((tag, data))
+        for key, data in input_stream.iter_data():
+            await input_ch.writer.send((key, data))
         await input_ch.writer.close()
 
         await node2.async_execute(input_ch.reader, output_ch.writer)
@@ -476,7 +476,7 @@ async def test_binary_op_delegation(self):
                 "z": pa.array([100, 300], type=pa.int64()),
             }
         )
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
 
         op = SemiJoin()
         node = OperatorNode(op, [left, right])
@@ -487,12 +487,12 @@ async def test_binary_op_delegation(self):
 
         await feed_stream_to_channel(make_stream(5), left_ch)
         await feed_stream_to_channel(
-            ArrowTableStream(right_table, tag_columns=["id"]), right_ch
+            ArrowTableStream(right_table, key_columns=["id"]), right_ch
         )
         await node.async_execute([left_ch.reader, right_ch.reader], output_ch.writer)
 
         results = await output_ch.reader.collect()
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [1, 3]
 
     @pytest.mark.asyncio
@@ -509,8 +509,8 @@ async def test_nary_op_delegation(self):
                 "y": pa.array([100, 200, 300], type=pa.int64()),
             }
         )
-        left = ArrowTableStream(left_table, tag_columns=["id"])
-        right = ArrowTableStream(right_table, tag_columns=["id"])
+        left = ArrowTableStream(left_table, key_columns=["id"])
+        right = ArrowTableStream(right_table, key_columns=["id"])
         op = Join()
         node = OperatorNode(op, [left, right])
 
@@ -519,16 +519,16 @@ async def test_nary_op_delegation(self):
         output_ch = Channel(buffer_size=16)
 
         await feed_stream_to_channel(
-            ArrowTableStream(left_table, tag_columns=["id"]), left_ch
+            ArrowTableStream(left_table, key_columns=["id"]), left_ch
         )
         await feed_stream_to_channel(
-            ArrowTableStream(right_table, tag_columns=["id"]), right_ch
+            ArrowTableStream(right_table, key_columns=["id"]), right_ch
         )
         await node.async_execute([left_ch.reader, right_ch.reader], output_ch.writer)
 
         results = await output_ch.reader.collect()
         assert len(results) == 3
-        ids = sorted(tag.as_dict()["id"] for tag, _ in results)
+        ids = sorted(key.as_dict()["id"] for key, _ in results)
         assert ids == [0, 1, 2]
 
     @pytest.mark.asyncio
@@ -678,9 +678,9 @@ def test_function_node_sequential_uses_execute_data(self):
         # Monkey-patch to verify routing through internal path
         original = node._process_data_internal
 
-        def patched(tag, data, *, logger=None):
+        def patched(key, data, *, logger=None):
             call_log.append("_process_data_internal")
-            return original(tag, data, logger=logger)
+            return original(key, data, logger=logger)
 
         node._process_data_internal = patched
 
@@ -700,9 +700,9 @@ async def test_function_node_async_uses_async_process_data_internal(self):
 
         original = node._async_process_data_internal
 
-        async def patched(tag, data, **kwargs):
+        async def patched(key, data, **kwargs):
             call_log.append("_async_process_data_internal")
-            return await original(tag, data, **kwargs)
+            return await original(key, data, **kwargs)
 
         node._async_process_data_internal = patched
 
@@ -738,8 +738,8 @@ def triple(x: int) -> int:
         ch2 = Channel(buffer_size=16)
 
         async def source():
-            for tag, data in make_stream(4).iter_data():
-                await ch1.writer.send((tag, data))
+            for key, data in make_stream(4).iter_data():
+                await ch1.writer.send((key, data))
             await ch1.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -762,8 +762,8 @@ async def test_source_to_operator_node_pipeline(self):
         ch2 = Channel(buffer_size=16)
 
         async def source():
-            for tag, data in make_two_col_stream(3).iter_data():
-                await ch1.writer.send((tag, data))
+            for key, data in make_two_col_stream(3).iter_data():
+                await ch1.writer.send((key, data))
             await ch1.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -812,8 +812,8 @@ def double(x: int) -> int:
         output_ch = Channel(buffer_size=16)
 
         async def source_producer():
-            for tag, data in make_stream(5).iter_data():
-                await input_ch.writer.send((tag, data))
+            for key, data in make_stream(5).iter_data():
+                await input_ch.writer.send((key, data))
             await input_ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -857,8 +857,8 @@ async def test_persistent_operator_node_log_then_sync_db_retrieval(self):
         output_ch = Channel(buffer_size=16)
 
         async def source_producer():
-            for tag, data in make_two_col_stream(4).iter_data():
-                await input_ch.writer.send((tag, data))
+            for key, data in make_two_col_stream(4).iter_data():
+                await input_ch.writer.send((key, data))
             await input_ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -918,7 +918,7 @@ def double(x: int) -> int:
                 "result": pa.array([0, 2, 4], type=pa.int64()),
             }
         )
-        stage1_stream = ArrowTableStream(stage1_table, tag_columns=["id"])
+        stage1_stream = ArrowTableStream(stage1_table, key_columns=["id"])
         op = SelectDataColumns(["result"])
         op_db = InMemoryArrowDatabase()
         op_node = OperatorNode(
@@ -931,8 +931,8 @@ def double(x: int) -> int:
         ch_out = Channel(buffer_size=16)
 
         async def source_producer():
-            for tag, data in make_stream(3).iter_data():
-                await ch_source.writer.send((tag, data))
+            for key, data in make_stream(3).iter_data():
+                await ch_source.writer.send((key, data))
             await ch_source.writer.close()
 
         async with asyncio.TaskGroup() as tg:
diff --git a/tests/test_channels/test_pipeline_async_integration.py b/tests/test_channels/test_pipeline_async_integration.py
index c5abae01..4a610bc1 100644
--- a/tests/test_channels/test_pipeline_async_integration.py
+++ b/tests/test_channels/test_pipeline_async_integration.py
@@ -15,7 +15,7 @@
                ├── Join ──► compute_letter_grade
     grades  ───┘
 
-Tags:   student_id
+Keys:   student_id
 Data: name, score  →  letter_grade
 """
 
@@ -89,8 +89,8 @@ def _build_pipeline() -> Pipeline:
     )
 
     with pipeline:
-        students = ArrowTableSource(STUDENTS, tag_columns=["student_id"], infer_nullable=True)
-        grades = ArrowTableSource(GRADES, tag_columns=["student_id"], infer_nullable=True)
+        students = ArrowTableSource(STUDENTS, key_columns=["student_id"], infer_nullable=True)
+        grades = ArrowTableSource(GRADES, key_columns=["student_id"], infer_nullable=True)
 
         joined = Join()(students, grades, label="join")
         compute_letter_grade.pod(joined, label="letter_grade")
@@ -185,29 +185,29 @@ def test_sync_and_async_produce_identical_results(self):
         assert sync_grades == async_grades == EXPECTED
 
 
-class TestSyncAsyncSystemTagEquivalence:
+class TestSyncAsyncSystemKeyEquivalence:
     """Verify that sync and async pipeline execution produce identical
-    system-tag column names and values in the persisted DB records."""
+    system-key column names and values in the persisted DB records."""
 
-    def _get_system_tag_columns(self, table: pa.Table) -> list[str]:
-        """Return sorted system-tag column names from a table."""
+    def _get_system_key_columns(self, table: pa.Table) -> list[str]:
+        """Return sorted system-key column names from a table."""
         from orcapod.system_constants import constants
 
         return sorted(
-            c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
-    def _system_tag_data(self, table: pa.Table) -> dict[str, list]:
-        """Extract system-tag columns as {col_name: sorted_values}."""
-        sys_cols = self._get_system_tag_columns(table)
+    def _system_key_data(self, table: pa.Table) -> dict[str, list]:
+        """Extract system-key columns as {col_name: sorted_values}."""
+        sys_cols = self._get_system_key_columns(table)
         return {c: sorted(table.column(c).to_pylist()) for c in sys_cols}
 
-    def test_join_pipeline_system_tags_identical(self):
-        """Join pipeline: sync and async produce the same system-tag columns."""
+    def test_join_pipeline_system_keys_identical(self):
+        """Join pipeline: sync and async produce the same system-key columns."""
         sync_pipeline = _build_pipeline()
         sync_pipeline.run()
         sync_records = sync_pipeline.letter_grade.get_all_records(
-            columns={"system_tags": True}
+            columns={"system_keys": True}
         )
         assert sync_records is not None
 
@@ -216,42 +216,42 @@ def test_join_pipeline_system_tags_identical(self):
         AsyncPipelineOrchestrator().run(async_pipeline._node_graph)
         async_pipeline.flush()
         async_records = async_pipeline.letter_grade.get_all_records(
-            columns={"system_tags": True}
+            columns={"system_keys": True}
         )
         assert async_records is not None
 
-        # System-tag column names must match
-        sync_sys_cols = self._get_system_tag_columns(sync_records)
-        async_sys_cols = self._get_system_tag_columns(async_records)
-        assert sync_sys_cols, "Expected system-tag columns in output"
+        # System-key column names must match
+        sync_sys_cols = self._get_system_key_columns(sync_records)
+        async_sys_cols = self._get_system_key_columns(async_records)
+        assert sync_sys_cols, "Expected system-key columns in output"
         assert sync_sys_cols == async_sys_cols
 
-        # System-tag values must match
-        sync_sys_data = self._system_tag_data(sync_records)
-        async_sys_data = self._system_tag_data(async_records)
+        # System-key values must match
+        sync_sys_data = self._system_key_data(sync_records)
+        async_sys_data = self._system_key_data(async_records)
         assert sync_sys_data == async_sys_data
 
-    def test_join_pipeline_system_tag_column_names_contain_pipeline_hash(self):
-        """System-tag columns should follow the name-extending convention."""
+    def test_join_pipeline_system_key_column_names_contain_pipeline_hash(self):
+        """System-key columns should follow the name-extending convention."""
 
         pipeline = _build_pipeline()
         pipeline.compile()
         AsyncPipelineOrchestrator().run(pipeline._node_graph)
         pipeline.flush()
-        records = pipeline.letter_grade.get_all_records(columns={"system_tags": True})
+        records = pipeline.letter_grade.get_all_records(columns={"system_keys": True})
         assert records is not None
 
-        sys_cols = self._get_system_tag_columns(records)
+        sys_cols = self._get_system_key_columns(records)
         assert len(sys_cols) > 0
 
-        # Each system-tag column should end with :N (canonical position)
+        # Each system-key column should end with :N (canonical position)
         for col in sys_cols:
             assert col[-2:] in (":0", ":1"), (
-                f"System-tag column {col!r} missing canonical position suffix"
+                f"System-key column {col!r} missing canonical position suffix"
             )
 
-    def test_all_system_tag_columns_match_between_sync_and_async(self):
-        """Every system-tag column name and value in the terminal node's
+    def test_all_system_key_columns_match_between_sync_and_async(self):
+        """Every system-key column name and value in the terminal node's
         DB records should be identical between sync and async.
 
         Source-info columns contain run-specific UUIDs and are excluded
@@ -260,7 +260,7 @@ def test_all_system_tag_columns_match_between_sync_and_async(self):
         sync_pipeline = _build_pipeline()
         sync_pipeline.run()
         sync_records = sync_pipeline.letter_grade.get_all_records(
-            columns={"system_tags": True}
+            columns={"system_keys": True}
         )
         assert sync_records is not None
 
@@ -269,20 +269,20 @@ def test_all_system_tag_columns_match_between_sync_and_async(self):
         AsyncPipelineOrchestrator().run(async_pipeline._node_graph)
         async_pipeline.flush()
         async_records = async_pipeline.letter_grade.get_all_records(
-            columns={"system_tags": True}
+            columns={"system_keys": True}
         )
         assert async_records is not None
 
-        # System-tag column names must match
-        sync_sys_cols = self._get_system_tag_columns(sync_records)
-        async_sys_cols = self._get_system_tag_columns(async_records)
+        # System-key column names must match
+        sync_sys_cols = self._get_system_key_columns(sync_records)
+        async_sys_cols = self._get_system_key_columns(async_records)
         assert sync_sys_cols == async_sys_cols
 
-        # System-tag column values must match (sort by student_id)
+        # System-key column values must match (sort by student_id)
         sync_sorted = sync_records.sort_by("student_id")
         async_sorted = async_records.sort_by("student_id")
         for col in sync_sys_cols:
             assert (
                 sync_sorted.column(col).to_pylist()
                 == async_sorted.column(col).to_pylist()
-            ), f"System-tag column {col!r} differs between sync and async"
+            ), f"System-key column {col!r} differs between sync and async"
diff --git a/tests/test_channels/test_pipeline_example.py b/tests/test_channels/test_pipeline_example.py
index 3854b010..3c9f1c05 100644
--- a/tests/test_channels/test_pipeline_example.py
+++ b/tests/test_channels/test_pipeline_example.py
@@ -71,7 +71,7 @@ def make_students() -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["student_id"])
+    return ArrowTableStream(table, key_columns=["student_id"])
 
 
 def make_grades() -> ArrowTableStream:
@@ -88,7 +88,7 @@ def make_grades() -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["student_id"])
+    return ArrowTableStream(table, key_columns=["student_id"])
 
 
 # The expected output: only students with score >= 70, with letter grades.
@@ -128,8 +128,8 @@ def test_sync_pipeline_full(self):
 
         # --- Execute (pull-based: iter_data triggers computation) ---
         results = {}
-        for tag, data in with_grades.iter_data():
-            sid = tag.as_dict()["student_id"]
+        for key, data in with_grades.iter_data():
+            sid = key.as_dict()["student_id"]
             results[sid] = data.as_dict()["letter_grade"]
 
         # --- Verify ---
@@ -179,8 +179,8 @@ async def test_async_pipeline_full(self):
 
         # --- Source tasks push data into channels ---
         async def push_source(stream: ArrowTableStream, ch: Channel):
-            for tag, data in stream.iter_data():
-                await ch.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch.writer.send((key, data))
             await ch.writer.close()
 
         # --- Run all stages concurrently via TaskGroup ---
@@ -217,8 +217,8 @@ async def push_source(stream: ArrowTableStream, ch: Channel):
         output_rows = await ch_output.reader.collect()
 
         results = {}
-        for tag, data in output_rows:
-            sid = tag.as_dict()["student_id"]
+        for key, data in output_rows:
+            sid = key.as_dict()["student_id"]
             results[sid] = data.as_dict()["letter_grade"]
 
         assert results == EXPECTED
@@ -240,8 +240,8 @@ async def test_async_pipeline_with_concurrency_control(self):
         ch_output = Channel(buffer_size=16)
 
         async def push_source(stream, ch):
-            for tag, data in stream.iter_data():
-                await ch.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch.writer.send((key, data))
             await ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -269,8 +269,8 @@ async def push_source(stream, ch):
 
         output_rows = await ch_output.reader.collect()
         results = {
-            tag.as_dict()["student_id"]: data.as_dict()["letter_grade"]
-            for tag, data in output_rows
+            key.as_dict()["student_id"]: data.as_dict()["letter_grade"]
+            for key, data in output_rows
         }
         assert results == EXPECTED
 
@@ -294,8 +294,8 @@ def _run_sync(self) -> dict[str, str]:
         with_grades = grade_pod.process(passing)
 
         return {
-            tag.as_dict()["student_id"]: data.as_dict()["letter_grade"]
-            for tag, data in with_grades.iter_data()
+            key.as_dict()["student_id"]: data.as_dict()["letter_grade"]
+            for key, data in with_grades.iter_data()
         }
 
     async def _run_async(self) -> dict[str, str]:
@@ -312,8 +312,8 @@ async def _run_async(self) -> dict[str, str]:
         ch_o = Channel(buffer_size=16)
 
         async def push(stream, ch):
-            for tag, data in stream.iter_data():
-                await ch.writer.send((tag, data))
+            for key, data in stream.iter_data():
+                await ch.writer.send((key, data))
             await ch.writer.close()
 
         async with asyncio.TaskGroup() as tg:
@@ -326,8 +326,8 @@ async def push(stream, ch):
             tg.create_task(grade_pod.async_execute([ch_f.reader], ch_o.writer))
 
         return {
-            tag.as_dict()["student_id"]: data.as_dict()["letter_grade"]
-            for tag, data in await ch_o.reader.collect()
+            key.as_dict()["student_id"]: data.as_dict()["letter_grade"]
+            for key, data in await ch_o.reader.collect()
         }
 
     @pytest.mark.asyncio
diff --git a/tests/test_core/conftest.py b/tests/test_core/conftest.py
index 86bfc7f1..be9012be 100644
--- a/tests/test_core/conftest.py
+++ b/tests/test_core/conftest.py
@@ -28,7 +28,7 @@ def to_upper(name: str) -> str:
 
 
 def make_int_stream(n: int = 3) -> ArrowTableStream:
-    """ArrowTableStream with tag=id (int), data=x (int).
+    """ArrowTableStream with key=id (int), data=x (int).
 
     Uses explicit nullable=False schema to simulate data that has been
     processed through SourceStreamBuilder (which normalizes nullable flags).
@@ -46,11 +46,11 @@ def make_int_stream(n: int = 3) -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def make_two_col_stream(n: int = 3) -> ArrowTableStream:
-    """ArrowTableStream with tag=id, data={x, y} for add_pf.
+    """ArrowTableStream with key=id, data={x, y} for add_pf.
 
     Uses explicit nullable=False schema to simulate data that has been
     processed through SourceStreamBuilder (which normalizes nullable flags).
@@ -70,7 +70,7 @@ def make_two_col_stream(n: int = 3) -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_core/data_function/test_data_function_proxy.py b/tests/test_core/data_function/test_data_function_proxy.py
index 49dc4b85..972bff38 100644
--- a/tests/test_core/data_function/test_data_function_proxy.py
+++ b/tests/test_core/data_function/test_data_function_proxy.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from orcapod.core.datagrams.tag_data import Data, Tag
+from orcapod.core.datagrams.key_data import Data, Key
 from orcapod.core.function_pod import FunctionPod
 from orcapod.core.data_function import PythonDataFunction
 from orcapod.core.data_function_proxy import DataFunctionProxy
@@ -222,7 +222,7 @@ def test_function_pod_output_schema(self):
         source = DictSource(
             data=[{"age": 10}, {"age": 20}, {"age": 30}],
         )
-        _tag_schema, data_schema = pod.output_schema(source)
+        _key_schema, data_schema = pod.output_schema(source)
         assert "doubled_age" in data_schema
 
     def test_function_pod_process_data_raises(self):
@@ -230,7 +230,7 @@ def test_function_pod_process_data_raises(self):
         pf = _make_sample_function()
         proxy = _make_proxy_from_function(pf)
         pod = FunctionPod(data_function=proxy)
-        tag = Tag({})
+        key = Key({})
         data = Data({"age": 25})
         with pytest.raises(DataFunctionUnavailableError):
-            pod.process_data(tag, data)
+            pod.process_data(key, data)
diff --git a/tests/test_core/data_function/test_executor.py b/tests/test_core/data_function/test_executor.py
index 430cef8d..ad60b0d0 100644
--- a/tests/test_core/data_function/test_executor.py
+++ b/tests/test_core/data_function/test_executor.py
@@ -377,7 +377,7 @@ def _make_add_stream(rows: list[dict] | None = None):
         {k: pa.array([r[k] for r in rows], type=pa.int64()) for k in keys},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 class TestFunctionPodExecutorAccess:
@@ -698,7 +698,7 @@ def test_concurrent_results_preserve_order(self):
         rows = [{"id": i, "x": i, "y": i * 10} for i in range(5)]
         stream = _make_add_stream(rows)
         output = pod.process(stream)
-        results = [tag_pkt[1].as_dict()["result"] for tag_pkt in output.iter_data()]
+        results = [key_pkt[1].as_dict()["result"] for key_pkt in output.iter_data()]
         assert results == [0, 11, 22, 33, 44]
 
     def test_second_iteration_uses_cache(self):
diff --git a/tests/test_core/datagrams/test_lazy_conversion.py b/tests/test_core/datagrams/test_lazy_conversion.py
index c48d175f..7400257d 100644
--- a/tests/test_core/datagrams/test_lazy_conversion.py
+++ b/tests/test_core/datagrams/test_lazy_conversion.py
@@ -1,12 +1,12 @@
 """
-Tests verifying that Datagram/Tag/Data keep their original representation
+Tests verifying that Datagram/Key/Data keep their original representation
 (Arrow table or Python dict) for as long as possible, converting only when
 an operation semantically requires it.
 
 Design note
 -----------
 These tests intentionally inspect private attributes (_data_dict, _data_table,
-_system_tags_table, _source_info_table, _content_hash_cache, etc.) because the
+_system_keys_table, _source_info_table, _content_hash_cache, etc.) because the
 lazy-conversion contract is an explicit implementation guarantee — it is the
 entire point of the unified Datagram class.  Checking public behaviour alone
 would not distinguish "converted correctly" from "never converted at all".
@@ -15,7 +15,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams import Datagram, Data, Tag
+from orcapod.core.datagrams import Datagram, Data, Key
 from orcapod.system_constants import constants
 from orcapod.types import ColumnConfig
 
@@ -23,7 +23,7 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
-SYS = constants.SYSTEM_TAG_PREFIX  # '_tag::'
+SYS = constants.SYSTEM_KEY_PREFIX  # '_key::'
 SRC = constants.SOURCE_PREFIX  # '_source_'
 META = constants.META_PREFIX  # '__'
 
@@ -291,71 +291,71 @@ def test_copy_without_cache_drops_context_table(self):
 
 
 # ---------------------------------------------------------------------------
-# Tag — lazy system-tags table
+# Key — lazy system-keys table
 # ---------------------------------------------------------------------------
 
 
-class TestTagLazySystemTagsTable:
-    """_system_tags_table is built only when system_tags are explicitly requested."""
+class TestKeyLazySystemKeysTable:
+    """_system_keys_table is built only when system_keys are explicitly requested."""
 
-    def test_dict_backed_starts_with_no_system_tags_table(self):
-        t = Tag({"a": 1, f"{SYS}run": "run1"})
-        assert t._system_tags_table is None
+    def test_dict_backed_starts_with_no_system_keys_table(self):
+        t = Key({"a": 1, f"{SYS}run": "run1"})
+        assert t._system_keys_table is None
 
-    def test_arrow_backed_system_tag_columns_extracted_from_data_table(self):
+    def test_arrow_backed_system_key_columns_extracted_from_data_table(self):
         sys_col = f"{SYS}run"
         tbl = arrow_table(a=1)
         tbl = tbl.append_column(sys_col, pa.array(["run1"], type=pa.large_string()))
-        t = Tag(tbl)
-        # System tag column removed from primary data table
+        t = Key(tbl)
+        # System key column removed from primary data table
         assert t._data_table is not None
         assert sys_col not in t._data_table.column_names
-        # Captured in the system_tags dict
-        assert t._system_tags[sys_col] == "run1"
+        # Captured in the system_keys dict
+        assert t._system_keys[sys_col] == "run1"
         # Table not yet built
-        assert t._system_tags_table is None
+        assert t._system_keys_table is None
 
-    def test_system_tags_table_not_built_without_system_tags_flag(self):
-        t = Tag({"a": 1, f"{SYS}run": "run1"})
+    def test_system_keys_table_not_built_without_system_keys_flag(self):
+        t = Key({"a": 1, f"{SYS}run": "run1"})
         _ = t.as_table()
-        assert t._system_tags_table is None
+        assert t._system_keys_table is None
         _ = t.as_dict()
-        assert t._system_tags_table is None
+        assert t._system_keys_table is None
         _ = t.keys()
-        assert t._system_tags_table is None
+        assert t._system_keys_table is None
 
-    def test_system_tags_table_built_when_requested_via_as_table(self):
-        t = Tag({"a": 1, f"{SYS}run": "run1"})
-        _ = t.as_table(columns=ColumnConfig(system_tags=True))
-        assert t._system_tags_table is not None
+    def test_system_keys_table_built_when_requested_via_as_table(self):
+        t = Key({"a": 1, f"{SYS}run": "run1"})
+        _ = t.as_table(columns=ColumnConfig(system_keys=True))
+        assert t._system_keys_table is not None
 
-    def test_system_tags_table_built_when_requested_via_arrow_schema(self):
-        t = Tag({"a": 1, f"{SYS}run": "run1"})
-        _ = t.arrow_schema(columns=ColumnConfig(system_tags=True))
-        assert t._system_tags_table is not None
+    def test_system_keys_table_built_when_requested_via_arrow_schema(self):
+        t = Key({"a": 1, f"{SYS}run": "run1"})
+        _ = t.arrow_schema(columns=ColumnConfig(system_keys=True))
+        assert t._system_keys_table is not None
 
-    def test_arrow_backed_dict_not_loaded_by_system_tags_operations(self):
+    def test_arrow_backed_dict_not_loaded_by_system_keys_operations(self):
         sys_col = f"{SYS}run"
         tbl = arrow_table(a=1)
         tbl = tbl.append_column(sys_col, pa.array(["run1"], type=pa.large_string()))
-        t = Tag(tbl)
+        t = Key(tbl)
         assert t._data_dict is None
-        _ = t.keys(columns=ColumnConfig(system_tags=True))
-        _ = t.schema(columns=ColumnConfig(system_tags=True))
-        _ = t.arrow_schema(columns=ColumnConfig(system_tags=True))
+        _ = t.keys(columns=ColumnConfig(system_keys=True))
+        _ = t.schema(columns=ColumnConfig(system_keys=True))
+        _ = t.arrow_schema(columns=ColumnConfig(system_keys=True))
         assert t._data_dict is None
 
-    def test_copy_with_cache_propagates_system_tags_table(self):
-        t = Tag({"a": 1, f"{SYS}run": "run1"})
-        _ = t.as_table(columns=ColumnConfig(system_tags=True))
+    def test_copy_with_cache_propagates_system_keys_table(self):
+        t = Key({"a": 1, f"{SYS}run": "run1"})
+        _ = t.as_table(columns=ColumnConfig(system_keys=True))
         t2 = t.copy(include_cache=True)
-        assert t2._system_tags_table is not None
+        assert t2._system_keys_table is not None
 
-    def test_copy_without_cache_drops_system_tags_table(self):
-        t = Tag({"a": 1, f"{SYS}run": "run1"})
-        _ = t.as_table(columns=ColumnConfig(system_tags=True))
+    def test_copy_without_cache_drops_system_keys_table(self):
+        t = Key({"a": 1, f"{SYS}run": "run1"})
+        _ = t.as_table(columns=ColumnConfig(system_keys=True))
         t2 = t.copy(include_cache=False)
-        assert t2._system_tags_table is None
+        assert t2._system_keys_table is None
 
 
 # ---------------------------------------------------------------------------
@@ -435,7 +435,7 @@ def test_with_columns_clears_source_info_table_and_adds_empty_entry(self):
 
 
 # ---------------------------------------------------------------------------
-# RecordBatch — both Tag and Data accept pa.RecordBatch (from table.to_batches())
+# RecordBatch — both Key and Data accept pa.RecordBatch (from table.to_batches())
 # ---------------------------------------------------------------------------
 
 
@@ -450,15 +450,15 @@ def test_datagram_from_record_batch(self):
         assert d._data_dict is None
         assert d["a"] == 1
 
-    def test_tag_from_record_batch(self):
+    def test_key_from_record_batch(self):
         sys_col = f"{SYS}run"
         tbl = arrow_table(a=1)
         tbl = tbl.append_column(sys_col, pa.array(["r1"], type=pa.large_string()))
         batch = tbl.to_batches()[0]
-        t = Tag(batch.slice(0, 1))
+        t = Key(batch.slice(0, 1))
         assert t._data_table is not None
         assert sys_col not in t._data_table.column_names
-        assert t._system_tags[sys_col] == "r1"
+        assert t._system_keys[sys_col] == "r1"
         assert t._data_dict is None
 
     def test_data_from_record_batch(self):
diff --git a/tests/test_core/function_pod/test_cached_function_pod.py b/tests/test_core/function_pod/test_cached_function_pod.py
index 09ac5b46..caa1a068 100644
--- a/tests/test_core/function_pod/test_cached_function_pod.py
+++ b/tests/test_core/function_pod/test_cached_function_pod.py
@@ -30,7 +30,7 @@ def _make_stream(rows: list[dict] | None = None) -> ArrowTableStream:
         {k: pa.array([r[k] for r in rows], type=pa.int64()) for k in keys},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 # ---------------------------------------------------------------------------
@@ -147,20 +147,20 @@ def test_second_call_does_not_add_new_records(self, cached_pod, cache_db):
 
 
 # ---------------------------------------------------------------------------
-# Tag-aware caching
+# Key-aware caching
 # ---------------------------------------------------------------------------
 
 
 class TestCacheKeySemantics:
-    def test_same_data_different_tags_is_cache_hit(self, double_pod, cache_db):
-        """Same data data with different tags is a cache hit — the function
-        output depends only on the data, not the tag."""
+    def test_same_data_different_keys_is_cache_hit(self, double_pod, cache_db):
+        """Same data data with different keys is a cache hit — the function
+        output depends only on the data, not the key."""
         cached_pod = CachedFunctionPod(double_pod, result_database=cache_db)
 
         stream1 = _make_stream([{"id": 0, "x": 10}])
         list(cached_pod.process(stream1).iter_data())
 
-        # Same data data, different tag — should be cache hit
+        # Same data data, different key — should be cache hit
         stream2 = _make_stream([{"id": 1, "x": 10}])
         results = list(cached_pod.process(stream2).iter_data())
 
diff --git a/tests/test_core/function_pod/test_function_node_attach_db.py b/tests/test_core/function_pod/test_function_node_attach_db.py
index 9118c96b..9ac06963 100644
--- a/tests/test_core/function_pod/test_function_node_attach_db.py
+++ b/tests/test_core/function_pod/test_function_node_attach_db.py
@@ -35,7 +35,7 @@ def _make_stream(n=3):
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 class TestFunctionNodeWithoutDatabase:
diff --git a/tests/test_core/function_pod/test_function_node_caching.py b/tests/test_core/function_pod/test_function_node_caching.py
index 8c5bb0d5..c0b2084a 100644
--- a/tests/test_core/function_pod/test_function_node_caching.py
+++ b/tests/test_core/function_pod/test_function_node_caching.py
@@ -2,10 +2,10 @@
 
 Covers:
 - compute_pipeline_entry_id behavior
-- Pipeline entry_id based Phase 2 skip (tag + system_tags + data_hash)
+- Pipeline entry_id based Phase 2 skip (key + system_keys + data_hash)
 - CachedFunctionPod result cache hit with novel pipeline entry_id
-- Same data data, different tags → 1 result record, N pipeline records
-- System tag awareness in pipeline entry_id computation
+- Same data data, different keys → 1 result record, N pipeline records
+- System key awareness in pipeline entry_id computation
 - Phase 1 yields existing records, Phase 2 processes only novel entry_ids
 """
 
@@ -15,7 +15,7 @@
 import pytest
 
 from orcapod.core.cached_function_pod import CachedFunctionPod
-from orcapod.core.datagrams import Data, Tag
+from orcapod.core.datagrams import Data, Key
 from orcapod.core.function_pod import FunctionPod
 from orcapod.core.nodes import FunctionNode
 from orcapod.core.data_function import PythonDataFunction
@@ -40,29 +40,29 @@ def _make_pod():
 
 
 def _make_stream(
-    rows: list[dict], tag_columns: list[str] | None = None
+    rows: list[dict], key_columns: list[str] | None = None
 ) -> ArrowTableStream:
-    if tag_columns is None:
-        tag_columns = ["id"]
+    if key_columns is None:
+        key_columns = ["id"]
     keys = list(rows[0].keys())
     schema = pa.schema([pa.field(k, pa.int64(), nullable=False) for k in keys])
     table = pa.table(
         {k: pa.array([r[k] for r in rows], type=pa.int64()) for k in keys},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=tag_columns)
+    return ArrowTableStream(table, key_columns=key_columns)
 
 
 def _make_source_stream(
-    rows: list[dict], tag_columns: list[str] | None = None, source_id: str = "src_a"
+    rows: list[dict], key_columns: list[str] | None = None, source_id: str = "src_a"
 ) -> ArrowTableStream:
-    """Create a stream from an ArrowTableSource so it has system tag columns."""
-    if tag_columns is None:
-        tag_columns = ["id"]
+    """Create a stream from an ArrowTableSource so it has system key columns."""
+    if key_columns is None:
+        key_columns = ["id"]
     table = pa.table(
         {k: pa.array([r[k] for r in rows], type=pa.int64()) for k in rows[0]}
     )
-    source = ArrowTableSource(table, tag_columns=tag_columns, source_id=source_id, infer_nullable=True)
+    source = ArrowTableSource(table, key_columns=key_columns, source_id=source_id, infer_nullable=True)
     return source
 
 
@@ -87,63 +87,63 @@ class TestComputePipelineEntryId:
     def test_returns_non_empty_string(self):
         stream = _make_stream([{"id": 0, "x": 10}])
         node, _ = _make_node(stream)
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 10})
-        entry_id = node.compute_pipeline_entry_id(tag, data)
+        entry_id = node.compute_pipeline_entry_id(key, data)
         assert isinstance(entry_id, str)
         assert len(entry_id) > 0
 
     def test_same_inputs_produce_same_id(self):
         stream = _make_stream([{"id": 0, "x": 10}])
         node, _ = _make_node(stream)
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 10})
-        id1 = node.compute_pipeline_entry_id(tag, data)
-        id2 = node.compute_pipeline_entry_id(tag, data)
+        id1 = node.compute_pipeline_entry_id(key, data)
+        id2 = node.compute_pipeline_entry_id(key, data)
         assert id1 == id2
 
-    def test_different_tags_produce_different_ids(self):
+    def test_different_keys_produce_different_ids(self):
         stream = _make_stream([{"id": 0, "x": 10}])
         node, _ = _make_node(stream)
         data = Data({"x": 10})
-        id_tag0 = node.compute_pipeline_entry_id(Tag({"id": 0}), data)
-        id_tag1 = node.compute_pipeline_entry_id(Tag({"id": 1}), data)
-        assert id_tag0 != id_tag1
+        id_key0 = node.compute_pipeline_entry_id(Key({"id": 0}), data)
+        id_key1 = node.compute_pipeline_entry_id(Key({"id": 1}), data)
+        assert id_key0 != id_key1
 
     def test_different_data_produce_different_ids(self):
         stream = _make_stream([{"id": 0, "x": 10}])
         node, _ = _make_node(stream)
-        tag = Tag({"id": 0})
-        id_x10 = node.compute_pipeline_entry_id(tag, Data({"x": 10}))
-        id_x99 = node.compute_pipeline_entry_id(tag, Data({"x": 99}))
+        key = Key({"id": 0})
+        id_x10 = node.compute_pipeline_entry_id(key, Data({"x": 10}))
+        id_x99 = node.compute_pipeline_entry_id(key, Data({"x": 99}))
         assert id_x10 != id_x99
 
 
 # ---------------------------------------------------------------------------
-# System tag awareness in entry_id
+# System key awareness in entry_id
 # ---------------------------------------------------------------------------
 
 
-class TestSystemTagAwareness:
-    def test_same_tag_values_different_system_tags_produce_different_ids(self):
-        """Two tags with identical user values but different system tags
+class TestSystemKeyAwareness:
+    def test_same_key_values_different_system_keys_produce_different_ids(self):
+        """Two keys with identical user values but different system keys
         must produce different pipeline entry_ids."""
         stream = _make_stream([{"id": 0, "x": 10}])
         node, _ = _make_node(stream)
         data = Data({"x": 10})
 
-        # Tags with same user value but different system tag columns
-        tag_a = Tag(
+        # Keys with same user value but different system key columns
+        key_a = Key(
             {"id": 0},
-            system_tags={f"{constants.SYSTEM_TAG_PREFIX}source:abc": "row0"},
+            system_keys={f"{constants.SYSTEM_KEY_PREFIX}source:abc": "row0"},
         )
-        tag_b = Tag(
+        key_b = Key(
             {"id": 0},
-            system_tags={f"{constants.SYSTEM_TAG_PREFIX}source:xyz": "row0"},
+            system_keys={f"{constants.SYSTEM_KEY_PREFIX}source:xyz": "row0"},
         )
 
-        id_a = node.compute_pipeline_entry_id(tag_a, data)
-        id_b = node.compute_pipeline_entry_id(tag_b, data)
+        id_a = node.compute_pipeline_entry_id(key_a, data)
+        id_b = node.compute_pipeline_entry_id(key_b, data)
         assert id_a != id_b
 
 
@@ -153,10 +153,10 @@ def test_same_tag_values_different_system_tags_produce_different_ids(self):
 
 
 class TestResultVsPipelineRecordCounts:
-    def test_same_data_different_tags_one_result_two_pipeline_records(self):
-        """Same data data with different tags should produce:
+    def test_same_data_different_keys_one_result_two_pipeline_records(self):
+        """Same data data with different keys should produce:
         - 1 result record (CachedFunctionPod caches by data hash only)
-        - 2 pipeline records (different tag → different entry_id)
+        - 2 pipeline records (different key → different entry_id)
         """
         rows = [{"id": 0, "x": 10}, {"id": 1, "x": 10}]
         stream = _make_stream(rows)
@@ -170,13 +170,13 @@ def test_same_data_different_tags_one_result_two_pipeline_records(self):
         assert result_records is not None
         assert result_records.num_rows == 1
 
-        # Pipeline DB: 2 records (different tags → different entry_ids)
+        # Pipeline DB: 2 records (different keys → different entry_ids)
         pipeline_records = db.get_all_records(node.node_identity_path)
         assert pipeline_records is not None
         assert pipeline_records.num_rows == 2
 
-    def test_different_data_same_tag_two_result_two_pipeline_records(self):
-        """Different data data with same tag should produce:
+    def test_different_data_same_key_two_result_two_pipeline_records(self):
+        """Different data data with same key should produce:
         - 2 result records (different data hashes)
         - 2 pipeline records (different data hash → different entry_id)
         """
@@ -196,7 +196,7 @@ def test_different_data_same_tag_two_result_two_pipeline_records(self):
         assert pipeline_records.num_rows == 2
 
     def test_identical_rows_one_result_one_pipeline_record(self):
-        """Identical (tag, data) → 1 result record, 1 pipeline record."""
+        """Identical (key, data) → 1 result record, 1 pipeline record."""
         # A single row — process once
         stream = _make_stream([{"id": 0, "x": 10}])
         node, db = _make_node(stream)
@@ -258,13 +258,13 @@ def test_phase2_processes_novel_entry_ids_only(self):
         result_values = sorted(p.as_dict()["result"] for _, p in results)
         assert result_values == [20, 40, 60]
 
-    def test_same_data_new_tag_triggers_phase2(self):
+    def test_same_data_new_key_triggers_phase2(self):
         """With pipeline_hash scope (default), nodes with same schema share one DB table.
         node2 (id=1, x=10) has a different content_hash than node1 (id=0, x=10),
         so Phase 1 finds no records for node2 and Phase 2 executes the data."""
         db = InMemoryArrowDatabase()
 
-        # First run: tag=0, x=10
+        # First run: key=0, x=10
         stream1 = _make_stream([{"id": 0, "x": 10}])
         node1, _ = _make_node(stream1, db=db)
         node1.run()
@@ -272,7 +272,7 @@ def test_same_data_new_tag_triggers_phase2(self):
         pipeline_count_after_first = db.get_all_records(node1.node_identity_path).num_rows
         assert pipeline_count_after_first == 1
 
-        # Second run: tag=1, x=10 (same data, different tag)
+        # Second run: key=1, x=10 (same data, different key)
         stream2 = _make_stream([{"id": 1, "x": 10}])
         node2, _ = _make_node(stream2, db=db)
 
@@ -327,28 +327,28 @@ def test_all_existing_entry_ids_skipped_in_phase2(self):
 
 
 class TestResultCacheHitPipelineNovel:
-    def test_cached_result_reused_for_new_tag(self):
+    def test_cached_result_reused_for_new_key(self):
         """When CachedFunctionPod has a cache hit (same data hash) but
-        the pipeline entry_id is novel (different tag), the cached result
+        the pipeline entry_id is novel (different key), the cached result
         should be reused and a new pipeline record created."""
         db = InMemoryArrowDatabase()
 
-        # Process tag=0, x=10
+        # Process key=0, x=10
         stream1 = _make_stream([{"id": 0, "x": 10}])
         node1, _ = _make_node(stream1, db=db)
         node1.run()
 
-        # Process tag=1, x=10 — same data, different tag
+        # Process key=1, x=10 — same data, different key
         stream2 = _make_stream([{"id": 1, "x": 10}])
         node2, _ = _make_node(stream2, db=db)
         results = list(node2.iter_data())
 
-        # Both tags should produce the same result value
+        # Both keys should produce the same result value
         result_values = [p.as_dict()["result"] for _, p in results]
         assert all(v == 20 for v in result_values)
 
     def test_pipeline_records_reference_same_result_uuid(self):
-        """Two pipeline records for the same data (different tags)
+        """Two pipeline records for the same data (different keys)
         should reference the same output data UUID in the result DB."""
         db = InMemoryArrowDatabase()
 
diff --git a/tests/test_core/function_pod/test_function_pod_chaining.py b/tests/test_core/function_pod/test_function_pod_chaining.py
index c9779a4e..dbcdbd28 100644
--- a/tests/test_core/function_pod/test_function_pod_chaining.py
+++ b/tests/test_core/function_pod/test_function_pod_chaining.py
@@ -5,7 +5,7 @@
 - Two-pod linear chain: output stream of pod1 feeds into pod2
 - Three-pod linear chain with value verification at each stage
 - Chaining via the decorator (@function_pod) interface
-- TagProtocol preservation across chained pods
+- KeyProtocol preservation across chained pods
 - Row count preservation across chained pods
 - as_table() results after chaining
 - Chain where an intermediate pod is inactive (data filtered out)
@@ -81,12 +81,12 @@ def test_chain_values_correct(self, double_pod, add_one_pod):
         ):
             assert data["result"] == i * 2 + 1
 
-    def test_chain_tag_preserved(self, double_pod, add_one_pod):
+    def test_chain_key_preserved(self, double_pod, add_one_pod):
         n = 3
-        for i, (tag, _) in enumerate(
+        for i, (key, _) in enumerate(
             add_one_pod.process(double_pod.process(make_int_stream(n=n))).iter_data()
         ):
-            assert tag["id"] == i
+            assert key["id"] == i
 
     def test_chain_as_table_has_correct_columns(self, double_pod, add_one_pod):
         table = add_one_pod.process(double_pod.process(make_int_stream(n=3))).as_table()
@@ -149,13 +149,13 @@ def test_three_pod_chain_values(self, double_pod, add_one_pod, square_pod):
             expected = (i * 2 + 1) ** 2
             assert data["result"] == expected
 
-    def test_three_pod_chain_tags_preserved(self, double_pod, add_one_pod, square_pod):
+    def test_three_pod_chain_keys_preserved(self, double_pod, add_one_pod, square_pod):
         n = 4
         stream = square_pod.process(
             add_one_pod.process(double_pod.process(make_int_stream(n=n)))
         )
-        for i, (tag, _) in enumerate(stream.iter_data()):
-            assert tag["id"] == i
+        for i, (key, _) in enumerate(stream.iter_data()):
+            assert key["id"] == i
 
     def test_three_pod_chain_as_table_correct(
         self, double_pod, add_one_pod, square_pod
@@ -167,7 +167,7 @@ def test_three_pod_chain_as_table_correct(
         results = table.column("result").to_pylist()
         assert results == [(i * 2 + 1) ** 2 for i in range(n)]
 
-    def test_three_pod_chain_table_has_tag_column(
+    def test_three_pod_chain_table_has_key_column(
         self, double_pod, add_one_pod, square_pod
     ):
         table = square_pod.process(
diff --git a/tests/test_core/function_pod/test_function_pod_decorator.py b/tests/test_core/function_pod/test_function_pod_decorator.py
index 8db0e8d8..de29b009 100644
--- a/tests/test_core/function_pod/test_function_pod_decorator.py
+++ b/tests/test_core/function_pod/test_function_pod_decorator.py
@@ -157,7 +157,7 @@ def test_multiple_output_keys_end_to_end(self):
                     ]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         for i, (_, data) in enumerate(stats.pod.process(stream).iter_data()):
             assert data["total"] == i + i
diff --git a/tests/test_core/function_pod/test_function_pod_extended.py b/tests/test_core/function_pod/test_function_pod_extended.py
index f513a518..340664bf 100644
--- a/tests/test_core/function_pod/test_function_pod_extended.py
+++ b/tests/test_core/function_pod/test_function_pod_extended.py
@@ -2,7 +2,7 @@
 Extended tests for function_pod.py covering:
 - _FunctionPodBase — handle_input_streams
 - WrappedFunctionPod — delegation, uri, validate_inputs, output_schema, process
-- FunctionPodStream — as_table() with content_hash and sort_by_tags column configs
+- FunctionPodStream — as_table() with content_hash and sort_by_keys column configs
 - function_pod decorator with result_database — creates CachedDataFunction, caching works
 """
 
@@ -45,14 +45,14 @@ def test_multiple_streams_returns_joined_stream(self, add_pod):
                 {"id": pa.array([0, 1], type=pa.int64()), "x": pa.array([0, 1], type=pa.int64())},
                 schema=pa.schema([pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)]),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         stream_y = ArrowTableStream(
             pa.table(
                 {"id": pa.array([0, 1], type=pa.int64()), "y": pa.array([10, 20], type=pa.int64())},
                 schema=pa.schema([pa.field("id", pa.int64(), nullable=False), pa.field("y", pa.int64(), nullable=False)]),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         result = add_pod.handle_input_streams(stream_x, stream_y)
         assert isinstance(result, StreamProtocol)
@@ -144,12 +144,12 @@ def test_no_content_hash_by_default(self, double_pod):
 
 
 # ---------------------------------------------------------------------------
-# 4. FunctionPodStream — as_table() with sort_by_tags column config
+# 4. FunctionPodStream — as_table() with sort_by_keys column config
 # ---------------------------------------------------------------------------
 
 
-class TestFunctionPodStreamSortByTags:
-    def test_sort_by_tags_returns_sorted_table(self, double_pod):
+class TestFunctionPodStreamSortByKeys:
+    def test_sort_by_keys_returns_sorted_table(self, double_pod):
         n = 5
         schema = pa.schema([pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)])
         table = pa.table(
@@ -159,8 +159,8 @@ def test_sort_by_tags_returns_sorted_table(self, double_pod):
             },
             schema=schema,
         )
-        stream = double_pod.process(ArrowTableStream(table, tag_columns=["id"]))
-        result = stream.as_table(columns={"sort_by_tags": True})
+        stream = double_pod.process(ArrowTableStream(table, key_columns=["id"]))
+        result = stream.as_table(columns={"sort_by_keys": True})
         ids: list[int] = result.column("id").to_pylist()  # type: ignore[assignment]
         assert ids == sorted(ids)
 
@@ -175,7 +175,7 @@ def test_default_table_may_be_unsorted(self, double_pod):
             },
             schema=schema,
         )
-        stream = double_pod.process(ArrowTableStream(table, tag_columns=["id"]))
+        stream = double_pod.process(ArrowTableStream(table, key_columns=["id"]))
         result = stream.as_table()
         ids: list[int] = result.column("id").to_pylist()  # type: ignore[assignment]
         assert ids == reversed_ids
diff --git a/tests/test_core/function_pod/test_function_pod_node.py b/tests/test_core/function_pod/test_function_pod_node.py
index ea290664..a43d93c1 100644
--- a/tests/test_core/function_pod/test_function_pod_node.py
+++ b/tests/test_core/function_pod/test_function_pod_node.py
@@ -4,7 +4,7 @@
 - output_schema and keys
 - execute_data and add_pipeline_record
 - iter_data, run(), stream interface
-- get_all_records: empty DB, correctness, ColumnConfig (meta/source/system_tags/all_info)
+- get_all_records: empty DB, correctness, ColumnConfig (meta/source/system_keys/all_info)
 - pipeline_identity_structure and pipeline_hash
 - pipeline_path_prefix
 - result path conventions
@@ -17,7 +17,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams import Data, Tag
+from orcapod.core.datagrams import Data, Key
 from orcapod.core.function_pod import FunctionPod
 from orcapod.core.nodes import FunctionNode
 from orcapod.core.data_function import PythonDataFunction
@@ -48,12 +48,12 @@ def _make_node(
     )
 
 
-def _make_node_with_system_tags(
+def _make_node_with_system_keys(
     pf: PythonDataFunction,
     n: int = 3,
     db: InMemoryArrowDatabase | None = None,
 ) -> FunctionNode:
-    """Build a node whose input stream has an explicit system-tag column ('run')."""
+    """Build a node whose input stream has an explicit system-key column ('run')."""
     if db is None:
         db = InMemoryArrowDatabase()
     schema = pa.schema(
@@ -71,7 +71,7 @@ def _make_node_with_system_tags(
         },
         schema=schema,
     )
-    stream = ArrowTableStream(table, tag_columns=["id"], system_tag_columns=["run"])
+    stream = ArrowTableStream(table, key_columns=["id"], system_key_columns=["run"])
     return FunctionNode(
         function_pod=FunctionPod(data_function=pf),
         input_stream=stream,
@@ -127,9 +127,9 @@ def test_pipeline_path_contains_data_function_uri(self, node):
         for part in pf_uri:
             assert part in node.node_identity_path
 
-    def test_pipeline_path_has_no_tag_schema_hash(self, node):
+    def test_pipeline_path_has_no_key_schema_hash(self, node):
         path = node.node_identity_path
-        assert not any(segment.startswith("tag:") for segment in path)
+        assert not any(segment.startswith("key:") for segment in path)
 
     def test_node_is_stream_protocol(self, node):
         assert isinstance(node, StreamProtocol)
@@ -155,7 +155,7 @@ def test_incompatible_stream_raises_on_construction(self, double_pf):
                     "z": pa.array([0, 1], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         with pytest.raises(ValueError):
             FunctionNode(
@@ -201,24 +201,24 @@ def node(self, double_pf) -> FunctionNode:
         )
 
     def test_output_schema_returns_two_mappings(self, node: FunctionNode):
-        tag_schema, data_schema = node.output_schema()
-        assert isinstance(tag_schema, Mapping)
+        key_schema, data_schema = node.output_schema()
+        assert isinstance(key_schema, Mapping)
         assert isinstance(data_schema, Mapping)
-        assert "id" in tag_schema
-        assert len(tag_schema) == 1
+        assert "id" in key_schema
+        assert len(key_schema) == 1
         assert "result" in data_schema
         assert len(data_schema) == 1
-        assert tag_schema["id"] is int
+        assert key_schema["id"] is int
         assert data_schema["result"] is int
 
     def test_data_schema_matches_function_output(self, node, double_pf):
         _, data_schema = node.output_schema()
         assert data_schema == double_pf.output_data_schema
 
-    def test_tag_schema_matches_input_stream(self, node):
-        tag_schema, _ = node.output_schema()
-        assert "id" in tag_schema
-        assert tag_schema["id"] is int
+    def test_key_schema_matches_input_stream(self, node):
+        key_schema, _ = node.output_schema()
+        assert "id" in key_schema
+        assert key_schema["id"] is int
 
 
 # ---------------------------------------------------------------------------
@@ -236,24 +236,24 @@ def node(self, double_pf) -> FunctionNode:
             pipeline_database=db,
         )
 
-    def test_execute_data_returns_tag_and_data(self, node):
-        tag = Tag({"id": 0})
+    def test_execute_data_returns_key_and_data(self, node):
+        key = Key({"id": 0})
         data = Data({"x": 5})
-        out_tag, out_data = node.execute_data(tag, data)
-        assert out_tag is tag
+        out_key, out_data = node.execute_data(key, data)
+        assert out_key is key
         assert out_data is not None
 
     def test_execute_data_value_correct(self, node):
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 6})
-        _, out_data = node.execute_data(tag, data)
+        _, out_data = node.execute_data(key, data)
         assert out_data["result"] == 12  # 6 * 2
 
     def test_execute_data_adds_pipeline_record(self, node, double_pf):
         """execute_data writes pipeline records (compute + persist + cache)."""
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 3})
-        node.execute_data(tag, data)
+        node.execute_data(key, data)
         db = node._pipeline_database
         db.flush()
         all_records = db.get_all_records(node.node_identity_path)
@@ -261,9 +261,9 @@ def test_execute_data_adds_pipeline_record(self, node, double_pf):
         assert all_records.num_rows >= 1
 
     def test_execute_data_internal_adds_pipeline_record(self, node, double_pf):
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 3})
-        node._process_data_internal(tag, data)
+        node._process_data_internal(key, data)
         db = node._pipeline_database
         db.flush()
         all_records = db.get_all_records(node.node_identity_path)
@@ -271,10 +271,10 @@ def test_execute_data_internal_adds_pipeline_record(self, node, double_pf):
         assert all_records.num_rows >= 1
 
     def test_execute_data_second_call_same_input_deduplicates(self, node):
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 3})
-        node._process_data_internal(tag, data)
-        node._process_data_internal(tag, data)
+        node._process_data_internal(key, data)
+        node._process_data_internal(key, data)
         db = node._pipeline_database
         db.flush()
         all_records = db.get_all_records(node.node_identity_path)
@@ -282,11 +282,11 @@ def test_execute_data_second_call_same_input_deduplicates(self, node):
         assert all_records.num_rows == 1
 
     def test_process_and_store_two_data_add_two_entries(self, node):
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data1 = Data({"x": 3})
         data2 = Data({"x": 4})
-        node._process_data_internal(tag, data1)
-        node._process_data_internal(tag, data2)
+        node._process_data_internal(key, data1)
+        node._process_data_internal(key, data2)
         db = node._pipeline_database
         all_records = db.get_all_records(node.node_identity_path)
         assert all_records is not None
@@ -360,7 +360,7 @@ def test_pipeline_hash_different_data_same_hash(self, double_pf):
                     [pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         node_a = FunctionNode(
             function_pod=FunctionPod(data_function=double_pf),
@@ -437,7 +437,7 @@ def test_row_count_matches_input(self, filled_node):
         assert result is not None
         assert result.num_rows == 4
 
-    def test_contains_tag_column(self, filled_node):
+    def test_contains_key_column(self, filled_node):
         result = filled_node.get_all_records()
         assert result is not None
         assert "id" in result.column_names
@@ -452,7 +452,7 @@ def test_output_values_are_correct(self, filled_node):
         assert result is not None
         assert sorted(result.column("result").to_pylist()) == [0, 2, 4, 6]
 
-    def test_tag_values_are_correct(self, filled_node):
+    def test_key_values_are_correct(self, filled_node):
         result = filled_node.get_all_records()
         assert result is not None
         assert sorted(result.column("id").to_pylist()) == [0, 1, 2, 3]
@@ -543,40 +543,40 @@ def test_source_true_still_has_data_columns(self, filled_node):
 
 
 # ---------------------------------------------------------------------------
-# 10. get_all_records — ColumnConfig: system_tags columns
+# 10. get_all_records — ColumnConfig: system_keys columns
 # ---------------------------------------------------------------------------
 
 
-class TestGetAllRecordsSystemTagColumns:
+class TestGetAllRecordsSystemKeyColumns:
     @pytest.fixture
-    def filled_node_with_sys_tags(self, double_pf) -> FunctionNode:
-        node = _make_node_with_system_tags(double_pf, n=3)
+    def filled_node_with_sys_keys(self, double_pf) -> FunctionNode:
+        node = _make_node_with_system_keys(double_pf, n=3)
         _fill_node(node)
         return node
 
-    def test_default_excludes_system_tag_columns(self, filled_node_with_sys_tags):
-        result = filled_node_with_sys_tags.get_all_records()
+    def test_default_excludes_system_key_columns(self, filled_node_with_sys_keys):
+        result = filled_node_with_sys_keys.get_all_records()
         assert result is not None
         sys_cols = [
-            c for c in result.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            c for c in result.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        assert sys_cols == [], f"Unexpected system tag columns: {sys_cols}"
+        assert sys_cols == [], f"Unexpected system key columns: {sys_cols}"
 
-    def test_system_tags_true_includes_system_tag_columns(
-        self, filled_node_with_sys_tags
+    def test_system_keys_true_includes_system_key_columns(
+        self, filled_node_with_sys_keys
     ):
-        result = filled_node_with_sys_tags.get_all_records(
-            columns={"system_tags": True}
+        result = filled_node_with_sys_keys.get_all_records(
+            columns={"system_keys": True}
         )
         assert result is not None
         sys_cols = [
-            c for c in result.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            c for c in result.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
         assert len(sys_cols) > 0
 
-    def test_system_tags_true_still_has_data_columns(self, filled_node_with_sys_tags):
-        result = filled_node_with_sys_tags.get_all_records(
-            columns={"system_tags": True}
+    def test_system_keys_true_still_has_data_columns(self, filled_node_with_sys_keys):
+        result = filled_node_with_sys_keys.get_all_records(
+            columns={"system_keys": True}
         )
         assert result is not None
         assert "id" in result.column_names
@@ -596,8 +596,8 @@ def filled_node(self, double_pf) -> FunctionNode:
         return node
 
     @pytest.fixture
-    def filled_node_with_sys_tags(self, double_pf) -> FunctionNode:
-        node = _make_node_with_system_tags(double_pf, n=3)
+    def filled_node_with_sys_keys(self, double_pf) -> FunctionNode:
+        node = _make_node_with_system_keys(double_pf, n=3)
         _fill_node(node)
         return node
 
@@ -617,11 +617,11 @@ def test_all_info_includes_source_columns(self, filled_node):
         ]
         assert len(source_cols) > 0
 
-    def test_all_info_includes_system_tag_columns(self, filled_node_with_sys_tags):
-        result = filled_node_with_sys_tags.get_all_records(all_info=True)
+    def test_all_info_includes_system_key_columns(self, filled_node_with_sys_keys):
+        result = filled_node_with_sys_keys.get_all_records(all_info=True)
         assert result is not None
         sys_cols = [
-            c for c in result.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            c for c in result.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
         assert len(sys_cols) > 0
 
@@ -681,9 +681,9 @@ def test_result_records_stored_under_pod_uri_path(self, double_pf):
             input_stream=make_int_stream(n=2),
             pipeline_database=db,
         )
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 5})
-        node.execute_data(tag, data)
+        node.execute_data(key, data)
         db.flush()
 
         result_path = node._cached_function_pod.record_path
diff --git a/tests/test_core/function_pod/test_function_pod_node_stream.py b/tests/test_core/function_pod/test_function_pod_node_stream.py
index 6442d3cc..265a210d 100644
--- a/tests/test_core/function_pod/test_function_pod_node_stream.py
+++ b/tests/test_core/function_pod/test_function_pod_node_stream.py
@@ -1,7 +1,7 @@
 """
 Tests for FunctionNode's stream interface covering:
 - iter_data: correctness, repeatability, __iter__
-- as_table: correctness, ColumnConfig (content_hash, sort_by_tags)
+- as_table: correctness, ColumnConfig (content_hash, sort_by_keys)
 - output_schema and keys
 - source / upstreams properties
 - Inactive data function behaviour
@@ -90,7 +90,7 @@ def test_as_table_returns_pyarrow_table(self, node):
     def test_as_table_has_correct_row_count(self, node):
         assert len(node.as_table()) == 3
 
-    def test_as_table_contains_tag_columns(self, node):
+    def test_as_table_contains_key_columns(self, node):
         assert "id" in node.as_table().column_names
 
     def test_as_table_contains_data_columns(self, node):
@@ -105,14 +105,14 @@ def test_upstreams_contains_input_stream(self, node):
         assert len(upstreams) == 1
 
     def test_output_schema_has_result_in_data_schema(self, node):
-        tag_schema, data_schema = node.output_schema()
-        assert isinstance(tag_schema, Mapping)
+        key_schema, data_schema = node.output_schema()
+        assert isinstance(key_schema, Mapping)
         assert isinstance(data_schema, Mapping)
         assert "result" in data_schema
 
 
 # ---------------------------------------------------------------------------
-# 2. ColumnConfig — content_hash and sort_by_tags
+# 2. ColumnConfig — content_hash and sort_by_keys
 # ---------------------------------------------------------------------------
 
 
@@ -124,7 +124,7 @@ def test_as_table_content_hash_column(self, double_pf):
         assert "_content_hash" in table.column_names
         assert len(table.column("_content_hash")) == 3
 
-    def test_as_table_sort_by_tags(self, double_pf):
+    def test_as_table_sort_by_keys(self, double_pf):
         db = InMemoryArrowDatabase()
         reversed_table = pa.table(
             {
@@ -135,14 +135,14 @@ def test_as_table_sort_by_tags(self, double_pf):
                 [pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)]
             ),
         )
-        input_stream = ArrowTableStream(reversed_table, tag_columns=["id"])
+        input_stream = ArrowTableStream(reversed_table, key_columns=["id"])
         node = FunctionNode(
             function_pod=FunctionPod(data_function=double_pf),
             input_stream=input_stream,
             pipeline_database=db,
         )
         node.run()
-        result = node.as_table(columns={"sort_by_tags": True})
+        result = node.as_table(columns={"sort_by_keys": True})
         ids: list[int] = result.column("id").to_pylist()  # type: ignore[assignment]
         assert ids == sorted(ids)
 
diff --git a/tests/test_core/function_pod/test_function_pod_stream.py b/tests/test_core/function_pod/test_function_pod_stream.py
index 71dcba51..c3b08784 100644
--- a/tests/test_core/function_pod/test_function_pod_stream.py
+++ b/tests/test_core/function_pod/test_function_pod_stream.py
@@ -15,7 +15,7 @@
 import pyarrow as pa
 
 from orcapod.protocols.core_protocols import StreamProtocol
-from orcapod.protocols.core_protocols.datagrams import DataProtocol, TagProtocol
+from orcapod.protocols.core_protocols.datagrams import DataProtocol, KeyProtocol
 
 from ..conftest import make_int_stream
 
@@ -55,15 +55,15 @@ def test_has_upstreams_property(self, double_pod):
         assert isinstance(double_pod.process(make_int_stream()).upstreams, tuple)
 
     def test_has_keys_method(self, double_pod):
-        tag_keys, data_keys = double_pod.process(make_int_stream()).keys()
-        assert isinstance(tag_keys, tuple)
+        key_keys, data_keys = double_pod.process(make_int_stream()).keys()
+        assert isinstance(key_keys, tuple)
         assert isinstance(data_keys, tuple)
 
     def test_has_output_schema_method(self, double_pod):
-        tag_schema, data_schema = double_pod.process(
+        key_schema, data_schema = double_pod.process(
             make_int_stream()
         ).output_schema()
-        assert isinstance(tag_schema, Mapping)
+        assert isinstance(key_schema, Mapping)
         assert isinstance(data_schema, Mapping)
 
     def test_has_iter_data_method(self, double_pod):
@@ -80,9 +80,9 @@ def test_has_as_table_method(self, double_pod):
 
 
 class TestFunctionPodStreamKeysAndSchema:
-    def test_tag_keys_come_from_input_stream(self, double_pod):
-        tag_keys, _ = double_pod.process(make_int_stream()).keys()
-        assert "id" in tag_keys
+    def test_key_keys_come_from_input_stream(self, double_pod):
+        key_keys, _ = double_pod.process(make_int_stream()).keys()
+        assert "id" in key_keys
 
     def test_data_keys_come_from_function_output(self, double_pod):
         _, data_keys = double_pod.process(make_int_stream()).keys()
@@ -94,9 +94,9 @@ def test_data_keys_do_not_include_input_keys(self, double_pod):
 
     def test_output_schema_keys_match_keys_method(self, double_pod):
         stream = double_pod.process(make_int_stream())
-        tag_keys, data_keys = stream.keys()
-        tag_schema, data_schema = stream.output_schema()
-        assert set(tag_schema.keys()) == set(tag_keys)
+        key_keys, data_keys = stream.keys()
+        key_schema, data_schema = stream.output_schema()
+        assert set(key_schema.keys()) == set(key_keys)
         assert set(data_schema.keys()) == set(data_keys)
 
     def test_data_schema_type_is_correct(self, double_pod):
@@ -114,9 +114,9 @@ def test_yields_correct_count(self, double_pod):
         n = 5
         assert len(list(double_pod.process(make_int_stream(n=n)).iter_data())) == n
 
-    def test_each_pair_has_tag_and_data(self, double_pod):
-        for tag, data in double_pod.process(make_int_stream()).iter_data():
-            assert isinstance(tag, TagProtocol)
+    def test_each_pair_has_key_and_data(self, double_pod):
+        for key, data in double_pod.process(make_int_stream()).iter_data():
+            assert isinstance(key, KeyProtocol)
             assert isinstance(data, DataProtocol)
 
     def test_output_data_values_are_doubled(self, double_pod):
@@ -149,7 +149,7 @@ def test_table_has_correct_row_count(self, double_pod):
         n = 4
         assert len(double_pod.process(make_int_stream(n=n)).as_table()) == n
 
-    def test_table_contains_tag_columns(self, double_pod):
+    def test_table_contains_key_columns(self, double_pod):
         assert "id" in double_pod.process(make_int_stream()).as_table().column_names
 
     def test_table_contains_data_columns(self, double_pod):
diff --git a/tests/test_core/function_pod/test_pipeline_hash_integration.py b/tests/test_core/function_pod/test_pipeline_hash_integration.py
index 45f78d02..8a7dd7c2 100644
--- a/tests/test_core/function_pod/test_pipeline_hash_integration.py
+++ b/tests/test_core/function_pod/test_pipeline_hash_integration.py
@@ -13,7 +13,7 @@
     Different function → different pipeline_hash
 
   Phase 3 — RootSource base case
-    RootSource.pipeline_hash() is (tag_schema, data_schema) only
+    RootSource.pipeline_hash() is (key_schema, data_schema) only
     Same-schema sources share pipeline_hash regardless of data
 
   Phase 4 — ArrowTableStream pipeline_hash
@@ -184,17 +184,17 @@ def test_different_schema_different_pipeline_hash(self):
         )
         assert src_x.pipeline_hash() != src_y.pipeline_hash()
 
-    def test_different_tag_column_different_pipeline_hash(self):
-        """Tag vs data assignment changes the schema, hence the pipeline_hash."""
+    def test_different_key_column_different_pipeline_hash(self):
+        """Key vs data assignment changes the schema, hence the pipeline_hash."""
         table = pa.table(
             {
                 "id": pa.array([1, 2], type=pa.int64()),
                 "val": pa.array([10, 20], type=pa.int64()),
             }
         )
-        src_with_tag = ArrowTableSource(table=table, tag_columns=["id"], infer_nullable=True)
-        src_no_tag = ArrowTableSource(table=table, infer_nullable=True)
-        assert src_with_tag.pipeline_hash() != src_no_tag.pipeline_hash()
+        src_with_key = ArrowTableSource(table=table, key_columns=["id"], infer_nullable=True)
+        src_no_key = ArrowTableSource(table=table, infer_nullable=True)
+        assert src_with_key.pipeline_hash() != src_no_key.pipeline_hash()
 
     def test_dict_source_same_schema_shares_pipeline_hash_with_arrow_source(self):
         """DictSource and ArrowTableSource with identical schemas share pipeline_hash."""
@@ -205,7 +205,7 @@ def test_dict_source_same_schema_shares_pipeline_hash_with_arrow_source(self):
             data=[{"x": 10}, {"x": 20}, {"x": 30}],
             data_schema={"x": int},
         )
-        # Both have data schema {x: int64}, no tag columns → same pipeline_hash
+        # Both have data schema {x: int64}, no key columns → same pipeline_hash
         assert arrow_src.pipeline_hash() == dict_src.pipeline_hash()
 
     def test_pipeline_hash_stable_across_instances(self):
@@ -252,7 +252,7 @@ def test_different_data_same_schema_different_content_hash(self):
                     ]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         assert s1.pipeline_hash() == s2.pipeline_hash()
         assert s1.content_hash() != s2.content_hash()
@@ -328,7 +328,7 @@ def test_different_data_same_schema_share_uri(self, double_pf):
                         ]
                     ),
                 ),
-                tag_columns=["id"],
+                key_columns=["id"],
             ),
             pipeline_database=db,
         )
@@ -358,7 +358,7 @@ def test_different_data_yields_different_content_hash(self, double_pf):
                         ]
                     ),
                 ),
-                tag_columns=["id"],
+                key_columns=["id"],
             ),
             pipeline_database=db,
         )
diff --git a/tests/test_core/function_pod/test_simple_function_pod.py b/tests/test_core/function_pod/test_simple_function_pod.py
index 1b0436c9..dbfb02e2 100644
--- a/tests/test_core/function_pod/test_simple_function_pod.py
+++ b/tests/test_core/function_pod/test_simple_function_pod.py
@@ -17,7 +17,7 @@
 import pyarrow as pa
 import pytest
 
-from orcapod.core.datagrams import Data, Tag
+from orcapod.core.datagrams import Data, Key
 from orcapod.core.function_pod import FunctionPodStream, FunctionPod
 from orcapod.core.data_function import PythonDataFunction
 from orcapod.core.streams import ArrowTableStream
@@ -50,18 +50,18 @@ def test_has_validate_inputs_method(self, double_pod):
         double_pod.validate_inputs(make_int_stream())
 
     def test_has_process_data_method(self, double_pod):
-        tag = Tag({"id": 0})
+        key = Key({"id": 0})
         data = Data({"x": 5})
-        out_tag, out_data = double_pod.process_data(tag, data)
-        assert out_tag is tag
+        out_key, out_data = double_pod.process_data(key, data)
+        assert out_key is key
         assert out_data is not None
 
     def test_has_argument_symmetry_method(self, double_pod):
         double_pod.argument_symmetry([make_int_stream()])
 
     def test_has_output_schema_method(self, double_pod):
-        tag_schema, data_schema = double_pod.output_schema(make_int_stream())
-        assert isinstance(tag_schema, Mapping)
+        key_schema, data_schema = double_pod.output_schema(make_int_stream())
+        assert isinstance(key_schema, Mapping)
         assert isinstance(data_schema, Mapping)
 
 
@@ -145,7 +145,7 @@ def test_wrong_key_name_raises(self, double_pod):
                     "z": pa.array([0, 1, 2], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         with pytest.raises(ValueError):
             double_pod.process(stream)
@@ -158,7 +158,7 @@ def test_wrong_data_type_raises(self, double_pod):
                     "x": pa.array(["a", "b", "c"], type=pa.large_string()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         with pytest.raises(ValueError):
             double_pod.process(stream)
@@ -171,7 +171,7 @@ def test_missing_required_key_raises(self, add_pod):
                     "x": pa.array([0, 1], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         with pytest.raises(ValueError):
             add_pod.process(stream)
@@ -196,7 +196,7 @@ def add_with_default(x: int, y: int = 10) -> int:
                     ]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         pod.validate_inputs(stream)
 
@@ -220,7 +220,7 @@ def add_with_default(x: int, y: int = 10) -> int:
                     ]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         table = pod.process(stream).as_table()
         assert table.column("result").to_pylist() == [13, 15]
@@ -232,17 +232,17 @@ def add_with_default(x: int, y: int = 10) -> int:
 
 
 class TestSimpleFunctionPodProcessData:
-    def test_returns_tag_and_data_tuple(self, double_pod):
-        result = double_pod.process_data(Tag({"id": 0}), Data({"x": 7}))
+    def test_returns_key_and_data_tuple(self, double_pod):
+        result = double_pod.process_data(Key({"id": 0}), Data({"x": 7}))
         assert len(result) == 2
 
-    def test_output_tag_is_input_tag(self, double_pod):
-        tag = Tag({"id": 42})
-        out_tag, _ = double_pod.process_data(tag, Data({"x": 3}))
-        assert out_tag is tag
+    def test_output_key_is_input_key(self, double_pod):
+        key = Key({"id": 42})
+        out_key, _ = double_pod.process_data(key, Data({"x": 3}))
+        assert out_key is key
 
     def test_output_data_has_correct_value(self, double_pod):
-        _, out_data = double_pod.process_data(Tag({"id": 0}), Data({"x": 6}))
+        _, out_data = double_pod.process_data(Key({"id": 0}), Data({"x": 6}))
         assert out_data is not None
         assert out_data["result"] == 12  # 6 * 2
 
@@ -268,7 +268,7 @@ def test_two_streams_are_joined_before_processing(self, add_pod):
                     ]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         stream_y = ArrowTableStream(
             pa.table(
@@ -283,7 +283,7 @@ def test_two_streams_are_joined_before_processing(self, add_pod):
                     ]
                 ),
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         result = add_pod.process(stream_x, stream_y)
         assert isinstance(result, FunctionPodStream)
diff --git a/tests/test_core/nodes/test_function_node_get_cached.py b/tests/test_core/nodes/test_function_node_get_cached.py
index a11aac4c..235f29c4 100644
--- a/tests/test_core/nodes/test_function_node_get_cached.py
+++ b/tests/test_core/nodes/test_function_node_get_cached.py
@@ -24,7 +24,7 @@ def function_node_with_db():
             "value": pa.array([1, 2], type=pa.int64()),
         }
     )
-    src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+    src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
     pf = PythonDataFunction(double_value, output_keys="result")
     pod = FunctionPod(pf)
     pipeline_db = InMemoryArrowDatabase()
@@ -46,7 +46,7 @@ def test_returns_empty_dict_when_no_db(self):
                 "value": pa.array([1], type=pa.int64()),
             }
         )
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         pf = PythonDataFunction(double_value, output_keys="result")
         pod = FunctionPod(pf)
         node = FunctionNode(pod, src)
@@ -60,9 +60,9 @@ def test_returns_cached_results_for_matching_entry_ids(self, function_node_with_
         data = list(node._input_stream.iter_data())
 
         entry_ids = []
-        for tag, data in data:
-            node.execute_data(tag, data)
-            entry_ids.append(node.compute_pipeline_entry_id(tag, data))
+        for key, data in data:
+            node.execute_data(key, data)
+            entry_ids.append(node.compute_pipeline_entry_id(key, data))
 
         cached = node.get_cached_results(entry_ids)
         assert len(cached) == 2
@@ -73,9 +73,9 @@ def test_filters_to_requested_entry_ids_only(self, function_node_with_db):
         data = list(node._input_stream.iter_data())
 
         entry_ids = []
-        for tag, data in data:
-            node.execute_data(tag, data)
-            entry_ids.append(node.compute_pipeline_entry_id(tag, data))
+        for key, data in data:
+            node.execute_data(key, data)
+            entry_ids.append(node.compute_pipeline_entry_id(key, data))
 
         cached = node.get_cached_results([entry_ids[0]])
         assert len(cached) == 1
@@ -88,9 +88,9 @@ def test_get_cached_results_populates_internal_cache(self, function_node_with_db
         data = list(node._input_stream.iter_data())
 
         entry_ids = []
-        for tag, data in data:
-            node.execute_data(tag, data)
-            entry_ids.append(node.compute_pipeline_entry_id(tag, data))
+        for key, data in data:
+            node.execute_data(key, data)
+            entry_ids.append(node.compute_pipeline_entry_id(key, data))
 
         # Clear internal cache
         node._cached_output_datas.clear()
diff --git a/tests/test_core/nodes/test_function_node_iteration.py b/tests/test_core/nodes/test_function_node_iteration.py
index 3dc21265..979b3524 100644
--- a/tests/test_core/nodes/test_function_node_iteration.py
+++ b/tests/test_core/nodes/test_function_node_iteration.py
@@ -32,7 +32,7 @@ def _make_source(n: int = 3) -> ArrowTableSource:
             ]
         ),
     )
-    return ArrowTableSource(table, tag_columns=["id"])
+    return ArrowTableSource(table, key_columns=["id"])
 
 
 def _make_node(n: int = 3, db: InMemoryArrowDatabase | None = None) -> FunctionNode:
@@ -154,7 +154,7 @@ def sometimes_fail(x: int) -> int:
         from orcapod.pipeline.observer import NoOpObserver
 
         class CapturingObserver(NoOpObserver):
-            def on_data_crash(self, node_label, tag, data, exc):
+            def on_data_crash(self, node_label, key, data, exc):
                 errors.append(exc)
 
         results = node.execute(node._input_stream, observer=CapturingObserver(), error_policy="continue")
diff --git a/tests/test_core/nodes/test_node_execute.py b/tests/test_core/nodes/test_node_execute.py
index 9875d737..e17e9349 100644
--- a/tests/test_core/nodes/test_node_execute.py
+++ b/tests/test_core/nodes/test_node_execute.py
@@ -28,7 +28,7 @@ def function_node_with_db():
             "value": pa.array([1, 2], type=pa.int64()),
         }
     )
-    src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+    src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
     pf = PythonDataFunction(double_value, output_keys="result")
     pod = FunctionPod(pf)
     pipeline_db = InMemoryArrowDatabase()
@@ -50,7 +50,7 @@ def function_node_no_db():
             "value": pa.array([1, 2], type=pa.int64()),
         }
     )
-    src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+    src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
     pf = PythonDataFunction(double_value, output_keys="result")
     pod = FunctionPod(pf)
     return FunctionNode(pod, src)
@@ -60,16 +60,16 @@ class TestFunctionNodeExecuteData:
     def test_returns_correct_result(self, function_node_no_db):
         node = function_node_no_db
         data = list(node._input_stream.iter_data())
-        tag, data = data[0]
-        tag_out, result = node.execute_data(tag, data)
+        key, data = data[0]
+        key_out, result = node.execute_data(key, data)
         assert result is not None
         assert result.as_dict()["result"] == 2
 
     def test_writes_pipeline_record(self, function_node_with_db):
         node, pipeline_db, _ = function_node_with_db
         data = list(node._input_stream.iter_data())
-        tag, data = data[0]
-        node.execute_data(tag, data)
+        key, data = data[0]
+        node.execute_data(key, data)
         records = pipeline_db.get_all_records(node.node_identity_path)
         assert records is not None
         assert records.num_rows == 1
@@ -77,8 +77,8 @@ def test_writes_pipeline_record(self, function_node_with_db):
     def test_writes_to_result_db(self, function_node_with_db):
         node, _, _ = function_node_with_db
         data = list(node._input_stream.iter_data())
-        tag, data = data[0]
-        node.execute_data(tag, data)
+        key, data = data[0]
+        node.execute_data(key, data)
         cached = node._cached_function_pod.get_all_cached_outputs()
         assert cached is not None
         assert cached.num_rows == 1
@@ -86,8 +86,8 @@ def test_writes_to_result_db(self, function_node_with_db):
     def test_caches_internally(self, function_node_with_db):
         node, _, _ = function_node_with_db
         data = list(node._input_stream.iter_data())
-        tag, data = data[0]
-        node.execute_data(tag, data)
+        key, data = data[0]
+        node.execute_data(key, data)
         assert len(node._cached_output_datas) == 1
 
 
@@ -130,7 +130,7 @@ def operator_with_db():
             "value": pa.array([10, 20], type=pa.int64()),
         }
     )
-    src = ArrowTableSource(table, tag_columns=["key"])
+    src = ArrowTableSource(table, key_columns=["key"])
     op = SelectDataColumns(columns=["value"])
     db = InMemoryArrowDatabase()
     node = OperatorNode(
@@ -150,7 +150,7 @@ def operator_no_db():
             "value": pa.array([10, 20], type=pa.int64()),
         }
     )
-    src = ArrowTableSource(table, tag_columns=["key"])
+    src = ArrowTableSource(table, key_columns=["key"])
     op = SelectDataColumns(columns=["value"])
     node = OperatorNode(op, input_streams=[src])
     return node, src
diff --git a/tests/test_core/operators/test_merge_join.py b/tests/test_core/operators/test_merge_join.py
index 3be9c8ab..ae3c52ef 100644
--- a/tests/test_core/operators/test_merge_join.py
+++ b/tests/test_core/operators/test_merge_join.py
@@ -17,8 +17,8 @@
 # ---------------------------------------------------------------------------
 #
 # Left and right fixtures are deliberately asymmetric:
-# - Different tag column sets (left: ["id"], right: ["id", "group"])
-#   to prove tag union and that inner join on shared tags works.
+# - Different key column sets (left: ["id"], right: ["id", "group"])
+#   to prove key union and that inner join on shared keys works.
 # - Colliding "value" column has left > right for id=2 (500 > 200)
 #   and left < right for id=3 (30 < 300), forcing actual sort reordering.
 # - Non-overlapping ids (left has 1, right has 4) prove inner join filters.
@@ -40,7 +40,7 @@ def left_stream() -> ArrowTableStream:
             ]
         ),
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
@@ -61,7 +61,7 @@ def right_stream() -> ArrowTableStream:
             ]
         ),
     )
-    return ArrowTableStream(table, tag_columns=["id", "group"])
+    return ArrowTableStream(table, key_columns=["id", "group"])
 
 
 @pytest.fixture
@@ -74,7 +74,7 @@ def left_source() -> ArrowTableSource:
                 "extra_left": pa.array(["a", "b", "c"], type=pa.large_string()),
             }
         ),
-        tag_columns=["id"],
+        key_columns=["id"],
         infer_nullable=True,
     )
 
@@ -90,7 +90,7 @@ def right_source() -> ArrowTableSource:
                 "extra_right": pa.array(["x", "y", "z"], type=pa.large_string()),
             }
         ),
-        tag_columns=["id", "group"],
+        key_columns=["id", "group"],
         infer_nullable=True,
     )
 
@@ -116,8 +116,8 @@ def test_is_commutative(self):
 
 
 class TestMergeJoinBasic:
-    def test_inner_join_on_shared_tags(self, left_stream, right_stream):
-        """Only matching tag values survive the inner join."""
+    def test_inner_join_on_shared_keys(self, left_stream, right_stream):
+        """Only matching key values survive the inner join."""
         op = MergeJoin()
         result = op.static_process(left_stream, right_stream)
         result_table = result.as_table()
@@ -126,13 +126,13 @@ def test_inner_join_on_shared_tags(self, left_stream, right_stream):
         # id=1 only in left, id=4 only in right => only 2,3 survive
         assert sorted(ids) == [2, 3]
 
-    def test_tag_columns_are_union(self, left_stream, right_stream):
-        """Output should have the union of both tag column sets."""
+    def test_key_columns_are_union(self, left_stream, right_stream):
+        """Output should have the union of both key column sets."""
         op = MergeJoin()
         result = op.static_process(left_stream, right_stream)
-        tag_keys, _ = result.keys()
+        key_keys, _ = result.keys()
         # left has ["id"], right has ["id", "group"] => union is {"id", "group"}
-        assert set(tag_keys) == {"id", "group"}
+        assert set(key_keys) == {"id", "group"}
 
     def test_colliding_columns_become_sorted_lists(self, left_stream, right_stream):
         """Colliding data columns must be sorted, not just in input order.
@@ -169,7 +169,7 @@ def test_values_sorted_independently_per_column(self):
                     "col_b": [1, 50],
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         right = ArrowTableStream(
             pa.table(
@@ -179,7 +179,7 @@ def test_values_sorted_independently_per_column(self):
                     "col_b": [99, 2],
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         op = MergeJoin()
@@ -260,7 +260,7 @@ def test_source_columns_sorted_independently_per_colliding_column(self):
                     "reading": pa.array([30, 88], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="east",
             infer_nullable=True,
         )
@@ -272,7 +272,7 @@ def test_source_columns_sorted_independently_per_colliding_column(self):
                     "reading": pa.array([92, 10], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="west",
             infer_nullable=True,
         )
@@ -321,8 +321,8 @@ def test_commutative_data_output(self, left_stream, right_stream):
 
         assert rows_lr == rows_rl
 
-    def test_commutative_system_tag_column_names(self, left_source, right_source):
-        """Swapping input order should produce the same system tag column names."""
+    def test_commutative_system_key_column_names(self, left_source, right_source):
+        """Swapping input order should produce the same system key column names."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
@@ -332,20 +332,20 @@ def test_commutative_system_tag_column_names(self, left_source, right_source):
 
         sys_cols_lr = sorted(
             c
-            for c in result_lr.as_table(columns={"system_tags": True}).column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            for c in result_lr.as_table(columns={"system_keys": True}).column_names
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
         sys_cols_rl = sorted(
             c
-            for c in result_rl.as_table(columns={"system_tags": True}).column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            for c in result_rl.as_table(columns={"system_keys": True}).column_names
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         assert sys_cols_lr == sys_cols_rl
 
-    def test_commutative_system_tag_values_same_pipeline_hash(self):
+    def test_commutative_system_key_values_same_pipeline_hash(self):
         """When both inputs have the same pipeline_hash, swapping inputs
-        must still produce identical system tag VALUES per row (not just
+        must still produce identical system key VALUES per row (not just
         column names). This tests the value-sorting logic.
 
         src_a values [300, 20] vs src_b values [100, 200]:
@@ -359,7 +359,7 @@ def test_commutative_system_tag_values_same_pipeline_hash(self):
                     "value": pa.array([300, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -369,7 +369,7 @@ def test_commutative_system_tag_values_same_pipeline_hash(self):
                     "value": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -380,18 +380,18 @@ def test_commutative_system_tag_values_same_pipeline_hash(self):
         result_ab = op.static_process(src_a, src_b)
         result_ba = op.static_process(src_b, src_a)
 
-        table_ab = result_ab.as_table(columns={"system_tags": True})
-        table_ba = result_ba.as_table(columns={"system_tags": True})
+        table_ab = result_ab.as_table(columns={"system_keys": True})
+        table_ba = result_ba.as_table(columns={"system_keys": True})
 
         sys_cols_ab = sorted(
             c
             for c in table_ab.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
         sys_cols_ba = sorted(
             c
             for c in table_ba.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         # Same column names
@@ -404,7 +404,7 @@ def test_commutative_system_tag_values_same_pipeline_hash(self):
         for row_ab, row_ba in zip(rows_ab, rows_ba):
             for col in sys_cols_ab:
                 assert row_ab[col] == row_ba[col], (
-                    f"System tag value mismatch for {col}: "
+                    f"System key value mismatch for {col}: "
                     f"{row_ab[col]!r} vs {row_ba[col]!r}"
                 )
 
@@ -425,72 +425,72 @@ def test_non_colliding_columns_stay_original_type(self, left_stream, right_strea
         assert data_schema["extra_left"] == str
         assert data_schema["extra_right"] == str
 
-    def test_tag_schema_is_union(self, left_stream, right_stream):
-        """Tag schema should be the union of both input tag schemas."""
+    def test_key_schema_is_union(self, left_stream, right_stream):
+        """Key schema should be the union of both input key schemas."""
         op = MergeJoin()
-        tag_schema, _ = op.output_schema(left_stream, right_stream)
+        key_schema, _ = op.output_schema(left_stream, right_stream)
 
-        # left tags: {"id"}, right tags: {"id", "group"}
-        assert "id" in tag_schema
-        assert "group" in tag_schema
+        # left keys: {"id"}, right keys: {"id", "group"}
+        assert "id" in key_schema
+        assert "group" in key_schema
 
-    def test_output_schema_excludes_system_tags_by_default(
+    def test_output_schema_excludes_system_keys_by_default(
         self, left_source, right_source
     ):
-        """Without system_tags=True, no system tag columns in tag schema."""
+        """Without system_keys=True, no system key columns in key schema."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
-        tag_schema, _ = op.output_schema(left_source, right_source)
+        key_schema, _ = op.output_schema(left_source, right_source)
 
-        for key in tag_schema:
-            assert not key.startswith(constants.SYSTEM_TAG_PREFIX)
+        for key in key_schema:
+            assert not key.startswith(constants.SYSTEM_KEY_PREFIX)
 
-    def test_output_schema_includes_system_tags_when_requested(
+    def test_output_schema_includes_system_keys_when_requested(
         self, left_source, right_source
     ):
-        """With system_tags=True, tag schema should include system tag columns."""
+        """With system_keys=True, key schema should include system key columns."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
-        tag_schema, _ = op.output_schema(
-            left_source, right_source, columns={"system_tags": True}
+        key_schema, _ = op.output_schema(
+            left_source, right_source, columns={"system_keys": True}
         )
 
-        sys_tag_keys = [
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+        sys_key_keys = [
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        assert len(sys_tag_keys) == 4  # 2 sources × 2 fields (source_id + record_id)
+        assert len(sys_key_keys) == 4  # 2 sources × 2 fields (source_id + record_id)
 
-    def test_output_schema_system_tags_match_actual_output(
+    def test_output_schema_system_keys_match_actual_output(
         self, left_source, right_source
     ):
-        """Predicted system tag column names must match the actual result."""
+        """Predicted system key column names must match the actual result."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
 
         # Predicted schema
-        tag_schema, _ = op.output_schema(
-            left_source, right_source, columns={"system_tags": True}
+        key_schema, _ = op.output_schema(
+            left_source, right_source, columns={"system_keys": True}
         )
-        predicted_sys_tags = sorted(
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+        predicted_sys_keys = sorted(
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         # Actual result
         result = op.static_process(left_source, right_source)
-        result_table = result.as_table(columns={"system_tags": True})
-        actual_sys_tags = sorted(
+        result_table = result.as_table(columns={"system_keys": True})
+        actual_sys_keys = sorted(
             c
             for c in result_table.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
-        assert predicted_sys_tags == actual_sys_tags
+        assert predicted_sys_keys == actual_sys_keys
 
-    def test_output_schema_system_tags_match_with_same_pipeline_hash(self):
-        """System tag prediction should work when both inputs have the
+    def test_output_schema_system_keys_match_with_same_pipeline_hash(self):
+        """System key prediction should work when both inputs have the
         same pipeline_hash — columns distinguished by canonical position."""
         from orcapod.system_constants import constants
 
@@ -501,7 +501,7 @@ def test_output_schema_system_tags_match_with_same_pipeline_hash(self):
                     "value": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -511,71 +511,71 @@ def test_output_schema_system_tags_match_with_same_pipeline_hash(self):
                     "value": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         assert src_a.pipeline_hash().to_hex() == src_b.pipeline_hash().to_hex()
 
         op = MergeJoin()
-        tag_schema, _ = op.output_schema(src_a, src_b, columns={"system_tags": True})
+        key_schema, _ = op.output_schema(src_a, src_b, columns={"system_keys": True})
         predicted = sorted(
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         result = op.static_process(src_a, src_b)
         actual = sorted(
             c
-            for c in result.as_table(columns={"system_tags": True}).column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            for c in result.as_table(columns={"system_keys": True}).column_names
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         assert predicted == actual
-        # Must have 4 system tag columns (2 per source: source_id + record_id)
+        # Must have 4 system key columns (2 per source: source_id + record_id)
         assert len(predicted) == 4
 
-    def test_output_schema_all_info_includes_system_tags(
+    def test_output_schema_all_info_includes_system_keys(
         self, left_source, right_source
     ):
-        """all_info=True should include system tag columns in the schema."""
+        """all_info=True should include system key columns in the schema."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
-        tag_schema, _ = op.output_schema(left_source, right_source, all_info=True)
+        key_schema, _ = op.output_schema(left_source, right_source, all_info=True)
 
-        sys_tag_keys = [
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+        sys_key_keys = [
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        assert len(sys_tag_keys) == 4  # 2 sources × 2 fields
+        assert len(sys_key_keys) == 4  # 2 sources × 2 fields
 
     def test_predicted_schema_matches_result_stream_schema(
         self, left_source, right_source
     ):
         """Operator's predicted output_schema must equal the result stream's
-        output_schema — both tag and data schemas, without system tags."""
+        output_schema — both key and data schemas, without system keys."""
         op = MergeJoin()
 
-        predicted_tag, predicted_pkt = op.output_schema(left_source, right_source)
+        predicted_key, predicted_pkt = op.output_schema(left_source, right_source)
         result = op.static_process(left_source, right_source)
-        actual_tag, actual_pkt = result.output_schema()
+        actual_key, actual_pkt = result.output_schema()
 
-        assert dict(predicted_tag) == dict(actual_tag)
+        assert dict(predicted_key) == dict(actual_key)
         assert dict(predicted_pkt) == dict(actual_pkt)
 
-    def test_predicted_schema_matches_result_stream_schema_with_system_tags(
+    def test_predicted_schema_matches_result_stream_schema_with_system_keys(
         self, left_source, right_source
     ):
-        """Operator's predicted output_schema(system_tags=True) must equal
-        the result stream's output_schema(system_tags=True)."""
+        """Operator's predicted output_schema(system_keys=True) must equal
+        the result stream's output_schema(system_keys=True)."""
         op = MergeJoin()
 
-        predicted_tag, predicted_pkt = op.output_schema(
-            left_source, right_source, columns={"system_tags": True}
+        predicted_key, predicted_pkt = op.output_schema(
+            left_source, right_source, columns={"system_keys": True}
         )
         result = op.static_process(left_source, right_source)
-        actual_tag, actual_pkt = result.output_schema(columns={"system_tags": True})
+        actual_key, actual_pkt = result.output_schema(columns={"system_keys": True})
 
-        assert dict(predicted_tag) == dict(actual_tag)
+        assert dict(predicted_key) == dict(actual_key)
         assert dict(predicted_pkt) == dict(actual_pkt)
 
 
@@ -584,11 +584,11 @@ def test_colliding_columns_with_incompatible_types_raises(self):
         """MergeJoin should reject colliding columns with different types."""
         left = ArrowTableStream(
             pa.table({"id": [1, 2], "value": [10, 20]}),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         right = ArrowTableStream(
             pa.table({"id": [1, 2], "value": ["a", "b"]}),
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         op = MergeJoin()
@@ -602,24 +602,24 @@ def test_colliding_columns_with_same_types_passes(self, left_stream, right_strea
         op.validate_inputs(left_stream, right_stream)
 
 
-class TestMergeJoinSystemTags:
-    """System tag tests demonstrating why both pipeline_hash and canonical
+class TestMergeJoinSystemKeys:
+    """System key tests demonstrating why both pipeline_hash and canonical
     position are needed."""
 
     @staticmethod
-    def _get_system_tag_columns(table, constants):
+    def _get_system_key_columns(table, constants):
         return sorted(
-            c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
     @staticmethod
-    def _parse_system_tag_column(col, constants):
-        """Parse system tag column name into its component blocks.
+    def _parse_system_key_column(col, constants):
+        """Parse system key column name into its component blocks.
 
-        Format: _tag_{field_type}::{schema_hash}::{stream_hash}:{canonical_position}
+        Format: _key_{field_type}::{schema_hash}::{stream_hash}:{canonical_position}
         Returns: (field_type, schema_hash, stream_hash, index)
         """
-        after_prefix = col[len(constants.SYSTEM_TAG_PREFIX) :]
+        after_prefix = col[len(constants.SYSTEM_KEY_PREFIX) :]
         blocks = after_prefix.split(constants.BLOCK_SEPARATOR)
         field_type = blocks[0]
         schema_hash = blocks[1]
@@ -628,32 +628,32 @@ def _parse_system_tag_column(col, constants):
         index = join_block_fields[1]
         return field_type, schema_hash, stream_hash, index
 
-    def test_two_system_tag_columns_produced(self, left_source, right_source):
-        """MergeJoin of two sources should produce 4 system tag columns (2 per source: source_id + record_id)."""
+    def test_two_system_key_columns_produced(self, left_source, right_source):
+        """MergeJoin of two sources should produce 4 system key columns (2 per source: source_id + record_id)."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
         result = op.static_process(left_source, right_source)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
         assert len(sys_cols) == 4
 
-    def test_system_tag_canonical_positions(self, left_source, right_source):
-        """System tag columns should carry canonical position indices
+    def test_system_key_canonical_positions(self, left_source, right_source):
+        """System key columns should carry canonical position indices
         matching stable sort by pipeline_hash."""
         from orcapod.config import Config
         from orcapod.system_constants import constants
 
-        n_char = Config().system_tag_hash_n_char
+        n_char = Config().system_key_hash_n_char
 
         op = MergeJoin()
         result = op.static_process(left_source, right_source)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
 
         # Filter to just source_id columns for position checking
         sid_cols = [
-            c for c in sys_cols if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c for c in sys_cols if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         ]
 
         # Independently determine expected ordering
@@ -662,7 +662,7 @@ def test_system_tag_canonical_positions(self, left_source, right_source):
 
         for expected_idx, expected_source in enumerate(sorted_sources):
             field_type, schema_hash, stream_hash, index_str = (
-                self._parse_system_tag_column(sid_cols[expected_idx], constants)
+                self._parse_system_key_column(sid_cols[expected_idx], constants)
             )
             expected_stream_hash = expected_source.pipeline_hash().to_hex(n_char)
             assert stream_hash == expected_stream_hash
@@ -670,7 +670,7 @@ def test_system_tag_canonical_positions(self, left_source, right_source):
 
     def test_same_schema_inputs_distinguished_by_canonical_position(self):
         """Two streams with identical schemas (same pipeline_hash) must still
-        produce distinct system tag columns via canonical position.
+        produce distinct system key columns via canonical position.
 
         Values have mixed ordering (a>b for id=1, a<b for id=2) to prove
         the merge actually sorts."""
@@ -683,7 +683,7 @@ def test_same_schema_inputs_distinguished_by_canonical_position(self):
                     "value": pa.array([300, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -693,7 +693,7 @@ def test_same_schema_inputs_distinguished_by_canonical_position(self):
                     "value": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -702,21 +702,21 @@ def test_same_schema_inputs_distinguished_by_canonical_position(self):
 
         op = MergeJoin()
         result = op.static_process(src_a, src_b)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
 
-        # Must have 4 system tag columns (2 per source: source_id + record_id)
+        # Must have 4 system key columns (2 per source: source_id + record_id)
         assert len(sys_cols) == 4
 
         # Filter to source_id columns only for position checking
         sid_cols = [
-            c for c in sys_cols if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c for c in sys_cols if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         ]
         assert len(sid_cols) == 2
 
         # Both should have the same pipeline_hash but different positions
-        _, _, hash_0, pos_0 = self._parse_system_tag_column(sid_cols[0], constants)
-        _, _, hash_1, pos_1 = self._parse_system_tag_column(sid_cols[1], constants)
+        _, _, hash_0, pos_0 = self._parse_system_key_column(sid_cols[0], constants)
+        _, _, hash_1, pos_1 = self._parse_system_key_column(sid_cols[1], constants)
 
         assert hash_0 == hash_1  # Same pipeline hash
         assert pos_0 != pos_1  # Different canonical positions
@@ -733,26 +733,26 @@ def test_different_schema_inputs_have_different_pipeline_hashes(
         self, left_source, right_source
     ):
         """Two sources with different schemas should have different pipeline_hashes
-        in their system tag columns."""
+        in their system key columns."""
         from orcapod.system_constants import constants
 
         op = MergeJoin()
         result = op.static_process(left_source, right_source)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
 
         # Filter to source_id columns for pipeline hash comparison
         sid_cols = [
-            c for c in sys_cols if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c for c in sys_cols if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         ]
-        _, _, hash_0, _ = self._parse_system_tag_column(sid_cols[0], constants)
-        _, _, hash_1, _ = self._parse_system_tag_column(sid_cols[1], constants)
+        _, _, hash_0, _ = self._parse_system_key_column(sid_cols[0], constants)
+        _, _, hash_1, _ = self._parse_system_key_column(sid_cols[1], constants)
 
         assert hash_0 != hash_1
 
-    def test_commutative_system_tag_column_names_same_pipeline_hash(self):
+    def test_commutative_system_key_column_names_same_pipeline_hash(self):
         """Swapping inputs with same pipeline_hash must produce identical
-        system tag column names. Values have mixed ordering to prove sort."""
+        system key column names. Values have mixed ordering to prove sort."""
         from orcapod.system_constants import constants
 
         src_a = ArrowTableSource(
@@ -762,7 +762,7 @@ def test_commutative_system_tag_column_names_same_pipeline_hash(self):
                     "value": pa.array([300, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -772,7 +772,7 @@ def test_commutative_system_tag_column_names_same_pipeline_hash(self):
                     "value": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -780,17 +780,17 @@ def test_commutative_system_tag_column_names_same_pipeline_hash(self):
         result_ab = op.static_process(src_a, src_b)
         result_ba = op.static_process(src_b, src_a)
 
-        sys_ab = self._get_system_tag_columns(
-            result_ab.as_table(columns={"system_tags": True}), constants
+        sys_ab = self._get_system_key_columns(
+            result_ab.as_table(columns={"system_keys": True}), constants
         )
-        sys_ba = self._get_system_tag_columns(
-            result_ba.as_table(columns={"system_tags": True}), constants
+        sys_ba = self._get_system_key_columns(
+            result_ba.as_table(columns={"system_keys": True}), constants
         )
 
         assert sys_ab == sys_ba
 
-    def test_system_tag_values_sorted_for_same_pipeline_hash(self):
-        """When two streams share the same pipeline_hash, system tag VALUES
+    def test_system_key_values_sorted_for_same_pipeline_hash(self):
+        """When two streams share the same pipeline_hash, system key VALUES
         must be sorted per row so that position :0 always gets the
         lexicographically smaller (source_id, record_id) tuple.
 
@@ -806,7 +806,7 @@ def test_system_tag_values_sorted_for_same_pipeline_hash(self):
                     "value": pa.array([300, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="zzz_source",
             infer_nullable=True,
         )
@@ -817,7 +817,7 @@ def test_system_tag_values_sorted_for_same_pipeline_hash(self):
                     "value": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="aaa_source",
             infer_nullable=True,
         )
@@ -829,15 +829,15 @@ def test_system_tag_values_sorted_for_same_pipeline_hash(self):
         result_ab = op.static_process(src_a, src_b)
         result_ba = op.static_process(src_b, src_a)
 
-        table_ab = result_ab.as_table(columns={"system_tags": True})
-        table_ba = result_ba.as_table(columns={"system_tags": True})
+        table_ab = result_ab.as_table(columns={"system_keys": True})
+        table_ba = result_ba.as_table(columns={"system_keys": True})
 
-        sys_cols = self._get_system_tag_columns(table_ab, constants)
+        sys_cols = self._get_system_key_columns(table_ab, constants)
         assert len(sys_cols) == 4  # 2 sources × 2 fields
 
         # Check source_id columns are sorted
         sid_cols = sorted(
-            c for c in sys_cols if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c for c in sys_cols if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         )
         assert len(sid_cols) == 2
 
diff --git a/tests/test_core/operators/test_operator_config.py b/tests/test_core/operators/test_operator_config.py
index 414e9769..bc513215 100644
--- a/tests/test_core/operators/test_operator_config.py
+++ b/tests/test_core/operators/test_operator_config.py
@@ -3,14 +3,14 @@
 from orcapod.core.operators import (
     Batch,
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     Join,
     MapData,
-    MapTags,
+    MapKeys,
     MergeJoin,
     PolarsFilter,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
     SemiJoin,
 )
 
@@ -78,28 +78,28 @@ def test_round_trip(self):
         assert restored.drop_partial_batch is True
 
 
-class TestSelectTagColumnsConfig:
+class TestSelectKeyColumnsConfig:
     def test_to_config(self):
-        op = SelectTagColumns(columns=["a", "b"], strict=False)
+        op = SelectKeyColumns(columns=["a", "b"], strict=False)
         config = op.to_config()
-        assert config["class_name"] == "SelectTagColumns"
+        assert config["class_name"] == "SelectKeyColumns"
         assert config["config"]["columns"] == ["a", "b"]
         assert config["config"]["strict"] is False
 
     def test_round_trip(self):
-        op = SelectTagColumns(columns=["a", "b"])
+        op = SelectKeyColumns(columns=["a", "b"])
         config = op.to_config()
-        restored = SelectTagColumns.from_config(config)
-        assert isinstance(restored, SelectTagColumns)
+        restored = SelectKeyColumns.from_config(config)
+        assert isinstance(restored, SelectKeyColumns)
 
 
-class TestDropTagColumnsConfig:
+class TestDropKeyColumnsConfig:
     def test_round_trip(self):
-        op = DropTagColumns(columns=["x"])
+        op = DropKeyColumns(columns=["x"])
         config = op.to_config()
-        restored = DropTagColumns.from_config(config)
-        assert isinstance(restored, DropTagColumns)
-        assert config["class_name"] == "DropTagColumns"
+        restored = DropKeyColumns.from_config(config)
+        assert isinstance(restored, DropKeyColumns)
+        assert config["class_name"] == "DropKeyColumns"
 
 
 class TestSelectDataColumnsConfig:
@@ -118,18 +118,18 @@ def test_round_trip(self):
         assert isinstance(restored, DropDataColumns)
 
 
-class TestMapTagsConfig:
+class TestMapKeysConfig:
     def test_to_config(self):
-        op = MapTags(name_map={"old": "new"}, drop_unmapped=True)
+        op = MapKeys(name_map={"old": "new"}, drop_unmapped=True)
         config = op.to_config()
         assert config["config"]["name_map"] == {"old": "new"}
         assert config["config"]["drop_unmapped"] is True
 
     def test_round_trip(self):
-        op = MapTags(name_map={"old": "new"})
+        op = MapKeys(name_map={"old": "new"})
         config = op.to_config()
-        restored = MapTags.from_config(config)
-        assert isinstance(restored, MapTags)
+        restored = MapKeys.from_config(config)
+        assert isinstance(restored, MapKeys)
 
 
 class TestMapDataConfig:
diff --git a/tests/test_core/operators/test_operator_node.py b/tests/test_core/operators/test_operator_node.py
index df3cdeca..68a4b634 100644
--- a/tests/test_core/operators/test_operator_node.py
+++ b/tests/test_core/operators/test_operator_node.py
@@ -37,19 +37,19 @@
 
 @pytest.fixture
 def simple_stream() -> ArrowTableStream:
-    """Stream with 1 tag (id) and 1 data column (x)."""
+    """Stream with 1 key (id) and 1 data column (x)."""
     table = pa.table(
         {
             "id": pa.array([1, 2, 3], type=pa.int64()),
             "x": pa.array([10, 20, 30], type=pa.int64()),
         }
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
 def two_data_stream() -> ArrowTableStream:
-    """Stream with 1 tag (id) and 2 data columns (x, y)."""
+    """Stream with 1 key (id) and 2 data columns (x, y)."""
     table = pa.table(
         {
             "id": pa.array([1, 2, 3], type=pa.int64()),
@@ -57,7 +57,7 @@ def two_data_stream() -> ArrowTableStream:
             "y": pa.array([100, 200, 300], type=pa.int64()),
         }
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
@@ -69,7 +69,7 @@ def left_stream() -> ArrowTableStream:
             "value_a": pa.array([10, 20, 30], type=pa.int64()),
         }
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
@@ -81,7 +81,7 @@ def right_stream() -> ArrowTableStream:
             "value_b": pa.array([200, 300, 400], type=pa.int64()),
         }
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
@@ -129,16 +129,16 @@ def test_upstreams_match_input(self, simple_stream):
     def test_output_schema(self, simple_stream):
         op = MapData({"x": "renamed_x"})
         node = _make_node(op, (simple_stream,))
-        tag_schema, data_schema = node.output_schema()
-        assert "id" in tag_schema
+        key_schema, data_schema = node.output_schema()
+        assert "id" in key_schema
         assert "renamed_x" in data_schema
         assert "x" not in data_schema
 
     def test_keys(self, simple_stream):
         op = MapData({"x": "renamed_x"})
         node = _make_node(op, (simple_stream,))
-        tag_keys, data_keys = node.keys()
-        assert "id" in tag_keys
+        key_keys, data_keys = node.keys()
+        assert "id" in key_keys
         assert "renamed_x" in data_keys
 
     def test_stream_protocol_conformance(self, simple_stream):
@@ -174,11 +174,11 @@ def test_pipeline_path_ends_with_schema_hash(self, simple_stream):
         assert path[-1].startswith("schema:")
         assert not any(seg.startswith("instance:") for seg in path)
 
-    def test_no_tag_schema_hash_in_path(self, simple_stream):
+    def test_no_key_schema_hash_in_path(self, simple_stream):
         op = MapData({"x": "renamed_x"})
         node = _make_node(op, (simple_stream,))
         path = node.node_identity_path
-        assert not any(segment.startswith("tag:") for segment in path)
+        assert not any(segment.startswith("key:") for segment in path)
 
 
 # ---------------------------------------------------------------------------
@@ -216,8 +216,8 @@ def test_different_operator_different_hash(self, simple_stream):
     def test_different_input_different_content_hash(self):
         table1 = pa.table({"id": [1, 2], "x": [10, 20]})
         table2 = pa.table({"id": [3, 4], "x": [30, 40]})
-        s1 = ArrowTableStream(table1, tag_columns=["id"])
-        s2 = ArrowTableStream(table2, tag_columns=["id"])
+        s1 = ArrowTableStream(table1, key_columns=["id"])
+        s2 = ArrowTableStream(table2, key_columns=["id"])
         op = MapData({"x": "y"})
         node1 = _make_node(op, (s1,))
         node2 = _make_node(op, (s2,))
@@ -237,8 +237,8 @@ def test_same_schema_same_pipeline_hash(self):
                 "x": pa.array([30, 40], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(table1, tag_columns=["id"])
-        s2 = ArrowTableStream(table2, tag_columns=["id"])
+        s1 = ArrowTableStream(table1, key_columns=["id"])
+        s2 = ArrowTableStream(table2, key_columns=["id"])
         op = MapData({"x": "y"})
         node1 = _make_node(op, (s1,))
         node2 = _make_node(op, (s2,))
@@ -328,7 +328,7 @@ def test_iter_data(self, simple_stream, db):
         node.run()                          # <-- add this line
         data = list(node.iter_data())
         assert len(data) == 3
-        for tag, data in data:
+        for key, data in data:
             assert "renamed_x" in data.keys()
 
     def test_as_table(self, simple_stream, db):
@@ -385,8 +385,8 @@ def test_replay_no_cache_returns_empty_stream(self, simple_stream, db):
         table = node.as_table()
         assert table.num_rows == 0
         # Schema is still correct
-        tag_keys, data_keys = node.keys()
-        assert set(tag_keys).issubset(set(table.column_names))
+        key_keys, data_keys = node.keys()
+        assert set(key_keys).issubset(set(table.column_names))
         assert set(data_keys).issubset(set(table.column_names))
 
 
diff --git a/tests/test_core/operators/test_operator_node_attach_db.py b/tests/test_core/operators/test_operator_node_attach_db.py
index d93c6d2e..d144de4c 100644
--- a/tests/test_core/operators/test_operator_node_attach_db.py
+++ b/tests/test_core/operators/test_operator_node_attach_db.py
@@ -19,7 +19,7 @@ def _make_stream(name="x", n=3):
                 name: pa.array(list(range(n)), type=pa.int64()),
             }
         ),
-        tag_columns=["id"],
+        key_columns=["id"],
     )
 
 
diff --git a/tests/test_core/operators/test_operator_node_non_active.py b/tests/test_core/operators/test_operator_node_non_active.py
index 593355a6..24167699 100644
--- a/tests/test_core/operators/test_operator_node_non_active.py
+++ b/tests/test_core/operators/test_operator_node_non_active.py
@@ -22,7 +22,7 @@
 
 @pytest.fixture
 def simple_source() -> ArrowTableStream:
-    """Single-tag stream: id (tag), x (data), 3 rows."""
+    """Single-key stream: id (key), x (data), 3 rows."""
     return ArrowTableStream(
         pa.table(
             {
@@ -30,7 +30,7 @@ def simple_source() -> ArrowTableStream:
                 "x": pa.array([10, 20, 30], type=pa.int64()),
             }
         ),
-        tag_columns=["id"],
+        key_columns=["id"],
     )
 
 
diff --git a/tests/test_core/operators/test_operators.py b/tests/test_core/operators/test_operators.py
index 483ed437..5c91ff7c 100644
--- a/tests/test_core/operators/test_operators.py
+++ b/tests/test_core/operators/test_operators.py
@@ -8,13 +8,13 @@
 from orcapod.core.operators import (
     Batch,
     DropDataColumns,
-    DropTagColumns,
+    DropKeyColumns,
     Join,
     MapData,
-    MapTags,
+    MapKeys,
     PolarsFilter,
     SelectDataColumns,
-    SelectTagColumns,
+    SelectKeyColumns,
     SemiJoin,
 )
 from orcapod.core.streams import ArrowTableStream
@@ -28,7 +28,7 @@
 
 @pytest.fixture
 def simple_stream() -> ArrowTableStream:
-    """Stream with 1 tag (animal) and 2 data columns (weight, legs)."""
+    """Stream with 1 key (animal) and 2 data columns (weight, legs)."""
     table = pa.table(
         {
             "animal": ["cat", "dog", "bird"],
@@ -36,12 +36,12 @@ def simple_stream() -> ArrowTableStream:
             "legs": [4, 4, 2],
         }
     )
-    return ArrowTableStream(table, tag_columns=["animal"])
+    return ArrowTableStream(table, key_columns=["animal"])
 
 
 @pytest.fixture
-def two_tag_stream() -> ArrowTableStream:
-    """Stream with 2 tags (region, animal) and 1 data column (count)."""
+def two_key_stream() -> ArrowTableStream:
+    """Stream with 2 keys (region, animal) and 1 data column (count)."""
     table = pa.table(
         {
             "region": ["east", "east", "west"],
@@ -49,7 +49,7 @@ def two_tag_stream() -> ArrowTableStream:
             "count": [10, 5, 8],
         }
     )
-    return ArrowTableStream(table, tag_columns=["region", "animal"])
+    return ArrowTableStream(table, key_columns=["region", "animal"])
 
 
 @pytest.fixture
@@ -61,7 +61,7 @@ def left_stream() -> ArrowTableStream:
             "value_a": [10, 20, 30],
         }
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
@@ -73,7 +73,7 @@ def right_stream() -> ArrowTableStream:
             "value_b": [200, 300, 400],
         }
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 @pytest.fixture
@@ -85,7 +85,7 @@ def disjoint_stream() -> ArrowTableStream:
             "speed": [30.0, 45.0, 80.0],
         }
     )
-    return ArrowTableStream(table, tag_columns=["animal"])
+    return ArrowTableStream(table, key_columns=["animal"])
 
 
 # ===================================================================
@@ -100,16 +100,16 @@ def test_polars_filter_is_pod(self):
         op = PolarsFilter()
         assert isinstance(op, PodProtocol)
 
-    def test_select_tag_columns_is_pod(self):
-        op = SelectTagColumns(columns=["x"])
+    def test_select_key_columns_is_pod(self):
+        op = SelectKeyColumns(columns=["x"])
         assert isinstance(op, PodProtocol)
 
     def test_select_data_columns_is_pod(self):
         op = SelectDataColumns(columns=["x"])
         assert isinstance(op, PodProtocol)
 
-    def test_drop_tag_columns_is_pod(self):
-        op = DropTagColumns(columns=["x"])
+    def test_drop_key_columns_is_pod(self):
+        op = DropKeyColumns(columns=["x"])
         assert isinstance(op, PodProtocol)
 
     def test_drop_data_columns_is_pod(self):
@@ -120,8 +120,8 @@ def test_map_data_is_pod(self):
         op = MapData(name_map={"a": "b"})
         assert isinstance(op, PodProtocol)
 
-    def test_map_tags_is_pod(self):
-        op = MapTags(name_map={"a": "b"})
+    def test_map_keys_is_pod(self):
+        op = MapKeys(name_map={"a": "b"})
         assert isinstance(op, PodProtocol)
 
     def test_batch_is_pod(self):
@@ -151,9 +151,9 @@ def test_polars_filter_producer(self, simple_stream):
         assert isinstance(out, StreamProtocol)
         assert out.producer is op
 
-    def test_select_tag_columns_producer(self, two_tag_stream):
-        op = SelectTagColumns(columns=["region"])
-        out = op.process(two_tag_stream)
+    def test_select_key_columns_producer(self, two_key_stream):
+        op = SelectKeyColumns(columns=["region"])
+        out = op.process(two_key_stream)
         assert isinstance(out, StreamProtocol)
         assert out.producer is op
 
@@ -163,9 +163,9 @@ def test_select_data_columns_producer(self, simple_stream):
         assert isinstance(out, StreamProtocol)
         assert out.producer is op
 
-    def test_drop_tag_columns_producer(self, two_tag_stream):
-        op = DropTagColumns(columns=["region"])
-        out = op.process(two_tag_stream)
+    def test_drop_key_columns_producer(self, two_key_stream):
+        op = DropKeyColumns(columns=["region"])
+        out = op.process(two_key_stream)
         assert isinstance(out, StreamProtocol)
         assert out.producer is op
 
@@ -181,9 +181,9 @@ def test_map_data_producer(self, simple_stream):
         assert isinstance(out, StreamProtocol)
         assert out.producer is op
 
-    def test_map_tags_producer(self, two_tag_stream):
-        op = MapTags(name_map={"region": "area"})
-        out = op.process(two_tag_stream)
+    def test_map_keys_producer(self, two_key_stream):
+        op = MapKeys(name_map={"region": "area"})
+        out = op.process(two_key_stream)
         assert isinstance(out, StreamProtocol)
         assert out.producer is op
 
@@ -244,8 +244,8 @@ def test_select_data_strict_rejects_missing(self, simple_stream):
         with pytest.raises(Exception):
             op.process(simple_stream)
 
-    def test_select_tag_strict_rejects_missing(self, simple_stream):
-        op = SelectTagColumns(columns=["nonexistent"], strict=True)
+    def test_select_key_strict_rejects_missing(self, simple_stream):
+        op = SelectKeyColumns(columns=["nonexistent"], strict=True)
         with pytest.raises(Exception):
             op.process(simple_stream)
 
@@ -254,8 +254,8 @@ def test_drop_data_strict_rejects_missing(self, simple_stream):
         with pytest.raises(Exception):
             op.process(simple_stream)
 
-    def test_drop_tag_strict_rejects_missing(self, simple_stream):
-        op = DropTagColumns(columns=["nonexistent"], strict=True)
+    def test_drop_key_strict_rejects_missing(self, simple_stream):
+        op = DropKeyColumns(columns=["nonexistent"], strict=True)
         with pytest.raises(Exception):
             op.process(simple_stream)
 
@@ -281,27 +281,27 @@ def test_filter_reduces_rows(self, simple_stream):
 
     def test_filter_preserves_schema(self, simple_stream):
         op = PolarsFilter(constraints={"legs": 4})
-        tag_schema, data_schema = op.output_schema(simple_stream)
-        orig_tag, orig_pkt = simple_stream.output_schema()
-        assert set(tag_schema.keys()) == set(orig_tag.keys())
+        key_schema, data_schema = op.output_schema(simple_stream)
+        orig_key, orig_pkt = simple_stream.output_schema()
+        assert set(key_schema.keys()) == set(orig_key.keys())
         assert set(data_schema.keys()) == set(orig_pkt.keys())
 
 
-class TestSelectTagColumnsBehavior:
-    def test_keeps_only_selected_tags(self, two_tag_stream):
-        op = SelectTagColumns(columns=["region"])
-        out = op.process(two_tag_stream)
-        tag_keys, pkt_keys = out.keys()
-        assert "region" in tag_keys
-        assert "animal" not in tag_keys
+class TestSelectKeyColumnsBehavior:
+    def test_keeps_only_selected_keys(self, two_key_stream):
+        op = SelectKeyColumns(columns=["region"])
+        out = op.process(two_key_stream)
+        key_keys, pkt_keys = out.keys()
+        assert "region" in key_keys
+        assert "animal" not in key_keys
         # data columns unchanged
         assert "count" in pkt_keys
 
-    def test_output_schema_matches_result(self, two_tag_stream):
-        op = SelectTagColumns(columns=["region"])
-        tag_schema, pkt_schema = op.output_schema(two_tag_stream)
-        assert "region" in tag_schema
-        assert "animal" not in tag_schema
+    def test_output_schema_matches_result(self, two_key_stream):
+        op = SelectKeyColumns(columns=["region"])
+        key_schema, pkt_schema = op.output_schema(two_key_stream)
+        assert "region" in key_schema
+        assert "animal" not in key_schema
         assert "count" in pkt_schema
 
 
@@ -309,47 +309,47 @@ class TestSelectDataColumnsBehavior:
     def test_keeps_only_selected_data(self, simple_stream):
         op = SelectDataColumns(columns=["weight"])
         out = op.process(simple_stream)
-        tag_keys, pkt_keys = out.keys()
+        key_keys, pkt_keys = out.keys()
         assert pkt_keys == ("weight",)
         assert "legs" not in pkt_keys
-        # tag columns unchanged
-        assert "animal" in tag_keys
+        # key columns unchanged
+        assert "animal" in key_keys
 
     def test_output_schema_matches_result(self, simple_stream):
         op = SelectDataColumns(columns=["weight"])
-        tag_schema, pkt_schema = op.output_schema(simple_stream)
+        key_schema, pkt_schema = op.output_schema(simple_stream)
         assert "weight" in pkt_schema
         assert "legs" not in pkt_schema
 
 
-class TestDropTagColumnsBehavior:
-    def test_drops_specified_tags(self, two_tag_stream):
-        op = DropTagColumns(columns=["region"])
-        out = op.process(two_tag_stream)
-        tag_keys, pkt_keys = out.keys()
-        assert "region" not in tag_keys
-        assert "animal" in tag_keys
+class TestDropKeyColumnsBehavior:
+    def test_drops_specified_keys(self, two_key_stream):
+        op = DropKeyColumns(columns=["region"])
+        out = op.process(two_key_stream)
+        key_keys, pkt_keys = out.keys()
+        assert "region" not in key_keys
+        assert "animal" in key_keys
         assert "count" in pkt_keys
 
-    def test_output_schema_matches_result(self, two_tag_stream):
-        op = DropTagColumns(columns=["region"])
-        tag_schema, pkt_schema = op.output_schema(two_tag_stream)
-        assert "region" not in tag_schema
-        assert "animal" in tag_schema
+    def test_output_schema_matches_result(self, two_key_stream):
+        op = DropKeyColumns(columns=["region"])
+        key_schema, pkt_schema = op.output_schema(two_key_stream)
+        assert "region" not in key_schema
+        assert "animal" in key_schema
 
 
 class TestDropDataColumnsBehavior:
     def test_drops_specified_data(self, simple_stream):
         op = DropDataColumns(columns=["legs"])
         out = op.process(simple_stream)
-        tag_keys, pkt_keys = out.keys()
+        key_keys, pkt_keys = out.keys()
         assert "legs" not in pkt_keys
         assert "weight" in pkt_keys
-        assert "animal" in tag_keys
+        assert "animal" in key_keys
 
     def test_output_schema_matches_result(self, simple_stream):
         op = DropDataColumns(columns=["legs"])
-        tag_schema, pkt_schema = op.output_schema(simple_stream)
+        key_schema, pkt_schema = op.output_schema(simple_stream)
         assert "legs" not in pkt_schema
         assert "weight" in pkt_schema
 
@@ -358,7 +358,7 @@ class TestMapDataBehavior:
     def test_renames_data_column(self, simple_stream):
         op = MapData(name_map={"weight": "mass"})
         out = op.process(simple_stream)
-        tag_keys, pkt_keys = out.keys()
+        key_keys, pkt_keys = out.keys()
         assert "mass" in pkt_keys
         assert "weight" not in pkt_keys
         # data preserved
@@ -367,7 +367,7 @@ def test_renames_data_column(self, simple_stream):
 
     def test_output_schema_reflects_rename(self, simple_stream):
         op = MapData(name_map={"weight": "mass"})
-        tag_schema, pkt_schema = op.output_schema(simple_stream)
+        key_schema, pkt_schema = op.output_schema(simple_stream)
         assert "mass" in pkt_schema
         assert "weight" not in pkt_schema
 
@@ -377,27 +377,27 @@ def test_collision_with_existing_column_raises(self, simple_stream):
             op.process(simple_stream)
 
 
-class TestMapTagsBehavior:
-    def test_renames_tag_column(self, two_tag_stream):
-        op = MapTags(name_map={"region": "area"})
-        out = op.process(two_tag_stream)
-        tag_keys, pkt_keys = out.keys()
-        assert "area" in tag_keys
-        assert "region" not in tag_keys
+class TestMapKeysBehavior:
+    def test_renames_key_column(self, two_key_stream):
+        op = MapKeys(name_map={"region": "area"})
+        out = op.process(two_key_stream)
+        key_keys, pkt_keys = out.keys()
+        assert "area" in key_keys
+        assert "region" not in key_keys
         # data preserved
         result = out.as_table()
         assert set(result.column("area").to_pylist()) == {"east", "west"}
 
-    def test_output_schema_reflects_rename(self, two_tag_stream):
-        op = MapTags(name_map={"region": "area"})
-        tag_schema, pkt_schema = op.output_schema(two_tag_stream)
-        assert "area" in tag_schema
-        assert "region" not in tag_schema
+    def test_output_schema_reflects_rename(self, two_key_stream):
+        op = MapKeys(name_map={"region": "area"})
+        key_schema, pkt_schema = op.output_schema(two_key_stream)
+        assert "area" in key_schema
+        assert "region" not in key_schema
 
-    def test_collision_with_existing_tag_raises(self, two_tag_stream):
-        op = MapTags(name_map={"region": "animal"})
+    def test_collision_with_existing_key_raises(self, two_key_stream):
+        op = MapKeys(name_map={"region": "animal"})
         with pytest.raises(Exception):
-            op.process(two_tag_stream)
+            op.process(two_key_stream)
 
 
 class TestBatchBehavior:
@@ -435,11 +435,11 @@ def test_negative_batch_size_raises(self):
 
 
 class TestJoinBehavior:
-    def test_join_combines_streams_on_shared_tags(self, simple_stream, disjoint_stream):
+    def test_join_combines_streams_on_shared_keys(self, simple_stream, disjoint_stream):
         op = Join()
         out = op.process(simple_stream, disjoint_stream)
         result = out.as_table()
-        # Both have 3 rows with same "animal" tags → inner join → 3 rows
+        # Both have 3 rows with same "animal" keys → inner join → 3 rows
         assert len(result) == 3
         # All columns present
         col_names = set(result.column_names)
@@ -454,8 +454,8 @@ def test_join_single_stream_passthrough(self, simple_stream):
 
     def test_join_output_schema(self, simple_stream, disjoint_stream):
         op = Join()
-        tag_schema, pkt_schema = op.output_schema(simple_stream, disjoint_stream)
-        assert "animal" in tag_schema
+        key_schema, pkt_schema = op.output_schema(simple_stream, disjoint_stream)
+        assert "animal" in key_schema
         assert "weight" in pkt_schema
         assert "speed" in pkt_schema
 
@@ -471,7 +471,7 @@ class TestJoinMetaColumnCollision:
     with stream-index-based suffixes (e.g. ``__computed_1``, ``__computed_2``)."""
 
     def _make_stream(self, id_vals, pkt_col, pkt_vals, meta_val):
-        """Helper: stream with shared tag 'id', one data column, and ``__computed``."""
+        """Helper: stream with shared key 'id', one data column, and ``__computed``."""
         table = pa.table(
             {
                 "id": pa.array(id_vals, type=pa.int64()),
@@ -479,7 +479,7 @@ def _make_stream(self, id_vals, pkt_col, pkt_vals, meta_val):
                 "__computed": pa.array([meta_val] * len(id_vals), type=pa.bool_()),
             }
         )
-        return ArrowTableStream(table, tag_columns=["id"])
+        return ArrowTableStream(table, key_columns=["id"])
 
     def test_three_way_join_with_shared_meta_column_succeeds(self):
         """Three streams each carrying ``__computed`` should join without DuplicateError."""
@@ -509,11 +509,11 @@ def test_three_way_join_meta_columns_renamed_with_index_suffix(self):
         assert "__computed_2" in col_names
 
 
-class TestJoinOutputSchemaSystemTags:
-    """Verify that Join.output_schema correctly predicts system tag columns."""
+class TestJoinOutputSchemaSystemKeys:
+    """Verify that Join.output_schema correctly predicts system key columns."""
 
-    def test_output_schema_excludes_system_tags_by_default(self):
-        """Without system_tags=True, no system tag columns in tag schema."""
+    def test_output_schema_excludes_system_keys_by_default(self):
+        """Without system_keys=True, no system key columns in key schema."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
 
@@ -524,7 +524,7 @@ def test_output_schema_excludes_system_tags_by_default(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -534,18 +534,18 @@ def test_output_schema_excludes_system_tags_by_default(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         op = Join()
-        tag_schema, _ = op.output_schema(src_a, src_b)
+        key_schema, _ = op.output_schema(src_a, src_b)
 
-        for key in tag_schema:
-            assert not key.startswith(constants.SYSTEM_TAG_PREFIX)
+        for key in key_schema:
+            assert not key.startswith(constants.SYSTEM_KEY_PREFIX)
 
-    def test_output_schema_includes_system_tags_when_requested(self):
-        """With system_tags=True, tag schema should include system tag columns."""
+    def test_output_schema_includes_system_keys_when_requested(self):
+        """With system_keys=True, key schema should include system key columns."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
 
@@ -556,7 +556,7 @@ def test_output_schema_includes_system_tags_when_requested(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -566,20 +566,20 @@ def test_output_schema_includes_system_tags_when_requested(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         op = Join()
-        tag_schema, _ = op.output_schema(src_a, src_b, columns={"system_tags": True})
+        key_schema, _ = op.output_schema(src_a, src_b, columns={"system_keys": True})
 
-        sys_tag_keys = [
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+        sys_key_keys = [
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
-        assert len(sys_tag_keys) == 4  # 2 sources × 2 fields (source_id + record_id)
+        assert len(sys_key_keys) == 4  # 2 sources × 2 fields (source_id + record_id)
 
-    def test_output_schema_system_tags_match_actual_output(self):
-        """Predicted system tag column names must match the actual result."""
+    def test_output_schema_system_keys_match_actual_output(self):
+        """Predicted system key column names must match the actual result."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
 
@@ -590,7 +590,7 @@ def test_output_schema_system_tags_match_actual_output(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -600,31 +600,31 @@ def test_output_schema_system_tags_match_actual_output(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         op = Join()
 
         # Predicted
-        tag_schema, _ = op.output_schema(src_a, src_b, columns={"system_tags": True})
+        key_schema, _ = op.output_schema(src_a, src_b, columns={"system_keys": True})
         predicted = sorted(
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         # Actual
         result = op.static_process(src_a, src_b)
-        result_table = result.as_table(columns={"system_tags": True})
+        result_table = result.as_table(columns={"system_keys": True})
         actual = sorted(
             c
             for c in result_table.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         assert predicted == actual
 
-    def test_output_schema_system_tags_three_way_join(self):
-        """Three-way join should predict 3 system tag columns."""
+    def test_output_schema_system_keys_three_way_join(self):
+        """Three-way join should predict 3 system key columns."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
 
@@ -635,7 +635,7 @@ def test_output_schema_system_tags_three_way_join(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -645,7 +645,7 @@ def test_output_schema_system_tags_three_way_join(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_c = ArrowTableSource(
@@ -655,43 +655,43 @@ def test_output_schema_system_tags_three_way_join(self):
                     "gamma": pa.array([1000, 2000], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         op = Join()
 
         # Predicted
-        tag_schema, _ = op.output_schema(
-            src_a, src_b, src_c, columns={"system_tags": True}
+        key_schema, _ = op.output_schema(
+            src_a, src_b, src_c, columns={"system_keys": True}
         )
         predicted = sorted(
-            k for k in tag_schema if k.startswith(constants.SYSTEM_TAG_PREFIX)
+            k for k in key_schema if k.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         # Actual
         result = op.static_process(src_a, src_b, src_c)
         actual = sorted(
             c
-            for c in result.as_table(columns={"system_tags": True}).column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            for c in result.as_table(columns={"system_keys": True}).column_names
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         assert len(predicted) == 6  # 3 sources × 2 fields
         assert predicted == actual
 
     def test_output_schema_single_stream_passthrough(self, simple_stream):
-        """Single stream should pass through output_schema including system_tags."""
+        """Single stream should pass through output_schema including system_keys."""
         op = Join()
         result_default = op.output_schema(simple_stream)
-        result_sys = op.output_schema(simple_stream, columns={"system_tags": True})
+        result_sys = op.output_schema(simple_stream, columns={"system_keys": True})
         # Single stream delegates to stream's output_schema
         assert result_default == simple_stream.output_schema()
-        assert result_sys == simple_stream.output_schema(columns={"system_tags": True})
+        assert result_sys == simple_stream.output_schema(columns={"system_keys": True})
 
     def test_predicted_schema_matches_result_stream_schema(self):
         """Operator's predicted output_schema must equal the result stream's
-        output_schema — both tag and data schemas, without system tags."""
+        output_schema — both key and data schemas, without system keys."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
 
         src_a = ArrowTableSource(
@@ -701,7 +701,7 @@ def test_predicted_schema_matches_result_stream_schema(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -711,22 +711,22 @@ def test_predicted_schema_matches_result_stream_schema(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         op = Join()
 
-        predicted_tag, predicted_pkt = op.output_schema(src_a, src_b)
+        predicted_key, predicted_pkt = op.output_schema(src_a, src_b)
         result = op.static_process(src_a, src_b)
-        actual_tag, actual_pkt = result.output_schema()
+        actual_key, actual_pkt = result.output_schema()
 
-        assert dict(predicted_tag) == dict(actual_tag)
+        assert dict(predicted_key) == dict(actual_key)
         assert dict(predicted_pkt) == dict(actual_pkt)
 
-    def test_predicted_schema_matches_result_stream_schema_with_system_tags(self):
-        """Operator's predicted output_schema(system_tags=True) must equal
-        the result stream's output_schema(system_tags=True)."""
+    def test_predicted_schema_matches_result_stream_schema_with_system_keys(self):
+        """Operator's predicted output_schema(system_keys=True) must equal
+        the result stream's output_schema(system_keys=True)."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
 
         src_a = ArrowTableSource(
@@ -736,7 +736,7 @@ def test_predicted_schema_matches_result_stream_schema_with_system_tags(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -746,19 +746,19 @@ def test_predicted_schema_matches_result_stream_schema_with_system_tags(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
         op = Join()
 
-        predicted_tag, predicted_pkt = op.output_schema(
-            src_a, src_b, columns={"system_tags": True}
+        predicted_key, predicted_pkt = op.output_schema(
+            src_a, src_b, columns={"system_keys": True}
         )
         result = op.static_process(src_a, src_b)
-        actual_tag, actual_pkt = result.output_schema(columns={"system_tags": True})
+        actual_key, actual_pkt = result.output_schema(columns={"system_keys": True})
 
-        assert dict(predicted_tag) == dict(actual_tag)
+        assert dict(predicted_key) == dict(actual_key)
         assert dict(predicted_pkt) == dict(actual_pkt)
 
 
@@ -773,9 +773,9 @@ def test_semijoin_filters_left_by_right(self, left_stream, right_stream):
 
     def test_semijoin_preserves_left_schema(self, left_stream, right_stream):
         op = SemiJoin()
-        tag_schema, pkt_schema = op.output_schema(left_stream, right_stream)
-        left_tag, left_pkt = left_stream.output_schema()
-        assert set(tag_schema.keys()) == set(left_tag.keys())
+        key_schema, pkt_schema = op.output_schema(left_stream, right_stream)
+        left_key, left_pkt = left_stream.output_schema()
+        assert set(key_schema.keys()) == set(left_key.keys())
         assert set(pkt_schema.keys()) == set(left_pkt.keys())
 
     def test_semijoin_is_not_commutative(self, left_stream, right_stream):
@@ -797,9 +797,9 @@ def test_polars_filter_different_params_different_hash(self):
         b = PolarsFilter(constraints={"x": 2})
         assert a.content_hash() != b.content_hash()
 
-    def test_select_tag_columns_different_params_different_hash(self):
-        a = SelectTagColumns(columns=["x"])
-        b = SelectTagColumns(columns=["y"])
+    def test_select_key_columns_different_params_different_hash(self):
+        a = SelectKeyColumns(columns=["x"])
+        b = SelectKeyColumns(columns=["y"])
         assert a.content_hash() != b.content_hash()
 
     def test_select_data_columns_different_params_different_hash(self):
@@ -807,9 +807,9 @@ def test_select_data_columns_different_params_different_hash(self):
         b = SelectDataColumns(columns=["y"])
         assert a.content_hash() != b.content_hash()
 
-    def test_drop_tag_columns_different_params_different_hash(self):
-        a = DropTagColumns(columns=["x"])
-        b = DropTagColumns(columns=["y"])
+    def test_drop_key_columns_different_params_different_hash(self):
+        a = DropKeyColumns(columns=["x"])
+        b = DropKeyColumns(columns=["y"])
         assert a.content_hash() != b.content_hash()
 
     def test_drop_data_columns_different_params_different_hash(self):
@@ -822,9 +822,9 @@ def test_map_data_different_params_different_hash(self):
         b = MapData(name_map={"a": "c"})
         assert a.content_hash() != b.content_hash()
 
-    def test_map_tags_different_params_different_hash(self):
-        a = MapTags(name_map={"a": "b"})
-        b = MapTags(name_map={"a": "c"})
+    def test_map_keys_different_params_different_hash(self):
+        a = MapKeys(name_map={"a": "b"})
+        b = MapKeys(name_map={"a": "c"})
         assert a.content_hash() != b.content_hash()
 
     def test_batch_different_params_different_hash(self):
@@ -854,11 +854,11 @@ def test_polars_filter_argument_symmetry(self, simple_stream):
         assert isinstance(sym, tuple)
         assert sym == (simple_stream,)
 
-    def test_select_tag_columns_argument_symmetry(self, two_tag_stream):
-        op = SelectTagColumns(columns=["region"])
-        sym = op.argument_symmetry([two_tag_stream])
+    def test_select_key_columns_argument_symmetry(self, two_key_stream):
+        op = SelectKeyColumns(columns=["region"])
+        sym = op.argument_symmetry([two_key_stream])
         assert isinstance(sym, tuple)
-        assert sym == (two_tag_stream,)
+        assert sym == (two_key_stream,)
 
     def test_select_data_columns_argument_symmetry(self, simple_stream):
         op = SelectDataColumns(columns=["weight"])
@@ -866,11 +866,11 @@ def test_select_data_columns_argument_symmetry(self, simple_stream):
         assert isinstance(sym, tuple)
         assert sym == (simple_stream,)
 
-    def test_drop_tag_columns_argument_symmetry(self, two_tag_stream):
-        op = DropTagColumns(columns=["region"])
-        sym = op.argument_symmetry([two_tag_stream])
+    def test_drop_key_columns_argument_symmetry(self, two_key_stream):
+        op = DropKeyColumns(columns=["region"])
+        sym = op.argument_symmetry([two_key_stream])
         assert isinstance(sym, tuple)
-        assert sym == (two_tag_stream,)
+        assert sym == (two_key_stream,)
 
     def test_drop_data_columns_argument_symmetry(self, simple_stream):
         op = DropDataColumns(columns=["legs"])
@@ -884,11 +884,11 @@ def test_map_data_argument_symmetry(self, simple_stream):
         assert isinstance(sym, tuple)
         assert sym == (simple_stream,)
 
-    def test_map_tags_argument_symmetry(self, two_tag_stream):
-        op = MapTags(name_map={"region": "area"})
-        sym = op.argument_symmetry([two_tag_stream])
+    def test_map_keys_argument_symmetry(self, two_key_stream):
+        op = MapKeys(name_map={"region": "area"})
+        sym = op.argument_symmetry([two_key_stream])
         assert isinstance(sym, tuple)
-        assert sym == (two_tag_stream,)
+        assert sym == (two_key_stream,)
 
     def test_batch_argument_symmetry(self, simple_stream):
         op = Batch(batch_size=2)
@@ -974,16 +974,16 @@ def _check_unary_identity(self, op, stream):
     def test_polars_filter_identity(self, simple_stream):
         self._check_unary_identity(PolarsFilter(), simple_stream)
 
-    def test_select_tag_columns_identity(self, two_tag_stream):
-        self._check_unary_identity(SelectTagColumns(columns=["region"]), two_tag_stream)
+    def test_select_key_columns_identity(self, two_key_stream):
+        self._check_unary_identity(SelectKeyColumns(columns=["region"]), two_key_stream)
 
     def test_select_data_columns_identity(self, simple_stream):
         self._check_unary_identity(
             SelectDataColumns(columns=["weight"]), simple_stream
         )
 
-    def test_drop_tag_columns_identity(self, two_tag_stream):
-        self._check_unary_identity(DropTagColumns(columns=["region"]), two_tag_stream)
+    def test_drop_key_columns_identity(self, two_key_stream):
+        self._check_unary_identity(DropKeyColumns(columns=["region"]), two_key_stream)
 
     def test_drop_data_columns_identity(self, simple_stream):
         self._check_unary_identity(DropDataColumns(columns=["legs"]), simple_stream)
@@ -993,8 +993,8 @@ def test_map_data_identity(self, simple_stream):
             MapData(name_map={"weight": "mass"}), simple_stream
         )
 
-    def test_map_tags_identity(self, two_tag_stream):
-        self._check_unary_identity(MapTags(name_map={"region": "area"}), two_tag_stream)
+    def test_map_keys_identity(self, two_key_stream):
+        self._check_unary_identity(MapKeys(name_map={"region": "area"}), two_key_stream)
 
     def test_batch_identity(self, simple_stream):
         self._check_unary_identity(Batch(batch_size=2), simple_stream)
@@ -1073,20 +1073,20 @@ def test_semijoin_swapped_inputs_different_pipeline_hash(
 
 
 # ---------------------------------------------------------------------------
-# System Tag Name-Extension Tests
+# System Key Name-Extension Tests
 # ---------------------------------------------------------------------------
 
 
-class TestJoinSystemTagNameExtension:
-    """Verify that Join uses pipeline_hash (structure-only) for system tag
+class TestJoinSystemKeyNameExtension:
+    """Verify that Join uses pipeline_hash (structure-only) for system key
     name-extension, not content_hash (data-inclusive).
 
-    Uses ArrowTableSource to ensure system tag columns are present (raw
-    ArrowTableStream has no system tags)."""
+    Uses ArrowTableSource to ensure system key columns are present (raw
+    ArrowTableStream has no system keys)."""
 
-    def test_same_schema_different_data_produces_same_system_tag_names(self):
+    def test_same_schema_different_data_produces_same_system_key_names(self):
         """Two sources with same schema but different data should produce
-        the same system tag column names after Join, because system tag
+        the same system key column names after Join, because system key
         name-extension uses pipeline_hash (structure-only)."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
@@ -1098,7 +1098,7 @@ def test_same_schema_different_data_produces_same_system_tag_names(self):
                     "value_a": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_left2 = ArrowTableSource(
@@ -1108,7 +1108,7 @@ def test_same_schema_different_data_produces_same_system_tag_names(self):
                     "value_a": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_right = ArrowTableSource(
@@ -1118,7 +1118,7 @@ def test_same_schema_different_data_produces_same_system_tag_names(self):
                     "value_b": pa.array([30, 40], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -1126,27 +1126,27 @@ def test_same_schema_different_data_produces_same_system_tag_names(self):
         result1 = op.static_process(src_left1, src_right)
         result2 = op.static_process(src_left2, src_right)
 
-        result1_table = result1.as_table(columns={"system_tags": True})
-        result2_table = result2.as_table(columns={"system_tags": True})
+        result1_table = result1.as_table(columns={"system_keys": True})
+        result2_table = result2.as_table(columns={"system_keys": True})
 
         sys_cols_1 = sorted(
             c
             for c in result1_table.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
         sys_cols_2 = sorted(
             c
             for c in result2_table.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         # Column names should be identical (structure-only hashing)
-        assert len(sys_cols_1) > 0, "Expected system tag columns to be present"
+        assert len(sys_cols_1) > 0, "Expected system key columns to be present"
         assert sys_cols_1 == sys_cols_2
 
-    def test_different_schema_produces_different_system_tag_names(self):
+    def test_different_schema_produces_different_system_key_names(self):
         """Two sources with different data schemas should produce different
-        system tag column names after Join."""
+        system key column names after Join."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
 
@@ -1157,7 +1157,7 @@ def test_different_schema_produces_different_system_tag_names(self):
                     "value_a": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_right_int = ArrowTableSource(
@@ -1167,7 +1167,7 @@ def test_different_schema_produces_different_system_tag_names(self):
                     "value_b": pa.array([30, 40], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_right_str = ArrowTableSource(
@@ -1177,7 +1177,7 @@ def test_different_schema_produces_different_system_tag_names(self):
                     "value_c": pa.array(["a", "b"]),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -1185,32 +1185,32 @@ def test_different_schema_produces_different_system_tag_names(self):
         result1 = op.static_process(src_left, src_right_int)
         result2 = op.static_process(src_left, src_right_str)
 
-        result1_table = result1.as_table(columns={"system_tags": True})
-        result2_table = result2.as_table(columns={"system_tags": True})
+        result1_table = result1.as_table(columns={"system_keys": True})
+        result2_table = result2.as_table(columns={"system_keys": True})
 
         sys_cols_1 = sorted(
             c
             for c in result1_table.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
         sys_cols_2 = sorted(
             c
             for c in result2_table.column_names
-            if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            if c.startswith(constants.SYSTEM_KEY_PREFIX)
         )
 
         # Column names should differ (different pipeline structures)
-        assert len(sys_cols_1) > 0, "Expected system tag columns to be present"
+        assert len(sys_cols_1) > 0, "Expected system key columns to be present"
         assert sys_cols_1 != sys_cols_2
 
 
-class TestSourceSystemTagSchemaHash:
-    """Verify that source system tag column name uses a hash consistent
+class TestSourceSystemKeySchemaHash:
+    """Verify that source system key column name uses a hash consistent
     with the source's pipeline_hash."""
 
     def test_source_schema_hash_matches_pipeline_hash(self):
         """ArrowTableSource._schema_hash should match the truncated
-        pipeline_hash, since both hash (tag_schema, data_schema)."""
+        pipeline_hash, since both hash (key_schema, data_schema)."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
 
         table = pa.table(
@@ -1219,20 +1219,20 @@ def test_source_schema_hash_matches_pipeline_hash(self):
                 "x": pa.array([10, 20, 30], type=pa.int64()),
             }
         )
-        source = ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+        source = ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
         schema_hash = source._schema_hash
         pipeline_hash_hex = source.pipeline_hash().to_hex(char_count=len(schema_hash))
         assert schema_hash == pipeline_hash_hex
 
 
-class TestJoinSystemTagCanonicalOrdering:
+class TestJoinSystemKeyCanonicalOrdering:
     """Verify that Join canonically orders streams by pipeline_hash,
-    and that the resulting system tag columns reflect this ordering
+    and that the resulting system key columns reflect this ordering
     with canonical position indices (0, 1, 2, ...)."""
 
     @pytest.fixture
     def three_sources(self):
-        """Three ArrowTableSources with distinct data schemas sharing tag 'id'."""
+        """Three ArrowTableSources with distinct data schemas sharing key 'id'."""
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
 
         src_a = ArrowTableSource(
@@ -1242,7 +1242,7 @@ def three_sources(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -1252,7 +1252,7 @@ def three_sources(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_c = ArrowTableSource(
@@ -1262,30 +1262,30 @@ def three_sources(self):
                     "gamma": pa.array([1000, 2000], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         return src_a, src_b, src_c
 
     @staticmethod
-    def _get_system_tag_columns(table, constants):
-        """Extract system tag column names in their natural table order."""
+    def _get_system_key_columns(table, constants):
+        """Extract system key column names in their natural table order."""
         return [
-            c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+            c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
 
     @staticmethod
-    def _parse_system_tag_column(col, constants):
-        """Parse a system tag column name into (field_type, schema_hash, stream_hash, index).
+    def _parse_system_key_column(col, constants):
+        """Parse a system key column name into (field_type, schema_hash, stream_hash, index).
 
         Column format after join::
 
-            _tag_{field_type}::{schema_hash}::{stream_hash}:{canonical_index}
+            _key_{field_type}::{schema_hash}::{stream_hash}:{canonical_index}
 
         Blocks are separated by ``::`` (block separator).
         Fields within a block are separated by ``:`` (field separator).
         """
-        after_prefix = col[len(constants.SYSTEM_TAG_PREFIX) :]
+        after_prefix = col[len(constants.SYSTEM_KEY_PREFIX) :]
         blocks = after_prefix.split(constants.BLOCK_SEPARATOR)
         field_type = blocks[0]
         schema_hash = blocks[1]
@@ -1294,18 +1294,18 @@ def _parse_system_tag_column(col, constants):
         index = join_block_fields[1]
         return field_type, schema_hash, stream_hash, index
 
-    def test_three_way_join_produces_six_system_tag_columns(self, three_sources):
+    def test_three_way_join_produces_six_system_key_columns(self, three_sources):
         from orcapod.system_constants import constants
 
         src_a, src_b, src_c = three_sources
         op = Join()
         result = op.static_process(src_a, src_b, src_c)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
         assert len(sys_cols) == 6  # 3 sources × 2 fields (source_id + record_id)
 
-    def test_system_tag_position_maps_to_correct_source(self, three_sources):
-        """Each system tag column should carry the canonical position index
+    def test_system_key_position_maps_to_correct_source(self, three_sources):
+        """Each system key column should carry the canonical position index
         matching the source's rank when sorted by pipeline_hash.
 
         Independently sorts sources by pipeline_hash to determine expected
@@ -1317,7 +1317,7 @@ def test_system_tag_position_maps_to_correct_source(self, three_sources):
         from orcapod.system_constants import constants
 
         src_a, src_b, src_c = three_sources
-        n_char = Config().system_tag_hash_n_char
+        n_char = Config().system_key_hash_n_char
 
         # Independently determine expected position → source mapping
         sources = [src_a, src_b, src_c]
@@ -1325,18 +1325,18 @@ def test_system_tag_position_maps_to_correct_source(self, three_sources):
 
         op = Join()
         result = op.static_process(src_a, src_b, src_c)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
 
         # Filter to source_id columns for position checking
         sid_cols = [
-            c for c in sys_cols if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c for c in sys_cols if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         ]
         assert len(sid_cols) == 3
 
         for expected_idx, expected_source in enumerate(sorted_sources):
             field_type, schema_hash, stream_hash, index_str = (
-                self._parse_system_tag_column(sid_cols[expected_idx], constants)
+                self._parse_system_key_column(sid_cols[expected_idx], constants)
             )
             # The schema_hash identifies the originating source
             assert schema_hash == expected_source._schema_hash, (
@@ -1355,9 +1355,9 @@ def test_system_tag_position_maps_to_correct_source(self, three_sources):
                 f"got {index_str!r}"
             )
 
-    def test_swapped_input_order_produces_identical_system_tags(self, three_sources):
+    def test_swapped_input_order_produces_identical_system_keys(self, three_sources):
         """Join is commutative — any permutation of inputs should produce
-        the same system tag column names in the same order."""
+        the same system key column names in the same order."""
         from orcapod.system_constants import constants
 
         src_a, src_b, src_c = three_sources
@@ -1367,29 +1367,29 @@ def test_swapped_input_order_produces_identical_system_tags(self, three_sources)
         result_cab = op.static_process(src_c, src_a, src_b)
         result_bca = op.static_process(src_b, src_c, src_a)
 
-        sys_abc = self._get_system_tag_columns(
-            result_abc.as_table(columns={"system_tags": True}), constants
+        sys_abc = self._get_system_key_columns(
+            result_abc.as_table(columns={"system_keys": True}), constants
         )
-        sys_cab = self._get_system_tag_columns(
-            result_cab.as_table(columns={"system_tags": True}), constants
+        sys_cab = self._get_system_key_columns(
+            result_cab.as_table(columns={"system_keys": True}), constants
         )
-        sys_bca = self._get_system_tag_columns(
-            result_bca.as_table(columns={"system_tags": True}), constants
+        sys_bca = self._get_system_key_columns(
+            result_bca.as_table(columns={"system_keys": True}), constants
         )
 
         assert sys_abc == sys_cab
         assert sys_abc == sys_bca
 
-    def test_system_tag_values_are_per_row_source_provenance(self, three_sources):
-        """System tag column values should reflect the source provenance.
+    def test_system_key_values_are_per_row_source_provenance(self, three_sources):
+        """System key column values should reflect the source provenance.
         source_id columns contain the source_id, record_id columns contain the record_id."""
         from orcapod.system_constants import constants
 
         src_a, src_b, src_c = three_sources
         op = Join()
         result = op.static_process(src_a, src_b, src_c)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
 
         for col in sys_cols:
             values = result_table.column(col).to_pylist()
@@ -1401,9 +1401,9 @@ def test_system_tag_values_are_per_row_source_provenance(self, three_sources):
     def test_intermediate_operators_produce_different_stream_hash(self):
         """When sources pass through intermediate operators before Join,
         the schema_hash (from origin source) and stream_hash (from the
-        operator output) should differ in the system tag column name.
+        operator output) should differ in the system key column name.
 
-        Column format: _tag_{field_type}::{schema_hash}::{stream_hash}:{index}
+        Column format: _key_{field_type}::{schema_hash}::{stream_hash}:{index}
 
         With an intermediate MapData, stream_hash comes from the
         DynamicPodStream which has a different pipeline_hash than the
@@ -1412,7 +1412,7 @@ def test_intermediate_operators_produce_different_stream_hash(self):
         from orcapod.core.sources.arrow_table_source import ArrowTableSource
         from orcapod.system_constants import constants
 
-        n_char = Config().system_tag_hash_n_char
+        n_char = Config().system_key_hash_n_char
 
         src_a = ArrowTableSource(
             pa.table(
@@ -1421,7 +1421,7 @@ def test_intermediate_operators_produce_different_stream_hash(self):
                     "alpha": pa.array([10, 20], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_b = ArrowTableSource(
@@ -1431,7 +1431,7 @@ def test_intermediate_operators_produce_different_stream_hash(self):
                     "beta": pa.array([100, 200], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
         src_c = ArrowTableSource(
@@ -1441,7 +1441,7 @@ def test_intermediate_operators_produce_different_stream_hash(self):
                     "gamma": pa.array([1000, 2000], type=pa.int64()),
                 }
             ),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -1462,14 +1462,14 @@ def test_intermediate_operators_produce_different_stream_hash(self):
         # Join the intermediate streams
         op = Join()
         result = op.static_process(stream_a, stream_b, stream_c)
-        result_table = result.as_table(columns={"system_tags": True})
-        sys_cols = self._get_system_tag_columns(result_table, constants)
+        result_table = result.as_table(columns={"system_keys": True})
+        sys_cols = self._get_system_key_columns(result_table, constants)
 
         assert len(sys_cols) == 6  # 3 sources × 2 fields
 
         # Filter to source_id columns for position checking
         sid_cols = [
-            c for c in sys_cols if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c for c in sys_cols if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         ]
         assert len(sid_cols) == 3
 
@@ -1484,7 +1484,7 @@ def test_intermediate_operators_produce_different_stream_hash(self):
         for expected_idx, expected_stream in enumerate(sorted_streams):
             expected_source = stream_to_source[expected_stream]
             field_type, schema_hash, stream_hash, index_str = (
-                self._parse_system_tag_column(sid_cols[expected_idx], constants)
+                self._parse_system_key_column(sid_cols[expected_idx], constants)
             )
 
             # schema_hash should match the original source's schema_hash
@@ -1512,22 +1512,22 @@ def test_intermediate_operators_produce_different_stream_hash(self):
             assert index_str == str(expected_idx)
 
 
-class TestSortSystemTagValues:
-    """Tests for the sort_system_tag_values utility that ensures commutativity
-    by sorting paired (source_id, record_id) system tag values per row."""
+class TestSortSystemKeyValues:
+    """Tests for the sort_system_key_values utility that ensures commutativity
+    by sorting paired (source_id, record_id) system key values per row."""
 
     @staticmethod
     def _make_paired_cols(constants, provenance_path, position):
         """Build paired source_id/record_id column names for a given provenance path and position."""
-        sid = f"{constants.SYSTEM_TAG_SOURCE_ID_PREFIX}{constants.BLOCK_SEPARATOR}{provenance_path}{constants.FIELD_SEPARATOR}{position}"
-        rid = f"{constants.SYSTEM_TAG_RECORD_ID_PREFIX}{constants.BLOCK_SEPARATOR}{provenance_path}{constants.FIELD_SEPARATOR}{position}"
+        sid = f"{constants.SYSTEM_KEY_SOURCE_ID_PREFIX}{constants.BLOCK_SEPARATOR}{provenance_path}{constants.FIELD_SEPARATOR}{position}"
+        rid = f"{constants.SYSTEM_KEY_RECORD_ID_PREFIX}{constants.BLOCK_SEPARATOR}{provenance_path}{constants.FIELD_SEPARATOR}{position}"
         return sid, rid
 
     def test_sorts_paired_values_across_same_provenance_path(self):
         """Paired (source_id, record_id) columns sharing a provenance path
         should have their values sorted per row by (source_id, record_id) tuples."""
         from orcapod.system_constants import constants
-        from orcapod.utils.arrow_utils import sort_system_tag_values
+        from orcapod.utils.arrow_utils import sort_system_key_values
 
         sid_0, rid_0 = self._make_paired_cols(constants, "abc::ph123", "0")
         sid_1, rid_1 = self._make_paired_cols(constants, "abc::ph123", "1")
@@ -1542,7 +1542,7 @@ def test_sorts_paired_values_across_same_provenance_path(self):
             }
         )
 
-        result = sort_system_tag_values(table)
+        result = sort_system_key_values(table)
 
         # After sorting by (source_id, record_id), position :0 should have the smaller tuple
         # Row 0: ("zzz_source", "row_0") vs ("aaa_source", "row_1") → sorted: aaa first
@@ -1560,7 +1560,7 @@ def test_sorts_paired_values_across_same_provenance_path(self):
     def test_does_not_sort_different_provenance_paths(self):
         """Columns with different provenance paths should NOT have their values sorted."""
         from orcapod.system_constants import constants
-        from orcapod.utils.arrow_utils import sort_system_tag_values
+        from orcapod.utils.arrow_utils import sort_system_key_values
 
         # Two different provenance paths (different pipeline hashes)
         sid_a, rid_a = self._make_paired_cols(constants, "abc::ph_AAA", "0")
@@ -1576,7 +1576,7 @@ def test_does_not_sort_different_provenance_paths(self):
             }
         )
 
-        result = sort_system_tag_values(table)
+        result = sort_system_key_values(table)
 
         # Values should be untouched since provenance paths differ
         assert result.column(sid_a).to_pylist() == ["zzz"]
@@ -1585,7 +1585,7 @@ def test_does_not_sort_different_provenance_paths(self):
     def test_no_op_for_single_position_groups(self):
         """Groups with only one position should be left untouched."""
         from orcapod.system_constants import constants
-        from orcapod.utils.arrow_utils import sort_system_tag_values
+        from orcapod.utils.arrow_utils import sort_system_key_values
 
         sid, rid = self._make_paired_cols(constants, "abc::ph123", "0")
 
@@ -1597,14 +1597,14 @@ def test_no_op_for_single_position_groups(self):
             }
         )
 
-        result = sort_system_tag_values(table)
+        result = sort_system_key_values(table)
         assert result.column(sid).to_pylist() == ["hello", "world"]
         assert result.column(rid).to_pylist() == ["row_0", "row_1"]
 
-    def test_preserves_non_system_tag_columns(self):
-        """Non-system-tag columns should be completely unaffected."""
+    def test_preserves_non_system_key_columns(self):
+        """Non-system-key columns should be completely unaffected."""
         from orcapod.system_constants import constants
-        from orcapod.utils.arrow_utils import sort_system_tag_values
+        from orcapod.utils.arrow_utils import sort_system_key_values
 
         sid_0, rid_0 = self._make_paired_cols(constants, "abc::ph123", "0")
         sid_1, rid_1 = self._make_paired_cols(constants, "abc::ph123", "1")
@@ -1620,14 +1620,14 @@ def test_preserves_non_system_tag_columns(self):
             }
         )
 
-        result = sort_system_tag_values(table)
+        result = sort_system_key_values(table)
         assert result.column("id").to_pylist() == [1, 2]
         assert result.column("data").to_pylist() == ["foo", "bar"]
 
     def test_three_way_group_sorts_correctly(self):
         """Three positions sharing the same provenance path should all be sorted together."""
         from orcapod.system_constants import constants
-        from orcapod.utils.arrow_utils import sort_system_tag_values
+        from orcapod.utils.arrow_utils import sort_system_key_values
 
         sid_0, rid_0 = self._make_paired_cols(constants, "abc::ph123", "0")
         sid_1, rid_1 = self._make_paired_cols(constants, "abc::ph123", "1")
@@ -1644,7 +1644,7 @@ def test_three_way_group_sorts_correctly(self):
             }
         )
 
-        result = sort_system_tag_values(table)
+        result = sort_system_key_values(table)
 
         # Row 0: tuples are (cherry,r0), (apple,r1), (banana,r2) → sorted: (apple,r1), (banana,r2), (cherry,r0)
         assert result.column(sid_0).to_pylist()[0] == "apple"
diff --git a/tests/test_core/sources/test_cached_source.py b/tests/test_core/sources/test_cached_source.py
index 06bfd554..cbbbe409 100644
--- a/tests/test_core/sources/test_cached_source.py
+++ b/tests/test_core/sources/test_cached_source.py
@@ -6,7 +6,7 @@
 - Dedup by per-row content hash
 - Transparent streaming: downstream consumers see same schema as live source
 - iter_data and as_table produce consistent results
-- System tags are preserved through caching
+- System keys are preserved through caching
 - Source info columns are preserved through caching
 - clear_cache forces rebuild on next access
 - Identity delegation to wrapped source
@@ -41,7 +41,7 @@ def simple_table():
 
 @pytest.fixture
 def simple_source(simple_table):
-    return ArrowTableSource(simple_table, tag_columns=["name"], source_id="src_1", infer_nullable=True)
+    return ArrowTableSource(simple_table, key_columns=["name"], source_id="src_1", infer_nullable=True)
 
 
 @pytest.fixture
@@ -99,8 +99,8 @@ def test_cache_path_prefix(self, simple_source, db):
 
     def test_same_source_same_cache_path(self, simple_table, db):
         """Identical sources produce the same cache path."""
-        s1 = ArrowTableSource(simple_table, tag_columns=["name"], source_id="src", infer_nullable=True)
-        s2 = ArrowTableSource(simple_table, tag_columns=["name"], source_id="src", infer_nullable=True)
+        s1 = ArrowTableSource(simple_table, key_columns=["name"], source_id="src", infer_nullable=True)
+        s2 = ArrowTableSource(simple_table, key_columns=["name"], source_id="src", infer_nullable=True)
         ps1 = CachedSource(s1, cache_database=db)
         ps2 = CachedSource(s2, cache_database=db)
         assert ps1.cache_path == ps2.cache_path
@@ -109,8 +109,8 @@ def test_same_name_same_schema_same_cache_path(self, db):
         """Same source_id + same schema = same identity (regardless of data)."""
         t1 = pa.table({"k": ["a"], "v": [1]})
         t2 = pa.table({"k": ["b"], "v": [2]})
-        s1 = ArrowTableSource(t1, tag_columns=["k"], source_id="s", infer_nullable=True)
-        s2 = ArrowTableSource(t2, tag_columns=["k"], source_id="s", infer_nullable=True)
+        s1 = ArrowTableSource(t1, key_columns=["k"], source_id="s", infer_nullable=True)
+        s2 = ArrowTableSource(t2, key_columns=["k"], source_id="s", infer_nullable=True)
         ps1 = CachedSource(s1, cache_database=db)
         ps2 = CachedSource(s2, cache_database=db)
         assert ps1.cache_path == ps2.cache_path
@@ -118,8 +118,8 @@ def test_same_name_same_schema_same_cache_path(self, db):
     def test_different_name_different_cache_path(self, db):
         """Different source_id produces different cache paths."""
         t1 = pa.table({"k": ["a"], "v": [1]})
-        s1 = ArrowTableSource(t1, tag_columns=["k"], source_id="src_a", infer_nullable=True)
-        s2 = ArrowTableSource(t1, tag_columns=["k"], source_id="src_b", infer_nullable=True)
+        s1 = ArrowTableSource(t1, key_columns=["k"], source_id="src_a", infer_nullable=True)
+        s2 = ArrowTableSource(t1, key_columns=["k"], source_id="src_b", infer_nullable=True)
         ps1 = CachedSource(s1, cache_database=db)
         ps2 = CachedSource(s2, cache_database=db)
         assert ps1.cache_path != ps2.cache_path
@@ -128,8 +128,8 @@ def test_unnamed_different_data_different_cache_path(self, db):
         """Unnamed sources with different data get different cache paths."""
         t1 = pa.table({"k": ["a"], "v": [1]})
         t2 = pa.table({"k": ["b"], "v": [2]})
-        s1 = ArrowTableSource(t1, tag_columns=["k"], infer_nullable=True)
-        s2 = ArrowTableSource(t2, tag_columns=["k"], infer_nullable=True)
+        s1 = ArrowTableSource(t1, key_columns=["k"], infer_nullable=True)
+        s2 = ArrowTableSource(t2, key_columns=["k"], infer_nullable=True)
         ps1 = CachedSource(s1, cache_database=db)
         ps2 = CachedSource(s2, cache_database=db)
         assert ps1.cache_path != ps2.cache_path
@@ -145,11 +145,11 @@ def test_output_schema_matches_source(self, simple_source, db):
         ps = CachedSource(simple_source, cache_database=db)
         assert ps.output_schema() == simple_source.output_schema()
 
-    def test_output_schema_with_system_tags(self, simple_source, db):
+    def test_output_schema_with_system_keys(self, simple_source, db):
         ps = CachedSource(simple_source, cache_database=db)
         assert ps.output_schema(
-            columns={"system_tags": True}
-        ) == simple_source.output_schema(columns={"system_tags": True})
+            columns={"system_keys": True}
+        ) == simple_source.output_schema(columns={"system_keys": True})
 
     def test_keys_match_source(self, simple_source, db):
         ps = CachedSource(simple_source, cache_database=db)
@@ -174,29 +174,29 @@ def test_iter_data_count(self, simple_source, db):
         data = list(ps.iter_data())
         assert len(data) == 3
 
-    def test_iter_data_tags_and_data(self, simple_source, db):
+    def test_iter_data_keys_and_data(self, simple_source, db):
         ps = CachedSource(simple_source, cache_database=db)
-        for tag, data in ps.iter_data():
-            assert "name" in tag.keys()
+        for key, data in ps.iter_data():
+            assert "name" in key.keys()
             assert "age" in data.keys()
 
-    def test_system_tags_preserved(self, simple_source, db):
-        """System tags flow through the cache correctly."""
+    def test_system_keys_preserved(self, simple_source, db):
+        """System keys flow through the cache correctly."""
         ps = CachedSource(simple_source, cache_database=db)
-        table = ps.as_table(columns={"system_tags": True})
-        sys_tag_cols = [
-            c for c in table.column_names if c.startswith(constants.SYSTEM_TAG_PREFIX)
+        table = ps.as_table(columns={"system_keys": True})
+        sys_key_cols = [
+            c for c in table.column_names if c.startswith(constants.SYSTEM_KEY_PREFIX)
         ]
         # Should have paired source_id and record_id columns
         source_id_cols = [
             c
-            for c in sys_tag_cols
-            if c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            for c in sys_key_cols
+            if c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
         ]
         record_id_cols = [
             c
-            for c in sys_tag_cols
-            if c.startswith(constants.SYSTEM_TAG_RECORD_ID_PREFIX)
+            for c in sys_key_cols
+            if c.startswith(constants.SYSTEM_KEY_RECORD_ID_PREFIX)
         ]
         assert len(source_id_cols) == 1
         assert len(record_id_cols) == 1
@@ -256,8 +256,8 @@ def test_cumulative_across_runs(self, db):
         # but with different data (different content_hash → different cache_path)
         t1 = pa.table({"k": ["a", "b"], "v": [1, 2]})
         t2 = pa.table({"k": ["a", "b", "c"], "v": [1, 2, 3]})
-        s1 = ArrowTableSource(t1, tag_columns=["k"], source_id="shared", infer_nullable=True)
-        s2 = ArrowTableSource(t2, tag_columns=["k"], source_id="shared", infer_nullable=True)
+        s1 = ArrowTableSource(t1, key_columns=["k"], source_id="shared", infer_nullable=True)
+        s2 = ArrowTableSource(t2, key_columns=["k"], source_id="shared", infer_nullable=True)
 
         # Different data → different content_hash → different cache_paths
         # So cumulative within the SAME cache_path requires same content_hash
@@ -266,7 +266,7 @@ def test_cumulative_across_runs(self, db):
         assert ps1.as_table().num_rows == 2
 
         # Same data source: should dedup
-        s1_again = ArrowTableSource(t1, tag_columns=["k"], source_id="shared", infer_nullable=True)
+        s1_again = ArrowTableSource(t1, key_columns=["k"], source_id="shared", infer_nullable=True)
         ps1_again = CachedSource(s1_again, cache_database=db)
         ps1_again.flow()
         assert ps1_again.as_table().num_rows == 2
@@ -297,7 +297,7 @@ def test_resolve_field_with_record_id_column_raises(self, db):
             }
         )
         source = ArrowTableSource(
-            table, tag_columns=["user_id"], record_id_column="user_id", source_id="test", infer_nullable=True
+            table, key_columns=["user_id"], record_id_column="user_id", source_id="test", infer_nullable=True
         )
         ps = CachedSource(source, cache_database=db)
         with pytest.raises(NotImplementedError):
@@ -316,8 +316,8 @@ def test_join_with_cached_source(self, db):
 
         t1 = pa.table({"id": [1, 2, 3], "val_a": [10, 20, 30]})
         t2 = pa.table({"id": [2, 3, 4], "val_b": [200, 300, 400]})
-        s1 = ArrowTableSource(t1, tag_columns=["id"], source_id="a", infer_nullable=True)
-        s2 = ArrowTableSource(t2, tag_columns=["id"], source_id="b", infer_nullable=True)
+        s1 = ArrowTableSource(t1, key_columns=["id"], source_id="a", infer_nullable=True)
+        s2 = ArrowTableSource(t2, key_columns=["id"], source_id="b", infer_nullable=True)
 
         ps1 = CachedSource(s1, cache_database=db)
         ps2 = CachedSource(s2, cache_database=db)
@@ -340,7 +340,7 @@ def double_age(age: int) -> int:
         pod = FunctionPod(data_function=pf)
 
         table = pa.table({"name": ["Alice", "Bob"], "age": [30, 25]})
-        source = ArrowTableSource(table, tag_columns=["name"], source_id="test", infer_nullable=True)
+        source = ArrowTableSource(table, key_columns=["name"], source_id="test", infer_nullable=True)
         ps = CachedSource(source, cache_database=db)
 
         result = pod(ps)
@@ -365,12 +365,12 @@ def _make_proxy(source):
         from orcapod.core.sources.source_proxy import SourceProxy
         from orcapod.types import Schema
 
-        tag_schema, data_schema = source.output_schema()
+        key_schema, data_schema = source.output_schema()
         return SourceProxy(
             source_id=source.source_id,
             content_hash_str=source.content_hash().to_string(),
             pipeline_hash_str=source.pipeline_hash().to_string(),
-            tag_schema=tag_schema,
+            key_schema=key_schema,
             data_schema=data_schema,
             expected_class_name=source.__class__.__name__,
         )
@@ -384,7 +384,7 @@ def test_serves_cached_data_when_inner_source_unrecoverable(self, db):
                 "age": pa.array([30, 25], type=pa.int64()),
             }
         )
-        source = ArrowTableSource(table, tag_columns=["name"], source_id="test_src", infer_nullable=True)
+        source = ArrowTableSource(table, key_columns=["name"], source_id="test_src", infer_nullable=True)
         cached = CachedSource(source, cache_database=db)
 
         # Force data into the cache
@@ -414,7 +414,7 @@ def test_inner_source_is_source_proxy(self, db):
                 "value": pa.array([10, 20], type=pa.int64()),
             }
         )
-        source = ArrowTableSource(table, tag_columns=["id"], source_id="proxy_test", infer_nullable=True)
+        source = ArrowTableSource(table, key_columns=["id"], source_id="proxy_test", infer_nullable=True)
         proxy = self._make_proxy(source)
         loaded = CachedSource(source=proxy, cache_database=db)
 
@@ -430,7 +430,7 @@ def test_identity_preserved_with_proxy(self, db):
                 "val": pa.array([1, 2], type=pa.int64()),
             }
         )
-        source = ArrowTableSource(table, tag_columns=["key"], source_id="id_test", infer_nullable=True)
+        source = ArrowTableSource(table, key_columns=["key"], source_id="id_test", infer_nullable=True)
         cached = CachedSource(source, cache_database=db)
 
         original_content = cached.content_hash()
@@ -455,7 +455,7 @@ def test_empty_cache_returns_empty_stream(self, db):
                 "y": pa.array([1], type=pa.int64()),
             }
         )
-        source = ArrowTableSource(table, tag_columns=["x"], source_id="empty_test", infer_nullable=True)
+        source = ArrowTableSource(table, key_columns=["x"], source_id="empty_test", infer_nullable=True)
         proxy = self._make_proxy(source)
 
         loaded = CachedSource(
diff --git a/tests/test_core/sources/test_db_table_source.py b/tests/test_core/sources/test_db_table_source.py
index 4cc85cec..6acafe1f 100644
--- a/tests/test_core/sources/test_db_table_source.py
+++ b/tests/test_core/sources/test_db_table_source.py
@@ -5,7 +5,7 @@
  1. Import / export sanity
  2. MockDBConnector satisfies DBConnectorProtocol
  3. DBTableSource protocol conformance (SourceProtocol, StreamProtocol, PipelineElementProtocol)
- 4. Construction — default tag columns (PK), explicit tag columns, source_id
+ 4. Construction — default key columns (PK), explicit key columns, source_id
  5. Construction error cases — missing table, no PK columns, empty table
  6. Stream behaviour — iter_data count, output_schema, as_table, producer/upstreams
  7. Deterministic hashing (pipeline_hash, content_hash)
@@ -202,18 +202,18 @@ def test_has_from_config(self, source):
 
 
 # ===========================================================================
-# 4. Construction — tag columns and source_id
+# 4. Construction — key columns and source_id
 # ===========================================================================
 
 
 class TestConstruction:
-    def test_pk_columns_used_as_default_tag_columns(self, source):
-        tag_schema, _ = source.output_schema()
-        assert "session_id" in tag_schema
+    def test_pk_columns_used_as_default_key_columns(self, source):
+        key_schema, _ = source.output_schema()
+        assert "session_id" in key_schema
 
-    def test_pk_tag_column_not_in_data_schema(self, source):
-        tag_schema, data_schema = source.output_schema()
-        assert "session_id" in tag_schema
+    def test_pk_key_column_not_in_data_schema(self, source):
+        key_schema, data_schema = source.output_schema()
+        assert "session_id" in key_schema
         assert "session_id" not in data_schema
 
     def test_non_pk_columns_in_data_schema(self, source):
@@ -221,19 +221,19 @@ def test_non_pk_columns_in_data_schema(self, source):
         assert "trial" in data_schema
         assert "response" in data_schema
 
-    def test_explicit_tag_columns_override_pk(self, connector):
-        src = DBTableSource(connector, "measurements", tag_columns=["trial"])
-        tag_schema, data_schema = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+    def test_explicit_key_columns_override_pk(self, connector):
+        src = DBTableSource(connector, "measurements", key_columns=["trial"])
+        key_schema, data_schema = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
-    def test_multiple_explicit_tag_columns(self, connector):
+    def test_multiple_explicit_key_columns(self, connector):
         src = DBTableSource(
-            connector, "measurements", tag_columns=["session_id", "trial"]
+            connector, "measurements", key_columns=["session_id", "trial"]
         )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "trial" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "trial" in key_schema
 
     def test_default_source_id_is_table_name(self, source):
         assert source.source_id == "measurements"
@@ -248,9 +248,9 @@ def test_table_with_multiple_pk_columns(self, measurements_table):
             pk_columns={"t": ["session_id", "trial"]},
         )
         src = DBTableSource(connector, "t")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "trial" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "trial" in key_schema
 
 
 # ===========================================================================
@@ -272,7 +272,7 @@ def test_missing_table_error_not_confused_with_no_pk(self, measurements_table):
         with pytest.raises(ValueError, match="not found in database"):
             DBTableSource(connector, "completely_missing")
 
-    def test_no_pk_and_no_explicit_tags_raises_value_error(self, measurements_table):
+    def test_no_pk_and_no_explicit_keys_raises_value_error(self, measurements_table):
         connector = MockDBConnector(
             tables={"t": measurements_table},
             pk_columns={},  # table exists but has no PK
@@ -326,20 +326,20 @@ def test_iter_data_yields_one_data_per_row(self, source, measurements_table):
         data = list(source.iter_data())
         assert len(data) == measurements_table.num_rows
 
-    def test_iter_data_each_has_tag_and_data(self, source):
-        # Tag and Data are named types (not plain dict) but support
+    def test_iter_data_each_has_key_and_data(self, source):
+        # Key and Data are named types (not plain dict) but support
         # dict-like access and containment checks.
-        for tags, data in source.iter_data():
-            assert "session_id" in tags
+        for keys, data in source.iter_data():
+            assert "session_id" in keys
             assert "trial" in data or "response" in data
 
     def test_output_schema_returns_two_schemas(self, source):
         result = source.output_schema()
         assert len(result) == 2
 
-    def test_output_schema_tag_schema_is_dict_like(self, source):
-        tag_schema, _ = source.output_schema()
-        assert "session_id" in tag_schema
+    def test_output_schema_key_schema_is_dict_like(self, source):
+        key_schema, _ = source.output_schema()
+        assert "session_id" in key_schema
 
     def test_output_schema_data_schema_has_payload_columns(self, source):
         _, data_schema = source.output_schema()
@@ -354,10 +354,10 @@ def test_as_table_row_count_matches_source_data(self, source, measurements_table
         t = source.as_table()
         assert t.num_rows == measurements_table.num_rows
 
-    def test_source_with_explicit_tags_yields_correct_keys(self, connector):
-        src = DBTableSource(connector, "measurements", tag_columns=["session_id"])
-        for tags, _ in src.iter_data():
-            assert "session_id" in tags
+    def test_source_with_explicit_keys_yields_correct_keys(self, connector):
+        src = DBTableSource(connector, "measurements", key_columns=["session_id"])
+        for keys, _ in src.iter_data():
+            assert "session_id" in keys
 
 
 # ===========================================================================
@@ -377,16 +377,16 @@ def test_content_hash_is_deterministic(self, connector):
         assert src1.content_hash() == src2.content_hash()
 
     def test_pipeline_hash_is_schema_only_not_source_id(self, connector):
-        # pipeline_identity_structure() is (tag_schema, data_schema) by design —
+        # pipeline_identity_structure() is (key_schema, data_schema) by design —
         # source_id is intentionally excluded so sources with identical schemas
         # share the same pipeline hash and therefore the same pipeline DB table.
         src1 = DBTableSource(connector, "measurements", source_id="a")
         src2 = DBTableSource(connector, "measurements", source_id="b")
         assert src1.pipeline_hash() == src2.pipeline_hash()
 
-    def test_different_tag_columns_yields_different_pipeline_hash(self, connector):
-        src1 = DBTableSource(connector, "measurements", tag_columns=["session_id"])
-        src2 = DBTableSource(connector, "measurements", tag_columns=["trial"])
+    def test_different_key_columns_yields_different_pipeline_hash(self, connector):
+        src1 = DBTableSource(connector, "measurements", key_columns=["session_id"])
+        src2 = DBTableSource(connector, "measurements", key_columns=["trial"])
         assert src1.pipeline_hash() != src2.pipeline_hash()
 
 
@@ -404,10 +404,10 @@ def test_to_config_has_table_name(self, source):
         config = source.to_config()
         assert config["table_name"] == "measurements"
 
-    def test_to_config_has_tag_columns(self, source):
+    def test_to_config_has_key_columns(self, source):
         config = source.to_config()
-        assert "tag_columns" in config
-        assert "session_id" in config["tag_columns"]
+        assert "key_columns" in config
+        assert "session_id" in config["key_columns"]
 
     def test_to_config_has_connector(self, source):
         config = source.to_config()
@@ -420,7 +420,7 @@ def test_to_config_has_source_id(self, source):
 
     def test_to_config_has_identity_fields(self, source):
         config = source.to_config()
-        # identity_config() adds content_hash, pipeline_hash, tag_schema, data_schema
+        # identity_config() adds content_hash, pipeline_hash, key_schema, data_schema
         assert "content_hash" in config
         assert "pipeline_hash" in config
 
@@ -434,12 +434,12 @@ def test_to_config_explicit_source_id_preserved(self, connector):
         config = src.to_config()
         assert config["source_id"] == "custom_id"
 
-    def test_to_config_system_tag_columns_preserved(self, connector):
+    def test_to_config_system_key_columns_preserved(self, connector):
         src = DBTableSource(
-            connector, "measurements", system_tag_columns=["session_id"]
+            connector, "measurements", system_key_columns=["session_id"]
         )
         config = src.to_config()
-        assert "system_tag_columns" in config
+        assert "system_key_columns" in config
 
 
 class TestQueryOverride:
diff --git a/tests/test_core/sources/test_derived_source.py b/tests/test_core/sources/test_derived_source.py
index 8865c6f4..63a6bcc4 100644
--- a/tests/test_core/sources/test_derived_source.py
+++ b/tests/test_core/sources/test_derived_source.py
@@ -112,9 +112,9 @@ def test_empty_table_has_correct_columns_before_run(self):
     def test_empty_table_schema_matches_origin(self):
         node = _make_node(n=3)
         src = node.as_source()
-        tag_schema, data_schema = src.output_schema()
+        key_schema, data_schema = src.output_schema()
         _ = src.as_table()
-        assert "id" in tag_schema
+        assert "id" in key_schema
         assert "result" in data_schema
 
 
@@ -137,7 +137,7 @@ def test_iter_data_yields_correct_values(self, src):
         results = sorted(p["result"] for _, p in src.iter_data())
         assert results == [0, 2, 4, 6]
 
-    def test_iter_data_yields_correct_tags(self, src):
+    def test_iter_data_yields_correct_keys(self, src):
         ids = sorted(t["id"] for t, _ in src.iter_data())
         assert ids == [0, 1, 2, 3]
 
@@ -147,7 +147,7 @@ def test_as_table_returns_pyarrow_table(self, src):
     def test_as_table_correct_row_count(self, src):
         assert src.as_table().num_rows == 4
 
-    def test_as_table_has_tag_column(self, src):
+    def test_as_table_has_key_column(self, src):
         assert "id" in src.as_table().column_names
 
     def test_as_table_has_data_column(self, src):
@@ -183,13 +183,13 @@ def test_derived_source_matches_node_output(self):
 
         assert node_results == src_results
 
-    def test_derived_source_tag_schema_matches_node(self):
+    def test_derived_source_key_schema_matches_node(self):
         node = _make_node(n=3)
         node.run()
         src = node.as_source()
-        node_tag_schema, _ = node.output_schema()
-        src_tag_schema, _ = src.output_schema()
-        assert node_tag_schema == src_tag_schema
+        node_key_schema, _ = node.output_schema()
+        src_key_schema, _ = src.output_schema()
+        assert node_key_schema == src_key_schema
 
     def test_derived_source_data_schema_matches_node(self):
         node = _make_node(n=3)
@@ -223,7 +223,7 @@ def test_derived_source_can_feed_downstream_node(self):
             ),
         )
 
-        result_stream = ArrowTableStream(result_table, tag_columns=["id"])
+        result_stream = ArrowTableStream(result_table, key_columns=["id"])
 
         double_result = PythonDataFunction(double, output_keys="result")
         node2 = FunctionNode(
@@ -248,16 +248,16 @@ def test_output_schema_returns_two_mappings(self):
         node = _make_node(n=3)
         node.run()
         src = node.as_source()
-        tag_schema, data_schema = src.output_schema()
-        assert isinstance(tag_schema, Mapping)
+        key_schema, data_schema = src.output_schema()
+        assert isinstance(key_schema, Mapping)
         assert isinstance(data_schema, Mapping)
 
-    def test_output_schema_tag_has_id(self):
+    def test_output_schema_key_has_id(self):
         node = _make_node(n=3)
         node.run()
         src = node.as_source()
-        tag_schema, _ = src.output_schema()
-        assert "id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "id" in key_schema
 
     def test_output_schema_data_has_result(self):
         node = _make_node(n=3)
@@ -266,12 +266,12 @@ def test_output_schema_data_has_result(self):
         _, data_schema = src.output_schema()
         assert "result" in data_schema
 
-    def test_keys_tag_has_id(self):
+    def test_keys_key_has_id(self):
         node = _make_node(n=3)
         node.run()
         src = node.as_source()
-        tag_keys, _ = src.keys()
-        assert "id" in tag_keys
+        key_keys, _ = src.keys()
+        assert "id" in key_keys
 
     def test_keys_data_has_result(self):
         node = _make_node(n=3)
@@ -284,9 +284,9 @@ def test_keys_consistent_with_output_schema(self):
         node = _make_node(n=3)
         node.run()
         src = node.as_source()
-        tag_keys, data_keys = src.keys()
-        tag_schema, data_schema = src.output_schema()
-        assert set(tag_keys) == set(tag_schema.keys())
+        key_keys, data_keys = src.keys()
+        key_schema, data_schema = src.output_schema()
+        assert set(key_keys) == set(key_schema.keys())
         assert set(data_keys) == set(data_schema.keys())
 
 
@@ -332,7 +332,7 @@ def test_pipeline_hash_is_stable(self):
 
     def test_pipeline_hash_is_schema_only(self):
         """
-        DerivedSource inherits RootSource.pipeline_identity_structure() = (tag_schema, data_schema).
+        DerivedSource inherits RootSource.pipeline_identity_structure() = (key_schema, data_schema).
         Two DerivedSources with identical schemas share the same pipeline_hash even if
         the underlying FunctionNode processed different data.
         """
diff --git a/tests/test_core/sources/test_postgresql_table_source.py b/tests/test_core/sources/test_postgresql_table_source.py
index 64412064..37800f6e 100644
--- a/tests/test_core/sources/test_postgresql_table_source.py
+++ b/tests/test_core/sources/test_postgresql_table_source.py
@@ -109,19 +109,19 @@ def test_is_pipeline_element_protocol(self):
 
 
 # ===========================================================================
-# 3. PK as default tag columns
+# 3. PK as default key columns
 # ===========================================================================
 
 
-class TestPKAsDefaultTags:
-    def test_single_pk_is_tag_column(self):
+class TestPKAsDefaultKeys:
+    def test_single_pk_is_key_column(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src = PostgreSQLTableSource(DSN, "measurements")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_pk_not_in_data_schema(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -142,7 +142,7 @@ def test_non_pk_columns_in_data_schema(self):
         assert "trial" in data_schema
         assert "response" in data_schema
 
-    def test_composite_pk_all_columns_are_tags(self):
+    def test_composite_pk_all_columns_are_keys(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         schema = pa.schema([
@@ -165,9 +165,9 @@ def test_composite_pk_all_columns_are_tags(self):
                 batches=[batch],
             )
             src = PostgreSQLTableSource(DSN, "events")
-        tag_schema, _ = src.output_schema()
-        assert "user_id" in tag_schema
-        assert "event_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "user_id" in key_schema
+        assert "event_id" in key_schema
 
     def test_default_source_id_is_table_name(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -187,32 +187,32 @@ def test_explicit_source_id_overrides_default(self):
 
 
 # ===========================================================================
-# 4. Explicit tag_columns override
+# 4. Explicit key_columns override
 # ===========================================================================
 
 
-class TestExplicitTagOverride:
-    def test_explicit_tag_columns_override_pk(self):
+class TestExplicitKeyOverride:
+    def test_explicit_key_columns_override_pk(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
-            src = PostgreSQLTableSource(DSN, "measurements", tag_columns=["trial"])
-        tag_schema, _ = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+            src = PostgreSQLTableSource(DSN, "measurements", key_columns=["trial"])
+        key_schema, _ = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
-    def test_multiple_explicit_tag_columns(self):
+    def test_multiple_explicit_key_columns(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src = PostgreSQLTableSource(
-                DSN, "measurements", tag_columns=["session_id", "trial"]
+                DSN, "measurements", key_columns=["session_id", "trial"]
             )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "trial" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "trial" in key_schema
 
 
 # ===========================================================================
@@ -221,7 +221,7 @@ def test_multiple_explicit_tag_columns(self):
 
 
 class TestNoPKError:
-    def test_no_pk_and_no_tag_columns_raises(self):
+    def test_no_pk_and_no_key_columns_raises(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
@@ -283,14 +283,14 @@ def test_iter_data_yields_one_per_row(self):
             src = PostgreSQLTableSource(DSN, "measurements")
         assert len(list(src.iter_data())) == 3
 
-    def test_iter_data_tags_contain_pk(self):
+    def test_iter_data_keys_contain_pk(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src = PostgreSQLTableSource(DSN, "measurements")
-        for tags, _ in src.iter_data():
-            assert "session_id" in tags
+        for keys, _ in src.iter_data():
+            assert "session_id" in keys
 
     def test_output_schema_returns_two_schemas(self):
         from orcapod.core.sources import PostgreSQLTableSource
@@ -386,7 +386,7 @@ def test_content_hash_is_deterministic(self):
             src2 = PostgreSQLTableSource(DSN, "measurements")
         assert src1.content_hash() == src2.content_hash()
 
-    def test_different_tag_columns_yields_different_pipeline_hash(self):
+    def test_different_key_columns_yields_different_pipeline_hash(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
@@ -394,7 +394,7 @@ def test_different_tag_columns_yields_different_pipeline_hash(self):
             src1 = PostgreSQLTableSource(DSN, "measurements")
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
-            src2 = PostgreSQLTableSource(DSN, "measurements", tag_columns=["trial"])
+            src2 = PostgreSQLTableSource(DSN, "measurements", key_columns=["trial"])
         assert src1.pipeline_hash() != src2.pipeline_hash()
 
 
@@ -420,8 +420,8 @@ def test_has_dsn(self):
     def test_has_table_name(self):
         assert self._make_src().to_config()["table_name"] == "measurements"
 
-    def test_has_tag_columns(self):
-        assert "session_id" in self._make_src().to_config()["tag_columns"]
+    def test_has_key_columns(self):
+        assert "session_id" in self._make_src().to_config()["key_columns"]
 
     def test_has_source_id(self):
         assert self._make_src().to_config()["source_id"] == "measurements"
@@ -474,18 +474,18 @@ def test_from_config_hashes_match(self):
         assert src2.content_hash() == src.content_hash()
         assert src2.pipeline_hash() == src.pipeline_hash()
 
-    def test_from_config_with_explicit_tag_columns(self):
+    def test_from_config_with_explicit_key_columns(self):
         from orcapod.core.sources import PostgreSQLTableSource
 
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
-            src = PostgreSQLTableSource(DSN, "measurements", tag_columns=["trial"])
+            src = PostgreSQLTableSource(DSN, "measurements", key_columns=["trial"])
         config = src.to_config()
         with patch(_PATCH) as mock_cls:
             mock_cls.return_value = _make_mock_connector()
             src2 = PostgreSQLTableSource.from_config(config)
-        tag_schema, _ = src2.output_schema()
-        assert "trial" in tag_schema
+        key_schema, _ = src2.output_schema()
+        assert "trial" in key_schema
 
     def test_from_config_missing_dsn_raises(self):
         from orcapod.core.sources import PostgreSQLTableSource
diff --git a/tests/test_core/sources/test_postgresql_table_source_integration.py b/tests/test_core/sources/test_postgresql_table_source_integration.py
index f6d674b3..fdd408d0 100644
--- a/tests/test_core/sources/test_postgresql_table_source_integration.py
+++ b/tests/test_core/sources/test_postgresql_table_source_integration.py
@@ -66,7 +66,7 @@ def schema_dsn(pg_schema: str) -> str:
 class TestSinglePKTable:
     """Source backed by a table with a single-column PK."""
 
-    def test_pk_column_is_tag(self, schema_dsn: str) -> None:
+    def test_pk_column_is_key(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -82,8 +82,8 @@ def test_pk_column_is_tag(self, schema_dsn: str) -> None:
             conn.commit()
 
         src = PostgreSQLTableSource(schema_dsn, "measurements")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_non_pk_columns_in_data_schema(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
@@ -123,7 +123,7 @@ def test_iter_data_count_matches_rows(self, schema_dsn: str) -> None:
         src = PostgreSQLTableSource(schema_dsn, "measurements")
         assert len(list(src.iter_data())) == 3
 
-    def test_tag_values_are_correct(self, schema_dsn: str) -> None:
+    def test_key_values_are_correct(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -139,15 +139,15 @@ def test_tag_values_are_correct(self, schema_dsn: str) -> None:
             conn.commit()
 
         src = PostgreSQLTableSource(schema_dsn, "measurements")
-        tag_values = sorted([tags["session_id"] for tags, _ in src.iter_data()])
-        assert tag_values == ["s1", "s2", "s3"]
+        key_values = sorted([keys["session_id"] for keys, _ in src.iter_data()])
+        assert key_values == ["s1", "s2", "s3"]
 
 
 @pytest.mark.postgres
 class TestCompositePKTable:
     """Source backed by a table with a composite PK."""
 
-    def test_both_pk_columns_are_tags(self, schema_dsn: str) -> None:
+    def test_both_pk_columns_are_keys(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -164,16 +164,16 @@ def test_both_pk_columns_are_tags(self, schema_dsn: str) -> None:
             conn.commit()
 
         src = PostgreSQLTableSource(schema_dsn, "events")
-        tag_schema, _ = src.output_schema()
-        assert "user_id" in tag_schema
-        assert "event_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "user_id" in key_schema
+        assert "event_id" in key_schema
 
 
 @pytest.mark.postgres
-class TestExplicitTagOverride:
-    """tag_columns override overrides the PK."""
+class TestExplicitKeyOverride:
+    """key_columns override overrides the PK."""
 
-    def test_explicit_tag_columns_override_pk(self, schema_dsn: str) -> None:
+    def test_explicit_key_columns_override_pk(self, schema_dsn: str) -> None:
         from orcapod.core.sources import PostgreSQLTableSource
 
         with psycopg.connect(schema_dsn) as conn:
@@ -189,11 +189,11 @@ def test_explicit_tag_columns_override_pk(self, schema_dsn: str) -> None:
             conn.commit()
 
         src = PostgreSQLTableSource(
-            schema_dsn, "measurements", tag_columns=["trial"]
+            schema_dsn, "measurements", key_columns=["trial"]
         )
-        tag_schema, _ = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
 
 @pytest.mark.postgres
@@ -246,5 +246,5 @@ def double_response(trial: int, response: float) -> float:
         doubled_values = sorted([pkt.as_dict()["doubled"] for _, pkt in fn_outputs[0]])
         assert doubled_values == pytest.approx([0.2, 0.4, 0.6])
 
-        tag_values = sorted([tags["session_id"] for tags, _ in fn_outputs[0]])
-        assert tag_values == ["s1", "s2", "s3"]
+        key_values = sorted([keys["session_id"] for keys, _ in fn_outputs[0]])
+        assert key_values == ["s1", "s2", "s3"]
diff --git a/tests/test_core/sources/test_source_builder_integration.py b/tests/test_core/sources/test_source_builder_integration.py
index 5f885cc3..2bdf48c5 100644
--- a/tests/test_core/sources/test_source_builder_integration.py
+++ b/tests/test_core/sources/test_source_builder_integration.py
@@ -12,57 +12,57 @@
 
 class TestDictSourceBuilder:
     def test_no_arrow_source_attr(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
         assert not hasattr(src, "_arrow_source")
 
     def test_has_stream_attr(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
         assert hasattr(src, "_stream")
 
     def test_iter_data(self):
         src = DictSource(
             data=[{"id": 1, "x": 10}, {"id": 2, "x": 20}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         assert len(list(src.iter_data())) == 2
 
     def test_output_schema(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
-        tag_schema, data_schema = src.output_schema()
-        assert "id" in tag_schema
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
+        key_schema, data_schema = src.output_schema()
+        assert "id" in key_schema
         assert "x" in data_schema
 
     def test_to_config(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
         config = src.to_config()
         assert config["source_type"] == "dict"
-        assert config["tag_columns"] == ["id"]
+        assert config["key_columns"] == ["id"]
 
     def test_identity_uses_class_name(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
         identity = src.identity_structure()
         assert identity[0] == "DictSource"
 
     def test_source_id_defaults(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
         assert src.source_id is not None
 
 
 class TestDataFrameSourceBuilder:
     def test_no_arrow_source_attr(self):
-        src = DataFrameSource(data={"id": [1, 2], "x": [10, 20]}, tag_columns=["id"])
+        src = DataFrameSource(data={"id": [1, 2], "x": [10, 20]}, key_columns=["id"])
         assert not hasattr(src, "_arrow_source")
 
     def test_has_stream_attr(self):
-        src = DataFrameSource(data={"id": [1, 2], "x": [10, 20]}, tag_columns=["id"])
+        src = DataFrameSource(data={"id": [1, 2], "x": [10, 20]}, key_columns=["id"])
         assert hasattr(src, "_stream")
 
     def test_iter_data(self):
-        src = DataFrameSource(data={"id": [1, 2], "x": [10, 20]}, tag_columns=["id"])
+        src = DataFrameSource(data={"id": [1, 2], "x": [10, 20]}, key_columns=["id"])
         assert len(list(src.iter_data())) == 2
 
     def test_identity_uses_class_name(self):
-        src = DataFrameSource(data={"id": [1], "x": [10]}, tag_columns=["id"])
+        src = DataFrameSource(data={"id": [1], "x": [10]}, key_columns=["id"])
         identity = src.identity_structure()
         assert identity[0] == "DataFrameSource"
 
@@ -75,26 +75,26 @@ class TestCSVSourceBuilder:
     def test_no_arrow_source_attr(self, tmp_path):
         csv_file = tmp_path / "test.csv"
         csv_file.write_text("id,x\n1,10\n2,20\n")
-        src = CSVSource(file_path=str(csv_file), tag_columns=["id"])
+        src = CSVSource(file_path=str(csv_file), key_columns=["id"])
         assert not hasattr(src, "_arrow_source")
 
     def test_has_stream_attr(self, tmp_path):
         csv_file = tmp_path / "test.csv"
         csv_file.write_text("id,x\n1,10\n2,20\n")
-        src = CSVSource(file_path=str(csv_file), tag_columns=["id"])
+        src = CSVSource(file_path=str(csv_file), key_columns=["id"])
         assert hasattr(src, "_stream")
 
     def test_iter_data(self, tmp_path):
         csv_file = tmp_path / "test.csv"
         csv_file.write_text("id,x\n1,10\n2,20\n")
-        src = CSVSource(file_path=str(csv_file), tag_columns=["id"])
+        src = CSVSource(file_path=str(csv_file), key_columns=["id"])
         assert len(list(src.iter_data())) == 2
 
     def test_round_trip_config(self, tmp_path):
         csv_file = tmp_path / "test.csv"
         csv_file.write_text("id,x\n1,10\n2,20\n")
         src = CSVSource(
-            file_path=str(csv_file), tag_columns=["id"], record_id_column="id"
+            file_path=str(csv_file), key_columns=["id"], record_id_column="id"
         )
         config = src.to_config()
         assert config["source_type"] == "csv"
@@ -105,7 +105,7 @@ def test_round_trip_config(self, tmp_path):
     def test_identity_uses_class_name(self, tmp_path):
         csv_file = tmp_path / "test.csv"
         csv_file.write_text("id,x\n1,10\n")
-        src = CSVSource(file_path=str(csv_file), tag_columns=["id"])
+        src = CSVSource(file_path=str(csv_file), key_columns=["id"])
         assert src.identity_structure()[0] == "CSVSource"
 
 
@@ -120,22 +120,22 @@ def delta_path(self, tmp_path):
         return path
 
     def test_no_arrow_source_attr(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
         assert not hasattr(src, "_arrow_source")
 
     def test_has_stream_attr(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
         assert hasattr(src, "_stream")
 
     def test_round_trip_config(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
         config = src.to_config()
         assert config["source_type"] == "delta_table"
         src2 = DeltaTableSource.from_config(config)
         assert src2.source_id == src.source_id
 
     def test_identity_uses_class_name(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
         assert src.identity_structure()[0] == "DeltaTableSource"
 
 
@@ -161,14 +161,14 @@ def test_custom_identity_structure(self):
         assert identity[0] == "ListSource"
         assert identity[1] == "val"
         assert identity[2] == (1, 2, 3)
-        assert len(identity) == 4  # includes tag_function_hash
+        assert len(identity) == 4  # includes key_function_hash
 
-    def test_with_tag_function(self):
+    def test_with_key_function(self):
         src = ListSource(
             name="val",
             data=[10, 20],
-            tag_function=lambda e, i: {"idx": i, "label": f"item_{i}"},
-            expected_tag_keys=["idx", "label"],
+            key_function=lambda e, i: {"idx": i, "label": f"item_{i}"},
+            expected_key_keys=["idx", "label"],
         )
         results = list(src.iter_data())
         assert len(results) == 2
@@ -180,20 +180,20 @@ def test_source_id_defaults(self):
 
 class TestSourceLabelDefaults:
     def test_dict_source_label_defaults_to_class_name(self):
-        src = DictSource(data=[{"id": 1, "x": 10}], tag_columns=["id"])
+        src = DictSource(data=[{"id": 1, "x": 10}], key_columns=["id"])
         assert src.label == "DictSource"
 
     def test_dict_source_explicit_label_preserved(self):
         src = DictSource(
             data=[{"id": 1, "x": 10}],
-            tag_columns=["id"],
+            key_columns=["id"],
             label="my_source",
         )
         assert src.label == "my_source"
 
     def test_arrow_table_source_label_defaults_to_class_name(self):
         table = pa.table({"id": pa.array([1, 2]), "x": pa.array([10, 20])})
-        src = ArrowTableSource(table=table, tag_columns=["id"], infer_nullable=True)
+        src = ArrowTableSource(table=table, key_columns=["id"], infer_nullable=True)
         assert src.label == "ArrowTableSource"
 
     def test_list_source_label_defaults_to_class_name(self):
@@ -203,5 +203,5 @@ def test_list_source_label_defaults_to_class_name(self):
     def test_csv_source_label_defaults_to_class_name(self, tmp_path):
         csv_file = tmp_path / "test.csv"
         csv_file.write_text("id,x\n1,10\n")
-        src = CSVSource(file_path=str(csv_file), tag_columns=["id"])
+        src = CSVSource(file_path=str(csv_file), key_columns=["id"])
         assert src.label == "CSVSource"
diff --git a/tests/test_core/sources/test_source_config.py b/tests/test_core/sources/test_source_config.py
index 16fa8fe3..2f1724a2 100644
--- a/tests/test_core/sources/test_source_config.py
+++ b/tests/test_core/sources/test_source_config.py
@@ -15,13 +15,13 @@ def test_to_config(self, tmp_path):
         csv_file.write_text("a,b\n1,2\n3,4\n")
         source = CSVSource(
             file_path=str(csv_file),
-            tag_columns=["a"],
+            key_columns=["a"],
             source_id="test_csv",
         )
         config = source.to_config()
         assert config["source_type"] == "csv"
         assert config["file_path"] == str(csv_file)
-        assert config["tag_columns"] == ["a"]
+        assert config["key_columns"] == ["a"]
         assert config["source_id"] == "test_csv"
 
     def test_round_trip(self, tmp_path):
@@ -29,7 +29,7 @@ def test_round_trip(self, tmp_path):
         csv_file.write_text("a,b\n1,2\n3,4\n")
         source = CSVSource(
             file_path=str(csv_file),
-            tag_columns=["a"],
+            key_columns=["a"],
         )
         config = source.to_config()
         restored = CSVSource.from_config(config)
@@ -41,18 +41,18 @@ class TestDictSourceConfig:
     def test_to_config(self):
         source = DictSource(
             data=[{"a": 1, "b": 2}, {"a": 3, "b": 4}],
-            tag_columns=["a"],
+            key_columns=["a"],
             source_id="test_dict",
         )
         config = source.to_config()
         assert config["source_type"] == "dict"
-        assert config["tag_columns"] == ["a"]
+        assert config["key_columns"] == ["a"]
         assert config["source_id"] == "test_dict"
 
     def test_from_config_raises(self):
         config = {
             "source_type": "dict",
-            "tag_columns": ["a"],
+            "key_columns": ["a"],
             "source_id": "test_dict",
         }
         with pytest.raises(NotImplementedError):
@@ -61,7 +61,7 @@ def test_from_config_raises(self):
 
 class TestArrowTableSourceConfig:
     def test_from_config_raises(self):
-        config = {"source_type": "arrow_table", "tag_columns": ["a"]}
+        config = {"source_type": "arrow_table", "key_columns": ["a"]}
         with pytest.raises(NotImplementedError):
             ArrowTableSource.from_config(config)
 
@@ -83,7 +83,7 @@ def test_to_config(self, tmp_path):
         write_deltalake(delta_path, table)
         source = DeltaTableSource(
             delta_table_path=delta_path,
-            tag_columns=["a"],
+            key_columns=["a"],
             source_id="test_delta",
         )
         config = source.to_config()
@@ -99,7 +99,7 @@ def test_round_trip(self, tmp_path):
         write_deltalake(delta_path, table)
         source = DeltaTableSource(
             delta_table_path=delta_path,
-            tag_columns=["a"],
+            key_columns=["a"],
         )
         config = source.to_config()
         restored = DeltaTableSource.from_config(config)
@@ -112,7 +112,7 @@ def test_to_config(self):
         from orcapod.databases.in_memory_databases import InMemoryArrowDatabase
 
         inner = DictSource(
-            data=[{"a": 1, "b": 2}], tag_columns=["a"], source_id="inner"
+            data=[{"a": 1, "b": 2}], key_columns=["a"], source_id="inner"
         )
         cache_db = InMemoryArrowDatabase()
         source = CachedSource(source=inner, cache_database=cache_db)
diff --git a/tests/test_core/sources/test_source_protocol_conformance.py b/tests/test_core/sources/test_source_protocol_conformance.py
index 1d7b563d..1f3967b6 100644
--- a/tests/test_core/sources/test_source_protocol_conformance.py
+++ b/tests/test_core/sources/test_source_protocol_conformance.py
@@ -43,7 +43,7 @@ def arrow_src():
             "value": pa.array(["a", "b", "c"], type=pa.large_string()),
         }
     )
-    return ArrowTableSource(table=table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table=table, key_columns=["id"], infer_nullable=True)
 
 
 @pytest.fixture
@@ -56,7 +56,7 @@ def arrow_src_with_record_id():
     )
     return ArrowTableSource(
         table=table,
-        tag_columns=["id"],
+        key_columns=["id"],
         record_id_column="id",
         source_id="arrow_with_rid",
         infer_nullable=True,
@@ -71,7 +71,7 @@ def dict_src():
             {"id": 2, "value": "b"},
             {"id": 3, "value": "c"},
         ],
-        tag_columns=["id"],
+        key_columns=["id"],
     )
 
 
@@ -83,7 +83,7 @@ def list_src():
 @pytest.fixture
 def df_src():
     df = pl.DataFrame({"id": [1, 2, 3], "value": ["a", "b", "c"]})
-    return DataFrameSource(data=df, tag_columns="id")
+    return DataFrameSource(data=df, key_columns="id")
 
 
 ALL_SOURCE_FIXTURES = ["arrow_src", "dict_src", "list_src", "df_src"]
@@ -141,33 +141,33 @@ def test_returns_two_schemas(self, src_fixture, request):
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
     def test_schemas_are_schema_instances(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
-        tag_schema, data_schema = src.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = src.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
-    def test_arrow_src_tag_schema_has_id(self, arrow_src):
-        tag_schema, _ = arrow_src.output_schema()
-        assert "id" in tag_schema
+    def test_arrow_src_key_schema_has_id(self, arrow_src):
+        key_schema, _ = arrow_src.output_schema()
+        assert "id" in key_schema
 
     def test_arrow_src_data_schema_has_value(self, arrow_src):
         _, data_schema = arrow_src.output_schema()
         assert "value" in data_schema
 
-    def test_dict_src_tag_schema_has_id(self, dict_src):
-        tag_schema, _ = dict_src.output_schema()
-        assert "id" in tag_schema
+    def test_dict_src_key_schema_has_id(self, dict_src):
+        key_schema, _ = dict_src.output_schema()
+        assert "id" in key_schema
 
     def test_list_src_data_schema_has_item(self, list_src):
         _, data_schema = list_src.output_schema()
         assert "item" in data_schema
 
-    def test_list_src_tag_schema_has_element_index(self, list_src):
-        tag_schema, _ = list_src.output_schema()
-        assert "element_index" in tag_schema
+    def test_list_src_key_schema_has_element_index(self, list_src):
+        key_schema, _ = list_src.output_schema()
+        assert "element_index" in key_schema
 
-    def test_df_src_tag_schema_has_id(self, df_src):
-        tag_schema, _ = df_src.output_schema()
-        assert "id" in tag_schema
+    def test_df_src_key_schema_has_id(self, df_src):
+        key_schema, _ = df_src.output_schema()
+        assert "id" in key_schema
 
 
 # ---------------------------------------------------------------------------
@@ -192,29 +192,29 @@ class TestStreamKeys:
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
     def test_returns_two_tuples(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
-        tag_keys, data_keys = src.keys()
-        assert isinstance(tag_keys, tuple)
+        key_keys, data_keys = src.keys()
+        assert isinstance(key_keys, tuple)
         assert isinstance(data_keys, tuple)
 
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
-    def test_no_overlap_between_tag_and_data_keys(self, src_fixture, request):
+    def test_no_overlap_between_key_and_data_keys(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
-        tag_keys, data_keys = src.keys()
-        assert set(tag_keys).isdisjoint(set(data_keys))
+        key_keys, data_keys = src.keys()
+        assert set(key_keys).isdisjoint(set(data_keys))
 
     def test_arrow_src_keys(self, arrow_src):
-        tag_keys, data_keys = arrow_src.keys()
-        assert "id" in tag_keys
+        key_keys, data_keys = arrow_src.keys()
+        assert "id" in key_keys
         assert "value" in data_keys
 
     def test_list_src_keys(self, list_src):
-        tag_keys, data_keys = list_src.keys()
-        assert "element_index" in tag_keys
+        key_keys, data_keys = list_src.keys()
+        assert "element_index" in key_keys
         assert "item" in data_keys
 
     def test_dict_src_keys(self, dict_src):
-        tag_keys, data_keys = dict_src.keys()
-        assert "id" in tag_keys
+        key_keys, data_keys = dict_src.keys()
+        assert "id" in key_keys
         assert "value" in data_keys
 
 
@@ -224,27 +224,27 @@ class TestStreamOutputSchema:
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
     def test_returns_two_schemas(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
-        tag_schema, data_schema = src.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = src.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
     def test_consistent_with_keys(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
-        tag_keys, data_keys = src.keys()
-        tag_schema, data_schema = src.output_schema()
-        assert set(tag_keys) == set(tag_schema.keys())
+        key_keys, data_keys = src.keys()
+        key_schema, data_schema = src.output_schema()
+        assert set(key_keys) == set(key_schema.keys())
         assert set(data_keys) == set(data_schema.keys())
 
 
 class TestStreamIterDatas:
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
-    def test_yields_tag_data_pairs(self, src_fixture, request):
+    def test_yields_key_data_pairs(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
         pairs = list(src.iter_data())
         assert len(pairs) > 0
-        for tag, data in pairs:
-            assert tag is not None
+        for key, data in pairs:
+            assert key is not None
             assert data is not None
 
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
@@ -257,9 +257,9 @@ def test_arrow_src_data_values(self, arrow_src):
         values = {pkt["value"] for pkt in data}
         assert values == {"a", "b", "c"}
 
-    def test_arrow_src_tag_values(self, arrow_src):
-        tags = [tag for tag, _ in arrow_src.iter_data()]
-        ids = {tag["id"] for tag in tags}
+    def test_arrow_src_key_values(self, arrow_src):
+        keys = [key for key, _ in arrow_src.iter_data()]
+        ids = {key["id"] for key in keys}
         assert ids == {1, 2, 3}
 
     def test_list_src_data_values(self, list_src):
@@ -267,7 +267,7 @@ def test_list_src_data_values(self, list_src):
         items = {pkt["item"] for pkt in data}
         assert items == {"x", "y", "z"}
 
-    def test_dict_src_tag_and_data_values(self, dict_src):
+    def test_dict_src_key_and_data_values(self, dict_src):
         pairs = list(dict_src.iter_data())
         assert len(pairs) == 3
         values = {pkt["value"] for _, pkt in pairs}
@@ -302,7 +302,7 @@ def test_correct_row_count(self, src_fixture, request):
     def test_default_no_system_columns(self, src_fixture, request):
         src = request.getfixturevalue(src_fixture)
         table = src.as_table()
-        assert not any(c.startswith("_tag_") for c in table.column_names)
+        assert not any(c.startswith("_key_") for c in table.column_names)
 
     @pytest.mark.parametrize("src_fixture", ALL_SOURCE_FIXTURES)
     def test_all_info_adds_source_columns(self, src_fixture, request):
@@ -409,56 +409,56 @@ def test_different_schema_different_pipeline_hash(self):
 
 
 class TestEdgeCases:
-    def test_arrow_source_no_tag_columns(self):
-        """A source with no tag columns is valid; all columns are data columns."""
+    def test_arrow_source_no_key_columns(self):
+        """A source with no key columns is valid; all columns are data columns."""
         table = pa.table({"a": pa.array([1, 2], type=pa.int64())})
         src = ArrowTableSource(table=table, infer_nullable=True)
-        tag_keys, data_keys = src.keys()
+        key_keys, data_keys = src.keys()
         assert "a" in data_keys
-        assert tag_keys == ()
+        assert key_keys == ()
 
-    def test_dict_source_multiple_tag_columns(self):
+    def test_dict_source_multiple_key_columns(self):
         data = [
             {"a": 1, "b": 2, "val": "x"},
             {"a": 3, "b": 4, "val": "y"},
         ]
-        src = DictSource(data=data, tag_columns=["a", "b"])
-        tag_keys, data_keys = src.keys()
-        assert set(tag_keys) == {"a", "b"}
+        src = DictSource(data=data, key_columns=["a", "b"])
+        key_keys, data_keys = src.keys()
+        assert set(key_keys) == {"a", "b"}
         assert "val" in data_keys
 
-    def test_list_source_custom_tag_function(self):
-        def tag_fn(element, idx):
+    def test_list_source_custom_key_function(self):
+        def key_fn(element, idx):
             return {"label": f"item_{idx}"}
 
         src = ListSource(
             name="val",
             data=[10, 20, 30],
-            tag_function=tag_fn,
-            expected_tag_keys=["label"],
+            key_function=key_fn,
+            expected_key_keys=["label"],
         )
-        tag_keys, data_keys = src.keys()
-        assert "label" in tag_keys
+        key_keys, data_keys = src.keys()
+        assert "label" in key_keys
         assert "val" in data_keys
         pairs = list(src.iter_data())
-        labels = {tag["label"] for tag, _ in pairs}
+        labels = {key["label"] for key, _ in pairs}
         assert labels == {"item_0", "item_1", "item_2"}
 
-    def test_df_source_missing_tag_column_raises(self):
+    def test_df_source_missing_key_column_raises(self):
         df = pl.DataFrame({"x": [1, 2, 3]})
         with pytest.raises(ValueError, match="not found"):
-            DataFrameSource(data=df, tag_columns="nonexistent")
+            DataFrameSource(data=df, key_columns="nonexistent")
 
     def test_arrow_source_strips_system_columns_from_input(self):
         """System columns in the input table are silently dropped."""
         table = pa.table(
             {
                 "x": pa.array([1, 2], type=pa.int64()),
-                "_tag_something": pa.array(["a", "b"], type=pa.large_string()),
+                "_key_something": pa.array(["a", "b"], type=pa.large_string()),
             }
         )
         src = ArrowTableSource(table=table, infer_nullable=True)
         # system columns should not appear in data keys
-        tag_keys, data_keys = src.keys()
-        assert "_tag_something" not in tag_keys
-        assert "_tag_something" not in data_keys
+        key_keys, data_keys = src.keys()
+        assert "_key_something" not in key_keys
+        assert "_key_something" not in data_keys
diff --git a/tests/test_core/sources/test_sources.py b/tests/test_core/sources/test_sources.py
index c45d58a3..a737dbdb 100644
--- a/tests/test_core/sources/test_sources.py
+++ b/tests/test_core/sources/test_sources.py
@@ -37,7 +37,7 @@ def _make_arrow_source(record_id_column=None, source_id=None):
     )
     return ArrowTableSource(
         table=table,
-        tag_columns=["user_id"],
+        key_columns=["user_id"],
         record_id_column=record_id_column,
         source_id=source_id,
         infer_nullable=True,
@@ -83,7 +83,7 @@ class TestDefaultResolveField:
     def test_dict_source_raises_not_implemented(self):
         src = DictSource(
             data=[{"id": 1, "val": "a"}, {"id": 2, "val": "b"}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         with pytest.raises(NotImplementedError):
             src.resolve_field("row_0", "val")
@@ -96,7 +96,7 @@ def test_list_source_raises_not_implemented(self):
     def test_error_message_contains_class_name_and_field(self):
         src = DictSource(
             data=[{"id": 1, "val": "a"}],
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="test_source",
         )
         with pytest.raises(NotImplementedError, match="DictSource"):
diff --git a/tests/test_core/sources/test_sources_comprehensive.py b/tests/test_core/sources/test_sources_comprehensive.py
index d4241a8b..2bac15ef 100644
--- a/tests/test_core/sources/test_sources_comprehensive.py
+++ b/tests/test_core/sources/test_sources_comprehensive.py
@@ -8,16 +8,16 @@
   file-not-found, protocol conformance
 - DeltaTableSource: construction, source_id defaulting, resolve_field, bad path
   error, protocol conformance
-- DataFrameSource: string tag_columns, resolve_field raises, system-column
+- DataFrameSource: string key_columns, resolve_field raises, system-column
   stripping from Polars input, source_id parameter
 - DictSource: data_schema parameter, empty-data raises, source_id, content
   hash with explicit schema
-- ListSource: tag_function_hash_mode='signature' and 'content', empty list,
-  tag function inference without expected_tag_keys, TagProtocol.as_dict() protocol,
+- ListSource: key_function_hash_mode='signature' and 'content', empty list,
+  key function inference without expected_key_keys, KeyProtocol.as_dict() protocol,
   identity_structure stability
 - ArrowTableSource: table property, source_id controls provenance tokens,
   negative row index raises, duplicate record_id takes first match,
-  system_tag_columns forwarded, integer record_id_column values
+  system_key_columns forwarded, integer record_id_column values
 - SourceRegistry: replace() returns None when no prior entry, replace() with
   empty source_id raises, register() with None raises, __repr__
 """
@@ -91,13 +91,13 @@ def delta_path(tmp_path: Path) -> Path:
 
 class TestCSVSource:
     def test_construction_reads_rows(self, csv_path):
-        src = CSVSource(file_path=csv_path, tag_columns=["user_id"])
+        src = CSVSource(file_path=csv_path, key_columns=["user_id"])
         assert len(list(src.iter_data())) == 3
 
-    def test_tag_and_data_keys(self, csv_path):
-        src = CSVSource(file_path=csv_path, tag_columns=["user_id"])
-        tag_keys, data_keys = src.keys()
-        assert "user_id" in tag_keys
+    def test_key_and_data_keys(self, csv_path):
+        src = CSVSource(file_path=csv_path, key_columns=["user_id"])
+        key_keys, data_keys = src.keys()
+        assert "user_id" in key_keys
         assert "score" in data_keys
 
     def test_source_id_defaults_to_file_path(self, csv_path):
@@ -110,7 +110,7 @@ def test_source_id_explicit_overrides_default(self, csv_path):
 
     def test_resolve_field_raises_not_implemented(self, csv_path):
         """CSVSource delegates to ArrowTableSource which no longer implements resolve_field."""
-        src = CSVSource(file_path=csv_path, tag_columns=["user_id"])
+        src = CSVSource(file_path=csv_path, key_columns=["user_id"])
         with pytest.raises(NotImplementedError):
             src.resolve_field("row_0", "score")
 
@@ -125,9 +125,9 @@ def test_is_root_source(self, csv_path):
         assert isinstance(CSVSource(file_path=csv_path), RootSource)
 
     def test_output_schema_returns_two_schemas(self, csv_path):
-        src = CSVSource(file_path=csv_path, tag_columns=["user_id"])
-        tag_schema, data_schema = src.output_schema()
-        assert isinstance(tag_schema, Schema)
+        src = CSVSource(file_path=csv_path, key_columns=["user_id"])
+        key_schema, data_schema = src.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
     def test_source_id_explicit(self, csv_path):
@@ -149,12 +149,12 @@ def test_same_source_id_yields_equivalent_source_fields(self, tmp_path):
 
         src_a = CSVSource(
             file_path=str(csv_a),
-            tag_columns=["user_id"],
+            key_columns=["user_id"],
             source_id="shared_name",
         )
         src_b = CSVSource(
             file_path=str(csv_b),
-            tag_columns=["user_id"],
+            key_columns=["user_id"],
             source_id="shared_name",
         )
 
@@ -182,13 +182,13 @@ def test_nonexistent_record_id_column_raises(self, csv_path):
 
 class TestDeltaTableSource:
     def test_construction_reads_rows(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
         assert len(list(src.iter_data())) == 3
 
-    def test_tag_and_data_keys(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
-        tag_keys, data_keys = src.keys()
-        assert "id" in tag_keys
+    def test_key_and_data_keys(self, delta_path):
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
+        key_keys, data_keys = src.keys()
+        assert "id" in key_keys
         assert "value" in data_keys
 
     def test_source_id_defaults_to_directory_name(self, delta_path):
@@ -201,7 +201,7 @@ def test_source_id_explicit_overrides_default(self, delta_path):
 
     def test_resolve_field_raises_not_implemented(self, delta_path):
         """DeltaTableSource delegates to ArrowTableSource which no longer implements resolve_field."""
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
         with pytest.raises(NotImplementedError):
             src.resolve_field("row_0", "id")
 
@@ -216,9 +216,9 @@ def test_is_root_source(self, delta_path):
         assert isinstance(DeltaTableSource(delta_table_path=delta_path), RootSource)
 
     def test_output_schema_returns_two_schemas(self, delta_path):
-        src = DeltaTableSource(delta_table_path=delta_path, tag_columns=["id"])
-        tag_schema, data_schema = src.output_schema()
-        assert isinstance(tag_schema, Schema)
+        src = DeltaTableSource(delta_table_path=delta_path, key_columns=["id"])
+        key_schema, data_schema = src.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
     def test_source_id_explicit(self, delta_path):
@@ -257,7 +257,7 @@ def test_non_nullable_columns_produce_plain_python_types(
     ) -> None:
         """Columns declared NOT NULL in the Delta schema must map to T, not T | None."""
         source = DeltaTableSource(
-            delta_mixed_nullable_path, tag_columns=["id"]
+            delta_mixed_nullable_path, key_columns=["id"]
         )
         _, data_schema = source.output_schema()
         # score is nullable=False in the Delta schema → must be float, not float | None
@@ -268,7 +268,7 @@ def test_nullable_columns_produce_optional_python_types(
     ) -> None:
         """Columns declared nullable in the Delta schema must map to T | None."""
         source = DeltaTableSource(
-            delta_mixed_nullable_path, tag_columns=["id"]
+            delta_mixed_nullable_path, key_columns=["id"]
         )
         _, data_schema = source.output_schema()
         # label is nullable=True in the Delta schema → must be str | None
@@ -282,18 +282,18 @@ def test_nullable_columns_produce_optional_python_types(
 
 
 class TestDataFrameSourceAdditional:
-    def test_string_tag_columns_accepted(self):
-        """tag_columns as a plain string (not a list) should work."""
+    def test_string_key_columns_accepted(self):
+        """key_columns as a plain string (not a list) should work."""
         df = pl.DataFrame({"id": [1, 2, 3], "value": ["a", "b", "c"]})
-        src = DataFrameSource(data=df, tag_columns="id")
-        tag_keys, data_keys = src.keys()
-        assert "id" in tag_keys
+        src = DataFrameSource(data=df, key_columns="id")
+        key_keys, data_keys = src.keys()
+        assert "id" in key_keys
         assert "value" in data_keys
 
     def test_resolve_field_raises_not_implemented(self):
         """DataFrameSource does not override resolve_field; must raise."""
         df = pl.DataFrame({"id": [1, 2], "value": ["x", "y"]})
-        src = DataFrameSource(data=df, tag_columns="id")
+        src = DataFrameSource(data=df, key_columns="id")
         with pytest.raises(NotImplementedError):
             src.resolve_field("row_0", "value")
 
@@ -302,28 +302,28 @@ def test_system_columns_stripped_from_polars_input(self):
         df = pl.DataFrame(
             {
                 "x": [1, 2],
-                "_tag_something": ["a", "b"],
+                "_key_something": ["a", "b"],
             }
         )
         src = DataFrameSource(data=df)
-        tag_keys, data_keys = src.keys()
-        assert "_tag_something" not in tag_keys
-        assert "_tag_something" not in data_keys
+        key_keys, data_keys = src.keys()
+        assert "_key_something" not in key_keys
+        assert "_key_something" not in data_keys
 
     def test_source_id_in_provenance_tokens(self):
         df = pl.DataFrame({"id": [1, 2, 3], "value": ["a", "b", "c"]})
-        src = DataFrameSource(data=df, tag_columns="id", source_id="df_source")
+        src = DataFrameSource(data=df, key_columns="id", source_id="df_source")
         table = src.as_table(all_info=True)
         source_cols = [c for c in table.column_names if c.startswith("_source_")]
         assert source_cols
         token = table.column(source_cols[0])[0].as_py()
         assert "df_source" in token
 
-    def test_multiple_tag_columns(self):
+    def test_multiple_key_columns(self):
         df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "val": ["x", "y"]})
-        src = DataFrameSource(data=df, tag_columns=["a", "b"])
-        tag_keys, data_keys = src.keys()
-        assert set(tag_keys) == {"a", "b"}
+        src = DataFrameSource(data=df, key_columns=["a", "b"])
+        key_keys, data_keys = src.keys()
+        assert set(key_keys) == {"a", "b"}
         assert "val" in data_keys
 
     def test_content_hash_same_data(self):
@@ -350,21 +350,21 @@ def test_data_schema_explicit(self):
         data = [{"id": 1, "value": "hello"}, {"id": 2, "value": "world"}]
         src = DictSource(
             data=data,
-            tag_columns=["id"],
+            key_columns=["id"],
             data_schema={"id": int, "value": str},
         )
-        tag_schema, data_schema = src.output_schema()
-        assert "id" in tag_schema
+        key_schema, data_schema = src.output_schema()
+        assert "id" in key_schema
         assert "value" in data_schema
 
     def test_empty_data_raises(self):
         """An empty DictSource cannot build a valid ArrowTableStream."""
         with pytest.raises(Exception):
-            DictSource(data=[], tag_columns=["id"])
+            DictSource(data=[], key_columns=["id"])
 
     def test_source_id_in_provenance_tokens(self):
         data = [{"id": 1, "val": "a"}, {"id": 2, "val": "b"}]
-        src = DictSource(data=data, tag_columns=["id"], source_id="dict_src_name")
+        src = DictSource(data=data, key_columns=["id"], source_id="dict_src_name")
         table = src.as_table(all_info=True)
         source_cols = [c for c in table.column_names if c.startswith("_source_")]
         assert source_cols
@@ -373,12 +373,12 @@ def test_source_id_in_provenance_tokens(self):
 
     def test_source_id_explicit(self):
         data = [{"id": 1, "val": "x"}]
-        src = DictSource(data=data, tag_columns=["id"], source_id="my_dict")
+        src = DictSource(data=data, key_columns=["id"], source_id="my_dict")
         assert src.source_id == "my_dict"
 
     def test_resolve_field_error_mentions_class_name(self):
         data = [{"id": 1, "val": "a"}]
-        src = DictSource(data=data, tag_columns=["id"], source_id="named_dict")
+        src = DictSource(data=data, key_columns=["id"], source_id="named_dict")
         with pytest.raises(NotImplementedError, match="DictSource"):
             src.resolve_field("row_0", "val")
 
@@ -388,98 +388,98 @@ def test_resolve_field_error_mentions_class_name(self):
 # ---------------------------------------------------------------------------
 
 
-def _tag_fn_for_signature(element, idx):
-    """Top-level tag function so inspect.getsource works."""
+def _key_fn_for_signature(element, idx):
+    """Top-level key function so inspect.getsource works."""
     return {"label": f"item_{idx}"}
 
 
-def _tag_fn_for_content(element, idx):
-    """Top-level tag function for content hash mode."""
+def _key_fn_for_content(element, idx):
+    """Top-level key function for content hash mode."""
     return {"bucket": idx % 2}
 
 
 class TestListSourceAdditional:
-    def test_tag_function_hash_mode_signature(self):
-        """Two ListSources with the same tag function and 'signature' mode share hash."""
+    def test_key_function_hash_mode_signature(self):
+        """Two ListSources with the same key function and 'signature' mode share hash."""
         src1 = ListSource(
             name="val",
             data=[1, 2, 3],
-            tag_function=_tag_fn_for_signature,
-            expected_tag_keys=["label"],
-            tag_function_hash_mode="signature",
+            key_function=_key_fn_for_signature,
+            expected_key_keys=["label"],
+            key_function_hash_mode="signature",
         )
         src2 = ListSource(
             name="val",
             data=[1, 2, 3],
-            tag_function=_tag_fn_for_signature,
-            expected_tag_keys=["label"],
-            tag_function_hash_mode="signature",
+            key_function=_key_fn_for_signature,
+            expected_key_keys=["label"],
+            key_function_hash_mode="signature",
         )
         assert src1.content_hash() == src2.content_hash()
 
-    def test_tag_function_hash_mode_content(self):
+    def test_key_function_hash_mode_content(self):
         """'content' mode hashes the function source code."""
         src = ListSource(
             name="val",
             data=[1, 2, 3],
-            tag_function=_tag_fn_for_content,
-            expected_tag_keys=["bucket"],
-            tag_function_hash_mode="content",
+            key_function=_key_fn_for_content,
+            expected_key_keys=["bucket"],
+            key_function_hash_mode="content",
         )
         # Identity structure should include a non-empty hash
         identity = src.identity_structure()
         assert isinstance(identity[3], str)
         assert len(identity[3]) > 0
 
-    def test_tag_function_hash_mode_name(self):
+    def test_key_function_hash_mode_name(self):
         """'name' mode uses the qualified name of the function."""
         src = ListSource(
             name="val",
             data=[1, 2, 3],
-            tag_function=_tag_fn_for_signature,
-            expected_tag_keys=["label"],
-            tag_function_hash_mode="name",
+            key_function=_key_fn_for_signature,
+            expected_key_keys=["label"],
+            key_function_hash_mode="name",
         )
-        assert _tag_fn_for_signature.__qualname__ in src._tag_function_hash
+        assert _key_fn_for_signature.__qualname__ in src._key_function_hash
 
     def test_empty_list_raises(self):
         """An empty ListSource cannot build a valid stream."""
         with pytest.raises(Exception):
             ListSource(name="item", data=[])
 
-    def test_tag_keys_inferred_from_first_row(self):
-        """When expected_tag_keys is None with a custom tag function, keys are
+    def test_key_keys_inferred_from_first_row(self):
+        """When expected_key_keys is None with a custom key function, keys are
         inferred from the first row."""
 
-        def tag_fn(el, idx):
+        def key_fn(el, idx):
             return {"group": el % 3}
 
-        src = ListSource(name="val", data=[0, 1, 2], tag_function=tag_fn)
-        tag_keys, data_keys = src.keys()
-        assert "group" in tag_keys
+        src = ListSource(name="val", data=[0, 1, 2], key_function=key_fn)
+        key_keys, data_keys = src.keys()
+        assert "group" in key_keys
         assert "val" in data_keys
 
-    def test_tag_as_dict_protocol(self):
-        """If the tag function returns an object with .as_dict(), it is unwrapped."""
+    def test_key_as_dict_protocol(self):
+        """If the key function returns an object with .as_dict(), it is unwrapped."""
 
-        class FakeTag:
+        class FakeKey:
             def __init__(self, d):
                 self._d = d
 
             def as_dict(self):
                 return self._d
 
-        def tag_fn(el, idx):
-            return FakeTag({"slot": idx})
+        def key_fn(el, idx):
+            return FakeKey({"slot": idx})
 
         src = ListSource(
             name="item",
             data=["x", "y", "z"],
-            tag_function=tag_fn,
-            expected_tag_keys=["slot"],
+            key_function=key_fn,
+            expected_key_keys=["slot"],
         )
         pairs = list(src.iter_data())
-        slots = {tag["slot"] for tag, _ in pairs}
+        slots = {key["slot"] for key, _ in pairs}
         assert slots == {0, 1, 2}
 
     def test_identity_structure_contains_name_and_elements(self):
@@ -518,7 +518,7 @@ def test_table_property_returns_enriched_table(self):
         src = ArrowTableSource(table=table, infer_nullable=True)
         enriched = src.table
         assert isinstance(enriched, pa.Table)
-        # The enriched table includes source-info and system-tag columns
+        # The enriched table includes source-info and system-key columns
         assert any(c.startswith("_source_") for c in enriched.column_names)
 
     def test_source_id_controls_provenance_tokens(self):
@@ -526,7 +526,7 @@ def test_source_id_controls_provenance_tokens(self):
         table = _simple_table()
         src = ArrowTableSource(
             table=table,
-            tag_columns=["user_id"],
+            key_columns=["user_id"],
             source_id="my_source",
             infer_nullable=True,
         )
@@ -543,25 +543,25 @@ def test_resolve_field_raises_not_implemented(self):
         with pytest.raises(NotImplementedError):
             src.resolve_field("row_0", "x")
 
-    def test_system_tag_columns_forwarded_to_stream(self):
-        """system_tag_columns passed at construction are preserved."""
+    def test_system_key_columns_forwarded_to_stream(self):
+        """system_key_columns passed at construction are preserved."""
         table = pa.table({"x": pa.array([1, 2], type=pa.int64())})
-        src = ArrowTableSource(table=table, system_tag_columns=["sys_col"], infer_nullable=True)
-        assert "sys_col" in src._system_tag_columns
+        src = ArrowTableSource(table=table, system_key_columns=["sys_col"], infer_nullable=True)
+        assert "sys_col" in src._system_key_columns
 
-    def test_as_table_all_info_includes_system_tag_columns(self):
-        """as_table(all_info=True) exposes paired _tag_source_id and _tag_record_id columns."""
+    def test_as_table_all_info_includes_system_key_columns(self):
+        """as_table(all_info=True) exposes paired _key_source_id and _key_record_id columns."""
         from orcapod.system_constants import constants
 
         table = pa.table({"x": pa.array([1, 2], type=pa.int64())})
         src = ArrowTableSource(table=table, infer_nullable=True)
         enriched = src.as_table(all_info=True)
         assert any(
-            c.startswith(constants.SYSTEM_TAG_SOURCE_ID_PREFIX)
+            c.startswith(constants.SYSTEM_KEY_SOURCE_ID_PREFIX)
             for c in enriched.column_names
         )
         assert any(
-            c.startswith(constants.SYSTEM_TAG_RECORD_ID_PREFIX)
+            c.startswith(constants.SYSTEM_KEY_RECORD_ID_PREFIX)
             for c in enriched.column_names
         )
 
@@ -572,39 +572,39 @@ def test_resolve_field_on_empty_record_id_prefix_raises(self):
         with pytest.raises(NotImplementedError):
             src.resolve_field("", "x")
 
-    def test_tag_columns_not_present_in_table_raises(self):
-        """tag_columns that don't exist in the table raise ValueError."""
+    def test_key_columns_not_present_in_table_raises(self):
+        """key_columns that don't exist in the table raise ValueError."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
                 "val": pa.array([42], type=pa.int64()),
             }
         )
-        with pytest.raises(ValueError, match="tag_columns not found in table"):
-            ArrowTableSource(table=table, tag_columns=["nonexistent", "id"], infer_nullable=True)
+        with pytest.raises(ValueError, match="key_columns not found in table"):
+            ArrowTableSource(table=table, key_columns=["nonexistent", "id"], infer_nullable=True)
 
-    def test_tag_columns_all_missing_raises(self):
-        """All tag_columns missing from the table raises ValueError."""
+    def test_key_columns_all_missing_raises(self):
+        """All key_columns missing from the table raises ValueError."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
                 "val": pa.array([42], type=pa.int64()),
             }
         )
-        with pytest.raises(ValueError, match="tag_columns not found in table"):
-            ArrowTableSource(table=table, tag_columns=["foo", "bar"], infer_nullable=True)
+        with pytest.raises(ValueError, match="key_columns not found in table"):
+            ArrowTableSource(table=table, key_columns=["foo", "bar"], infer_nullable=True)
 
-    def test_tag_columns_all_valid_succeeds(self):
-        """tag_columns that all exist in the table work correctly."""
+    def test_key_columns_all_valid_succeeds(self):
+        """key_columns that all exist in the table work correctly."""
         table = pa.table(
             {
                 "id": pa.array([1], type=pa.int64()),
                 "val": pa.array([42], type=pa.int64()),
             }
         )
-        src = ArrowTableSource(table=table, tag_columns=["id"], infer_nullable=True)
-        tag_keys, data_keys = src.keys()
-        assert "id" in tag_keys
+        src = ArrowTableSource(table=table, key_columns=["id"], infer_nullable=True)
+        key_keys, data_keys = src.keys()
+        assert "id" in key_keys
         assert "val" in data_keys
 
 
diff --git a/tests/test_core/sources/test_spiraldb_table_source.py b/tests/test_core/sources/test_spiraldb_table_source.py
index 37c4c76c..e2eaefd0 100644
--- a/tests/test_core/sources/test_spiraldb_table_source.py
+++ b/tests/test_core/sources/test_spiraldb_table_source.py
@@ -7,8 +7,8 @@
 Test sections:
  1. Import / export sanity
  2. Protocol conformance
- 3. PK as default tag columns (single and composite)
- 4. Explicit tag column override
+ 3. PK as default key columns (single and composite)
+ 4. Explicit key column override
  5. No key schema → ValueError (no ROWID fallback)
  6. Error cases (missing table, empty table)
  7. Stream behaviour
@@ -181,19 +181,19 @@ def test_is_pipeline_element_protocol(self):
 
 
 # ===========================================================================
-# 3. PK as default tag columns
+# 3. PK as default key columns
 # ===========================================================================
 
 
-class TestPKAsDefaultTags:
-    def test_single_pk_is_tag_column(self):
+class TestPKAsDefaultKeys:
+    def test_single_pk_is_key_column(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_mock_connector(pk_columns=["session_id"])
         with _patch_connector(connector):
             src = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_pk_not_in_data_schema(self):
         from orcapod.core.sources import SpiralDBTableSource
@@ -213,15 +213,15 @@ def test_non_pk_columns_in_data_schema(self):
         _, data_schema = src.output_schema()
         assert "firing_rate" in data_schema
 
-    def test_composite_pk_all_columns_are_tags(self):
+    def test_composite_pk_all_columns_are_keys(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_composite_pk_connector()
         with _patch_connector(connector):
             src = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "probe_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "probe_id" in key_schema
 
     def test_composite_pk_data_column_in_data(self):
         from orcapod.core.sources import SpiralDBTableSource
@@ -250,17 +250,17 @@ def test_explicit_source_id_overrides_default(self):
 
 
 # ===========================================================================
-# 4. Explicit tag column override
+# 4. Explicit key column override
 # ===========================================================================
 
 
-class TestExplicitTagOverride:
-    def test_explicit_tag_columns_override_pk(self):
+class TestExplicitKeyOverride:
+    def test_explicit_key_columns_override_pk(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_mock_connector(pk_columns=["session_id"])
-        # Provide explicit tag_columns — PK should be ignored
-        # Use "firing_rate" as tag by overriding, and "session_id" as data
+        # Provide explicit key_columns — PK should be ignored
+        # Use "firing_rate" as key by overriding, and "session_id" as data
         batches = [
             pa.record_batch(
                 {
@@ -273,13 +273,13 @@ def test_explicit_tag_columns_override_pk(self):
         connector.iter_batches.return_value = iter(batches)
         with _patch_connector(connector):
             src = SpiralDBTableSource(
-                _PROJECT_ID, _TABLE_NAME, tag_columns=["firing_rate"]
+                _PROJECT_ID, _TABLE_NAME, key_columns=["firing_rate"]
             )
-        tag_schema, _ = src.output_schema()
-        assert "firing_rate" in tag_schema
-        assert "session_id" not in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "firing_rate" in key_schema
+        assert "session_id" not in key_schema
 
-    def test_multiple_explicit_tag_columns(self):
+    def test_multiple_explicit_key_columns(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_composite_pk_connector()
@@ -287,11 +287,11 @@ def test_multiple_explicit_tag_columns(self):
             src = SpiralDBTableSource(
                 _PROJECT_ID,
                 _TABLE_NAME,
-                tag_columns=["session_id", "probe_id"],
+                key_columns=["session_id", "probe_id"],
             )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "probe_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "probe_id" in key_schema
 
 
 # ===========================================================================
@@ -300,7 +300,7 @@ def test_multiple_explicit_tag_columns(self):
 
 
 class TestNoPKRaisesError:
-    def test_no_pk_columns_and_no_explicit_tags_raises(self):
+    def test_no_pk_columns_and_no_explicit_keys_raises(self):
         """SpiralDB has no ROWID fallback — raise ValueError when no PK."""
         from orcapod.core.sources import SpiralDBTableSource
 
@@ -309,17 +309,17 @@ def test_no_pk_columns_and_no_explicit_tags_raises(self):
             with pytest.raises(ValueError, match="no primary key"):
                 SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
 
-    def test_no_pk_columns_but_explicit_tags_succeeds(self):
-        """Explicit tag_columns bypass the PK requirement."""
+    def test_no_pk_columns_but_explicit_keys_succeeds(self):
+        """Explicit key_columns bypass the PK requirement."""
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_mock_connector(pk_columns=[])
         with _patch_connector(connector):
             src = SpiralDBTableSource(
-                _PROJECT_ID, _TABLE_NAME, tag_columns=["session_id"]
+                _PROJECT_ID, _TABLE_NAME, key_columns=["session_id"]
             )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
 
 # ===========================================================================
@@ -424,14 +424,14 @@ def test_iter_data_yields_one_per_row(self):
         data = list(src.iter_data())
         assert len(data) == 3
 
-    def test_iter_data_tags_contain_pk(self):
+    def test_iter_data_keys_contain_pk(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_mock_connector()
         with _patch_connector(connector):
             src = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
-        for tags, _ in src.iter_data():
-            assert "session_id" in tags
+        for keys, _ in src.iter_data():
+            assert "session_id" in keys
 
     def test_output_schema_returns_two_schemas(self):
         from orcapod.core.sources import SpiralDBTableSource
@@ -468,13 +468,13 @@ def test_data_values_are_correct(self):
         firing_rates = sorted(pkt["firing_rate"] for _, pkt in src.iter_data())
         assert firing_rates == pytest.approx([0.1, 0.2, 0.3])
 
-    def test_tag_values_are_correct(self):
+    def test_key_values_are_correct(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_mock_connector()
         with _patch_connector(connector):
             src = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
-        session_ids = sorted(tags["session_id"] for tags, _ in src.iter_data())
+        session_ids = sorted(keys["session_id"] for keys, _ in src.iter_data())
         assert session_ids == ["s1", "s2", "s3"]
 
 
@@ -506,10 +506,10 @@ def test_content_hash_is_deterministic(self):
             src2 = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
         assert src1.content_hash() == src2.content_hash()
 
-    def test_different_tag_columns_yields_different_pipeline_hash(self):
+    def test_different_key_columns_yields_different_pipeline_hash(self):
         from orcapod.core.sources import SpiralDBTableSource
 
-        # src1 uses PK (session_id) as tag; src2 uses firing_rate
+        # src1 uses PK (session_id) as key; src2 uses firing_rate
         batches = [
             pa.record_batch(
                 {
@@ -526,7 +526,7 @@ def test_different_tag_columns_yields_different_pipeline_hash(self):
         c2.iter_batches.return_value = iter(batches)
         with _patch_connector(c2):
             src2 = SpiralDBTableSource(
-                _PROJECT_ID, _TABLE_NAME, tag_columns=["firing_rate"]
+                _PROJECT_ID, _TABLE_NAME, key_columns=["firing_rate"]
             )
         assert src1.pipeline_hash() != src2.pipeline_hash()
 
@@ -534,9 +534,9 @@ def test_different_schemas_yield_different_pipeline_hash(self):
         """pipeline_hash is schema-only; different column schemas → different hash."""
         from orcapod.core.sources import SpiralDBTableSource
 
-        # src1: tag=session_id (large_string), data=firing_rate (float64)
+        # src1: key=session_id (large_string), data=firing_rate (float64)
         c1 = _make_mock_connector()
-        # src2: tag=session_id (large_string), data=neuron_count (int64)
+        # src2: key=session_id (large_string), data=neuron_count (int64)
         batches2 = [
             pa.record_batch(
                 {
@@ -599,13 +599,13 @@ def test_to_config_has_table_name(self):
             src = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
         assert src.to_config()["table_name"] == _TABLE_NAME
 
-    def test_to_config_has_tag_columns(self):
+    def test_to_config_has_key_columns(self):
         from orcapod.core.sources import SpiralDBTableSource
 
         connector = _make_mock_connector(pk_columns=["session_id"])
         with _patch_connector(connector):
             src = SpiralDBTableSource(_PROJECT_ID, _TABLE_NAME)
-        assert "session_id" in src.to_config()["tag_columns"]
+        assert "session_id" in src.to_config()["key_columns"]
 
     def test_to_config_has_overrides(self):
         from orcapod.core.sources import SpiralDBTableSource
@@ -762,5 +762,5 @@ def double_rate(firing_rate: float) -> float:
         )
         assert doubled_values == pytest.approx([0.2, 0.4, 0.6])
 
-        tag_values = sorted([tags["session_id"] for tags, _ in fn_outputs[0]])
-        assert tag_values == ["s1", "s2", "s3"]
+        key_values = sorted([keys["session_id"] for keys, _ in fn_outputs[0]])
+        assert key_values == ["s1", "s2", "s3"]
diff --git a/tests/test_core/sources/test_sqlite_table_source.py b/tests/test_core/sources/test_sqlite_table_source.py
index 578495a4..0b31ff7d 100644
--- a/tests/test_core/sources/test_sqlite_table_source.py
+++ b/tests/test_core/sources/test_sqlite_table_source.py
@@ -3,8 +3,8 @@
 Test sections:
  1. Import / export sanity
  2. Protocol conformance
- 3. PK as default tag columns (single and composite)
- 4. Explicit tag column override
+ 3. PK as default key columns (single and composite)
+ 4. Explicit key column override
  5. ROWID fallback (no explicit PK)
  6. Error cases (missing table, empty table)
  7. Stream behaviour
@@ -167,16 +167,16 @@ def test_is_pipeline_element_protocol(self, pk_db):
 
 
 # ===========================================================================
-# 3. PK as default tag columns
+# 3. PK as default key columns
 # ===========================================================================
 
 
-class TestPKAsDefaultTags:
-    def test_single_pk_is_tag_column(self, pk_db):
+class TestPKAsDefaultKeys:
+    def test_single_pk_is_key_column(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(pk_db, "measurements")
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
 
     def test_pk_not_in_data_schema(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
@@ -191,12 +191,12 @@ def test_non_pk_columns_in_data_schema(self, pk_db):
         assert "trial" in data_schema
         assert "response" in data_schema
 
-    def test_composite_pk_all_columns_are_tags(self, composite_pk_db):
+    def test_composite_pk_all_columns_are_keys(self, composite_pk_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(composite_pk_db, "events")
-        tag_schema, _ = src.output_schema()
-        assert "user_id" in tag_schema
-        assert "event_id" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "user_id" in key_schema
+        assert "event_id" in key_schema
 
     def test_default_source_id_is_table_name(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
@@ -210,30 +210,30 @@ def test_explicit_source_id_overrides_default(self, pk_db):
 
 
 # ===========================================================================
-# 4. Explicit tag column override
+# 4. Explicit key column override
 # ===========================================================================
 
 
-class TestExplicitTagOverride:
-    def test_explicit_tag_columns_override_pk(self, pk_db):
+class TestExplicitKeyOverride:
+    def test_explicit_key_columns_override_pk(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(
-            pk_db, "measurements", tag_columns=["trial"]
+            pk_db, "measurements", key_columns=["trial"]
         )
-        tag_schema, _ = src.output_schema()
-        assert "trial" in tag_schema
-        assert "session_id" not in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "trial" in key_schema
+        assert "session_id" not in key_schema
 
-    def test_multiple_explicit_tag_columns(self, pk_db):
+    def test_multiple_explicit_key_columns(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(
             pk_db,
             "measurements",
-            tag_columns=["session_id", "trial"],
+            key_columns=["session_id", "trial"],
         )
-        tag_schema, _ = src.output_schema()
-        assert "session_id" in tag_schema
-        assert "trial" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "session_id" in key_schema
+        assert "trial" in key_schema
 
 
 # ===========================================================================
@@ -242,11 +242,11 @@ def test_multiple_explicit_tag_columns(self, pk_db):
 
 
 class TestRowidFallback:
-    def test_rowid_only_table_uses_rowid_as_tag(self, rowid_db):
+    def test_rowid_only_table_uses_rowid_as_key(self, rowid_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_db, "logs")
-        tag_schema, _ = src.output_schema()
-        assert "rowid" in tag_schema
+        key_schema, _ = src.output_schema()
+        assert "rowid" in key_schema
 
     def test_rowid_is_not_in_data_schema(self, rowid_db):
         from orcapod.core.sources import SQLiteTableSource
@@ -257,15 +257,15 @@ def test_rowid_is_not_in_data_schema(self, rowid_db):
     def test_rowid_values_are_positive_integers(self, rowid_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_db, "logs")
-        for tags, _ in src.iter_data():
-            assert isinstance(tags["rowid"], int)
-            assert tags["rowid"] > 0
+        for keys, _ in src.iter_data():
+            assert isinstance(keys["rowid"], int)
+            assert keys["rowid"] > 0
 
     def test_rowid_type_is_int64(self, rowid_db):
         """Verify rowid is actually typed as int64, not large_string."""
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_db, "logs")
-        # The raw stream table (before tag/data split) holds all columns.
+        # The raw stream table (before key/data split) holds all columns.
         # We can verify the Arrow type via the internal stream table.
         raw = src._stream._table  # ArrowTableStream stores the enriched table
         assert "rowid" in raw.schema.names
@@ -317,11 +317,11 @@ def test_iter_data_yields_one_per_row(self, pk_db):
         data = list(src.iter_data())
         assert len(data) == 3
 
-    def test_iter_data_tags_contain_pk(self, pk_db):
+    def test_iter_data_keys_contain_pk(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(pk_db, "measurements")
-        for tags, _ in src.iter_data():
-            assert "session_id" in tags
+        for keys, _ in src.iter_data():
+            assert "session_id" in keys
 
     def test_output_schema_returns_two_schemas(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
@@ -359,11 +359,11 @@ def test_content_hash_is_deterministic(self, pk_db):
         src2 = SQLiteTableSource(pk_db, "measurements")
         assert src1.content_hash() == src2.content_hash()
 
-    def test_different_tag_columns_yields_different_pipeline_hash(self, pk_db):
+    def test_different_key_columns_yields_different_pipeline_hash(self, pk_db):
         from orcapod.core.sources import SQLiteTableSource
         src1 = SQLiteTableSource(pk_db, "measurements")
         src2 = SQLiteTableSource(
-            pk_db, "measurements", tag_columns=["trial"]
+            pk_db, "measurements", key_columns=["trial"]
         )
         assert src1.pipeline_hash() != src2.pipeline_hash()
 
@@ -422,10 +422,10 @@ def test_to_config_has_table_name(self, file_db_path):
         src = SQLiteTableSource(file_db_path, "measurements")
         assert src.to_config()["table_name"] == "measurements"
 
-    def test_to_config_has_tag_columns(self, file_db_path):
+    def test_to_config_has_key_columns(self, file_db_path):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(file_db_path, "measurements")
-        assert "session_id" in src.to_config()["tag_columns"]
+        assert "session_id" in src.to_config()["key_columns"]
 
     def test_to_config_has_identity_fields(self, file_db_path):
         from orcapod.core.sources import SQLiteTableSource
@@ -472,18 +472,18 @@ def rowid_file_db_path(self, tmp_path: Path) -> str:
         conn.close()
         return db_path
 
-    def test_to_config_has_rowid_as_tag_column(self, rowid_file_db_path):
+    def test_to_config_has_rowid_as_key_column(self, rowid_file_db_path):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_file_db_path, "logs")
-        assert src.to_config()["tag_columns"] == ["rowid"]
+        assert src.to_config()["key_columns"] == ["rowid"]
 
     def test_from_config_reconstructs_rowid_table(self, rowid_file_db_path):
         from orcapod.core.sources import SQLiteTableSource
         src = SQLiteTableSource(rowid_file_db_path, "logs")
         config = src.to_config()
         src2 = SQLiteTableSource.from_config(config)
-        tag_schema, _ = src2.output_schema()
-        assert "rowid" in tag_schema
+        key_schema, _ = src2.output_schema()
+        assert "rowid" in key_schema
 
     def test_from_config_rowid_hashes_match(self, rowid_file_db_path):
         from orcapod.core.sources import SQLiteTableSource
@@ -532,12 +532,12 @@ def double_response(trial: int, response: float) -> float:
         assert len(fn_outputs) == 1
         assert len(fn_outputs[0]) == 3
 
-        # Verify tag column (session_id) flows through and results are correct
+        # Verify key column (session_id) flows through and results are correct
         doubled_values = sorted(
             [pkt.as_dict()["doubled"] for _, pkt in fn_outputs[0]]
         )
         assert doubled_values == pytest.approx([0.2, 0.4, 0.6])
 
-        # Verify tag values are present
-        tag_values = sorted([tags["session_id"] for tags, _ in fn_outputs[0]])
-        assert tag_values == ["s1", "s2", "s3"]
+        # Verify key values are present
+        key_values = sorted([keys["session_id"] for keys, _ in fn_outputs[0]])
+        assert key_values == ["s1", "s2", "s3"]
diff --git a/tests/test_core/sources/test_stream_builder.py b/tests/test_core/sources/test_stream_builder.py
index e446fdb9..dfc87b32 100644
--- a/tests/test_core/sources/test_stream_builder.py
+++ b/tests/test_core/sources/test_stream_builder.py
@@ -19,56 +19,56 @@ def builder(self):
 
     def test_build_returns_source_stream_result(self, builder):
         table = pa.table({"id": pa.array([1, 2]), "x": pa.array([10, 20])})
-        result = builder.build(table, tag_columns=["id"])
+        result = builder.build(table, key_columns=["id"])
         assert isinstance(result, SourceStreamResult)
 
     def test_build_stream_has_correct_row_count(self, builder):
         table = pa.table({"id": pa.array([1, 2, 3]), "x": pa.array([10, 20, 30])})
-        result = builder.build(table, tag_columns=["id"])
+        result = builder.build(table, key_columns=["id"])
         assert result.stream.as_table().num_rows == 3
 
     def test_build_source_id_defaults_to_table_hash(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        result = builder.build(table, tag_columns=["id"])
+        result = builder.build(table, key_columns=["id"])
         assert result.source_id is not None
         assert len(result.source_id) > 0
 
     def test_build_source_id_explicit(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        result = builder.build(table, tag_columns=["id"], source_id="my_source")
+        result = builder.build(table, key_columns=["id"], source_id="my_source")
         assert result.source_id == "my_source"
 
     def test_build_schema_hash_is_string(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        result = builder.build(table, tag_columns=["id"])
+        result = builder.build(table, key_columns=["id"])
         assert isinstance(result.schema_hash, str)
         assert len(result.schema_hash) > 0
 
-    def test_build_tag_columns_tuple(self, builder):
+    def test_build_key_columns_tuple(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        result = builder.build(table, tag_columns=["id"])
-        assert result.tag_columns == ("id",)
+        result = builder.build(table, key_columns=["id"])
+        assert result.key_columns == ("id",)
 
-    def test_build_validates_missing_tag_columns(self, builder):
+    def test_build_validates_missing_key_columns(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        with pytest.raises(ValueError, match="tag_columns not found"):
-            builder.build(table, tag_columns=["nonexistent"])
+        with pytest.raises(ValueError, match="key_columns not found"):
+            builder.build(table, key_columns=["nonexistent"])
 
     def test_build_validates_missing_record_id_column(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
         with pytest.raises(ValueError, match="record_id_column"):
-            builder.build(table, tag_columns=["id"], record_id_column="bad")
+            builder.build(table, key_columns=["id"], record_id_column="bad")
 
-    def test_build_output_schema_has_tag_and_data(self, builder):
+    def test_build_output_schema_has_key_and_data(self, builder):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        result = builder.build(table, tag_columns=["id"])
-        tag_schema, data_schema = result.stream.output_schema()
-        assert "id" in tag_schema
+        result = builder.build(table, key_columns=["id"])
+        key_schema, data_schema = result.stream.output_schema()
+        assert "id" in key_schema
         assert "x" in data_schema
 
     def test_build_with_record_id_column(self, builder):
         table = pa.table({"id": pa.array([1, 2]), "x": pa.array([10, 20])})
-        result = builder.build(table, tag_columns=["id"], record_id_column="id")
+        result = builder.build(table, key_columns=["id"], record_id_column="id")
         assert result.stream.as_table().num_rows == 2
 
     def test_build_drops_system_columns_from_input(self, builder):
@@ -79,8 +79,8 @@ def test_build_drops_system_columns_from_input(self, builder):
                 "__system_col": pa.array(["sys"]),
             }
         )
-        result = builder.build(table, tag_columns=["id"])
-        tag_schema, data_schema = result.stream.output_schema()
+        result = builder.build(table, key_columns=["id"])
+        key_schema, data_schema = result.stream.output_schema()
         assert "__system_col" not in data_schema
 
 
@@ -99,7 +99,7 @@ def test_nullable_true_schema_preserved(self, builder):
         """build() preserves nullable=True from incoming schema."""
         table = pa.table({"id": pa.array([1]), "val": pa.array([10], type=pa.int64())})
         # Arrow default: nullable=True
-        result = builder.build(table, tag_columns=["id"])
+        result = builder.build(table, key_columns=["id"])
         _, data_schema = result.stream.output_schema()
         assert data_schema["val"] == int | None
 
@@ -112,7 +112,7 @@ def test_nullable_false_schema_preserved(self, builder):
                 pa.field("val", pa.int64(), nullable=False),
             ]),
         )
-        result = builder.build(table, tag_columns=["id"])
+        result = builder.build(table, key_columns=["id"])
         _, data_schema = result.stream.output_schema()
         assert data_schema["val"] is int
 
@@ -126,8 +126,8 @@ def test_nullable_flags_affect_schema_hash(self, builder):
                 pa.field("val", pa.int64(), nullable=False),
             ]),
         )
-        result_nullable = builder.build(nullable_table, tag_columns=["id"])
-        result_non_nullable = builder.build(non_nullable_table, tag_columns=["id"])
+        result_nullable = builder.build(nullable_table, key_columns=["id"])
+        result_non_nullable = builder.build(non_nullable_table, key_columns=["id"])
         assert result_nullable.schema_hash != result_non_nullable.schema_hash
 
 
@@ -138,25 +138,25 @@ class TestArrowTableSourceUsesBuilder:
     def test_arrow_table_source_works(self):
         """ArrowTableSource should use SourceStreamBuilder internally."""
         table = pa.table({"id": pa.array([1, 2]), "x": pa.array([10, 20])})
-        src = ArrowTableSource(table=table, tag_columns=["id"])
+        src = ArrowTableSource(table=table, key_columns=["id"])
         assert src.as_table().num_rows == 2
-        tag_schema, data_schema = src.output_schema()
-        assert "id" in tag_schema
+        key_schema, data_schema = src.output_schema()
+        assert "id" in key_schema
         assert "x" in data_schema
 
     def test_arrow_table_source_has_stream_attr(self):
         table = pa.table({"id": pa.array([1, 2]), "x": pa.array([10, 20])})
-        src = ArrowTableSource(table=table, tag_columns=["id"])
+        src = ArrowTableSource(table=table, key_columns=["id"])
         assert hasattr(src, "_stream")
 
     def test_arrow_table_source_identity_uses_class_name(self):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        src = ArrowTableSource(table=table, tag_columns=["id"])
+        src = ArrowTableSource(table=table, key_columns=["id"])
         identity = src.identity_structure()
         assert identity[0] == "ArrowTableSource"
 
     def test_resolve_field_raises_not_implemented(self):
         table = pa.table({"id": pa.array([1]), "x": pa.array([10])})
-        src = ArrowTableSource(table=table, tag_columns=["id"])
+        src = ArrowTableSource(table=table, key_columns=["id"])
         with pytest.raises(NotImplementedError):
             src.resolve_field("row_0", "x")
diff --git a/tests/test_core/streams/test_stream_convenience_methods.py b/tests/test_core/streams/test_stream_convenience_methods.py
index a3c5e9b3..3d536199 100644
--- a/tests/test_core/streams/test_stream_convenience_methods.py
+++ b/tests/test_core/streams/test_stream_convenience_methods.py
@@ -18,20 +18,20 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_stream(tag_col: str, data_cols: dict, tag_data: list) -> ArrowTableStream:
+def _make_stream(key_col: str, data_cols: dict, key_data: list) -> ArrowTableStream:
     """Build an ArrowTableStream from column specs."""
-    columns = {tag_col: pa.array(tag_data, type=pa.large_string())}
+    columns = {key_col: pa.array(key_data, type=pa.large_string())}
     for name, values in data_cols.items():
         columns[name] = pa.array(values, type=pa.int64())
-    return ArrowTableStream(pa.table(columns), tag_columns=[tag_col])
+    return ArrowTableStream(pa.table(columns), key_columns=[key_col])
 
 
-def _make_source(tag_col: str, data_cols: dict, tag_data: list) -> ArrowTableSource:
+def _make_source(key_col: str, data_cols: dict, key_data: list) -> ArrowTableSource:
     """Build an ArrowTableSource from column specs."""
-    columns = {tag_col: pa.array(tag_data, type=pa.large_string())}
+    columns = {key_col: pa.array(key_data, type=pa.large_string())}
     for name, values in data_cols.items():
         columns[name] = pa.array(values, type=pa.int64())
-    return ArrowTableSource(pa.table(columns), tag_columns=[tag_col], infer_nullable=True)
+    return ArrowTableSource(pa.table(columns), key_columns=[key_col], infer_nullable=True)
 
 
 # ---------------------------------------------------------------------------
@@ -99,22 +99,22 @@ def test_semi_join_with_label(self):
 
 
 # ---------------------------------------------------------------------------
-# Tests: map_tags
+# Tests: map_keys
 # ---------------------------------------------------------------------------
 
 
-class TestMapTagsConvenience:
-    def test_map_tags_renames(self):
+class TestMapKeysConvenience:
+    def test_map_keys_renames(self):
         s = _make_stream("k", {"a": [1, 2]}, ["x", "y"])
-        result = s.map_tags({"k": "key"})
-        tag_keys, _ = result.keys()
-        assert "key" in tag_keys
-        assert "k" not in tag_keys
+        result = s.map_keys({"k": "key"})
+        key_keys, _ = result.keys()
+        assert "key" in key_keys
+        assert "k" not in key_keys
 
-    def test_map_tags_with_label(self):
+    def test_map_keys_with_label(self):
         s = _make_stream("k", {"a": [1]}, ["x"])
-        result = s.map_tags({"k": "key"}, label="rename_tag")
-        assert result.label == "rename_tag"
+        result = s.map_keys({"k": "key"}, label="rename_key")
+        assert result.label == "rename_key"
         assert result.has_assigned_label
 
 
@@ -146,12 +146,12 @@ def test_map_data_with_label(self):
 
 
 # ---------------------------------------------------------------------------
-# Tests: select_tag_columns / select_data_columns
+# Tests: select_key_columns / select_data_columns
 # ---------------------------------------------------------------------------
 
 
 class TestSelectColumnsConvenience:
-    def test_select_tag_columns(self):
+    def test_select_key_columns(self):
         table = pa.table(
             {
                 "k1": pa.array(["a"], type=pa.large_string()),
@@ -159,10 +159,10 @@ def test_select_tag_columns(self):
                 "v": pa.array([1], type=pa.int64()),
             }
         )
-        s = ArrowTableStream(table, tag_columns=["k1", "k2"])
-        result = s.select_tag_columns(["k1"])
-        tag_keys, _ = result.keys()
-        assert tag_keys == ("k1",)
+        s = ArrowTableStream(table, key_columns=["k1", "k2"])
+        result = s.select_key_columns(["k1"])
+        key_keys, _ = result.keys()
+        assert key_keys == ("k1",)
 
     def test_select_data_columns(self):
         table = pa.table(
@@ -172,12 +172,12 @@ def test_select_data_columns(self):
                 "v2": pa.array([2], type=pa.int64()),
             }
         )
-        s = ArrowTableStream(table, tag_columns=["k"])
+        s = ArrowTableStream(table, key_columns=["k"])
         result = s.select_data_columns(["v1"])
         _, data_keys = result.keys()
         assert data_keys == ("v1",)
 
-    def test_select_tag_columns_with_label(self):
+    def test_select_key_columns_with_label(self):
         table = pa.table(
             {
                 "k1": pa.array(["a"], type=pa.large_string()),
@@ -185,19 +185,19 @@ def test_select_tag_columns_with_label(self):
                 "v": pa.array([1], type=pa.int64()),
             }
         )
-        s = ArrowTableStream(table, tag_columns=["k1", "k2"])
-        result = s.select_tag_columns(["k1"], label="sel_tag")
-        assert result.label == "sel_tag"
+        s = ArrowTableStream(table, key_columns=["k1", "k2"])
+        result = s.select_key_columns(["k1"], label="sel_key")
+        assert result.label == "sel_key"
         assert result.has_assigned_label
 
 
 # ---------------------------------------------------------------------------
-# Tests: drop_tag_columns / drop_data_columns
+# Tests: drop_key_columns / drop_data_columns
 # ---------------------------------------------------------------------------
 
 
 class TestDropColumnsConvenience:
-    def test_drop_tag_columns(self):
+    def test_drop_key_columns(self):
         table = pa.table(
             {
                 "k1": pa.array(["a"], type=pa.large_string()),
@@ -205,11 +205,11 @@ def test_drop_tag_columns(self):
                 "v": pa.array([1], type=pa.int64()),
             }
         )
-        s = ArrowTableStream(table, tag_columns=["k1", "k2"])
-        result = s.drop_tag_columns(["k2"])
-        tag_keys, _ = result.keys()
-        assert "k1" in tag_keys
-        assert "k2" not in tag_keys
+        s = ArrowTableStream(table, key_columns=["k1", "k2"])
+        result = s.drop_key_columns(["k2"])
+        key_keys, _ = result.keys()
+        assert "k1" in key_keys
+        assert "k2" not in key_keys
 
     def test_drop_data_columns(self):
         table = pa.table(
@@ -219,7 +219,7 @@ def test_drop_data_columns(self):
                 "v2": pa.array([2], type=pa.int64()),
             }
         )
-        s = ArrowTableStream(table, tag_columns=["k"])
+        s = ArrowTableStream(table, key_columns=["k"])
         result = s.drop_data_columns(["v2"])
         _, data_keys = result.keys()
         assert "v1" in data_keys
diff --git a/tests/test_core/streams/test_streams.py b/tests/test_core/streams/test_streams.py
index 673befc7..fc5d64b8 100644
--- a/tests/test_core/streams/test_streams.py
+++ b/tests/test_core/streams/test_streams.py
@@ -22,7 +22,7 @@
 
 
 def make_table_stream(
-    tag_columns: list[str] | None = None,
+    key_columns: list[str] | None = None,
     n_rows: int = 3,
 ) -> ArrowTableStream:
     """Create a minimal ArrowTableStream for testing.
@@ -30,7 +30,7 @@ def make_table_stream(
     Uses explicit nullable=False schema to simulate data that has been
     processed through SourceStreamBuilder (which normalizes nullable flags).
     """
-    tag_columns = tag_columns or ["id"]
+    key_columns = key_columns or ["id"]
     schema = pa.schema(
         [
             pa.field("id", pa.int64(), nullable=False),
@@ -44,7 +44,7 @@ def make_table_stream(
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=tag_columns)
+    return ArrowTableStream(table, key_columns=key_columns)
 
 
 # ---------------------------------------------------------------------------
@@ -142,16 +142,16 @@ def test_stream_has_upstreams_property(self):
 
     def test_stream_has_keys_method(self):
         stream = make_table_stream()
-        tag_keys, data_keys = stream.keys()
-        assert isinstance(tag_keys, tuple)
+        key_keys, data_keys = stream.keys()
+        assert isinstance(key_keys, tuple)
         assert isinstance(data_keys, tuple)
 
     def test_stream_has_output_schema_method(self):
         from orcapod.types import Schema
 
         stream = make_table_stream()
-        tag_schema, data_schema = stream.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = stream.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
 
     def test_stream_has_iter_data_method(self):
@@ -159,7 +159,7 @@ def test_stream_has_iter_data_method(self):
         it = stream.iter_data()
         # must be iterable
         pair = next(it)
-        assert len(pair) == 2  # (TagProtocol, DataProtocol)
+        assert len(pair) == 2  # (KeyProtocol, DataProtocol)
 
     def test_stream_has_as_table_method(self):
         stream = make_table_stream()
@@ -177,23 +177,23 @@ def test_basic_construction(self):
         stream = make_table_stream()
         assert stream is not None
 
-    def test_tag_and_data_columns_are_separated(self):
-        stream = make_table_stream(tag_columns=["id"])
-        tag_keys, data_keys = stream.keys()
-        assert "id" in tag_keys
+    def test_key_and_data_columns_are_separated(self):
+        stream = make_table_stream(key_columns=["id"])
+        key_keys, data_keys = stream.keys()
+        assert "id" in key_keys
         assert "value" in data_keys
         assert "id" not in data_keys
 
-    def test_missing_tag_column_raises(self):
+    def test_missing_key_column_raises(self):
         table = pa.table({"value": pa.array([1, 2])})
         with pytest.raises(ValueError):
-            ArrowTableStream(table, tag_columns=["nonexistent"])
+            ArrowTableStream(table, key_columns=["nonexistent"])
 
     def test_no_data_column_raises(self):
-        # A table where all columns are tags → no data columns → should raise
+        # A table where all columns are keys → no data columns → should raise
         table = pa.table({"id": pa.array([1, 2])})
         with pytest.raises(ValueError):
-            ArrowTableStream(table, tag_columns=["id"])
+            ArrowTableStream(table, key_columns=["id"])
 
     def test_producer_defaults_to_none(self):
         stream = make_table_stream()
@@ -210,21 +210,21 @@ def test_upstreams_defaults_to_empty(self):
 
 
 class TestTableStreamKeys:
-    def test_returns_correct_tag_keys(self):
-        stream = make_table_stream(tag_columns=["id"])
-        tag_keys, _ = stream.keys()
-        assert tag_keys == ("id",)
+    def test_returns_correct_key_keys(self):
+        stream = make_table_stream(key_columns=["id"])
+        key_keys, _ = stream.keys()
+        assert key_keys == ("id",)
 
     def test_returns_correct_data_keys(self):
-        stream = make_table_stream(tag_columns=["id"])
+        stream = make_table_stream(key_columns=["id"])
         _, data_keys = stream.keys()
         assert data_keys == ("value",)
 
-    def test_no_tag_columns(self):
+    def test_no_key_columns(self):
         table = pa.table({"a": pa.array([1]), "b": pa.array([2])})
-        stream = ArrowTableStream(table, tag_columns=[])
-        tag_keys, data_keys = stream.keys()
-        assert tag_keys == ()
+        stream = ArrowTableStream(table, key_columns=[])
+        key_keys, data_keys = stream.keys()
+        assert key_keys == ()
         assert set(data_keys) == {"a", "b"}
 
 
@@ -235,18 +235,18 @@ def test_no_tag_columns(self):
 
 class TestTableStreamOutputSchema:
     def test_schema_keys_match_column_keys(self):
-        stream = make_table_stream(tag_columns=["id"])
-        tag_schema, data_schema = stream.output_schema()
-        tag_keys, data_keys = stream.keys()
-        assert set(tag_schema.keys()) == set(tag_keys)
+        stream = make_table_stream(key_columns=["id"])
+        key_schema, data_schema = stream.output_schema()
+        key_keys, data_keys = stream.keys()
+        assert set(key_schema.keys()) == set(key_keys)
         assert set(data_schema.keys()) == set(data_keys)
 
     def test_schema_values_are_types(self):
         import types as _types
 
-        stream = make_table_stream(tag_columns=["id"])
-        tag_schema, data_schema = stream.output_schema()
-        for v in (*tag_schema.values(), *data_schema.values()):
+        stream = make_table_stream(key_columns=["id"])
+        key_schema, data_schema = stream.output_schema()
+        for v in (*key_schema.values(), *data_schema.values()):
             assert isinstance(v, (type, _types.UnionType)), (
                 f"Expected a type or UnionType, got {v!r}"
             )
@@ -264,32 +264,32 @@ def test_yields_correct_number_of_pairs(self):
         pairs = list(stream.iter_data())
         assert len(pairs) == n
 
-    def test_each_pair_has_tag_and_data(self):
+    def test_each_pair_has_key_and_data(self):
         from orcapod.protocols.core_protocols.datagrams import (
             DataProtocol,
-            TagProtocol,
+            KeyProtocol,
         )
 
         stream = make_table_stream()
-        for tag, data in stream.iter_data():
-            assert isinstance(tag, TagProtocol)
+        for key, data in stream.iter_data():
+            assert isinstance(key, KeyProtocol)
             assert isinstance(data, DataProtocol)
 
-    def test_tag_contains_tag_column(self):
-        stream = make_table_stream(tag_columns=["id"])
-        for tag, _ in stream.iter_data():
-            assert "id" in tag.keys()
+    def test_key_contains_key_column(self):
+        stream = make_table_stream(key_columns=["id"])
+        for key, _ in stream.iter_data():
+            assert "id" in key.keys()
 
     def test_data_contains_data_column(self):
-        stream = make_table_stream(tag_columns=["id"])
+        stream = make_table_stream(key_columns=["id"])
         for _, data in stream.iter_data():
             assert "value" in data.keys()
 
     def test_values_are_correct(self):
-        stream = make_table_stream(tag_columns=["id"], n_rows=3)
+        stream = make_table_stream(key_columns=["id"], n_rows=3)
         pairs = list(stream.iter_data())
-        for i, (tag, data) in enumerate(pairs):
-            assert tag["id"] == i
+        for i, (key, data) in enumerate(pairs):
+            assert key["id"] == i
             assert data["value"] == f"v{i}"
 
     def test_iteration_is_repeatable(self):
@@ -318,7 +318,7 @@ def test_table_has_correct_row_count(self):
         assert len(stream.as_table()) == n
 
     def test_table_contains_all_columns(self):
-        stream = make_table_stream(tag_columns=["id"])
+        stream = make_table_stream(key_columns=["id"])
         table = stream.as_table()
         assert "id" in table.column_names
         assert "value" in table.column_names
@@ -367,8 +367,8 @@ def test_no_producer_same_data_same_hash(self):
                 "value": pa.array([10, 20, 30], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(table, tag_columns=["id"])
-        s2 = ArrowTableStream(table, tag_columns=["id"])
+        s1 = ArrowTableStream(table, key_columns=["id"])
+        s2 = ArrowTableStream(table, key_columns=["id"])
         assert s1.content_hash() == s2.content_hash()
 
     def test_no_producer_different_data_different_hash(self):
@@ -385,8 +385,8 @@ def test_no_producer_different_data_different_hash(self):
                 "value": pa.array([10, 20, 99], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(t1, tag_columns=["id"])
-        s2 = ArrowTableStream(t2, tag_columns=["id"])
+        s1 = ArrowTableStream(t1, key_columns=["id"])
+        s2 = ArrowTableStream(t2, key_columns=["id"])
         assert s1.content_hash() != s2.content_hash()
 
     def test_no_producer_identity_structure_contains_table(self):
@@ -423,7 +423,7 @@ def test_with_producer_identity_structure_starts_with_producer(self):
                 "v": pa.array([10, 20], type=pa.int64()),
             }
         )
-        stream = ArrowTableStream(table, tag_columns=["id"], producer=src)
+        stream = ArrowTableStream(table, key_columns=["id"], producer=src)
         structure = stream.identity_structure()
         assert structure[0] is src
 
@@ -442,8 +442,8 @@ def test_with_producer_content_hash_reflects_producer_identity(self):
                 "v": pa.array([30, 40], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(t1, tag_columns=["id"], producer=src)
-        s2 = ArrowTableStream(t2, tag_columns=["id"], producer=src)
+        s1 = ArrowTableStream(t1, key_columns=["id"], producer=src)
+        s2 = ArrowTableStream(t2, key_columns=["id"], producer=src)
         assert s1.content_hash() == s2.content_hash()
 
     def test_with_different_producers_different_hash(self):
@@ -456,8 +456,8 @@ def test_with_different_producers_different_hash(self):
                 "v": pa.array([10, 20], type=pa.int64()),
             }
         )
-        s1 = ArrowTableStream(table, tag_columns=["id"], producer=src_a)
-        s2 = ArrowTableStream(table, tag_columns=["id"], producer=src_b)
+        s1 = ArrowTableStream(table, key_columns=["id"], producer=src_a)
+        s2 = ArrowTableStream(table, key_columns=["id"], producer=src_b)
         assert s1.content_hash() != s2.content_hash()
 
 
@@ -476,14 +476,14 @@ def test_nullable_true_fields_yield_optional_in_output_schema(self):
         """nullable=True fields → T | None in output_schema."""
         table = pa.table(
             {
-                "tag": pa.array(["a"], type=pa.large_string()),
+                "key": pa.array(["a"], type=pa.large_string()),
                 "val": pa.array([1], type=pa.int64()),
             }
         )
         # Arrow defaults to nullable=True
         assert table.schema.field("val").nullable is True
 
-        stream = ArrowTableStream(table, tag_columns=["tag"])
+        stream = ArrowTableStream(table, key_columns=["key"])
         _, data_schema = stream.output_schema()
         assert data_schema["val"] == int | None
 
@@ -491,19 +491,19 @@ def test_non_nullable_fields_yield_plain_type_in_output_schema(self):
         """nullable=False fields → plain T in output_schema."""
         table = pa.table(
             {
-                "tag": pa.array(["a"], type=pa.large_string()),
+                "key": pa.array(["a"], type=pa.large_string()),
                 "val": pa.array([1], type=pa.int64()),
             },
             schema=pa.schema(
                 [
-                    pa.field("tag", pa.large_string(), nullable=False),
+                    pa.field("key", pa.large_string(), nullable=False),
                     pa.field("val", pa.int64(), nullable=False),
                 ]
             ),
         )
         assert table.schema.field("val").nullable is False
 
-        stream = ArrowTableStream(table, tag_columns=["tag"])
+        stream = ArrowTableStream(table, key_columns=["key"])
         _, data_schema = stream.output_schema()
         assert data_schema["val"] is int
 
@@ -520,7 +520,7 @@ def test_infer_schema_nullable_before_build_produces_plain_type(self):
         # Raw Arrow table with nullable=True (the default)
         table = pa.table(
             {
-                "tag": pa.array(["a"], type=pa.large_string()),
+                "key": pa.array(["a"], type=pa.large_string()),
                 "val": pa.array([1], type=pa.int64()),
             }
         )
@@ -528,6 +528,6 @@ def test_infer_schema_nullable_before_build_produces_plain_type(self):
 
         # Caller infers nullable before handing off to builder
         table = table.cast(arrow_utils.infer_schema_nullable(table))
-        result = builder.build(table, tag_columns=["tag"])
+        result = builder.build(table, key_columns=["key"])
         _, data_schema = result.stream.output_schema()
         assert data_schema["val"] is int
diff --git a/tests/test_core/test_caching_integration.py b/tests/test_core/test_caching_integration.py
index 61c463ca..7c5e5ab3 100644
--- a/tests/test_core/test_caching_integration.py
+++ b/tests/test_core/test_caching_integration.py
@@ -138,19 +138,19 @@ def pod():
 class TestSourcePodCaching:
     def test_delta_source_id_defaults_to_dir_name(self, clinic_a):
         patients_path, labs_path = clinic_a
-        ps = DeltaTableSource(patients_path, tag_columns=["patient_id"])
-        ls = DeltaTableSource(labs_path, tag_columns=["patient_id"])
+        ps = DeltaTableSource(patients_path, key_columns=["patient_id"])
+        ls = DeltaTableSource(labs_path, key_columns=["patient_id"])
         assert ps.source_id == patients_path.name
         assert ls.source_id == labs_path.name
 
     def test_different_sources_get_different_cache_paths(self, clinic_a, source_db):
         patients_path, labs_path = clinic_a
         patients = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         labs = CachedSource(
-            DeltaTableSource(labs_path, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         assert patients.cache_path != labs.cache_path
@@ -158,7 +158,7 @@ def test_different_sources_get_different_cache_paths(self, clinic_a, source_db):
     def test_cache_populates_on_flow(self, clinic_a, source_db):
         patients_path, _ = clinic_a
         ps = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         ps.flow()
@@ -169,12 +169,12 @@ def test_cache_populates_on_flow(self, clinic_a, source_db):
     def test_dedup_on_rerun(self, clinic_a, source_db):
         patients_path, _ = clinic_a
         ps1 = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         ps1.flow()
         ps2 = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         ps2.flow()
@@ -185,7 +185,7 @@ def test_named_source_same_name_same_schema_same_identity(
     ):
         """Same dir name + same schema = same content_hash regardless of data."""
         patients_path, _ = clinic_a
-        src1 = DeltaTableSource(patients_path, tag_columns=["patient_id"])
+        src1 = DeltaTableSource(patients_path, key_columns=["patient_id"])
         ps1 = CachedSource(src1, cache_database=source_db)
 
         # Overwrite with different data, same schema
@@ -203,7 +203,7 @@ def test_named_source_same_name_same_schema_same_identity(
             ),
             mode="overwrite",
         )
-        src2 = DeltaTableSource(patients_path, tag_columns=["patient_id"])
+        src2 = DeltaTableSource(patients_path, key_columns=["patient_id"])
         ps2 = CachedSource(src2, cache_database=source_db)
 
         assert src1.source_id == src2.source_id
@@ -214,7 +214,7 @@ def test_cumulative_caching_across_data_updates(self, clinic_a, source_db):
         """New rows from updated data accumulate in the same cache table."""
         patients_path, _ = clinic_a
         ps1 = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         ps1.flow()
@@ -236,7 +236,7 @@ def test_cumulative_caching_across_data_updates(self, clinic_a, source_db):
             mode="overwrite",
         )
         ps2 = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         ps2.flow()
@@ -256,8 +256,8 @@ def test_unnamed_source_different_data_different_identity(self):
                 "v": pa.array([2], type=pa.int64()),
             }
         )
-        s1 = ArrowTableSource(t1, tag_columns=["k"], infer_nullable=True)
-        s2 = ArrowTableSource(t2, tag_columns=["k"], infer_nullable=True)
+        s1 = ArrowTableSource(t1, key_columns=["k"], infer_nullable=True)
+        s2 = ArrowTableSource(t2, key_columns=["k"], infer_nullable=True)
         assert s1.source_id != s2.source_id
         assert s1.content_hash() != s2.content_hash()
 
@@ -268,8 +268,8 @@ def test_unnamed_source_same_data_same_identity(self):
                 "v": pa.array([1], type=pa.int64()),
             }
         )
-        s1 = ArrowTableSource(t, tag_columns=["k"], infer_nullable=True)
-        s2 = ArrowTableSource(t, tag_columns=["k"], infer_nullable=True)
+        s1 = ArrowTableSource(t, key_columns=["k"], infer_nullable=True)
+        s2 = ArrowTableSource(t, key_columns=["k"], infer_nullable=True)
         assert s1.source_id == s2.source_id
         assert s1.content_hash() == s2.content_hash()
 
@@ -285,11 +285,11 @@ def test_function_node_stores_records(
     ):
         patients_path, labs_path = clinic_a
         patients = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         labs = CachedSource(
-            DeltaTableSource(labs_path, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         joined = Join()(patients, labs)
@@ -317,11 +317,11 @@ def test_cross_source_sharing_same_pipeline_path(
 
         # Pipeline A
         pa_src = CachedSource(
-            DeltaTableSource(patients_a, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_a, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         la_src = CachedSource(
-            DeltaTableSource(labs_a, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_a, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         fn_a = FunctionNode(
@@ -333,11 +333,11 @@ def test_cross_source_sharing_same_pipeline_path(
 
         # Pipeline B
         pb_src = CachedSource(
-            DeltaTableSource(patients_b, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_b, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         lb_src = CachedSource(
-            DeltaTableSource(labs_b, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_b, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         fn_b = FunctionNode(
@@ -366,11 +366,11 @@ def test_cross_source_records_accumulate_in_shared_table(
             function_pod=pod,
             input_stream=Join()(
                 CachedSource(
-                    DeltaTableSource(patients_a, tag_columns=["patient_id"]),
+                    DeltaTableSource(patients_a, key_columns=["patient_id"]),
                     cache_database=source_db,
                 ),
                 CachedSource(
-                    DeltaTableSource(labs_a, tag_columns=["patient_id"]),
+                    DeltaTableSource(labs_a, key_columns=["patient_id"]),
                     cache_database=source_db,
                 ),
             ),
@@ -385,11 +385,11 @@ def test_cross_source_records_accumulate_in_shared_table(
             function_pod=pod,
             input_stream=Join()(
                 CachedSource(
-                    DeltaTableSource(patients_b, tag_columns=["patient_id"]),
+                    DeltaTableSource(patients_b, key_columns=["patient_id"]),
                     cache_database=source_db,
                 ),
                 CachedSource(
-                    DeltaTableSource(labs_b, tag_columns=["patient_id"]),
+                    DeltaTableSource(labs_b, key_columns=["patient_id"]),
                     cache_database=source_db,
                 ),
             ),
@@ -410,11 +410,11 @@ class TestOperatorPodCaching:
     def _make_joined_streams(self, clinic_a, source_db):
         patients_path, labs_path = clinic_a
         patients = CachedSource(
-            DeltaTableSource(patients_path, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         labs = CachedSource(
-            DeltaTableSource(labs_path, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_path, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         return patients, labs
@@ -479,8 +479,8 @@ def test_replay_empty_cache_returns_empty_stream(self, clinic_a, source_db):
         table = node.as_table()
         assert table.num_rows == 0
         # Schema is preserved
-        tag_keys, data_keys = node.keys()
-        assert set(tag_keys).issubset(set(table.column_names))
+        key_keys, data_keys = node.keys()
+        assert set(key_keys).issubset(set(table.column_names))
         assert set(data_keys).issubset(set(table.column_names))
 
     def test_content_hash_scoping_isolates_source_combinations(
@@ -492,19 +492,19 @@ def test_content_hash_scoping_isolates_source_combinations(
         patients_b, labs_b = clinic_b
 
         pa_src = CachedSource(
-            DeltaTableSource(patients_a, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_a, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         la_src = CachedSource(
-            DeltaTableSource(labs_a, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_a, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         pb_src = CachedSource(
-            DeltaTableSource(patients_b, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_b, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         lb_src = CachedSource(
-            DeltaTableSource(labs_b, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_b, key_columns=["patient_id"]),
             cache_database=source_db,
         )
 
@@ -543,11 +543,11 @@ def test_full_pipeline_source_to_function_to_operator(
 
         # Step 1: CachedSource
         patients = CachedSource(
-            DeltaTableSource(patients_a, tag_columns=["patient_id"]),
+            DeltaTableSource(patients_a, key_columns=["patient_id"]),
             cache_database=source_db,
         )
         labs = CachedSource(
-            DeltaTableSource(labs_a, tag_columns=["patient_id"]),
+            DeltaTableSource(labs_a, key_columns=["patient_id"]),
             cache_database=source_db,
         )
 
@@ -589,11 +589,11 @@ def test_full_pipeline_source_to_function_to_operator(
             function_pod=pod,
             input_stream=Join()(
                 CachedSource(
-                    DeltaTableSource(patients_b, tag_columns=["patient_id"]),
+                    DeltaTableSource(patients_b, key_columns=["patient_id"]),
                     cache_database=source_db,
                 ),
                 CachedSource(
-                    DeltaTableSource(labs_b, tag_columns=["patient_id"]),
+                    DeltaTableSource(labs_b, key_columns=["patient_id"]),
                     cache_database=source_db,
                 ),
             ),
diff --git a/tests/test_core/test_regression_fixes.py b/tests/test_core/test_regression_fixes.py
index 1876461b..b4f11a4d 100644
--- a/tests/test_core/test_regression_fixes.py
+++ b/tests/test_core/test_regression_fixes.py
@@ -45,7 +45,7 @@
 
 
 def make_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data=x (ints). Uses nullable=False schema."""
+    """Stream with key=id, data=x (ints). Uses nullable=False schema."""
     schema = pa.schema(
         [pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)]
     )
@@ -53,12 +53,12 @@ def make_stream(n: int = 3) -> ArrowTableStream:
         {"id": pa.array(list(range(n)), type=pa.int64()), "x": pa.array(list(range(n)), type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 async def feed_stream_to_channel(stream: ArrowTableStream, ch: Channel) -> None:
-    for tag, data in stream.iter_data():
-        await ch.writer.send((tag, data))
+    for key, data in stream.iter_data():
+        await ch.writer.send((key, data))
     await ch.writer.close()
 
 
@@ -101,7 +101,7 @@ class TestAsyncExecuteChannelCloseOnError:
     @pytest.mark.asyncio
     async def test_unary_operator_closes_channel_on_error(self):
         """When a data function raises, process_data catches the exception
-        and returns (tag, None).  The output channel is closed
+        and returns (key, None).  The output channel is closed
         normally and no exception propagates."""
 
         def failing(x: int) -> int:
@@ -260,7 +260,7 @@ def double(x: int) -> int:
         )
         from orcapod.core.streams.arrow_table_stream import ArrowTableStream
 
-        stream = ArrowTableStream(table, tag_columns=["id"])
+        stream = ArrowTableStream(table, key_columns=["id"])
         return pod.process(stream), pod
 
     @pytest.mark.asyncio
@@ -360,7 +360,7 @@ def test_source_info_preserved_through_round_trip(self):
                 {"id": 0, "x": 10},
                 {"id": 1, "x": 20},
             ],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         rows = list(source.iter_data())
@@ -389,7 +389,7 @@ def test_materialize_source_columns_in_table(self):
             data=[
                 {"id": 0, "x": 10},
             ],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         rows = list(source.iter_data())
 
diff --git a/tests/test_core/test_table_scope.py b/tests/test_core/test_table_scope.py
index 2010c426..d6ef851d 100644
--- a/tests/test_core/test_table_scope.py
+++ b/tests/test_core/test_table_scope.py
@@ -28,7 +28,7 @@
 
 
 def _make_pod() -> FunctionPod:
-    """Pod that doubles the 'y' data column (tag column is 'x')."""
+    """Pod that doubles the 'y' data column (key column is 'x')."""
 
     def double(y: int) -> int:
         return y * 2
@@ -38,8 +38,8 @@ def double(y: int) -> int:
 
 
 def _make_source(data: list[dict], source_id: str = "src") -> DictSource:
-    """Source with tag column 'x' and data column 'y'."""
-    return DictSource(data=data, tag_columns=["x"], source_id=source_id)
+    """Source with key column 'x' and data column 'y'."""
+    return DictSource(data=data, key_columns=["x"], source_id=source_id)
 
 
 def _make_join_streams(
@@ -63,10 +63,10 @@ def _make_join_streams(
         }
     )
     src_a = ArrowTableSource(
-        table_a, tag_columns=["key"], source_id=f"src_a_{source_id_suffix}", infer_nullable=True
+        table_a, key_columns=["key"], source_id=f"src_a_{source_id_suffix}", infer_nullable=True
     )
     src_b = ArrowTableSource(
-        table_b, tag_columns=["key"], source_id=f"src_b_{source_id_suffix}", infer_nullable=True
+        table_b, key_columns=["key"], source_id=f"src_b_{source_id_suffix}", infer_nullable=True
     )
     return src_a, src_b
 
@@ -233,7 +233,7 @@ def test_from_descriptor_missing_table_scope_raises(self):
         src = _make_source([{"x": 1, "y": 2}])
         db = InMemoryArrowDatabase()
         node = FunctionNode(function_pod=pod, input_stream=src, pipeline_database=db)
-        tag_schema, data_schema = node.output_schema()
+        key_schema, data_schema = node.output_schema()
         descriptor = {
             "node_type": "function",
             "label": None,
@@ -242,7 +242,7 @@ def test_from_descriptor_missing_table_scope_raises(self):
             "data_context_key": node.data_context_key,
             # "table_scope" intentionally omitted
             "output_schema": {
-                "tag": {k: str(v) for k, v in tag_schema.items()},
+                "key": {k: str(v) for k, v in key_schema.items()},
                 "data": {k: str(v) for k, v in data_schema.items()},
             },
         }
@@ -259,7 +259,7 @@ def test_from_descriptor_preserves_pipeline_hash_scope(self):
         src = _make_source([{"x": 1, "y": 2}])
         db = InMemoryArrowDatabase()
         node = FunctionNode(function_pod=pod, input_stream=src, pipeline_database=db)
-        tag_schema, data_schema = node.output_schema()
+        key_schema, data_schema = node.output_schema()
         descriptor = {
             "node_type": "function",
             "label": None,
@@ -268,7 +268,7 @@ def test_from_descriptor_preserves_pipeline_hash_scope(self):
             "data_context_key": node.data_context_key,
             "table_scope": "pipeline_hash",
             "output_schema": {
-                "tag": {k: str(v) for k, v in tag_schema.items()},
+                "key": {k: str(v) for k, v in key_schema.items()},
                 "data": {k: str(v) for k, v in data_schema.items()},
             },
         }
@@ -287,7 +287,7 @@ def test_from_descriptor_preserves_content_hash_scope(self):
         node = FunctionNode(
             function_pod=pod, input_stream=src, pipeline_database=db, table_scope="content_hash"
         )
-        tag_schema, data_schema = node.output_schema()
+        key_schema, data_schema = node.output_schema()
         descriptor = {
             "node_type": "function",
             "label": None,
@@ -296,7 +296,7 @@ def test_from_descriptor_preserves_content_hash_scope(self):
             "data_context_key": node.data_context_key,
             "table_scope": "content_hash",
             "output_schema": {
-                "tag": {k: str(v) for k, v in tag_schema.items()},
+                "key": {k: str(v) for k, v in key_schema.items()},
                 "data": {k: str(v) for k, v in data_schema.items()},
             },
         }
@@ -492,7 +492,7 @@ def test_from_descriptor_missing_table_scope_raises(self):
             "pipeline_hash": "fake_pipeline_hash",
             "data_context_key": "std:v0.1:default",
             # "table_scope" intentionally omitted
-            "output_schema": {"tag": {"key": "large_string"}, "data": {"val": "int64"}},
+            "output_schema": {"key": {"key": "large_string"}, "data": {"val": "int64"}},
             "operator": {
                 "class_name": "Join",
                 "module_path": "orcapod.core.operators.join",
@@ -519,7 +519,7 @@ def test_from_descriptor_preserves_pipeline_hash_scope(self):
             "pipeline_hash": "fake_pipeline_hash",
             "data_context_key": "std:v0.1:default",
             "table_scope": "pipeline_hash",
-            "output_schema": {"tag": {"key": "large_string"}, "data": {"val": "int64"}},
+            "output_schema": {"key": {"key": "large_string"}, "data": {"val": "int64"}},
             "operator": {
                 "class_name": "Join",
                 "module_path": "orcapod.core.operators.join",
@@ -546,7 +546,7 @@ def test_from_descriptor_preserves_content_hash_scope(self):
             "pipeline_hash": "fake_pipeline_hash",
             "data_context_key": "std:v0.1:default",
             "table_scope": "content_hash",
-            "output_schema": {"tag": {"key": "large_string"}, "data": {"val": "int64"}},
+            "output_schema": {"key": {"key": "large_string"}, "data": {"val": "int64"}},
             "operator": {
                 "class_name": "Join",
                 "module_path": "orcapod.core.operators.join",
diff --git a/tests/test_core/test_tracker.py b/tests/test_core/test_tracker.py
index 846cf79d..05e1cd2f 100644
--- a/tests/test_core/test_tracker.py
+++ b/tests/test_core/test_tracker.py
@@ -21,7 +21,7 @@
 
 from orcapod.core.function_pod import FunctionPod, function_pod
 from orcapod.core.nodes import FunctionNode, OperatorNode, SourceNode
-from orcapod.core.operators import Join, SelectTagColumns
+from orcapod.core.operators import Join, SelectKeyColumns
 from orcapod.core.data_function import PythonDataFunction
 from orcapod.core.sources.arrow_table_source import ArrowTableSource
 from orcapod.core.streams import ArrowTableStream
@@ -55,7 +55,7 @@ def _make_pipeline(
 
 
 def _make_stream(n: int = 3) -> ArrowTableStream:
-    """Simple stream with tag=id, data=x. Uses nullable=False schema."""
+    """Simple stream with key=id, data=x. Uses nullable=False schema."""
     schema = pa.schema(
         [pa.field("id", pa.int64(), nullable=False), pa.field("x", pa.int64(), nullable=False)]
     )
@@ -63,11 +63,11 @@ def _make_stream(n: int = 3) -> ArrowTableStream:
         {"id": pa.array(list(range(n)), type=pa.int64()), "x": pa.array(list(range(n)), type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def _make_two_col_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data={a, b} for binary operator tests. Uses nullable=False schema."""
+    """Stream with key=id, data={a, b} for binary operator tests. Uses nullable=False schema."""
     schema = pa.schema(
         [
             pa.field("id", pa.int64(), nullable=False),
@@ -83,11 +83,11 @@ def _make_two_col_stream(n: int = 3) -> ArrowTableStream:
         },
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 def _make_y_stream(n: int = 3) -> ArrowTableStream:
-    """Stream with tag=id, data=y (non-overlapping with _make_stream). Uses nullable=False schema."""
+    """Stream with key=id, data=y (non-overlapping with _make_stream). Uses nullable=False schema."""
     schema = pa.schema(
         [pa.field("id", pa.int64(), nullable=False), pa.field("y", pa.int64(), nullable=False)]
     )
@@ -95,7 +95,7 @@ def _make_y_stream(n: int = 3) -> ArrowTableStream:
         {"id": pa.array(list(range(n)), type=pa.int64()), "y": pa.array([i * 10 for i in range(n)], type=pa.int64())},
         schema=schema,
     )
-    return ArrowTableStream(table, tag_columns=["id"])
+    return ArrowTableStream(table, key_columns=["id"])
 
 
 # ---------------------------------------------------------------------------
@@ -221,7 +221,7 @@ def test_function_node_context_matches_pod(self):
 
     def test_operator_node_context_matches_operator(self):
         stream = _make_two_col_stream()
-        op = SelectTagColumns("id")
+        op = SelectKeyColumns("id")
         node = OperatorNode(operator=op, input_streams=[stream])
         assert node.data_context_key == op.data_context_key
         assert node.data_context.context_key == op.data_context_key
@@ -379,7 +379,7 @@ def test_record_function_pod_stores_upstream_stream(self):
 
     def test_record_operator_pod_creates_operator_node(self):
         stream = _make_stream()
-        op = SelectTagColumns(columns=["id"])
+        op = SelectKeyColumns(columns=["id"])
         mgr = BasicTrackerManager()
 
         with _make_pipeline(tracker_manager=mgr) as tracker:
@@ -472,7 +472,7 @@ def test_compile_single_function_pod(self):
     def test_compile_single_operator(self):
         """Source stream -> Operator: compile creates SourceNode and wires upstream."""
         stream = _make_stream()
-        op = SelectTagColumns(columns=["id"])
+        op = SelectKeyColumns(columns=["id"])
         mgr = BasicTrackerManager()
 
         with _make_pipeline(tracker_manager=mgr) as tracker:
@@ -549,7 +549,7 @@ def test_compile_function_then_operator(self):
         """Source -> FunctionPod -> Operator: compile wires SourceNode -> FunctionNode -> OperatorNode."""
         pf = PythonDataFunction(_double, output_keys="result")
         pod = FunctionPod(data_function=pf)
-        op = SelectTagColumns(columns=["id"])
+        op = SelectKeyColumns(columns=["id"])
         stream = _make_stream()
         mgr = BasicTrackerManager()
         pod.tracker_manager = mgr
@@ -574,7 +574,7 @@ def test_compile_function_then_operator(self):
     def test_compile_operator_then_function(self):
         """Source -> Operator -> FunctionPod: compile wires SourceNode -> OperatorNode -> FunctionNode."""
         stream = _make_stream()
-        op = SelectTagColumns(columns=["id"])
+        op = SelectKeyColumns(columns=["id"])
         pf = PythonDataFunction(_double, output_keys="result")
         pod = FunctionPod(data_function=pf)
         mgr = BasicTrackerManager()
@@ -750,7 +750,7 @@ class TestOperatorTrackerIntegration:
     def test_operator_process_records_to_tracker(self):
         """StaticOutputPod.process() automatically records to an active Pipeline."""
         stream = _make_stream()
-        op = SelectTagColumns(columns=["id"])
+        op = SelectKeyColumns(columns=["id"])
         mgr = BasicTrackerManager()
         op.tracker_manager = mgr
 
@@ -768,8 +768,8 @@ def test_operator_process_records_to_tracker(self):
     def test_operator_chain(self):
         """Source -> operator1 -> operator2."""
         stream = _make_two_col_stream()
-        op1 = SelectTagColumns(columns=["id"])
-        op2 = SelectTagColumns(columns=["id"])
+        op1 = SelectKeyColumns(columns=["id"])
+        op2 = SelectKeyColumns(columns=["id"])
         mgr = BasicTrackerManager()
         op1.tracker_manager = mgr
         op2.tracker_manager = mgr
@@ -858,7 +858,7 @@ def sources(self):
                     "height_cm": pa.array([170, 185, 160], type=pa.int64()),
                 }
             ),
-            tag_columns=["person_id"],
+            key_columns=["person_id"],
             source_id="heights",
             infer_nullable=True,
         )
@@ -869,7 +869,7 @@ def sources(self):
                     "weight_kg": pa.array([70, 90, 55], type=pa.int64()),
                 }
             ),
-            tag_columns=["person_id"],
+            key_columns=["person_id"],
             source_id="weights",
             infer_nullable=True,
         )
@@ -893,8 +893,8 @@ def test_pipeline_output_values(self, sources, expected_bmi):
             joined = Join()(converted, weights)
             bmi_stream = _compute_bmi.pod(joined)
 
-        for tag, data in bmi_stream.iter_data():
-            pid = tag["person_id"]
+        for key, data in bmi_stream.iter_data():
+            pid = key["person_id"]
             assert data["bmi"] == expected_bmi[pid], (
                 f"person_id={pid}: got {data['bmi']}, expected {expected_bmi[pid]}"
             )
diff --git a/tests/test_data/test_polars_nullability/test_function_node_nullability.py b/tests/test_data/test_polars_nullability/test_function_node_nullability.py
index 68e5a23b..2378487e 100644
--- a/tests/test_data/test_polars_nullability/test_function_node_nullability.py
+++ b/tests/test_data/test_polars_nullability/test_function_node_nullability.py
@@ -36,7 +36,7 @@ def test_non_optional_return_type_yields_non_nullable_output_column(self):
         database = InMemoryArrowDatabase()
         source = op.sources.DictSource(
             [{"id": 1, "x": 10}, {"id": 2, "x": 20}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         @op.function_pod(output_keys=["result"])
@@ -64,21 +64,21 @@ def double(x: int) -> int:
             "Arrow→Polars→Arrow round-trip in get_all_records() dropped nullability."
         )
 
-    def test_input_tag_column_non_nullable_after_get_all_records(self):
-        """Input tag columns that are non-nullable must remain so after Polars join."""
+    def test_input_key_column_non_nullable_after_get_all_records(self):
+        """Input key columns that are non-nullable must remain so after Polars join."""
         database = InMemoryArrowDatabase()
 
         # DictSource with integer id — infer_schema_nullable sets nullable=False (no nulls)
         source = op.sources.DictSource(
             [{"id": 1, "x": 5}, {"id": 2, "x": 15}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         @op.function_pod(output_keys=["result"])
         def triple(x: int) -> int:
             return x * 3
 
-        pipeline = op.Pipeline("test_fn_tag_nullable", database)
+        pipeline = op.Pipeline("test_fn_key_nullable", database)
         with pipeline:
             triple.pod(source)
 
@@ -93,7 +93,7 @@ def triple(x: int) -> int:
         # "id" was non-nullable in the source; after the Polars join it must stay so
         id_field = table.schema.field("id")
         assert id_field.nullable is False, (
-            f"Expected 'id' tag column to be non-nullable after get_all_records(), "
+            f"Expected 'id' key column to be non-nullable after get_all_records(), "
             f"but got nullable={id_field.nullable}."
         )
 
@@ -113,7 +113,7 @@ def test_iter_data_from_database_preserves_non_nullable_output(self):
         database = InMemoryArrowDatabase()
         source = op.sources.DictSource(
             [{"id": 1, "x": 7}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         @op.function_pod(output_keys=["result"])
@@ -135,7 +135,7 @@ def add_one(x: int) -> int:
         data_seen = list(loaded.values())
         assert len(data_seen) == 1, "Expected one data from the database"
 
-        _tag, data = data_seen[0]
+        _key, data = data_seen[0]
         data_schema = data.arrow_schema()
 
         result_field = data_schema.field("result")
@@ -152,19 +152,19 @@ def add_one(x: int) -> int:
 
 
 class TestJoinOperatorNullability:
-    """Join.op_forward must preserve non-nullable tag column flags through the
+    """Join.op_forward must preserve non-nullable key column flags through the
     Polars inner join it uses internally."""
 
-    def test_join_preserves_non_nullable_shared_tag_column(self):
-        """Shared tag column remains non-nullable after stream join."""
+    def test_join_preserves_non_nullable_shared_key_column(self):
+        """Shared key column remains non-nullable after stream join."""
         # DictSource applies infer_schema_nullable → integer 'id' has nullable=False
         source1 = op.sources.DictSource(
             [{"id": 1, "x": 10}, {"id": 2, "x": 20}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         source2 = op.sources.DictSource(
             [{"id": 1, "y": 100}, {"id": 2, "y": 200}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         joined_stream = source1.join(source2)
@@ -172,7 +172,7 @@ def test_join_preserves_non_nullable_shared_tag_column(self):
 
         id_field = table.schema.field("id")
         assert id_field.nullable is False, (
-            f"Expected 'id' tag column to be non-nullable after Join, "
+            f"Expected 'id' key column to be non-nullable after Join, "
             f"but got nullable={id_field.nullable}. "
             "Arrow→Polars→Arrow round-trip in Join.op_forward dropped nullability."
         )
@@ -181,11 +181,11 @@ def test_join_preserves_non_nullable_data_columns(self):
         """Data columns that are non-nullable remain so after stream join."""
         source1 = op.sources.DictSource(
             [{"id": 1, "x": 10}, {"id": 2, "x": 20}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
         source2 = op.sources.DictSource(
             [{"id": 1, "y": 100}, {"id": 2, "y": 200}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         joined_stream = source1.join(source2)
@@ -218,12 +218,12 @@ def test_join_preserves_nullable_optional_column_with_no_nulls(self):
         ])
         source1 = op.sources.DictSource(
             [{"id": 1, "x": 10}, {"id": 2, "x": 20}],
-            tag_columns=["id"],
+            key_columns=["id"],
             data_schema=schema1,
         )
         source2 = op.sources.DictSource(
             [{"id": 1, "y": 100}, {"id": 2, "y": 200}],
-            tag_columns=["id"],
+            key_columns=["id"],
         )
 
         joined_stream = source1.join(source2)
@@ -239,44 +239,44 @@ def test_join_preserves_nullable_optional_column_with_no_nulls(self):
 
 
 # ---------------------------------------------------------------------------
-# Join tag-column nullability with mixed nullable/non-nullable tag keys
+# Join key-column nullability with mixed nullable/non-nullable key keys
 # ---------------------------------------------------------------------------
 
 
-class TestJoinTagColumnNullability:
-    """Join must preserve the exact nullable flag of every tag column —
+class TestJoinKeyColumnNullability:
+    """Join must preserve the exact nullable flag of every key column —
     both non-nullable mandatory keys and nullable optional keys — through
     the Polars inner join used internally."""
 
-    def test_shared_tag_columns_mixed_nullability_preserved(self):
-        """When two sources share multiple tag columns with mixed nullable flags,
+    def test_shared_key_columns_mixed_nullability_preserved(self):
+        """When two sources share multiple key columns with mixed nullable flags,
         each flag is preserved correctly after the join.
 
         Schema intent:
         - "id"    int64  nullable=False  (mandatory join key)
         - "group" utf8   nullable=True   (Optional grouping key, no actual nulls)
         """
-        tag_schema = pa.schema([
+        key_schema = pa.schema([
             pa.field("id",    pa.int64(), nullable=False),
             pa.field("group", pa.utf8(),  nullable=True),
         ])
         schema1 = pa.schema([
-            *tag_schema,
+            *key_schema,
             pa.field("x", pa.int64(), nullable=False),
         ])
         schema2 = pa.schema([
-            *tag_schema,
+            *key_schema,
             pa.field("y", pa.int64(), nullable=False),
         ])
 
         source1 = op.sources.DictSource(
             [{"id": 1, "group": "a", "x": 10}, {"id": 2, "group": "b", "x": 20}],
-            tag_columns=["id", "group"],
+            key_columns=["id", "group"],
             data_schema=schema1,
         )
         source2 = op.sources.DictSource(
             [{"id": 1, "group": "a", "y": 100}, {"id": 2, "group": "b", "y": 200}],
-            tag_columns=["id", "group"],
+            key_columns=["id", "group"],
             data_schema=schema2,
         )
 
@@ -286,40 +286,40 @@ def test_shared_tag_columns_mixed_nullability_preserved(self):
         group_field = table.schema.field("group")
 
         assert id_field.nullable is False, (
-            f"'id' (non-nullable tag) must remain nullable=False after Join, "
+            f"'id' (non-nullable key) must remain nullable=False after Join, "
             f"got nullable={id_field.nullable}."
         )
         assert group_field.nullable is True, (
-            f"'group' (Optional tag, nullable=True) must remain nullable=True after Join "
+            f"'group' (Optional key, nullable=True) must remain nullable=True after Join "
             f"even though data contains no actual nulls, got nullable={group_field.nullable}."
         )
 
-    def test_non_shared_tag_columns_mixed_nullability_preserved(self):
-        """Tag columns that are unique to each side of a join (non-shared) also
+    def test_non_shared_key_columns_mixed_nullability_preserved(self):
+        """Key columns that are unique to each side of a join (non-shared) also
         preserve their nullable flags in the combined result.
 
-        source1 has tag "id" (non-nullable int).
-        source2 has tag "category" (nullable string, Optional, no actual nulls).
-        Neither tag is shared, so the join is a full cartesian product.
-        Both tag columns appear in the result and must keep their original nullable flags.
+        source1 has key "id" (non-nullable int).
+        source2 has key "category" (nullable string, Optional, no actual nulls).
+        Neither key is shared, so the join is a full cartesian product.
+        Both key columns appear in the result and must keep their original nullable flags.
         """
         schema1 = pa.schema([
             pa.field("id", pa.int64(), nullable=False),
             pa.field("x",  pa.int64(), nullable=False),
         ])
         schema2 = pa.schema([
-            pa.field("category", pa.utf8(), nullable=True),   # Optional tag
+            pa.field("category", pa.utf8(), nullable=True),   # Optional key
             pa.field("y",        pa.int64(), nullable=False),
         ])
 
         source1 = op.sources.DictSource(
             [{"id": 1, "x": 10}],
-            tag_columns=["id"],
+            key_columns=["id"],
             data_schema=schema1,
         )
         source2 = op.sources.DictSource(
             [{"category": "alpha", "y": 100}],
-            tag_columns=["category"],
+            key_columns=["category"],
             data_schema=schema2,
         )
 
@@ -329,43 +329,43 @@ def test_non_shared_tag_columns_mixed_nullability_preserved(self):
         category_field = table.schema.field("category")
 
         assert id_field.nullable is False, (
-            f"'id' (non-nullable tag from source1) must remain nullable=False after "
+            f"'id' (non-nullable key from source1) must remain nullable=False after "
             f"cartesian join, got nullable={id_field.nullable}."
         )
         assert category_field.nullable is True, (
-            f"'category' (Optional tag, nullable=True from source2) must remain "
+            f"'category' (Optional key, nullable=True from source2) must remain "
             f"nullable=True after cartesian join even with no actual nulls, "
             f"got nullable={category_field.nullable}."
         )
 
-    def test_three_way_join_tag_nullability_preserved(self):
+    def test_three_way_join_key_nullability_preserved(self):
         """A three-way join (two Polars join iterations) correctly restores nullable
-        flags on all tag columns across both iterations.
+        flags on all key columns across both iterations.
 
-        shared tag "id"    int64  nullable=False
-        shared tag "group" utf8   nullable=True  (Optional, no actual nulls)
+        shared key "id"    int64  nullable=False
+        shared key "group" utf8   nullable=True  (Optional, no actual nulls)
         """
-        tag_schema = pa.schema([
+        key_schema = pa.schema([
             pa.field("id",    pa.int64(), nullable=False),
             pa.field("group", pa.utf8(),  nullable=True),
         ])
-        schema1 = pa.schema([*tag_schema, pa.field("a", pa.int64(), nullable=False)])
-        schema2 = pa.schema([*tag_schema, pa.field("b", pa.int64(), nullable=True)])   # b is Optional
-        schema3 = pa.schema([*tag_schema, pa.field("c", pa.int64(), nullable=False)])
+        schema1 = pa.schema([*key_schema, pa.field("a", pa.int64(), nullable=False)])
+        schema2 = pa.schema([*key_schema, pa.field("b", pa.int64(), nullable=True)])   # b is Optional
+        schema3 = pa.schema([*key_schema, pa.field("c", pa.int64(), nullable=False)])
 
         source1 = op.sources.DictSource(
             [{"id": 1, "group": "x", "a": 1}],
-            tag_columns=["id", "group"],
+            key_columns=["id", "group"],
             data_schema=schema1,
         )
         source2 = op.sources.DictSource(
             [{"id": 1, "group": "x", "b": 2}],
-            tag_columns=["id", "group"],
+            key_columns=["id", "group"],
             data_schema=schema2,
         )
         source3 = op.sources.DictSource(
             [{"id": 1, "group": "x", "c": 3}],
-            tag_columns=["id", "group"],
+            key_columns=["id", "group"],
             data_schema=schema3,
         )
 
@@ -376,11 +376,11 @@ def test_three_way_join_tag_nullability_preserved(self):
         b_field     = table.schema.field("b")
 
         assert id_field.nullable is False, (
-            f"'id' (non-nullable tag) must remain nullable=False after 3-way join, "
+            f"'id' (non-nullable key) must remain nullable=False after 3-way join, "
             f"got nullable={id_field.nullable}."
         )
         assert group_field.nullable is True, (
-            f"'group' (Optional tag) must remain nullable=True after 3-way join, "
+            f"'group' (Optional key) must remain nullable=True after 3-way join, "
             f"got nullable={group_field.nullable}."
         )
         assert b_field.nullable is True, (
diff --git a/tests/test_hashing/generate_file_hashes.py b/tests/test_hashing/generate_file_hashes.py
index 77a0507a..b7d4c40b 100644
--- a/tests/test_hashing/generate_file_hashes.py
+++ b/tests/test_hashing/generate_file_hashes.py
@@ -108,7 +108,7 @@ def create_sample_files():
         "metadata": {
             "description": "Sample data for hash testing",
             "version": "1.0",
-            "tags": ["test", "hash", "sample"],
+            "keys": ["test", "hash", "sample"],
         },
     }
 
diff --git a/tests/test_hashing/generate_hash_examples.py b/tests/test_hashing/generate_hash_examples.py
index 5edbef3f..bd0677c2 100644
--- a/tests/test_hashing/generate_hash_examples.py
+++ b/tests/test_hashing/generate_hash_examples.py
@@ -114,9 +114,9 @@ def generate_hash_examples():
             if isinstance(value, (bytes, bytearray)):
                 serialized_value = f"bytes:{value.hex()}"
             elif isinstance(value, (set, frozenset)):
-                type_tag = "frozenset" if isinstance(value, frozenset) else "set"
+                type_key = "frozenset" if isinstance(value, frozenset) else "set"
                 serialized_value = {
-                    "__type__": type_tag,
+                    "__type__": type_key,
                     "items": sorted(value, key=str),
                 }
             elif isinstance(value, tuple):
diff --git a/tests/test_hashing/test_cross_process_stability.py b/tests/test_hashing/test_cross_process_stability.py
index e8e71700..6efb20a2 100644
--- a/tests/test_hashing/test_cross_process_stability.py
+++ b/tests/test_hashing/test_cross_process_stability.py
@@ -144,7 +144,7 @@ def _run_subprocess(script: str, pythonhashseed: str) -> dict[str, str]:
         "list":        [1, 2, 3],
         "nested_dict": {"a": 1, "b": [2, 3], "c": {"d": 4}},
         "set":         [1, 2, 3],   # hashed as set equivalent
-        "tuple":       [1, 2, 3],   # distinguished by type tag inside hasher
+        "tuple":       [1, 2, 3],   # distinguished by type key inside hasher
         "empty_list":  [],
         "empty_dict":  {},
     }
diff --git a/tests/test_hashing/test_hash_samples.py b/tests/test_hashing/test_hash_samples.py
index 4caff744..bea34b14 100644
--- a/tests/test_hashing/test_hash_samples.py
+++ b/tests/test_hashing/test_hash_samples.py
@@ -73,14 +73,14 @@ def deserialize_value(serialized_value):
 
     # --- tagged dicts (set, frozenset, tuple, OrderedDict) ---
     if isinstance(serialized_value, dict) and "__type__" in serialized_value:
-        type_tag = serialized_value["__type__"]
-        if type_tag == "set":
+        type_key = serialized_value["__type__"]
+        if type_key == "set":
             return set(serialized_value["items"])
-        if type_tag == "frozenset":
+        if type_key == "frozenset":
             return frozenset(serialized_value["items"])
-        if type_tag == "tuple":
+        if type_key == "tuple":
             return tuple(serialized_value["items"])
-        if type_tag == "OrderedDict":
+        if type_key == "OrderedDict":
             from collections import OrderedDict
 
             return OrderedDict(serialized_value["items"])
diff --git a/tests/test_hashing/test_semantic_hasher.py b/tests/test_hashing/test_semantic_hasher.py
index b2719b4a..ba5337d8 100644
--- a/tests/test_hashing/test_semantic_hasher.py
+++ b/tests/test_hashing/test_semantic_hasher.py
@@ -825,11 +825,11 @@ def test_repr_includes_hash(self, hasher):
 
 
 class _DummyHandler:
-    def __init__(self, tag: str) -> None:
-        self.tag = tag
+    def __init__(self, key: str) -> None:
+        self.key = key
 
     def handle(self, obj: Any, hasher: Any) -> Any:
-        return f"{self.tag}:{obj}"
+        return f"{self.key}:{obj}"
 
 
 class Base:
@@ -1440,7 +1440,7 @@ def test_entry_point_hasher_overrides_nested_hasher(self):
 
         result = outer.content_hash()
 
-        # Entry point is outer (hasher_b), so the top-level result tag is hasher_b
+        # Entry point is outer (hasher_b), so the top-level result key is hasher_b
         assert result.method == "hasher_b"
 
         # Verify: result equals computing everything with hasher_b uniformly
diff --git a/tests/test_pipeline/test_composite_observer.py b/tests/test_pipeline/test_composite_observer.py
index e33f6f4f..c227583f 100644
--- a/tests/test_pipeline/test_composite_observer.py
+++ b/tests/test_pipeline/test_composite_observer.py
@@ -36,7 +36,7 @@ def _make_source(n: int = 3) -> ArrowTableSource:
         "id": pa.array([str(i) for i in range(n)], type=pa.large_string()),
         "x": pa.array([10 * (i + 1) for i in range(n)], type=pa.int64()),
     })
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_pipeline/test_graph_rendering.py b/tests/test_pipeline/test_graph_rendering.py
index fdf0200a..e24d1fcb 100644
--- a/tests/test_pipeline/test_graph_rendering.py
+++ b/tests/test_pipeline/test_graph_rendering.py
@@ -32,14 +32,14 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_source(tag_col: str, data_col: str, data: dict) -> ArrowTableSource:
+def _make_source(key_col: str, data_col: str, data: dict) -> ArrowTableSource:
     table = pa.table(
         {
-            tag_col: pa.array(data[tag_col], type=pa.large_string()),
+            key_col: pa.array(data[key_col], type=pa.large_string()),
             data_col: pa.array(data[data_col], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=[tag_col], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=[key_col], infer_nullable=True)
 
 
 def _make_two_sources() -> tuple[ArrowTableSource, ArrowTableSource]:
diff --git a/tests/test_pipeline/test_logging_observer_integration.py b/tests/test_pipeline/test_logging_observer_integration.py
index 84c7fab4..c56861e2 100644
--- a/tests/test_pipeline/test_logging_observer_integration.py
+++ b/tests/test_pipeline/test_logging_observer_integration.py
@@ -33,7 +33,7 @@ def _make_source(n: int = 3) -> ArrowTableSource:
         "id": pa.array([str(i) for i in range(n)], type=pa.large_string()),
         "x": pa.array([10 * (i + 1) for i in range(n)], type=pa.int64()),
     })
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 def _get_function_node(pipeline: Pipeline):
@@ -148,12 +148,12 @@ def identity(x: int) -> int:
 
 
 # ---------------------------------------------------------------------------
-# 4. Queryable tag columns (not JSON)
+# 4. Queryable key columns (not JSON)
 # ---------------------------------------------------------------------------
 
 
-class TestQueryableTagColumns:
-    def test_tag_columns_in_log_table(self):
+class TestQueryableKeyColumns:
+    def test_key_columns_in_log_table(self):
         db = InMemoryArrowDatabase()
         source = _make_source(2)
 
@@ -163,7 +163,7 @@ def identity(x: int) -> int:
         pf = PythonDataFunction(identity, output_keys="result", executor=LocalPythonFunctionExecutor())
         pod = FunctionPod(pf)
 
-        pipeline = Pipeline(name="test_tags", pipeline_database=db)
+        pipeline = Pipeline(name="test_keys", pipeline_database=db)
         with pipeline:
             pod(source, label="ident")
 
@@ -174,9 +174,9 @@ def identity(x: int) -> int:
         logs = obs.get_logs()
 
         assert logs is not None
-        # "id" tag column should be a separate column, not JSON
+        # "id" key column should be a separate column, not JSON
         assert "id" in logs.column_names
-        assert "tags" not in logs.column_names
+        assert "keys" not in logs.column_names
         id_values = sorted(logs.column("id").to_pylist())
         assert id_values == ["0", "1"]
 
@@ -259,7 +259,7 @@ def test_mixed_results_logged_correctly(self):
                 "id": pa.array(["0", "1", "2"], type=pa.large_string()),
                 "x": pa.array([10, -1, 30], type=pa.int64()),
             }),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
diff --git a/tests/test_pipeline/test_node_descriptors.py b/tests/test_pipeline/test_node_descriptors.py
index 51cc2779..6f6bb4a1 100644
--- a/tests/test_pipeline/test_node_descriptors.py
+++ b/tests/test_pipeline/test_node_descriptors.py
@@ -12,11 +12,11 @@ class TestSourceNodeFromDescriptor:
     def _make_source_and_descriptor(self):
         source = DictSource(
             data=[{"a": 1, "b": 2}, {"a": 3, "b": 4}],
-            tag_columns=["a"],
+            key_columns=["a"],
             source_id="test",
         )
         node = SourceNode(stream=source, label="my_source")
-        tag_schema, data_schema = node.output_schema()
+        key_schema, data_schema = node.output_schema()
         descriptor = {
             "node_type": "source",
             "label": "my_source",
@@ -24,7 +24,7 @@ def _make_source_and_descriptor(self):
             "pipeline_hash": node.pipeline_hash().to_string(),
             "data_context_key": node.data_context_key,
             "output_schema": {
-                "tag": {k: str(v) for k, v in tag_schema.items()},
+                "key": {k: str(v) for k, v in key_schema.items()},
                 "data": {k: str(v) for k, v in data_schema.items()},
             },
             "stream_type": "dict",
@@ -60,8 +60,8 @@ def test_from_descriptor_output_schema_from_metadata(self):
             stream=None,
             databases={},
         )
-        tag_schema, data_schema = loaded.output_schema()
-        assert set(tag_schema.keys()) == set(descriptor["output_schema"]["tag"].keys())
+        key_schema, data_schema = loaded.output_schema()
+        assert set(key_schema.keys()) == set(descriptor["output_schema"]["key"].keys())
         assert set(data_schema.keys()) == set(
             descriptor["output_schema"]["data"].keys()
         )
@@ -74,8 +74,8 @@ def test_from_descriptor_full_mode_delegates_to_stream(self):
             stream=source,
             databases={},
         )
-        tag_schema, data_schema = loaded.output_schema()
-        assert "a" in tag_schema
+        key_schema, data_schema = loaded.output_schema()
+        assert "a" in key_schema
         assert "b" in data_schema
         # iter_data should work
         data = list(loaded.iter_data())
@@ -128,7 +128,7 @@ class TestFunctionNodeFromDescriptor:
     def _make_function_node_descriptor(self):
         source = DictSource(
             data=[{"a": 1, "b": 2}],
-            tag_columns=["a"],
+            key_columns=["a"],
             source_id="test",
         )
         pf = PythonDataFunction(function=_sample_func, output_keys=["result"])
@@ -140,7 +140,7 @@ def _make_function_node_descriptor(self):
             input_stream=source,
             pipeline_database=scoped_db,
         )
-        tag_schema, data_schema = node.output_schema()
+        key_schema, data_schema = node.output_schema()
         descriptor = {
             "node_type": "function",
             "label": None,
@@ -149,7 +149,7 @@ def _make_function_node_descriptor(self):
             "data_context_key": node.data_context_key,
             "table_scope": node._table_scope,
             "output_schema": {
-                "tag": {k: str(v) for k, v in tag_schema.items()},
+                "key": {k: str(v) for k, v in key_schema.items()},
                 "data": {k: str(v) for k, v in data_schema.items()},
             },
             "function_pod": pod.to_config(),
@@ -162,7 +162,7 @@ def test_from_descriptor_full_mode(self):
         original, descriptor, db = self._make_function_node_descriptor()
         source = DictSource(
             data=[{"a": 1, "b": 2}],
-            tag_columns=["a"],
+            key_columns=["a"],
             source_id="test",
         )
         pf = PythonDataFunction(function=_sample_func, output_keys=["result"])
@@ -201,7 +201,7 @@ def test_from_descriptor_read_only(self):
             "data_context_key": "std:v0.1:default",
             "table_scope": "pipeline_hash",
             "output_schema": {
-                "tag": {"a": "int64"},
+                "key": {"a": "int64"},
                 "data": {"b": "int64", "c": "int64"},
             },
             "operator": {
@@ -224,8 +224,8 @@ def test_from_descriptor_read_only(self):
     def test_from_descriptor_full_mode(self):
         db = InMemoryArrowDatabase()
         scoped_db = db.at("test")
-        source1 = DictSource(data=[{"a": 1, "b": 2}], tag_columns=["a"], source_id="s1")
-        source2 = DictSource(data=[{"a": 1, "c": 3}], tag_columns=["a"], source_id="s2")
+        source1 = DictSource(data=[{"a": 1, "b": 2}], key_columns=["a"], source_id="s1")
+        source2 = DictSource(data=[{"a": 1, "c": 3}], key_columns=["a"], source_id="s2")
         op = Join()
         node = OperatorNode(
             operator=op,
@@ -240,7 +240,7 @@ def test_from_descriptor_full_mode(self):
             "data_context_key": node.data_context_key,
             "table_scope": node._table_scope,
             "output_schema": {
-                "tag": {"a": "int64"},
+                "key": {"a": "int64"},
                 "data": {"b": "int64", "c": "int64"},
             },
             "operator": op.to_config(),
diff --git a/tests/test_pipeline/test_node_protocols.py b/tests/test_pipeline/test_node_protocols.py
index ce208629..b8615c88 100644
--- a/tests/test_pipeline/test_node_protocols.py
+++ b/tests/test_pipeline/test_node_protocols.py
@@ -77,11 +77,11 @@ class OldFunction:
             def get_cached_results(self, entry_ids):
                 return {}
 
-            def compute_pipeline_entry_id(self, tag, data):
+            def compute_pipeline_entry_id(self, key, data):
                 return ""
 
-            def execute_data(self, tag, data):
-                return (tag, None)
+            def execute_data(self, key, data):
+                return (key, None)
 
             def execute(self, input_stream):
                 return []
@@ -153,7 +153,7 @@ def _make_source_node(self):
             "key": pa.array(["a", "b", "c"], type=pa.large_string()),
             "value": pa.array([1, 2, 3], type=pa.int64()),
         })
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         return SourceNode(src)
 
     def test_execute_returns_list(self):
@@ -204,7 +204,7 @@ async def test_tightened_signature(self):
             "key": pa.array(["a", "b"], type=pa.large_string()),
             "value": pa.array([1, 2], type=pa.int64()),
         })
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         node = SourceNode(src)
 
         output_ch = Channel(buffer_size=16)
@@ -218,7 +218,7 @@ async def test_async_execute_with_observer(self):
             "key": pa.array(["a"], type=pa.large_string()),
             "value": pa.array([1], type=pa.int64()),
         })
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         node = SourceNode(src)
         events = []
 
@@ -256,7 +256,7 @@ def _make_function_node(self):
             "key": pa.array(["a", "b"], type=pa.large_string()),
             "value": pa.array([1, 2], type=pa.int64()),
         })
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         pf = PythonDataFunction(double_value, output_keys="result")
         pod = FunctionPod(pf)
         return FunctionNode(pod, src)
@@ -313,7 +313,7 @@ async def test_tightened_signature(self):
             "key": pa.array(["a", "b"], type=pa.large_string()),
             "value": pa.array([1, 2], type=pa.int64()),
         })
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         pf = PythonDataFunction(double_value, output_keys="result")
         pod = FunctionPod(pf)
         node = FunctionNode(pod, src)
@@ -321,8 +321,8 @@ async def test_tightened_signature(self):
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
 
-        for tag, data in src.iter_data():
-            await input_ch.writer.send((tag, data))
+        for key, data in src.iter_data():
+            await input_ch.writer.send((key, data))
         await input_ch.writer.close()
 
         await node.async_execute(input_ch.reader, output_ch.writer)
@@ -337,7 +337,7 @@ async def test_async_execute_with_observer(self):
             "key": pa.array(["a"], type=pa.large_string()),
             "value": pa.array([1], type=pa.int64()),
         })
-        src = ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+        src = ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
         pf = PythonDataFunction(double_value, output_keys="result")
         pod = FunctionPod(pf)
         node = FunctionNode(pod, src)
@@ -363,8 +363,8 @@ def create_data_logger(self, t, p, **kwargs):
 
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
-        for tag, data in src.iter_data():
-            await input_ch.writer.send((tag, data))
+        for key, data in src.iter_data():
+            await input_ch.writer.send((key, data))
         await input_ch.writer.close()
 
         await node.async_execute(input_ch.reader, output_ch.writer, observer=Obs())
@@ -390,8 +390,8 @@ def _make_join_node(self):
             "key": pa.array(["a", "b"], type=pa.large_string()),
             "score": pa.array([100, 200], type=pa.int64()),
         })
-        src_a = ArrowTableSource(table_a, tag_columns=["key"], infer_nullable=True)
-        src_b = ArrowTableSource(table_b, tag_columns=["key"], infer_nullable=True)
+        src_a = ArrowTableSource(table_a, key_columns=["key"], infer_nullable=True)
+        src_b = ArrowTableSource(table_b, key_columns=["key"], infer_nullable=True)
         return OperatorNode(Join(), input_streams=[src_a, src_b])
 
     def test_execute_with_observer(self):
@@ -435,7 +435,7 @@ async def test_async_execute_with_observer(self):
             "key": pa.array(["a", "b"], type=pa.large_string()),
             "value": pa.array([10, 20], type=pa.int64()),
         })
-        src_a = ArrowTableSource(table_a, tag_columns=["key"], infer_nullable=True)
+        src_a = ArrowTableSource(table_a, key_columns=["key"], infer_nullable=True)
         op = SelectDataColumns(columns=["value"])
         op_node = OperatorNode(op, input_streams=[src_a])
 
@@ -455,8 +455,8 @@ def on_data_end(self, node_label, t, ip, op, cached):
 
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
-        for tag, data in src_a.iter_data():
-            await input_ch.writer.send((tag, data))
+        for key, data in src_a.iter_data():
+            await input_ch.writer.send((key, data))
         await input_ch.writer.close()
 
         await op_node.async_execute(
diff --git a/tests/test_pipeline/test_observability_reader.py b/tests/test_pipeline/test_observability_reader.py
index f5dfef21..ebf1e75e 100644
--- a/tests/test_pipeline/test_observability_reader.py
+++ b/tests/test_pipeline/test_observability_reader.py
@@ -84,8 +84,8 @@ def _status_row(
         "_status_error_summary": error_summary,
         "subject": subject,
         "session_date": session_date,
-        "_tag::source_id::abc123:0": "tag_val",
-        "_tag::record_id::abc123:0": "tag_rec",
+        "_key::source_id::abc123:0": "key_val",
+        "_key::record_id::abc123:0": "key_rec",
     }
 
 
@@ -114,8 +114,8 @@ def _log_row(
         "_log_timestamp": timestamp,
         "subject": subject,
         "session_date": session_date,
-        "_tag::source_id::abc123:0": "tag_val",
-        "_tag::record_id::abc123:0": "tag_rec",
+        "_key::source_id::abc123:0": "key_val",
+        "_key::record_id::abc123:0": "key_rec",
     }
 
 
@@ -169,9 +169,9 @@ def test_discovers_nodes(self, results_root: Path):
         reader = ObservabilityReader(results_root)
         assert reader.nodes == ["node_a", "node_b", "node_c"]
 
-    def test_discovers_tag_columns(self, results_root: Path):
+    def test_discovers_key_columns(self, results_root: Path):
         reader = ObservabilityReader(results_root)
-        assert reader.tag_columns == ["session_date", "subject"]
+        assert reader.key_columns == ["session_date", "subject"]
 
     def test_raises_on_missing_root(self, tmp_path: Path):
         with pytest.raises(ValueError, match="does not exist"):
@@ -213,8 +213,8 @@ def test_returns_clean_columns(self, results_root: Path):
         assert "error_summary" in df.columns
         for col in df.columns:
             assert not col.startswith("_status_")
-            assert not col.startswith("_tag::")
-            assert not col.startswith("_tag_")
+            assert not col.startswith("_key::")
+            assert not col.startswith("_key_")
             assert not col.startswith("__")
 
     def test_includes_failed_with_error_summary(self, results_root: Path):
@@ -249,8 +249,8 @@ def test_returns_clean_columns(self, results_root: Path):
         assert "session_date" in df.columns
         for col in df.columns:
             assert not col.startswith("_log_")
-            assert not col.startswith("_tag::")
-            assert not col.startswith("_tag_")
+            assert not col.startswith("_key::")
+            assert not col.startswith("_key_")
             assert not col.startswith("__")
 
     def test_filters_to_requested_node(self, results_root: Path):
diff --git a/tests/test_pipeline/test_orchestrator.py b/tests/test_pipeline/test_orchestrator.py
index c51e7736..a498dbfb 100644
--- a/tests/test_pipeline/test_orchestrator.py
+++ b/tests/test_pipeline/test_orchestrator.py
@@ -39,17 +39,17 @@
 
 
 def _make_source(
-    tag_col: str,
+    key_col: str,
     data_col: str,
     data: dict,
 ) -> ArrowTableSource:
     table = pa.table(
         {
-            tag_col: pa.array(data[tag_col], type=pa.large_string()),
+            key_col: pa.array(data[key_col], type=pa.large_string()),
             data_col: pa.array(data[data_col], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=[tag_col], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=[key_col], infer_nullable=True)
 
 
 def _make_two_sources():
@@ -110,8 +110,8 @@ async def test_delegates_to_operator(self):
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
 
-        for tag, data in src.iter_data():
-            await input_ch.writer.send((tag, data))
+        for key, data in src.iter_data():
+            await input_ch.writer.send((key, data))
         await input_ch.writer.close()
 
         await op_node.async_execute([input_ch.reader], output_ch.writer)
@@ -136,8 +136,8 @@ async def test_processes_data(self):
         input_ch = Channel(buffer_size=16)
         output_ch = Channel(buffer_size=16)
 
-        for tag, data in src.iter_data():
-            await input_ch.writer.send((tag, data))
+        for key, data in src.iter_data():
+            await input_ch.writer.send((key, data))
         await input_ch.writer.close()
 
         await node.async_execute(input_ch.reader, output_ch.writer)
@@ -467,7 +467,7 @@ def failing_fn(value: int) -> int:
         crashes = []
 
         class CrashRecorder(NoOpObserver):
-            def on_data_crash(self, node_label, tag, data, error):
+            def on_data_crash(self, node_label, key, data, error):
                 crashes.append(error)
 
         pipeline.compile()
@@ -500,12 +500,12 @@ def on_node_start(self, node_label, node_hash, **kwargs):
                 events.append(("node_start", node_label))
             def on_node_end(self, node_label, node_hash, **kwargs):
                 events.append(("node_end", node_label))
-            def on_data_start(self, node_label, tag, data):
+            def on_data_start(self, node_label, key, data):
                 events.append(("data_start", node_label))
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 events.append(("data_end", node_label, cached))
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
@@ -557,12 +557,12 @@ def on_node_start(self, node_label, node_hash, **kwargs):
                 events.append(("node_start", node_label))
             def on_node_end(self, node_label, node_hash, **kwargs):
                 events.append(("node_end", node_label))
-            def on_data_start(self, node_label, tag, data):
+            def on_data_start(self, node_label, key, data):
                 events.append(("data_start", node_label))
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 events.append(("data_end", node_label))
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
diff --git a/tests/test_pipeline/test_orchestrator_executor_matrix.py b/tests/test_pipeline/test_orchestrator_executor_matrix.py
index 12c5487c..7e4c60c9 100644
--- a/tests/test_pipeline/test_orchestrator_executor_matrix.py
+++ b/tests/test_pipeline/test_orchestrator_executor_matrix.py
@@ -57,14 +57,14 @@
 
 
 def _make_source(n: int = _N_DATA) -> ArrowTableSource:
-    """Simple source with ``id`` tag and ``x`` data column."""
+    """Simple source with ``id`` key and ``x`` data column."""
     table = pa.table(
         {
             "id": pa.array(list(range(n)), type=pa.int64()),
             "x": pa.array(list(range(n)), type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 def _build_pipeline(
diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py
index 331581ab..c4c811af 100644
--- a/tests/test_pipeline/test_pipeline.py
+++ b/tests/test_pipeline/test_pipeline.py
@@ -34,14 +34,14 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_source(tag_col: str, data_col: str, data: dict) -> ArrowTableSource:
+def _make_source(key_col: str, data_col: str, data: dict) -> ArrowTableSource:
     table = pa.table(
         {
-            tag_col: pa.array(data[tag_col], type=pa.large_string()),
+            key_col: pa.array(data[key_col], type=pa.large_string()),
             data_col: pa.array(data[data_col], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=[tag_col], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=[key_col], infer_nullable=True)
 
 
 def _make_two_sources():
@@ -793,7 +793,7 @@ def test_detached_content_hash_differs_from_extending(self, pipeline_db):
 
     def test_detached_pipeline_hash_is_schema_only(self, pipeline_db):
         """DerivedSource inherits RootSource.pipeline_identity_structure()
-        = (tag_schema, data_schema), breaking the upstream Merkle chain."""
+        = (key_schema, data_schema), breaking the upstream Merkle chain."""
         src_a, src_b = _make_two_sources()
         pf = PythonDataFunction(add_values, output_keys="total")
         pod = FunctionPod(data_function=pf)
@@ -808,7 +808,7 @@ def test_detached_pipeline_hash_is_schema_only(self, pipeline_db):
         derived_src = pipe.adder.as_source()
         # DerivedSource pipeline_hash should be the RootSource base case
         # (schema-only, no upstream topology)
-        tag_schema, data_schema = derived_src.output_schema()
+        key_schema, data_schema = derived_src.output_schema()
         # Pipeline hash should NOT equal the origin node's pipeline hash
         assert derived_src.pipeline_hash() != pipe.adder.pipeline_hash()
         # But two DerivedSources with same schema should share pipeline_hash
@@ -869,7 +869,7 @@ def test_detached_pipeline_hash_matches_equivalent_fresh_source(self, pipeline_d
         pod_add = FunctionPod(data_function=pf_add)
         pf_double = PythonDataFunction(double_value, output_keys="doubled")
 
-        # Pipeline A: sources → join → adder (schema: tag=key, data=total)
+        # Pipeline A: sources → join → adder (schema: key=key, data=total)
         db_a = InMemoryArrowDatabase()
         pipe_a = Pipeline(name="pipe_a", pipeline_database=db_a)
         with pipe_a:
@@ -886,14 +886,14 @@ def test_detached_pipeline_hash_matches_equivalent_fresh_source(self, pipeline_d
             FunctionPod(data_function=pf_double)(renamed, label="doubler")
 
         # Branch 2: pipeline from a fresh ArrowTableSource with identical schema
-        # Same schema as DerivedSource: tag=key (large_string), data=total (int64)
+        # Same schema as DerivedSource: key=key (large_string), data=total (int64)
         fresh_table = pa.table(
             {
                 "key": pa.array(["x", "y"], type=pa.large_string()),
                 "total": pa.array([999, 888], type=pa.int64()),
             }
         )
-        fresh_src = ArrowTableSource(fresh_table, tag_columns=["key"], infer_nullable=True)
+        fresh_src = ArrowTableSource(fresh_table, key_columns=["key"], infer_nullable=True)
         db_fresh = InMemoryArrowDatabase()
         pipe_fresh = Pipeline(name="fresh_pipe", pipeline_database=db_fresh)
         with pipe_fresh:
@@ -1445,7 +1445,7 @@ def test_source_node_accessible_by_label(self, pipeline_db):
                     "value": pa.array([10], type=pa.int64()),
                 }
             ),
-            tag_columns=["key"],
+            key_columns=["key"],
             label="my_source",
             infer_nullable=True,
         )
diff --git a/tests/test_pipeline/test_serialization.py b/tests/test_pipeline/test_serialization.py
index 59bae9fa..c51d7310 100644
--- a/tests/test_pipeline/test_serialization.py
+++ b/tests/test_pipeline/test_serialization.py
@@ -30,7 +30,7 @@ def simple_pipeline(tmp_path):
     db = DeltaTableDatabase(base_path=str(tmp_path / "pipeline_db"))
     source = DictSource(
         data=[{"x": 1, "y": 2}, {"x": 3, "y": 4}],
-        tag_columns=["x"],
+        key_columns=["x"],
         source_id="test_source",
     )
     pf = PythonDataFunction(
@@ -62,8 +62,8 @@ def multi_source_pipeline(tmp_path):
             "score": pa.array([100, 200], type=pa.int64()),
         }
     )
-    src_a = ArrowTableSource(table_a, tag_columns=["key"], source_id="src_a", infer_nullable=True)
-    src_b = ArrowTableSource(table_b, tag_columns=["key"], source_id="src_b", infer_nullable=True)
+    src_a = ArrowTableSource(table_a, key_columns=["key"], source_id="src_a", infer_nullable=True)
+    src_b = ArrowTableSource(table_b, key_columns=["key"], source_id="src_b", infer_nullable=True)
 
     def add_values(value: int, score: int) -> dict[str, int]:
         return {"total": value + score}
@@ -178,8 +178,8 @@ def test_save_node_common_fields(self, simple_pipeline):
                 f"Node {node_hash} missing fields: "
                 f"{required_fields - set(descriptor.keys())}"
             )
-            # output_schema has tag and data sub-dicts
-            assert "tag" in descriptor["output_schema"]
+            # output_schema has key and data sub-dicts
+            assert "key" in descriptor["output_schema"]
             assert "data" in descriptor["output_schema"]
 
     def test_save_source_node_fields(self, simple_pipeline):
@@ -268,7 +268,7 @@ def test_save_with_separate_result_database(self, tmp_path):
 
         source = DictSource(
             data=[{"x": 1, "y": 2}],
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="test_source",
         )
         pf = PythonDataFunction(
@@ -369,7 +369,7 @@ def test_save_load_run_full_cycle(self, tmp_path):
         db = DeltaTableDatabase(base_path=db_path)
         source = DictSource(
             data=[{"x": 1, "y": 2}, {"x": 3, "y": 4}],
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="test_source",
         )
         pf = PythonDataFunction(
@@ -400,7 +400,7 @@ def test_read_only_can_access_cached_data(self, tmp_path):
         db = DeltaTableDatabase(base_path=db_path)
         source = DictSource(
             data=[{"x": 1, "y": 2}, {"x": 3, "y": 4}],
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="test_source",
         )
         pf = PythonDataFunction(
@@ -429,10 +429,10 @@ def test_pipeline_with_operator(self, tmp_path):
         """Save/load a pipeline with an operator node."""
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
         source1 = DictSource(
-            data=[{"a": 1, "b": 10}], tag_columns=["a"], source_id="s1"
+            data=[{"a": 1, "b": 10}], key_columns=["a"], source_id="s1"
         )
         source2 = DictSource(
-            data=[{"a": 1, "c": 20}], tag_columns=["a"], source_id="s2"
+            data=[{"a": 1, "c": 20}], key_columns=["a"], source_id="s2"
         )
         pipeline = Pipeline(name="test", pipeline_database=db)
         with pipeline:
@@ -471,7 +471,7 @@ def _make_csv_pipeline(tmp_path):
     db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
     source = CSVSource(
         file_path=csv_path,
-        tag_columns=["x"],
+        key_columns=["x"],
         source_id="csv_source",
     )
     pf = PythonDataFunction(
@@ -552,11 +552,11 @@ def test_read_only_source_returns_stored_schema(self, simple_pipeline):
         source_node = [
             n for n in loaded._persistent_node_map.values() if n.node_type == "source"
         ][0]
-        tag_schema, data_schema = source_node.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = source_node.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
-        # The original source has tag=["x"], data=["y"]
-        assert "x" in tag_schema
+        # The original source has key=["x"], data=["y"]
+        assert "x" in key_schema
         assert "y" in data_schema
 
     def test_read_only_source_returns_stored_keys(self, simple_pipeline):
@@ -571,8 +571,8 @@ def test_read_only_source_returns_stored_keys(self, simple_pipeline):
         source_node = [
             n for n in loaded._persistent_node_map.values() if n.node_type == "source"
         ][0]
-        tag_keys, data_keys = source_node.keys()
-        assert "x" in tag_keys
+        key_keys, data_keys = source_node.keys()
+        assert "x" in key_keys
         assert "y" in data_keys
 
     def test_read_only_source_iter_data_raises(self, simple_pipeline):
@@ -615,10 +615,10 @@ def test_read_only_function_returns_stored_schema(self, simple_pipeline):
         loaded = Pipeline.load(str(path), mode="read_only")
 
         fn = loaded.compiled_nodes["transform"]
-        tag_schema, data_schema = fn.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = fn.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
-        assert "x" in tag_schema
+        assert "x" in key_schema
         assert "result" in data_schema
 
     def test_read_only_function_returns_stored_keys(self, simple_pipeline):
@@ -631,8 +631,8 @@ def test_read_only_function_returns_stored_keys(self, simple_pipeline):
         loaded = Pipeline.load(str(path), mode="read_only")
 
         fn = loaded.compiled_nodes["transform"]
-        tag_keys, data_keys = fn.keys()
-        assert "x" in tag_keys
+        key_keys, data_keys = fn.keys()
+        assert "x" in key_keys
         assert "result" in data_keys
 
     def test_read_only_function_returns_stored_hashes(self, simple_pipeline):
@@ -662,10 +662,10 @@ def test_read_only_operator_with_join(self, tmp_path):
         """
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
         source1 = DictSource(
-            data=[{"a": 1, "b": 10}], tag_columns=["a"], source_id="s1"
+            data=[{"a": 1, "b": 10}], key_columns=["a"], source_id="s1"
         )
         source2 = DictSource(
-            data=[{"a": 1, "c": 20}], tag_columns=["a"], source_id="s2"
+            data=[{"a": 1, "c": 20}], key_columns=["a"], source_id="s2"
         )
         pipeline = Pipeline(name="test", pipeline_database=db)
         with pipeline:
@@ -682,10 +682,10 @@ def test_read_only_operator_with_join(self, tmp_path):
         assert op_node.load_status == LoadStatus.UNAVAILABLE
 
         # Stored metadata is still accessible
-        tag_schema, data_schema = op_node.output_schema()
-        assert isinstance(tag_schema, Schema)
+        key_schema, data_schema = op_node.output_schema()
+        assert isinstance(key_schema, Schema)
         assert isinstance(data_schema, Schema)
-        assert "a" in tag_schema
+        assert "a" in key_schema
 
     def test_read_only_pipeline_is_compiled(self, simple_pipeline):
         """A read-only loaded pipeline reports as compiled."""
@@ -804,10 +804,10 @@ def test_full_mode_operator_degrades_when_sources_unavailable(self, tmp_path):
         """
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
         source1 = DictSource(
-            data=[{"a": 1, "b": 10}], tag_columns=["a"], source_id="s1"
+            data=[{"a": 1, "b": 10}], key_columns=["a"], source_id="s1"
         )
         source2 = DictSource(
-            data=[{"a": 1, "c": 20}], tag_columns=["a"], source_id="s2"
+            data=[{"a": 1, "c": 20}], key_columns=["a"], source_id="s2"
         )
         pipeline = Pipeline(name="test", pipeline_database=db)
         with pipeline:
@@ -865,7 +865,7 @@ def test_full_mode_csv_function_iter_data(self, tmp_path):
         # 2 rows in the CSV
         assert len(data) == 2
         # Each data should have a "result" key
-        for tag, data in data:
+        for key, data in data:
             assert "result" in data.keys()
 
     def test_full_mode_csv_function_as_table(self, tmp_path):
@@ -902,7 +902,7 @@ def test_cached_dict_source_recovers_data_after_load(self, tmp_path):
 
         source = DictSource(
             data=[{"x": 1, "y": 10}, {"x": 2, "y": 20}],
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="dict_src",
         )
         cached_source = CachedSource(source, cache_database=cache_db)
@@ -956,7 +956,7 @@ def test_cached_dict_source_function_output_matches(self, tmp_path):
 
         source = DictSource(
             data=[{"x": 1, "y": 5}, {"x": 2, "y": 15}],
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="dict_src2",
         )
         cached_source = CachedSource(source, cache_database=cache_db)
@@ -1008,8 +1008,8 @@ def test_load_default_mode_is_full(self, simple_pipeline):
     def test_load_multi_source_operator_pipeline_read_only(self, tmp_path):
         """Multi-source pipeline with operator loads in read_only with correct status."""
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-        src1 = DictSource(data=[{"k": 1, "v1": 10}], tag_columns=["k"], source_id="s1")
-        src2 = DictSource(data=[{"k": 1, "v2": 20}], tag_columns=["k"], source_id="s2")
+        src1 = DictSource(data=[{"k": 1, "v1": 10}], key_columns=["k"], source_id="s1")
+        src2 = DictSource(data=[{"k": 1, "v2": 20}], key_columns=["k"], source_id="s2")
         pipeline = Pipeline(name="multi", pipeline_database=db)
         with pipeline:
             joined = Join().process(src1, src2, label="join_node")
@@ -1076,7 +1076,7 @@ def test_result_database_round_trip(self, tmp_path):
         """Pipeline with separate result_database round-trips correctly."""
         pipeline_db = DeltaTableDatabase(base_path=str(tmp_path / "pdb"))
         result_db = DeltaTableDatabase(base_path=str(tmp_path / "fdb"))
-        source = DictSource(data=[{"x": 1, "y": 2}], tag_columns=["x"], source_id="s")
+        source = DictSource(data=[{"x": 1, "y": 2}], key_columns=["x"], source_id="s")
         pf = PythonDataFunction(
             function=transform_func,
             output_keys=["result"],
@@ -1157,7 +1157,7 @@ def _build_and_save_pipeline(self, tmp_path):
         db = DeltaTableDatabase(base_path=db_path)
         source = CSVSource(
             file_path=csv_path,
-            tag_columns=["name"],
+            key_columns=["name"],
             source_id="people",
         )
         pf = PythonDataFunction(
@@ -1194,7 +1194,7 @@ def _build_and_save_pipeline_with_operator(self, tmp_path):
         db = DeltaTableDatabase(base_path=db_path)
         source = CSVSource(
             file_path=csv_path,
-            tag_columns=["name"],
+            key_columns=["name"],
             source_id="people",
         )
         pf = PythonDataFunction(
@@ -1313,7 +1313,7 @@ def _build_run_save(self, tmp_path, function, source_data=None):
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
         source = DictSource(
             data=source_data,
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="test_source",
         )
         pf = PythonDataFunction(
@@ -1417,7 +1417,7 @@ def test_live_stream_does_not_trigger_cache_only_mode(self, tmp_path):
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
         source = CSVSource(
             file_path=csv_path,
-            tag_columns=["x"],
+            key_columns=["x"],
             source_id="csv_source",
         )
         pf = PythonDataFunction(
@@ -1504,7 +1504,7 @@ def _build_pipeline(self, tmp_path):
                 {"id": 2, "x": 30, "y": 40},
                 {"id": 3, "x": 60, "y": 10},
             ],
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="data1",
         )
         data2 = DictSource(
@@ -1512,7 +1512,7 @@ def _build_pipeline(self, tmp_path):
                 {"id": 2, "z": 30},
                 {"id": 3, "z": 50},
             ],
-            tag_columns=["id"],
+            key_columns=["id"],
             source_id="data2",
         )
 
@@ -1648,7 +1648,7 @@ def add_one(y: int) -> int:
     pod = FunctionPod(data_function=pf)
     source = DictSource(
         data=[{"x": 1, "y": 2}, {"x": 3, "y": 4}],
-        tag_columns=["x"],
+        key_columns=["x"],
         source_id="test_src",
     )
 
@@ -1681,8 +1681,8 @@ def test_operator_node_pipeline_path_two_level(tmp_path):
             "score": pa.array([100, 200], type=pa.int64()),
         }
     )
-    src_a = ArrowTableSource(table_a, tag_columns=["key"], source_id="src_a", infer_nullable=True)
-    src_b = ArrowTableSource(table_b, tag_columns=["key"], source_id="src_b", infer_nullable=True)
+    src_a = ArrowTableSource(table_a, key_columns=["key"], source_id="src_a", infer_nullable=True)
+    src_b = ArrowTableSource(table_b, key_columns=["key"], source_id="src_b", infer_nullable=True)
     join = Join()
 
     pipeline = Pipeline(name="test", pipeline_database=db)
@@ -1724,7 +1724,7 @@ def _make_simple_pipeline_for_level_tests(tmp_path):
         function_name="transform_func",
     )
     pod = FunctionPod(data_function=pf)
-    source = DictSource(data=[{"x": 1, "y": 2}], tag_columns=["x"], source_id="s")
+    source = DictSource(data=[{"x": 1, "y": 2}], key_columns=["x"], source_id="s")
     pipeline = Pipeline(name="p", pipeline_database=db)
     with pipeline:
         pod.process(source, label="fn")
@@ -1856,7 +1856,7 @@ def test_load_operator_node_identity_path_has_schema_instance_components(tmp_pat
     _write_csv(csv_path, [{"name": "alice", "y": 2}, {"name": "bob", "y": 4}])
 
     db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-    source = CSVSource(file_path=csv_path, tag_columns=["name"], source_id="people")
+    source = CSVSource(file_path=csv_path, key_columns=["name"], source_id="people")
     pf = PythonDataFunction(
         function=transform_func,
         output_keys="result",
@@ -1893,7 +1893,7 @@ def test_load_raises_on_missing_result_database_registry_key(tmp_path):
     """
     pipeline_db = DeltaTableDatabase(base_path=str(tmp_path / "pdb"))
     result_db = DeltaTableDatabase(base_path=str(tmp_path / "fdb"))
-    source = DictSource(data=[{"x": 1, "y": 2}], tag_columns=["x"], source_id="s")
+    source = DictSource(data=[{"x": 1, "y": 2}], key_columns=["x"], source_id="s")
     pf = PythonDataFunction(
         function=transform_func,
         output_keys=["result"],
@@ -1934,7 +1934,7 @@ def test_load_function_node_identity_path_has_schema_instance_components(tmp_pat
     _write_csv(csv_path, [{"name": "alice", "y": 2}, {"name": "bob", "y": 4}])
 
     db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-    source = CSVSource(file_path=csv_path, tag_columns=["name"], source_id="people")
+    source = CSVSource(file_path=csv_path, key_columns=["name"], source_id="people")
     pf = PythonDataFunction(
         function=transform_func,
         output_keys="result",
@@ -2045,7 +2045,7 @@ def test_load_function_node_with_old_function_pod_key_is_not_reconstructed(tmp_p
     csv_path = str(tmp_path / "data.csv")
     _write_csv(csv_path, [{"name": "alice", "y": 2}, {"name": "bob", "y": 4}])
     db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-    source = CSVSource(file_path=csv_path, tag_columns=["name"], source_id="people")
+    source = CSVSource(file_path=csv_path, key_columns=["name"], source_id="people")
     pf = PythonDataFunction(
         function=transform_func, output_keys="result", function_name="transform_func"
     )
@@ -2086,7 +2086,7 @@ def test_load_operator_node_with_old_operator_key_is_not_reconstructed(tmp_path)
     csv_path = str(tmp_path / "data.csv")
     _write_csv(csv_path, [{"name": "alice", "y": 2}, {"name": "bob", "y": 4}])
     db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-    source = CSVSource(file_path=csv_path, tag_columns=["name"], source_id="people")
+    source = CSVSource(file_path=csv_path, key_columns=["name"], source_id="people")
     pf = PythonDataFunction(
         function=transform_func, output_keys="result", function_name="transform_func"
     )
@@ -2162,7 +2162,7 @@ def test_standard_save_load_run_roundtrip(self, tmp_path):
         _write_csv(csv_path, [{"x": "1", "y": "2"}, {"x": "3", "y": "4"}])
 
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-        source = CSVSource(file_path=csv_path, tag_columns=["x"], source_id="src")
+        source = CSVSource(file_path=csv_path, key_columns=["x"], source_id="src")
         pf = PythonDataFunction(
             function=transform_func,
             output_keys=["result"],
@@ -2206,7 +2206,7 @@ def test_definition_save_load_run_roundtrip(self, tmp_path):
         _write_csv(csv_path, [{"x": "1", "y": "10"}, {"x": "3", "y": "20"}])
 
         original_db = DeltaTableDatabase(base_path=str(tmp_path / "original_db"))
-        source = CSVSource(file_path=csv_path, tag_columns=["x"], source_id="src2")
+        source = CSVSource(file_path=csv_path, key_columns=["x"], source_id="src2")
         pf = PythonDataFunction(
             function=transform_func,
             output_keys=["result"],
@@ -2260,7 +2260,7 @@ def test_definition_save_load_with_unloadable_function_uses_proxy(self, tmp_path
         _write_csv(csv_path, [{"x": "1", "y": "5"}, {"x": "3", "y": "15"}])
 
         db = DeltaTableDatabase(base_path=str(tmp_path / "db"))
-        source = CSVSource(file_path=csv_path, tag_columns=["x"], source_id="src3")
+        source = CSVSource(file_path=csv_path, key_columns=["x"], source_id="src3")
         pf = PythonDataFunction(
             function=transform_func,
             output_keys=["result"],
diff --git a/tests/test_pipeline/test_serialization_helpers.py b/tests/test_pipeline/test_serialization_helpers.py
index f200c7c4..eca73618 100644
--- a/tests/test_pipeline/test_serialization_helpers.py
+++ b/tests/test_pipeline/test_serialization_helpers.py
@@ -139,7 +139,7 @@ def test_source_registry_has_all_types(self):
     def test_operator_registry_has_all_types(self):
         assert "Join" in OPERATOR_REGISTRY
         assert "Batch" in OPERATOR_REGISTRY
-        assert "SelectTagColumns" in OPERATOR_REGISTRY
+        assert "SelectKeyColumns" in OPERATOR_REGISTRY
 
     def test_data_function_registry(self):
         assert "python.function.v0" in DATA_FUNCTION_REGISTRY
@@ -386,7 +386,7 @@ def test_mixed_types(self):
             "id": int,
             "name": str,
             "score": float,
-            "tags": list[str],
+            "keys": list[str],
             "active": bool,
         }
         result = self._round_trip(schema)
@@ -583,7 +583,7 @@ def test_cached_source_to_config_no_identity_fields():
     inner = DictSource([{"x": 1}], source_id="s")
     src = CachedSource(source=inner, cache_database=db, cache_path_prefix=("cache",))
     config = src.to_config()
-    for field in ("content_hash", "pipeline_hash", "tag_schema", "data_schema"):
+    for field in ("content_hash", "pipeline_hash", "key_schema", "data_schema"):
         assert field not in config, f"Identity field {field!r} must not be in source_config"
 
 
@@ -694,7 +694,7 @@ def test_source_proxy_from_node_descriptor_fields():
         "content_hash": "semantic_v0.1:abc123",
         "pipeline_hash": "semantic_v0.1:def456",
         "output_schema": {
-            "tag": {"x": "int64"},
+            "key": {"x": "int64"},
             "data": {"result": "int64"},
         },
     }
@@ -711,7 +711,7 @@ def test_source_proxy_from_config_backward_compat():
         "source_id": "my_src",
         "content_hash": "semantic_v0.1:abc123",
         "pipeline_hash": "semantic_v0.1:def456",
-        "tag_schema": {"x": "int64"},
+        "key_schema": {"x": "int64"},
         "data_schema": {"result": "int64"},
     }
     proxy = _source_proxy_from_config(source_config)
@@ -738,7 +738,7 @@ def add_one(x: int) -> int:
         return x + 1
 
     table = pa.table({"id": pa.array(["a", "b"], type=pa.large_string()), "x": pa.array([1, 2], type=pa.int64())})
-    source = ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    source = ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
     pf = PythonDataFunction(add_one, output_keys="result")
     pod = FunctionPod(data_function=pf)
 
@@ -765,7 +765,7 @@ def test_function_node_stored_node_uri_from_descriptor():
         "pipeline_hash": "semantic_v0.1:def",
         "table_scope": "pipeline_hash",
         "node_uri": ["add_one", "v0", "python.function.v0", "schema_repr"],
-        "output_schema": {"tag": {"x": "int64"}, "data": {"result": "int64"}},
+        "output_schema": {"key": {"x": "int64"}, "data": {"result": "int64"}},
         "data_context_key": "std:v0.1:default",
     }
     node = FunctionNode.from_descriptor(descriptor, function_pod=None, input_stream=None, databases={})
@@ -785,8 +785,8 @@ def test_operator_node_has_node_uri():
     table_b = pa.table(
         {"key": pa.array(["a"], type=pa.large_string()), "val_b": pa.array([1], type=pa.int64())}
     )
-    src_a = ArrowTableSource(table_a, tag_columns=["key"], infer_nullable=True)
-    src_b = ArrowTableSource(table_b, tag_columns=["key"], infer_nullable=True)
+    src_a = ArrowTableSource(table_a, key_columns=["key"], infer_nullable=True)
+    src_b = ArrowTableSource(table_b, key_columns=["key"], infer_nullable=True)
 
     pipeline = Pipeline(name="test", pipeline_database=db)
     with pipeline:
diff --git a/tests/test_pipeline/test_status_observer_integration.py b/tests/test_pipeline/test_status_observer_integration.py
index b40bcd98..76f5b8c8 100644
--- a/tests/test_pipeline/test_status_observer_integration.py
+++ b/tests/test_pipeline/test_status_observer_integration.py
@@ -34,7 +34,7 @@ def _make_source(n: int = 3) -> ArrowTableSource:
         "id": pa.array([str(i) for i in range(n)], type=pa.large_string()),
         "x": pa.array([10 * (i + 1) for i in range(n)], type=pa.int64()),
     })
-    return ArrowTableSource(table, tag_columns=["id"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["id"], infer_nullable=True)
 
 
 def _get_function_node(pipeline: Pipeline):
@@ -157,12 +157,12 @@ def identity(x: int) -> int:
 
 
 # ---------------------------------------------------------------------------
-# 4. Queryable tag columns
+# 4. Queryable key columns
 # ---------------------------------------------------------------------------
 
 
-class TestQueryableTagColumns:
-    def test_tag_columns_in_status_table(self):
+class TestQueryableKeyColumns:
+    def test_key_columns_in_status_table(self):
         db = InMemoryArrowDatabase()
         source = _make_source(2)
 
@@ -172,7 +172,7 @@ def identity(x: int) -> int:
         pf = PythonDataFunction(identity, output_keys="result", executor=LocalPythonFunctionExecutor())
         pod = FunctionPod(pf)
 
-        pipeline = Pipeline(name="test_tags_status", pipeline_database=db)
+        pipeline = Pipeline(name="test_keys_status", pipeline_database=db)
         with pipeline:
             pod(source, label="ident")
 
@@ -183,7 +183,7 @@ def identity(x: int) -> int:
         status = obs.get_status()
 
         assert status is not None
-        # "id" tag column should be a separate column, not JSON
+        # "id" key column should be a separate column, not JSON
         assert "id" in status.column_names
         id_values = sorted(set(status.column("id").to_pylist()))
         assert id_values == ["0", "1"]
@@ -272,7 +272,7 @@ def test_mixed_results_tracked_correctly(self):
                 "id": pa.array(["a", "b", "c"], type=pa.large_string()),
                 "x": pa.array([10, 0, 30], type=pa.int64()),
             }),
-            tag_columns=["id"],
+            key_columns=["id"],
             infer_nullable=True,
         )
 
@@ -413,7 +413,7 @@ def identity(x: int) -> int:
         assert "_status_node_label" not in status.column_names
         assert "_status_node_hash" not in status.column_names
 
-        # Tag column should also be present
+        # Key column should also be present
         assert "id" in status.column_names
 
 
diff --git a/tests/test_pipeline/test_sync_orchestrator.py b/tests/test_pipeline/test_sync_orchestrator.py
index 1f51d368..75cabbae 100644
--- a/tests/test_pipeline/test_sync_orchestrator.py
+++ b/tests/test_pipeline/test_sync_orchestrator.py
@@ -17,14 +17,14 @@
 from orcapod.pipeline.sync_orchestrator import SyncPipelineOrchestrator
 
 
-def _make_source(tag_col, data_col, data):
+def _make_source(key_col, data_col, data):
     table = pa.table(
         {
-            tag_col: pa.array(data[tag_col], type=pa.large_string()),
+            key_col: pa.array(data[key_col], type=pa.large_string()),
             data_col: pa.array(data[data_col], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=[tag_col], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=[key_col], infer_nullable=True)
 
 
 def double_value(value: int) -> int:
@@ -136,12 +136,12 @@ def on_node_start(self, node_label, node_hash, **kwargs):
                 events.append(("node_start", node_label))
             def on_node_end(self, node_label, node_hash, **kwargs):
                 events.append(("node_end", node_label))
-            def on_data_start(self, node_label, tag, data):
+            def on_data_start(self, node_label, key, data):
                 events.append(("data_start",))
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 events.append(("data_end", cached))
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
@@ -217,12 +217,12 @@ def on_node_start(self, node_label, node_hash, **kwargs):
                 events.append(("node_start", node_label))
             def on_node_end(self, node_label, node_hash, **kwargs):
                 events.append(("node_end", node_label))
-            def on_data_start(self, node_label, tag, data):
+            def on_data_start(self, node_label, key, data):
                 events.append(("data_start",))
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 events.append(("data_end",))
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
@@ -351,8 +351,8 @@ def test_materialized_stream_has_same_content_hash(self):
         stream = SyncPipelineOrchestrator._materialize_as_stream(buf, op_node)
         assert stream.content_hash() == op_node.content_hash()
 
-    def test_materialized_stream_preserves_system_tags(self):
-        """System tag column names in materialized stream should match original."""
+    def test_materialized_stream_preserves_system_keys(self):
+        """System key column names in materialized stream should match original."""
         src_a = _make_source("key", "value", {"key": ["a", "b"], "value": [10, 20]})
         src_b = _make_source("key", "score", {"key": ["a", "b"], "score": [100, 200]})
         from orcapod.core.operators.join import Join
@@ -365,13 +365,13 @@ def test_materialized_stream_preserves_system_tags(self):
 
         stream = SyncPipelineOrchestrator._materialize_as_stream(buf, op_node)
 
-        expected_tag_schema = op_node.output_schema(columns={"system_tags": True})[0]
-        actual_tag_schema = stream.output_schema(columns={"system_tags": True})[0]
-        assert expected_tag_schema == actual_tag_schema
+        expected_key_schema = op_node.output_schema(columns={"system_keys": True})[0]
+        actual_key_schema = stream.output_schema(columns={"system_keys": True})[0]
+        assert expected_key_schema == actual_key_schema
 
-    def test_operator_with_materialized_upstream_produces_correct_system_tags(self):
+    def test_operator_with_materialized_upstream_produces_correct_system_keys(self):
         """When an operator receives a materialized stream, its output system
-        tags should embed the correct pipeline hashes (same as if it received
+        keys should embed the correct pipeline hashes (same as if it received
         the original stream)."""
         src_a = _make_source("key", "value", {"key": ["a", "b"], "value": [10, 20]})
         src_b = _make_source("key", "score", {"key": ["a", "b"], "score": [100, 200]})
@@ -392,11 +392,11 @@ def test_operator_with_materialized_upstream_produces_correct_system_tags(self):
         join_node = ON(join_op, input_streams=[src_a, src_b])
         join_node.run()
 
-        # Compare system tag schemas — should match
+        # Compare system key schemas — should match
         orch_join = orch_pipeline.compiled_nodes["join"]
-        orch_tag_schema = orch_join.output_schema(columns={"system_tags": True})[0]
-        pull_tag_schema = join_node.output_schema(columns={"system_tags": True})[0]
-        assert orch_tag_schema == pull_tag_schema
+        orch_key_schema = orch_join.output_schema(columns={"system_keys": True})[0]
+        pull_key_schema = join_node.output_schema(columns={"system_keys": True})[0]
+        assert orch_key_schema == pull_key_schema
 
 
 class TestSyncObserverInjection:
@@ -427,12 +427,12 @@ def on_node_start(self, node_label, node_hash, **kwargs):
                 events.append(("node_start", node_label))
             def on_node_end(self, node_label, node_hash, **kwargs):
                 events.append(("node_end", node_label))
-            def on_data_start(self, node_label, tag, data):
+            def on_data_start(self, node_label, key, data):
                 events.append(("data_start", node_label))
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 events.append(("data_end", node_label, cached))
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
@@ -471,12 +471,12 @@ def on_run_start(self, run_id, pipeline_uri=""): pass
             def on_run_end(self, run_id): pass
             def on_node_start(self, node_label, node_hash, **kwargs): pass
             def on_node_end(self, node_label, node_hash, **kwargs): pass
-            def on_data_start(self, node_label, tag, data): pass
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_start(self, node_label, key, data): pass
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 if node_label == "doubler":
                     events1.append(cached)
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
@@ -493,12 +493,12 @@ def on_run_start(self, run_id, pipeline_uri=""): pass
             def on_run_end(self, run_id): pass
             def on_node_start(self, node_label, node_hash, **kwargs): pass
             def on_node_end(self, node_label, node_hash, **kwargs): pass
-            def on_data_start(self, node_label, tag, data): pass
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached):
+            def on_data_start(self, node_label, key, data): pass
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached):
                 if node_label == "doubler":
                     events2.append(cached)
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
@@ -528,10 +528,10 @@ def on_node_start(self, node_label, node_hash, **kwargs):
                 node_order.append(("start", node_label))
             def on_node_end(self, node_label, node_hash, **kwargs):
                 node_order.append(("end", node_label))
-            def on_data_start(self, node_label, tag, data): pass
-            def on_data_end(self, node_label, tag, input_pkt, output_pkt, cached): pass
-            def on_data_crash(self, node_label, tag, data, exc): pass
-            def create_data_logger(self, tag, data, **kwargs):
+            def on_data_start(self, node_label, key, data): pass
+            def on_data_end(self, node_label, key, input_pkt, output_pkt, cached): pass
+            def on_data_crash(self, node_label, key, data, exc): pass
+            def create_data_logger(self, key, data, **kwargs):
                 from orcapod.pipeline.observer import _NOOP_LOGGER
                 return _NOOP_LOGGER
             def contextualize(self, *identity_path):
diff --git a/tests/test_protocols/test_node_protocols.py b/tests/test_protocols/test_node_protocols.py
index 07276620..fbeda403 100644
--- a/tests/test_protocols/test_node_protocols.py
+++ b/tests/test_protocols/test_node_protocols.py
@@ -23,7 +23,7 @@ def _sample_source():
             "value": pa.array([1, 2], type=pa.int64()),
         }
     )
-    return ArrowTableSource(table, tag_columns=["key"], infer_nullable=True)
+    return ArrowTableSource(table, key_columns=["key"], infer_nullable=True)
 
 
 @pytest.fixture
diff --git a/tests/test_utils/test_arrow_utils.py b/tests/test_utils/test_arrow_utils.py
index 2831b90a..1014bd3f 100644
--- a/tests/test_utils/test_arrow_utils.py
+++ b/tests/test_utils/test_arrow_utils.py
@@ -18,13 +18,13 @@ def test_non_nullable_fields_preserved_in_data_table(self):
         """Fields with nullable=False in source table must remain nullable=False in data_table."""
         schema = pa.schema(
             [
-                pa.field("tag", pa.large_string(), nullable=False),
+                pa.field("key", pa.large_string(), nullable=False),
                 pa.field("val", pa.int64(), nullable=False),
             ]
         )
         table = pa.table(
             {
-                "tag": pa.array(["a"], type=pa.large_string()),
+                "key": pa.array(["a"], type=pa.large_string()),
                 "val": pa.array([1], type=pa.int64()),
             },
             schema=schema,
@@ -41,7 +41,7 @@ def test_nullable_fields_preserved_in_data_table(self):
         """Fields with nullable=True in source table must remain nullable=True in data_table."""
         table = pa.table(
             {
-                "tag": pa.array(["a"], type=pa.large_string()),
+                "key": pa.array(["a"], type=pa.large_string()),
                 "val": pa.array([1], type=pa.int64()),
             }
         )
@@ -57,14 +57,14 @@ def test_mixed_nullable_fields_preserved_in_data_table(self):
         """Mix of nullable and non-nullable fields must be preserved correctly."""
         schema = pa.schema(
             [
-                pa.field("tag", pa.large_string(), nullable=True),
+                pa.field("key", pa.large_string(), nullable=True),
                 pa.field("val_nullable", pa.int64(), nullable=True),
                 pa.field("val_non_nullable", pa.float64(), nullable=False),
             ]
         )
         table = pa.table(
             {
-                "tag": pa.array(["a"], type=pa.large_string()),
+                "key": pa.array(["a"], type=pa.large_string()),
                 "val_nullable": pa.array([1], type=pa.int64()),
                 "val_non_nullable": pa.array([1.5], type=pa.float64()),
             },