Skip to content

contract.mux

MapFromPathsParams dataclass

MapFromPathsParams(
    paths: List[PathLike],
    include_glob_pattern: List[str],
    inner_data_stream: Type[_TDataStream],
    inner_param_factory: Callable[[str], TReaderParams],
    as_collection: bool = True,
    exclude_glob_pattern: List[str] = list(),
    inner_descriptions: dict[str, Optional[str]] = dict(),
)

Bases: Generic[_TDataStream]

Parameters for creating multiple data streams from file paths.

Defines parameters for locating files and creating data streams for each one.

Attributes:

Name Type Description
paths List[PathLike]

List of directory paths to search for files.

include_glob_pattern List[str]

List of glob patterns to match files to include.

inner_data_stream Type[_TDataStream]

Type of DataStream to create for each matched file.

inner_param_factory Callable[[str], TReaderParams]

Function that creates reader params from file paths.

as_collection bool

Whether to return results as a collection. Defaults to True.

exclude_glob_pattern List[str]

List of glob patterns for files to exclude.

inner_descriptions dict[str, Optional[str]]

Dictionary mapping file stems to descriptions for streams.

MapFromPaths

MapFromPaths(
    name: str,
    *,
    description: Optional[str] = None,
    reader_params: Optional[TReaderParams] = None,
    **kwargs,
)

Bases: DataStreamCollectionBase[_TDataStream, MapFromPathsParams]

File path mapper data stream provider.

A data stream implementation for creating multiple child data streams by searching for files matching glob patterns and creating a stream for each.

Parameters:

Name Type Description Default
DataStreamCollectionBase

Base class for data stream collection providers.

required

Examples:

from contraqctor.contract import mux, text

# Define a factory function for TextParams
def create_text_params(file_path):
    return text.TextParams(path=file_path)

# Create and load a text file collection
params = mux.MapFromPathsParams(
    paths=["documents/"],
    include_glob_pattern=["*.txt"],
    inner_data_stream=text.Text,
    inner_param_factory=create_text_params
)

docs = mux.MapFromPaths("documents", reader_params=params).load()
readme = docs["readme"].data
Source code in src/contraqctor/contract/base.py
317
318
319
320
321
322
323
324
325
326
327
def __init__(
    self: Self,
    name: str,
    *,
    description: Optional[str] = None,
    reader_params: Optional[_typing.TReaderParams] = None,
    **kwargs,
) -> None:
    super().__init__(name=name, description=description, reader_params=reader_params, **kwargs)
    self._hashmap: Dict[str, TDataStream] = {}
    self._update_hashmap()

name property

name: str

Get the name of the data stream.

Returns:

Name Type Description
str str

Name identifier of the data stream.

resolved_name property

resolved_name: str

Get the full hierarchical name of the data stream.

Generates a path-like name showing the stream's position in the hierarchy, using '::' as a separator between parent and child names.

Returns:

Name Type Description
str str

The fully resolved name including all parent names.

description property

description: Optional[str]

Get the description of the data stream.

Returns:

Type Description
Optional[str]

Optional[str]: Description of the data stream, or None if not provided.

parent property

Get the parent data stream.

Returns:

Type Description
Optional[DataStream]

Optional[DataStream]: Parent data stream, or None if this is a root stream.

is_collection property

is_collection: bool

Check if this data stream is a collection of other streams.

Returns:

Name Type Description
bool bool

True if this is a collection stream, False otherwise.

reader_params property

reader_params: TReaderParams

Get the parameters for the data reader.

Returns:

Name Type Description
TReaderParams TReaderParams

Parameters for the data reader.

has_data property

has_data: bool

Check if the data stream has loaded data.

Returns:

Name Type Description
bool bool

True if data has been loaded, False otherwise.

data property

data: TData

Get the loaded data.

Returns:

Name Type Description
TData TData

The loaded data.

Raises:

Type Description
ValueError

If data has not been loaded yet.

read

read(
    reader_params: Optional[TReaderParams] = None,
) -> TData

Read data using the configured reader.

Parameters:

Name Type Description Default
reader_params Optional[TReaderParams]

Optional parameters to override the default reader parameters.

None

Returns:

Name Type Description
TData TData

Data read from the source.

Raises:

Type Description
ValueError

If reader parameters are not set.

Source code in src/contraqctor/contract/base.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def read(self, reader_params: Optional[_typing.TReaderParams] = None) -> _typing.TData:
    """Read data using the configured reader.

    Args:
        reader_params: Optional parameters to override the default reader parameters.

    Returns:
        TData: Data read from the source.

    Raises:
        ValueError: If reader parameters are not set.
    """
    reader_params = reader_params if reader_params is not None else self._reader_params
    if _typing.is_unset(reader_params):
        raise ValueError("Reader parameters are not set. Cannot read data.")
    return self._reader(reader_params)

bind_reader_params

bind_reader_params(params: TReaderParams) -> Self

Bind reader parameters to the data stream.

Parameters:

Name Type Description Default
params TReaderParams

Parameters to bind to the data stream's reader.

required

Returns:

Name Type Description
Self Self

The data stream instance for method chaining.

Raises:

Type Description
ValueError

If reader parameters have already been set.

Source code in src/contraqctor/contract/base.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def bind_reader_params(self, params: _typing.TReaderParams) -> Self:
    """Bind reader parameters to the data stream.

    Args:
        params: Parameters to bind to the data stream's reader.

    Returns:
        Self: The data stream instance for method chaining.

    Raises:
        ValueError: If reader parameters have already been set.
    """
    if not _typing.is_unset(self._reader_params):
        raise ValueError("Reader parameters are already set. Cannot bind again.")
    self._reader_params = params
    return self

at

at(name: str) -> TDataStream

Get a child data stream by name.

Parameters:

Name Type Description Default
name str

Name of the child data stream to retrieve.

required

Returns:

Name Type Description
TDataStream TDataStream

The child data stream with the given name.

Raises:

Type Description
ValueError

If data has not been loaded yet.

KeyError

If no child stream with the given name exists.

Source code in src/contraqctor/contract/base.py
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
@override
def at(self, name: str) -> TDataStream:
    """Get a child data stream by name.

    Args:
        name: Name of the child data stream to retrieve.

    Returns:
        TDataStream: The child data stream with the given name.

    Raises:
        ValueError: If data has not been loaded yet.
        KeyError: If no child stream with the given name exists.
    """
    if not self.has_data:
        raise ValueError("data streams have not been read yet. Cannot access data streams.")
    if name in self._hashmap:
        return self._hashmap[name]
    else:
        raise KeyError(f"Stream with name: '{name}' not found in data streams.")

load

load()

Load data for this collection.

Overrides the base method to add validation that loaded data is a list of DataStreams.

Returns:

Name Type Description
Self

The collection instance for method chaining.

Raises:

Type Description
ValueError

If loaded data is not a list of DataStreams.

Source code in src/contraqctor/contract/base.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
@override
def load(self):
    """Load data for this collection.

    Overrides the base method to add validation that loaded data is a list of DataStreams.

    Returns:
        Self: The collection instance for method chaining.

    Raises:
        ValueError: If loaded data is not a list of DataStreams.
    """
    super().load()
    if not isinstance(self._data, list):
        self._data = _typing.UnsetData
        raise ValueError("Data must be a list of DataStreams.")
    self._update_hashmap()
    return self

load_all

load_all(
    strict: bool = False,
) -> list[tuple[DataStream, Exception], None, None]

Recursively load this data stream and all child streams.

Performs depth-first traversal to load all streams in the hierarchy.

Parameters:

Name Type Description Default
strict bool

If True, raises exceptions immediately; otherwise collects and returns them.

False

Returns:

Name Type Description
list list[tuple[DataStream, Exception], None, None]

List of tuples containing streams and exceptions that occurred during loading.

Raises:

Type Description
Exception

If strict is True and an exception occurs during loading.

Examples:

# Load all streams and handle errors
errors = collection.load_all(strict=False)

if errors:
    for stream, error in errors:
        print(f"Error loading {stream.name}: {error}")
Source code in src/contraqctor/contract/base.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def load_all(self, strict: bool = False) -> list[tuple["DataStream", Exception], None, None]:
    """Recursively load this data stream and all child streams.

    Performs depth-first traversal to load all streams in the hierarchy.

    Args:
        strict: If True, raises exceptions immediately; otherwise collects and returns them.

    Returns:
        list: List of tuples containing streams and exceptions that occurred during loading.

    Raises:
        Exception: If strict is True and an exception occurs during loading.

    Examples:
        ```python
        # Load all streams and handle errors
        errors = collection.load_all(strict=False)

        if errors:
            for stream, error in errors:
                print(f"Error loading {stream.name}: {error}")
        ```
    """
    self.load()
    exceptions = []
    for stream in self:
        if stream is None:
            continue
        try:
            exceptions += stream.load_all(strict=strict)
        except Exception as e:
            if strict:
                raise e
            exceptions.append((stream, e))
    return exceptions

iter_all

iter_all() -> Generator[DataStream, None, None]

Iterator for all child data streams, including nested collections.

Implements a depth-first traversal of the stream hierarchy.

Yields:

Name Type Description
DataStream DataStream

All recursively yielded child data streams.

Source code in src/contraqctor/contract/base.py
436
437
438
439
440
441
442
443
444
445
446
447
448
def iter_all(self) -> Generator[DataStream, None, None]:
    """Iterator for all child data streams, including nested collections.

    Implements a depth-first traversal of the stream hierarchy.

    Yields:
        DataStream: All recursively yielded child data streams.
    """
    for value in self:
        if isinstance(value, DataStream):
            yield value
        if isinstance(value, DataStreamCollectionBase):
            yield from value.iter_all()