Skip to content

contract.csv

CsvParams dataclass

CsvParams(
    path: PathLike,
    delimiter: Optional[str] = None,
    strict_header: bool = True,
    index: Optional[str] = None,
)

Bases: FilePathBaseParam

Parameters for CSV file processing.

Extends the base file path parameters with CSV-specific options.

Attributes:

Name Type Description
delimiter Optional[str]

Custom delimiter character for CSV parsing. If None, the default comma delimiter is used.

strict_header bool

If True, treats the first row as a header. Otherwise, no header is assumed.

index Optional[str]

Column name to set as the DataFrame index. If None, default numeric indices are used.

Csv

Csv(
    name: str,
    *,
    description: Optional[str] = None,
    reader_params: TReaderParams = UnsetParams,
    **kwargs,
)

Bases: DataStream[DataFrame, CsvParams]

CSV file data stream provider.

A data stream implementation for reading CSV files into pandas DataFrames with configurable parameters for delimiter, header handling, and indexing.

Parameters:

Name Type Description Default
DataStream

Base class for data stream providers.

required

Examples:

from contraqctor.contract.csv import Csv, CsvParams

# Create and load a CSV stream
params = CsvParams(path="data/european_data.csv", delimiter=";")
csv_stream = Csv("measurements", reader_params=params)
csv_stream.load()

# Access the DataFrame
df = csv_stream.data
filtered = df[df["temperature"] > 25]
Source code in src/contraqctor/contract/base.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def __init__(
    self: Self,
    name: str,
    *,
    description: Optional[str] = None,
    reader_params: _typing.TReaderParams = _typing.UnsetParams,
    **kwargs,
) -> None:
    if "::" in name:
        raise ValueError("Name cannot contain '::' character.")
    self._name = name

    self._description = description
    self._reader_params = reader_params
    self._data = _typing.UnsetData
    self._parent: Optional["DataStream"] = None

name property

name: str

Get the name of the data stream.

Returns:

Name Type Description
str str

Name identifier of the data stream.

resolved_name property

resolved_name: str

Get the full hierarchical name of the data stream.

Generates a path-like name showing the stream's position in the hierarchy, using '::' as a separator between parent and child names.

Returns:

Name Type Description
str str

The fully resolved name including all parent names.

description property

description: Optional[str]

Get the description of the data stream.

Returns:

Type Description
Optional[str]

Optional[str]: Description of the data stream, or None if not provided.

parent property

Get the parent data stream.

Returns:

Type Description
Optional[DataStream]

Optional[DataStream]: Parent data stream, or None if this is a root stream.

is_collection property

is_collection: bool

Check if this data stream is a collection of other streams.

Returns:

Name Type Description
bool bool

True if this is a collection stream, False otherwise.

reader_params property

reader_params: TReaderParams

Get the parameters for the data reader.

Returns:

Name Type Description
TReaderParams TReaderParams

Parameters for the data reader.

at property

at: _AtProtocol

Get a child data stream by name.

Parameters:

Name Type Description Default
name

Name of the child data stream to retrieve.

required

Returns:

Name Type Description
DataStream _AtProtocol

The child data stream with the given name.

Raises:

Type Description
NotImplementedError

If the data stream does not support child access.

Examples:

# Access stream in a collection
collection = data_collection.load()
temp_stream = collection.at("temperature")

# Or using dictionary-style syntax
humidity_stream = collection["humidity"]

has_data property

has_data: bool

Check if the data stream has loaded data.

Returns:

Name Type Description
bool bool

True if data has been loaded, False otherwise.

has_error property

has_error: bool

Check if the data stream encountered an error during loading.

Returns:

Name Type Description
bool bool

True if an error occurred, False otherwise.

data property

data: TData

Get the loaded data.

Returns:

Name Type Description
TData TData

The loaded data.

Raises:

Type Description
ValueError

If data has not been loaded yet.

set_parent

set_parent(parent: DataStream) -> None

Set the parent data stream.

Parameters:

Name Type Description Default
parent DataStream

The parent data stream to set.

required
Source code in src/contraqctor/contract/base.py
164
165
166
167
168
169
170
def set_parent(self, parent: "DataStream") -> None:
    """Set the parent data stream.

    Args:
        parent: The parent data stream to set.
    """
    self._parent = parent

read

read(
    reader_params: Optional[TReaderParams] = None,
) -> TData

Read data using the configured reader.

Parameters:

Name Type Description Default
reader_params Optional[TReaderParams]

Optional parameters to override the default reader parameters.

None

Returns:

Name Type Description
TData TData

Data read from the source.

Raises:

Type Description
ValueError

If reader parameters are not set.

Source code in src/contraqctor/contract/base.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def read(self, reader_params: Optional[_typing.TReaderParams] = None) -> _typing.TData:
    """Read data using the configured reader.

    Args:
        reader_params: Optional parameters to override the default reader parameters.

    Returns:
        TData: Data read from the source.

    Raises:
        ValueError: If reader parameters are not set.
    """
    reader_params = reader_params if reader_params is not None else self._reader_params
    if _typing.is_unset(reader_params):
        raise ValueError("Reader parameters are not set. Cannot read data.")
    return self._reader(reader_params)

bind_reader_params

bind_reader_params(params: TReaderParams) -> Self

Bind reader parameters to the data stream.

Parameters:

Name Type Description Default
params TReaderParams

Parameters to bind to the data stream's reader.

required

Returns:

Name Type Description
Self Self

The data stream instance for method chaining.

Raises:

Type Description
ValueError

If reader parameters have already been set.

Source code in src/contraqctor/contract/base.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def bind_reader_params(self, params: _typing.TReaderParams) -> Self:
    """Bind reader parameters to the data stream.

    Args:
        params: Parameters to bind to the data stream's reader.

    Returns:
        Self: The data stream instance for method chaining.

    Raises:
        ValueError: If reader parameters have already been set.
    """
    if not _typing.is_unset(self._reader_params):
        raise ValueError("Reader parameters are already set. Cannot bind again.")
    self._reader_params = params
    return self

clear

clear() -> Self

Clear the loaded data from the data stream.

Resets the data to an unset state, allowing for reloading.

Returns:

Name Type Description
Self Self

The data stream instance for method chaining.

Source code in src/contraqctor/contract/base.py
313
314
315
316
317
318
319
320
321
322
def clear(self) -> Self:
    """Clear the loaded data from the data stream.

    Resets the data to an unset state, allowing for reloading.

    Returns:
        Self: The data stream instance for method chaining.
    """
    self._data = _typing.UnsetData
    return self

load

load() -> Self

Load data into the data stream.

Reads data from the source and stores it in the data stream.

Returns:

Name Type Description
Self Self

The data stream instance for method chaining.

Examples:

from contraqctor.contract import csv

# Create and load a CSV stream
params = csv.CsvParams(path="data/measurements.csv")
csv_stream = csv.Csv("measurements", reader_params=params)
csv_stream.load()

# Access the data
df = csv_stream.data
print(f"Loaded {len(df)} rows")
Source code in src/contraqctor/contract/base.py
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def load(self) -> Self:
    """Load data into the data stream.

    Reads data from the source and stores it in the data stream.

    Returns:
        Self: The data stream instance for method chaining.

    Examples:
        ```python
        from contraqctor.contract import csv

        # Create and load a CSV stream
        params = csv.CsvParams(path="data/measurements.csv")
        csv_stream = csv.Csv("measurements", reader_params=params)
        csv_stream.load()

        # Access the data
        df = csv_stream.data
        print(f"Loaded {len(df)} rows")
        ```
    """
    try:
        self._data = self.read()
    except Exception as e:  # pylint: disable=broad-except
        self._data = _typing.ErrorOnLoad(self, exception=e)
    return self

collect_errors

collect_errors() -> List[ErrorOnLoad]

Collect all errors from this stream and its children.

Performs a depth-first traversal to gather all ErrorOnLoad instances.

Returns:

Type Description
List[ErrorOnLoad]

List[ErrorOnLoad]: List of all errors raised on load encountered in the hierarchy.

Source code in src/contraqctor/contract/base.py
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
def collect_errors(self) -> List[_typing.ErrorOnLoad]:
    """Collect all errors from this stream and its children.

    Performs a depth-first traversal to gather all ErrorOnLoad instances.

    Returns:
        List[ErrorOnLoad]: List of all errors raised on load encountered in the hierarchy.
    """
    errors = []
    if self.has_error:
        errors.append(cast(_typing.ErrorOnLoad, self._data))
    for stream in self:
        if stream is None:
            continue
        errors.extend(stream.collect_errors())
    return errors

load_all

load_all(strict: bool = False) -> Self

Recursively load this data stream and all child streams.

Performs depth-first traversal to load all streams in the hierarchy.

Parameters:

Name Type Description Default
strict bool

If True, raises exceptions immediately; otherwise collects and returns them.

False

Returns:

Name Type Description
list Self

List of tuples containing streams and exceptions that occurred during loading.

Raises:

Type Description
Exception

If strict is True and an exception occurs during loading.

Examples:

# Load all streams and handle errors
errors = collection.load_all(strict=False)

if errors:
    for stream, error in errors:
        print(f"Error loading {stream.name}: {error}")
Source code in src/contraqctor/contract/base.py
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
def load_all(self, strict: bool = False) -> Self:
    """Recursively load this data stream and all child streams.

    Performs depth-first traversal to load all streams in the hierarchy.

    Args:
        strict: If True, raises exceptions immediately; otherwise collects and returns them.

    Returns:
        list: List of tuples containing streams and exceptions that occurred during loading.

    Raises:
        Exception: If strict is True and an exception occurs during loading.

    Examples:
        ```python
        # Load all streams and handle errors
        errors = collection.load_all(strict=False)

        if errors:
            for stream, error in errors:
                print(f"Error loading {stream.name}: {error}")
        ```
    """
    self.load()
    for stream in self:
        if stream is None:
            continue
        stream.load_all(strict=strict)
        if stream.has_error and strict:
            cast(_typing.ErrorOnLoad, stream.data).raise_from_error()
    return self