Skip to content

Runner

cognite_data_quality.runner

run_validation(*, client, rules_path, rules_format=None, datamodel=None, instance_space=None, db_name=None, table_name=None, verbose=True, limit=None, print_output=True, records_config=None, post_to_records=False, namespace_base='http://purl.org/cognite/')

Run SHACL validation by reading data directly from CDF.

Supports both DMS instance validation and RAW table validation: - For DMS: provide datamodel and instance_space - For RAW: provide db_name and table_name

Parameters:

Name Type Description Default
client CogniteClient

CogniteClient instance

required
rules_path Path | str

Path to SHACL rules file (.ttl, .json, or .yaml)

required
rules_format Literal['ttl', 'json', 'yaml'] | None

Format of rules file (auto-detected if None)

None
datamodel DataModelConfig | None

DMS data model config (for DMS validation)

None
instance_space str | None

DMS instance space (for DMS validation)

None
db_name str | None

RAW database name (for RAW validation)

None
table_name str | None

RAW table name (for RAW validation)

None
verbose bool | None

Print validation details

True
limit int | None

Maximum number of rows/instances to validate (None = all)

None
print_output bool

Print per-instance/row validation output

True
records_config RecordsConfig | None

Records API configuration

None
post_to_records bool

Post results to Records API

False
namespace_base str

Base namespace for URIs

'http://purl.org/cognite/'

Returns:

Type Description
ValidationResult

ValidationResult with conforms status and violations

Raises:

Type Description
ValueError

If neither DMS nor RAW parameters are provided, or both are provided

Examples:

DMS validation:

>>> result = run_validation(
...     client=client,
...     rules_path="rules.ttl",
...     datamodel=DataModelConfig(space="sp", external_id="dm", version="v1"),
...     instance_space="my_space"
... )

RAW validation:

>>> result = run_validation(
...     client=client,
...     rules_path="rules.ttl",
...     db_name="abb",
...     table_name="Example Motor List",
...     namespace_base="http://purl.org/cognite/raw/abb/Example%20Motor%20List/"
... )
Source code in cognite_data_quality/runner.py
def run_validation(
    *,
    client: CogniteClient,
    rules_path: Path | str,
    rules_format: Literal["ttl", "json", "yaml"] | None = None,
    datamodel: DataModelConfig | None = None,
    instance_space: str | None = None,
    db_name: str | None = None,
    table_name: str | None = None,
    verbose: bool | None = True,
    limit: int | None = None,
    print_output: bool = True,
    records_config: RecordsConfig | None = None,
    post_to_records: bool = False,
    namespace_base: str = "http://purl.org/cognite/",
) -> ValidationResult:
    """Run SHACL validation by reading data directly from CDF.

    Supports both DMS instance validation and RAW table validation:
    - For DMS: provide datamodel and instance_space
    - For RAW: provide db_name and table_name

    Args:
        client: CogniteClient instance
        rules_path: Path to SHACL rules file (.ttl, .json, or .yaml)
        rules_format: Format of rules file (auto-detected if None)
        datamodel: DMS data model config (for DMS validation)
        instance_space: DMS instance space (for DMS validation)
        db_name: RAW database name (for RAW validation)
        table_name: RAW table name (for RAW validation)
        verbose: Print validation details
        limit: Maximum number of rows/instances to validate (None = all)
        print_output: Print per-instance/row validation output
        records_config: Records API configuration
        post_to_records: Post results to Records API
        namespace_base: Base namespace for URIs

    Returns:
        ValidationResult with conforms status and violations

    Raises:
        ValueError: If neither DMS nor RAW parameters are provided, or both are provided

    Examples:
        DMS validation:
        >>> result = run_validation(
        ...     client=client,
        ...     rules_path="rules.ttl",
        ...     datamodel=DataModelConfig(space="sp", external_id="dm", version="v1"),
        ...     instance_space="my_space"
        ... )

        RAW validation:
        >>> result = run_validation(
        ...     client=client,
        ...     rules_path="rules.ttl",
        ...     db_name="abb",
        ...     table_name="Example Motor List",
        ...     namespace_base="http://purl.org/cognite/raw/abb/Example%20Motor%20List/"
        ... )
    """
    # Detect validation mode
    is_raw_mode = db_name is not None or table_name is not None
    is_dms_mode = datamodel is not None or instance_space is not None

    if is_raw_mode and is_dms_mode:
        raise ValueError(
            "Cannot use both DMS (datamodel/instance_space) and RAW (db_name/table_name) parameters. "
            "Choose one validation mode."
        )

    if not is_raw_mode and not is_dms_mode:
        raise ValueError(
            "Must provide either DMS parameters (datamodel, instance_space) or RAW parameters (db_name, table_name)"
        )

    # Generate job run ID for Records API
    job_run_id = f"run_{int(time.time() * 1000)}"

    # RAW table validation
    if is_raw_mode:
        if db_name is None or table_name is None:
            raise ValueError("Both db_name and table_name are required for RAW validation")

        result = _validate_raw_table(
            client=client,
            db_name=db_name,
            table_name=table_name,
            rules_path=Path(rules_path),
            namespace_base=namespace_base,
            verbose=verbose or False,
            limit=limit,
            print_output=print_output,
            records_config=records_config,
            job_run_id=job_run_id,
        )

    # DMS instance validation
    else:
        shacl_rules, rule_config = load_rules(
            rules_path,
            format=rules_format,
            client=client,
            shacl_base_dir=Path(rules_path).parent,
        )
        resolved = _resolve_rule_config(
            rule_config,
            datamodel=datamodel,
            instance_space=instance_space,
            auto_load_depth=None,
            verbose=verbose,
            records_config=records_config,
        )

        if resolved.datamodel is None:
            raise ValueError("datamodel must be provided or resolved from YAML config")

        if not resolved.instance_space:
            raise ValueError("instance_space must be provided or set in rule config (YAML)")

        rules_path_obj = Path(rules_path)
        view_external_ids = None
        if rules_path_obj.suffix.lower() == ".ttl" and rules_path_obj.exists():
            target_views = extract_target_views_from_shacl(rules_path_obj)
            if target_views:
                view_external_ids = sorted(target_views)

        result = validate_instances(
            client=client,
            shacl_rules=shacl_rules,
            datamodel=resolved.datamodel,
            instance_space=resolved.instance_space,
            view_external_ids=view_external_ids,
            verbose=resolved.verbose,
            limit=limit,
            print_validation_output=print_output,
            records_config=resolved.records,
            namespace_base=namespace_base,
        )

    if not print_output:
        entity_type = "rows" if is_raw_mode else "instances"
        count = len(result.violations) if is_raw_mode else result.instance_count
        print(
            f"Validation complete. Conforms: {result.conforms}, "
            f"{entity_type.capitalize()}: {count}, Violations: {len(result.violations)}"
        )

    if post_to_records:
        # Get the appropriate records config
        config_to_use = records_config if is_raw_mode else resolved.records

        if config_to_use is None:
            raise ValueError("records_config must be provided when post_to_records=True")

        ensure_records_infrastructure(client, config_to_use)
        _post_records(client, result, config_to_use)

    return result