dpg.core#

Attributes#

Exceptions#

DPGError

Base exception class for DPG-specific errors

Classes#

Module Contents#

dpg.core.HAS_OMEGACONF = True#
dpg.core.DEFAULT_DPG_CONFIG#
exception dpg.core.DPGError[source]#

Bases: Exception

Base exception class for DPG-specific errors

class dpg.core.DecisionPredicateGraph(model: Any, feature_names: Iterable[str], target_names: Iterable[str] | None = None, config_file: str = 'config.yaml', dpg_config: Dict[str, Any] | None = None)[source]#
SUPPORTED_GRAPH_CONSTRUCTION_MODES#

Main class for converting tree-based ensemble models into interpretable graphs.

Converts the internal decision paths of a tree-based ensemble (Random Forest, AdaBoost, Extra Trees, …) into a compact directed graph — the Decision Predicate Graph — that exposes which feature conditions the model uses, how often, and in what order.

model#
feature_names#
target_names = None#
perc_var#
decimal_threshold#
n_jobs#
graph_construction_mode#
visualization_config#
fit(X_train: Any) Any[source]#

Main pipeline: Extract decision paths → Build graph → Generate visualization.

Parameters:

X_train – Training data (n_samples, n_features)

Returns:

Visualizable graph object

Return type:

graphviz.Digraph

tracing_ensemble(case_id: int, sample: Any) Generator[List[str], None, None][source]#

Extract decision path for a single sample (generator version).

Parameters:
  • case_id – Sample identifier

  • sample – Feature values (1D array)

Yields:

List[str] – Path segments as [prefix, decision/prediction]

tracing_ensemble_parallel(case_id: int, sample: Any) List[List[str]][source]#

Extract decision path for a single sample (list version for parallel workers).

Parameters:
  • case_id – Sample identifier used to name each path prefix.

  • sample – Feature values array, shape (n_features,).

Returns:

List of [prefix, event] pairs representing the full decision path across all trees in the ensemble.

filter_log(log: Any) Any[source]#

Filter paths based on frequency threshold.

Parameters:

log – DataFrame of extracted paths

Returns:

Filtered paths meeting perc_var threshold

Return type:

pd.DataFrame

discover_dfg(log: Any) Dict[Tuple[str, str], int][source]#

Build directed frequency graph from path logs.

Parameters:

log – DataFrame of decision paths

Returns:

Edge frequencies as {(source, target): count}

Return type:

Dict[tuple, int]

discover_dfg_execution_trace(log: Any) Dict[Tuple[str, str], int][source]#

Build a directed frequency graph directly from the raw execution trace.

If perc_var > 0, infrequent edges are removed using a minimum edge count of total_cases * perc_var where total_cases is the number of unique case ids in the raw trace log.

Parameters:

log – Raw DataFrame of decision paths

Returns:

Edge frequencies as {(source, target): count}

Return type:

Dict[tuple, int]

generate_dot(dfg: Dict[Tuple[str, str], int]) Any[source]#

Convert frequency graph to Graphviz format.

Parameters:

dfg – Directed frequency graph

Returns:

Visualizable graph

Return type:

graphviz.Digraph

to_networkx(graphviz_graph: Any) Tuple[Any, List[List[str]]][source]#

Convert Graphviz graph to NetworkX format.

Parameters:

graphviz_graph – Input graph

Returns:

NetworkX graph and node metadata

Return type:

Tuple[nx.DiGraph, List]