Slide 22
Slide 22 text
def _tree_to_dict(decision_tree, feature_names, fraudulent_class_idx=1):
# This is where the internal tree structure lives in an sk DecisionTree
tree = decision_tree.tree_
if isinstance(decision_tree, t.DecisionTreeClassifier):
# NOTE: This ONLY WORKS with binary classification, where the
# second class is the fraudulent class.
probs = np.nan_to_num(tree.value[:, 0, fraudulent_class_idx] /
(tree.value[:, 0, 0] + tree.value[:, 0, 1]))
elif isinstance(decision_tree, t.DecisionTreeRegressor):
probs = [v[0][0] for v in tree.value]
else:
raise ValueError("You can only serialize scikit decision trees!")
return {
"feature_names": feature_names,
"features_used": _features_used(tree, feature_names),
"node_features": map(int, tree.feature),
"node_thresholds": map(float, tree.threshold),
"left_children": map(int, tree.children_left),
"right_children": map(int, tree.children_right),
"probabilities": [float(p) for p in probs],
# Deprecated, moving these to Pipeline
"encodings": {}
}
Brittle to
version
changes!