Slide 41
Slide 41 text
# sklearn-pandas
mapper = DataFrameMapper([
(['age'], [sklearn.preprocessing.Imputer(),
sklearn.preprocessing.StandardScaler()]),
...])
pipeline = sklearn.pipeline.Pipeline([
('featurise', mapper),
('feature_selection',
feature_selection.SelectKBest(k=100)),
('random_forest', ensemble.RandomForestClassifier())])
cv_params = dict(
feature_selection__k=[100, 200],
random_forest__n_estimators=[50, 100, 200])
cv = grid_search.GridSearchCV(pipeline, param_grid=cv_params)
best_model = best_estimator_