Slide 34
Slide 34 text
@Giuliabianchl
@Giuliabianchl
#WiDS2021
CLASSE POSITIVE: ~25%, CLASSE NÉGATIVE: ~75%
PIPELINE GÉNÉRIQUE
count_vectorizer = CountVectorizer(
strip_accents="ascii",
lowercase=True,
stop_words="english",
ngram_range=(1, 3),
max_features=500,
min_df=.01,
max_df=.90
)
tfidf_transformer = TfidfTransformer()
model =
pipeline = Pipeline([
('vect', count_vectorizer),
('tfidf', tfidf_transformer),
('clf', model)
])
pipeline = pipeline.fit(X_train, y_train)
predicted_probabilities =
pipeline.predict_proba(X_test)
pos_th =
predicted_classes = np.array([True if
i[-1]>=pos_th else False for i in
predicted_probabilities])
[precision, recall, f1, support] =
precision_recall_fscore_support(y_test,
predicted_classes)
acc = accuracy_score(y_test, predicted_classes)
perf = {
"accuracy score": round(acc, 2),
"precision": round(precision[-1], 2),
"recall": round(recall[-1], 2)
}
34
Kaggle - Random acts of pizza