expérimentation avec les fonctions nécessaires

This commit is contained in:
François Pelletier 2019-09-28 00:47:43 -04:00
parent b2b8d26b4f
commit f48c6823b0

View file

@ -2,11 +2,18 @@
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.utils.multiclass import unique_labels
from sklearn.model_selection import cross_validate
training_questions_fn = "./data/questions-t3.txt"
test_questions_fn = "./data/test-questions-t3.txt"
def mean(numbers):
return float(sum(numbers)) / max(len(numbers), 1)
def run_question_classification(training_fn, test_fn):
accuracy_train, accuracy_test = train_and_test_classifier(training_fn, test_fn)
print("Accuracy on training set: {0:.4f}".format(accuracy_train))
@ -22,7 +29,25 @@ def train_and_test_classifier(training_fn, test_fn):
# Insérer ici votre code pour la classification des questions.
# Votre code...
accuracy_train = 0.8 # A modifier
mnb = MultinomialNB()
avg_score = {}
for min_df in range(1, 5):
for max_df in [x/200.0 for x in range(1, 11)]:
count_vec = CountVectorizer(strip_accents='unicode',
# stop_words='english',
analyzer='word',
min_df=min_df,
max_df=max_df)
count_questions = count_vec.fit_transform(questions)
mnb_fit = cross_validate(mnb, count_questions, labels, cv=10)
avg_score[(min_df, max_df)] = mean(mnb_fit['test_score'])
labels_predict_train = mnb_fit.predict(count_questions)
cm_predict = confusion_matrix(y_true=labels,
y_pred=labels_predict_train,
labels=unique_labels(labels, labels_predict_train))
accuracy_train = accuracy_score(y_true=labels, y_pred=labels_predict_train)
accuracy_test = 0.8 # A modifier
return accuracy_train, accuracy_test