From 196b0d964904dc74e0211b3a83234d60f9264645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Pelletier?= Date: Fri, 1 Nov 2019 00:14:55 -0400 Subject: [PATCH] =?UTF-8?q?modeles=20termin=C3=A9s,=20pas=20test=C3=A9s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sentiment_analysis.py | 33 ++++++++++++++++++++++++++++++--- sentiment_analysis_functions.py | 14 ++++++++++---- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/sentiment_analysis.py b/sentiment_analysis.py index 4847b00..2854d8b 100644 --- a/sentiment_analysis.py +++ b/sentiment_analysis.py @@ -13,9 +13,6 @@ from scipy.sparse import csr_matrix, hstack # nltk.download('sentiwordnet') -# from sklearn.naive_bayes import MultinomialNB -# from sklearn.linear_model import LogisticRegression - train_pos_reviews_fn = "./data/train-positive-t1.txt" train_neg_reviews_fn = "./data/train-negative-t1.txt" test_pos_reviews_fn = "./data/test-pos-t1.txt" @@ -160,3 +157,33 @@ if __name__ == '__main__': v_final_test.append(v_select_final_test) # Scoring des modèles + + modeles_nb = [] + scores_nb = [] + modeles_reg = [] + scores_reg = [] + for norm_method in range(0,2): + modeles_select_vector_nb = [] + scores_select_vector_nb = [] + modeles_select_vector_reg = [] + scores_select_vector_reg = [] + for select_method in range(0,3): + modeles_vector_nb = [] + scores_vector_nb = [] + modeles_vector_reg = [] + scores_vector_reg = [] + for vector_method in range(0,3): + modele_nb = sfun.train_naive_model(v_final_train[norm_method][select_method][vector_method],train_dataset_response) + score_nb = modele_nb.predict(v_final_test[norm_method][select_method][vector_method]) + modele_reg = sfun.train_regression_model(v_final_train[norm_method][select_method][vector_method],train_dataset_response) + score_reg = modele_reg.predict(v_final_test[norm_method][select_method][vector_method]) + modeles_vector_reg.append(modele_reg) + scores_vector_reg.append(score_reg) + modeles_select_vector_nb.append(modeles_vector_nb) + scores_select_vector_nb.append(scores_vector_nb) + modeles_select_vector_reg.append(modeles_vector_reg) + scores_select_vector_reg.append(scores_vector_reg) + modeles_nb.append(modeles_select_vector_nb) + scores_nb.append(scores_select_vector_nb) + modeles_reg.append(modeles_select_vector_reg) + scores_reg.append(scores_select_vector_reg) \ No newline at end of file diff --git a/sentiment_analysis_functions.py b/sentiment_analysis_functions.py index 18e6c5d..aee37b0 100644 --- a/sentiment_analysis_functions.py +++ b/sentiment_analysis_functions.py @@ -13,6 +13,8 @@ from collections import defaultdict from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from nltk.corpus import wordnet as wn from nltk.corpus import sentiwordnet as swn +from sklearn.naive_bayes import MultinomialNB +from sklearn.linear_model import LogisticRegression # Normalisation @@ -182,9 +184,13 @@ def attribute_polarity_count(norm_reviews): # Training -def train_naive_model(reviews): - return 0 +def train_naive_model(reviews_vectors,reviews_response): + mnb = MultinomialNB() + mnb.fit(reviews_vectors,reviews_response) + return mnb -def train_regression_model(reviews): - return 0 +def train_regression_model(reviews_vectors,reviews_response): + lrm = LogisticRegression(solver='liblinear', max_iter=1000) + lrm.fit(reviews_vectors,reviews_response) + return lrm