diff --git a/README b/README new file mode 100644 index 0000000..9e8e702 --- /dev/null +++ b/README @@ -0,0 +1,13 @@ +Installation des fichiers supplémentaires pour NLTK depuis python + +import nltk +nltk.download('punkt') +nltk.download('wordnet') +nltk.download('stopwords') +nltk.download('averaged_perceptron_tagger') +nltk.download('universal_tagset') +nltk.download('sentiwordnet') + +Installation de Stanford CoreNLP + +Télécharger et décompresser https://nlp.stanford.edu/software/stanford-parser-full-2018-10-17.zip dans le dossier de travail diff --git a/negation_conversion.py b/negation_conversion.py index eed15d0..7953a77 100644 --- a/negation_conversion.py +++ b/negation_conversion.py @@ -15,10 +15,13 @@ sentences = ["This is not a test.", "We do not like washing dishes which lead to the decision of buying a dishwasher." ] +# Source du fichier à télécharger pour Stanford CoreNLP +# https://nlp.stanford.edu/software/stanford-parser-full-2018-10-17.zip + from nltk.parse.corenlp import CoreNLPServer from nltk.parse.corenlp import CoreNLPParser set_negatives = set(['no','not','never']) -# https://nlp.stanford.edu/software/stanford-parser-full-2018-10-17.zip + def is_negative_tree(tree): lower_leaves = [x.lower() for x in tree.leaves()] @@ -103,11 +106,11 @@ def convert_negated_words(sentence): if __name__ == '__main__': - #server = CoreNLPServer("/home/francois/stanford-corenlp-full-2018-10-05/stanford-corenlp-3.9.2.jar", - # "/home/francois/stanford-corenlp-full-2018-10-05/stanford-english-corenlp-2018-10-05-models.jar") - #server.start() - #parser = CoreNLPParser() - output_file = open("/home/francois/nlp_a2019_tp2/nlp_a2019_tp2/output_negative.txt","w") + server = CoreNLPServer("./stanford-corenlp-full-2018-10-05/stanford-corenlp-3.9.2.jar", + "./stanford-corenlp-full-2018-10-05/stanford-english-corenlp-2018-10-05-models.jar") + server.start() + parser = CoreNLPParser() + output_file = open("output_negative.txt","w") for sent in sentences: print("\nS:", sent) output_file.write("S: "+sent) @@ -116,4 +119,4 @@ if __name__ == '__main__': print("N:", converted) output_file.write("\nN: "+converted+"\n\n") output_file.close() - #server.stop() \ No newline at end of file + server.stop() \ No newline at end of file diff --git a/output_negative.txt b/output_negative.txt new file mode 100644 index 0000000..a63f7e4 --- /dev/null +++ b/output_negative.txt @@ -0,0 +1,45 @@ +S: This is not a test. +N: This is not NOT_a NOT_test . + +S: There is no flowery dialog, and time is not wasted. +N: There is no NOT_flowery NOT_dialog , and time is not NOT_wasted . + +S: She did not promise to help him. +N: She did not NOT_promise NOT_to NOT_help NOT_him . + +S: The King of France is not bald. +N: The King of France is not NOT_bald . + +S: It is not so much a work of entertainment as it is unique study. +N: It is not NOT_so much a work of entertainment as it is unique study . + +S: Mary did not complete the program but Nancy wrote the report. +N: Mary did not NOT_complete NOT_the NOT_program but Nancy wrote the report . + +S: Not an accomplished dancer, he moved rather clumsily. +N: Not NOT_an NOT_accomplished NOT_dancer , he moved rather clumsily . + +S: Not all participants liked this game. +N: Not NOT_all NOT_participants liked this game . + +S: I do not think he is coming. +N: I do not NOT_think NOT_he NOT_is NOT_coming . + +S: Mary did not give the solution to Paul. +N: Mary did not NOT_give NOT_the NOT_solution NOT_to NOT_Paul . + +S: She claimed that Donald had not offered bribes to any official. +N: She claimed that Donald had not NOT_offered NOT_bribes NOT_to NOT_any NOT_official . + +S: Not for the first time, he was surprised by this player. +N: Not NOT_for NOT_the NOT_first NOT_time , he was surprised by this player . + +S: I would never do it even if I can. +N: I would never NOT_do NOT_it even if I can . + +S: A decision is not expected until June. +N: A decision is not NOT_expected NOT_until NOT_June . + +S: We do not like washing dishes which lead to the decision of buying a dishwasher. +N: We do not NOT_like NOT_washing NOT_dishes which lead to the decision of buying a dishwasher . + diff --git a/sentiment_analysis.py b/sentiment_analysis.py index dff2c20..0cd52b0 100644 --- a/sentiment_analysis.py +++ b/sentiment_analysis.py @@ -5,15 +5,6 @@ from sklearn.metrics import accuracy_score, recall_score, precision_score from scipy.sparse import csr_matrix, hstack import pandas as pd -# installation -# import nltk -# nltk.download('punkt') -# nltk.download('wordnet') -# nltk.download('stopwords') -# nltk.download('averaged_perceptron_tagger') -# nltk.download('universal_tagset') -# nltk.download('sentiwordnet') - train_pos_reviews_fn = "./data/train-positive-t1.txt" train_neg_reviews_fn = "./data/train-negative-t1.txt"