commit d3da13c681ff4da406cdc8ee0270ff4f1f584ba1 Author: François Pelletier Date: Sun Oct 27 22:35:21 2019 -0400 initial diff --git a/negation_conversion.py b/negation_conversion.py new file mode 100644 index 0000000..fe49686 --- /dev/null +++ b/negation_conversion.py @@ -0,0 +1,45 @@ +sentences = ["This is not a test.", + "There is no flowery dialog, and time is not wasted.", + "She did not promise to help him.", + "The King of France is not bald.", + "It is not so much a work of entertainment as it is unique study.", + "Mary did not complete the program but Nancy wrote the report.", + "Not an accomplished dancer, he moved rather clumsily.", + "Not all participants liked this game.", + "I do not think he is coming.", + "Mary did not give the solution to Paul.", + "She claimed that Donald had not offered bribes to any official.", + "Not for the first time, he was surprised by this player.", + "I would never do it even if I can.", + "A decision is not expected until June.", + "We do not like washing dishes which lead to the decision of buying a dishwasher." + ] + + +def convert_negated_words(sentence): + # Pour évaluer cette tâche, nous allons utiliser cette fonction pour tester la portée de la négation + # d'autres phrases que celles dans la liste plus haut (sentences). + # SVP ne pas changer la signautre de la fonction. + # + # Pour déterminer la portée d'une négation, utilisez la structure d'un arbre syntaxique (constituants ou dépendances), + # les symboles des consituants ou des dépendance, et les part-of-speech des mots (POS). + # + # Vous pouvez utiliser la libairie de votre choix pour faire l'analyse syntaxique des phrases. + # Et vous DEVEZ utiliser un analyseur syntaxique. + # + # Vous pouvez ajouter autant de fonctions que vous le souhaitez. + # Mais vous ne pouvez pas utiliser de code disponible sur le Web pour déterminer la portée de négations. + # + # Nos solutions pour les 15 phrases sont dans le fichier /data/phrases-references-t2.txt + # + # Mettre votre code ici. Vous pouvez effacer ces commentaires. + # + converted_sentence = "This is not NOT_a NOT_test ." # A MODIFIER + return converted_sentence.strip() + + +if __name__ == '__main__': + for sent in sentences: + print("\nS:", sent) + converted = convert_negated_words(sent) + print("N:", converted) \ No newline at end of file diff --git a/sentiment_analysis.py b/sentiment_analysis.py new file mode 100644 index 0000000..9d114d6 --- /dev/null +++ b/sentiment_analysis.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +import json +import sentiment_analysis_functions as sfun + +# installation +# nltk.download('punkt') + +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.naive_bayes import MultinomialNB +from sklearn.linear_model import LogisticRegression + +train_pos_reviews_fn = "./data/train-positive-t1.txt" +train_neg_reviews_fn = "./data/train-negative-t1.txt" +test_pos_reviews_fn = "./data/test-pos-t1.txt" +test_neg_reviews_fn = "./data/test-neg-t1.txt" + + +# Aucune contrainte pour cette tâche. Vous pouvez structurez votre code comme bon vous semble. +# Expliquez dans votre rapport comment exécuter le code et définir les configurations. + + +def load_reviews(filename): + with open(filename, 'r') as fp: + reviews_list = json.load(fp) + return reviews_list + +if __name__ == '__main__': + train_positive_reviews = load_reviews(train_pos_reviews_fn) + train_negative_reviews = load_reviews(train_neg_reviews_fn) + test_positive_reviews = load_reviews(test_pos_reviews_fn) + test_negative_reviews = load_reviews(test_neg_reviews_fn) + print("Nb of training reviews - positive:", len(train_positive_reviews), "negative:", len(train_negative_reviews)) + print("Nb of test reviews - positive:", len(test_positive_reviews), "negative:", len(test_negative_reviews)) + + xx = sfun.tokenize(train_positive_reviews) + xx[0] + diff --git a/sentiment_analysis_functions.py b/sentiment_analysis_functions.py new file mode 100644 index 0000000..dd3ac92 --- /dev/null +++ b/sentiment_analysis_functions.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Oct 27 17:16:54 2019 + +@author: François Pelletier +""" + +import nltk +import re + +## Normalisation + +def tokenize(reviews): + tokenized_reviews = [] + for review in reviews: + # Plusieurs fin de phrases étaient représentées par deux espaces ou plus. + review = re.sub(r"\s{2,}",". ",review) + tokenized_sentences = [] + sentences = nltk.sent_tokenize(review) + for sentence in sentences: + sentence_tokens = nltk.word_tokenize(sentence) + tokenized_sentences.append(sentence_tokens) + tokenized_reviews.append(tokenized_sentences) + return tokenized_reviews + +def norm_stemming(tokenized_reviews): + return 0 + +def norm_lemmatize(tokenized_reviews): + return 0 + +## Feature selection + +def select_freq(reviews): + return 0 + +def select_rem_stopwords(reviews): + return 0 + +def select_open_class(reviews): + return 0 + +## Attribute value + +def value_count(reviews): + return 0 + +def value_occurence(reviews): + return 0 + +def value_tfidf(reviews): + return 0 + +## Other attributes + +def attribute_polarity_count(reviews): + return 0 + +def attribute_length(reviews): + return 0 + +## Training + +def train_naive_model(reviews): + return 0 + +def train_regression_model(reviews): + return 0