initial

2019-10-27 22:35:21 -04:00 · 2019-10-27 22:35:21 -04:00 · d3da13c681
commit d3da13c681
3 changed files with 151 additions and 0 deletions
--- a/negation_conversion.py
+++ b/negation_conversion.py
@ -0,0 +1,45 @@
+sentences = ["This is not a test.",
+             "There is no flowery dialog, and time is not wasted.",
+             "She did not promise to help him.",
+             "The King of France is not bald.",
+             "It is not so much a work of entertainment as it is unique study.",
+             "Mary did not complete the program but Nancy wrote the report.",
+             "Not an accomplished dancer, he moved rather clumsily.",
+             "Not all participants liked this game.",
+             "I do not think he is coming.",
+             "Mary did not give the solution to Paul.",
+             "She claimed that Donald had not offered bribes to any official.",
+             "Not for the first time, he was surprised by this player.",
+             "I would never do it even if I can.",
+             "A decision is not expected until June.",
+             "We do not like washing dishes which lead to the decision of buying a dishwasher."
+             ]
+
+
+def convert_negated_words(sentence):
+    # Pour évaluer cette tâche, nous allons utiliser cette fonction pour tester la portée de la négation
+    # d'autres phrases que celles dans la liste plus haut (sentences).
+    # SVP ne pas changer la signautre de la fonction.
+    #
+    # Pour déterminer la portée d'une négation, utilisez la structure d'un arbre syntaxique (constituants ou dépendances),
+    # les symboles des consituants ou des dépendance, et les part-of-speech des mots (POS).
+    #
+    # Vous pouvez utiliser la libairie de votre choix pour faire l'analyse syntaxique des phrases.
+    # Et vous DEVEZ utiliser un analyseur syntaxique.
+    #
+    # Vous pouvez ajouter autant de fonctions que vous le souhaitez.
+    # Mais vous ne pouvez pas utiliser de code disponible sur le Web pour déterminer la portée de négations.
+    #
+    # Nos solutions pour les 15 phrases sont dans le fichier /data/phrases-references-t2.txt
+    #
+    # Mettre votre code ici. Vous pouvez effacer ces commentaires.
+    #
+    converted_sentence = "This is not NOT_a NOT_test ."  # A MODIFIER
+    return converted_sentence.strip()
+
+
+if __name__ == '__main__':
+    for sent in sentences:
+        print("\nS:", sent)
+        converted = convert_negated_words(sent)
+        print("N:", converted)
--- a/sentiment_analysis.py
+++ b/sentiment_analysis.py
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+import json
+import sentiment_analysis_functions as sfun
+
+# installation
+# nltk.download('punkt')
+
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.linear_model import LogisticRegression
+
+train_pos_reviews_fn = "./data/train-positive-t1.txt"
+train_neg_reviews_fn = "./data/train-negative-t1.txt"
+test_pos_reviews_fn = "./data/test-pos-t1.txt"
+test_neg_reviews_fn = "./data/test-neg-t1.txt"
+
+
+# Aucune contrainte pour cette tâche. Vous pouvez structurez votre code comme bon vous semble.
+# Expliquez dans votre rapport comment exécuter le code et définir les configurations.
+
+
+def load_reviews(filename):
+    with open(filename, 'r') as fp:
+        reviews_list = json.load(fp)
+    return reviews_list
+
+if __name__ == '__main__':
+    train_positive_reviews = load_reviews(train_pos_reviews_fn)
+    train_negative_reviews = load_reviews(train_neg_reviews_fn)
+    test_positive_reviews = load_reviews(test_pos_reviews_fn)
+    test_negative_reviews = load_reviews(test_neg_reviews_fn)
+    print("Nb of training reviews - positive:", len(train_positive_reviews), "negative:", len(train_negative_reviews))
+    print("Nb of test reviews - positive:", len(test_positive_reviews), "negative:", len(test_negative_reviews))
+    
+    xx = sfun.tokenize(train_positive_reviews)
+    xx[0]
+
--- a/sentiment_analysis_functions.py
+++ b/sentiment_analysis_functions.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Oct 27 17:16:54 2019
+
+@author: François Pelletier
+"""
+
+import nltk
+import re
+
+## Normalisation
+    
+def tokenize(reviews):
+    tokenized_reviews = []
+    for review in reviews:
+        # Plusieurs fin de phrases étaient représentées par deux espaces ou plus.
+        review = re.sub(r"\s{2,}",". ",review)
+        tokenized_sentences = []
+        sentences = nltk.sent_tokenize(review)
+        for sentence in sentences:
+            sentence_tokens = nltk.word_tokenize(sentence)
+            tokenized_sentences.append(sentence_tokens)
+        tokenized_reviews.append(tokenized_sentences)
+    return tokenized_reviews
+
+def norm_stemming(tokenized_reviews):
+    return 0
+
+def norm_lemmatize(tokenized_reviews):
+    return 0
+
+## Feature selection
+
+def select_freq(reviews):
+    return 0
+    
+def select_rem_stopwords(reviews):
+    return 0
+    
+def select_open_class(reviews):
+    return 0
+    
+## Attribute value
+    
+def value_count(reviews):
+    return 0
+    
+def value_occurence(reviews):
+    return 0
+    
+def value_tfidf(reviews):
+    return 0
+    
+## Other attributes
+    
+def attribute_polarity_count(reviews):
+    return 0
+    
+def attribute_length(reviews):
+    return 0
+    
+## Training
+    
+def train_naive_model(reviews):
+    return 0
+    
+def train_regression_model(reviews):
+    return 0