This commit is contained in:
François Pelletier 2019-10-27 22:35:21 -04:00
commit d3da13c681
3 changed files with 151 additions and 0 deletions

45
negation_conversion.py Normal file
View file

@ -0,0 +1,45 @@
sentences = ["This is not a test.",
"There is no flowery dialog, and time is not wasted.",
"She did not promise to help him.",
"The King of France is not bald.",
"It is not so much a work of entertainment as it is unique study.",
"Mary did not complete the program but Nancy wrote the report.",
"Not an accomplished dancer, he moved rather clumsily.",
"Not all participants liked this game.",
"I do not think he is coming.",
"Mary did not give the solution to Paul.",
"She claimed that Donald had not offered bribes to any official.",
"Not for the first time, he was surprised by this player.",
"I would never do it even if I can.",
"A decision is not expected until June.",
"We do not like washing dishes which lead to the decision of buying a dishwasher."
]
def convert_negated_words(sentence):
# Pour évaluer cette tâche, nous allons utiliser cette fonction pour tester la portée de la négation
# d'autres phrases que celles dans la liste plus haut (sentences).
# SVP ne pas changer la signautre de la fonction.
#
# Pour déterminer la portée d'une négation, utilisez la structure d'un arbre syntaxique (constituants ou dépendances),
# les symboles des consituants ou des dépendance, et les part-of-speech des mots (POS).
#
# Vous pouvez utiliser la libairie de votre choix pour faire l'analyse syntaxique des phrases.
# Et vous DEVEZ utiliser un analyseur syntaxique.
#
# Vous pouvez ajouter autant de fonctions que vous le souhaitez.
# Mais vous ne pouvez pas utiliser de code disponible sur le Web pour déterminer la portée de négations.
#
# Nos solutions pour les 15 phrases sont dans le fichier /data/phrases-references-t2.txt
#
# Mettre votre code ici. Vous pouvez effacer ces commentaires.
#
converted_sentence = "This is not NOT_a NOT_test ." # A MODIFIER
return converted_sentence.strip()
if __name__ == '__main__':
for sent in sentences:
print("\nS:", sent)
converted = convert_negated_words(sent)
print("N:", converted)

37
sentiment_analysis.py Normal file
View file

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import json
import sentiment_analysis_functions as sfun
# installation
# nltk.download('punkt')
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
train_pos_reviews_fn = "./data/train-positive-t1.txt"
train_neg_reviews_fn = "./data/train-negative-t1.txt"
test_pos_reviews_fn = "./data/test-pos-t1.txt"
test_neg_reviews_fn = "./data/test-neg-t1.txt"
# Aucune contrainte pour cette tâche. Vous pouvez structurez votre code comme bon vous semble.
# Expliquez dans votre rapport comment exécuter le code et définir les configurations.
def load_reviews(filename):
with open(filename, 'r') as fp:
reviews_list = json.load(fp)
return reviews_list
if __name__ == '__main__':
train_positive_reviews = load_reviews(train_pos_reviews_fn)
train_negative_reviews = load_reviews(train_neg_reviews_fn)
test_positive_reviews = load_reviews(test_pos_reviews_fn)
test_negative_reviews = load_reviews(test_neg_reviews_fn)
print("Nb of training reviews - positive:", len(train_positive_reviews), "negative:", len(train_negative_reviews))
print("Nb of test reviews - positive:", len(test_positive_reviews), "negative:", len(test_negative_reviews))
xx = sfun.tokenize(train_positive_reviews)
xx[0]

View file

@ -0,0 +1,69 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 27 17:16:54 2019
@author: François Pelletier
"""
import nltk
import re
## Normalisation
def tokenize(reviews):
tokenized_reviews = []
for review in reviews:
# Plusieurs fin de phrases étaient représentées par deux espaces ou plus.
review = re.sub(r"\s{2,}",". ",review)
tokenized_sentences = []
sentences = nltk.sent_tokenize(review)
for sentence in sentences:
sentence_tokens = nltk.word_tokenize(sentence)
tokenized_sentences.append(sentence_tokens)
tokenized_reviews.append(tokenized_sentences)
return tokenized_reviews
def norm_stemming(tokenized_reviews):
return 0
def norm_lemmatize(tokenized_reviews):
return 0
## Feature selection
def select_freq(reviews):
return 0
def select_rem_stopwords(reviews):
return 0
def select_open_class(reviews):
return 0
## Attribute value
def value_count(reviews):
return 0
def value_occurence(reviews):
return 0
def value_tfidf(reviews):
return 0
## Other attributes
def attribute_polarity_count(reviews):
return 0
def attribute_length(reviews):
return 0
## Training
def train_naive_model(reviews):
return 0
def train_regression_model(reviews):
return 0