initial
This commit is contained in:
commit
d3da13c681
3 changed files with 151 additions and 0 deletions
45
negation_conversion.py
Normal file
45
negation_conversion.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
sentences = ["This is not a test.",
|
||||
"There is no flowery dialog, and time is not wasted.",
|
||||
"She did not promise to help him.",
|
||||
"The King of France is not bald.",
|
||||
"It is not so much a work of entertainment as it is unique study.",
|
||||
"Mary did not complete the program but Nancy wrote the report.",
|
||||
"Not an accomplished dancer, he moved rather clumsily.",
|
||||
"Not all participants liked this game.",
|
||||
"I do not think he is coming.",
|
||||
"Mary did not give the solution to Paul.",
|
||||
"She claimed that Donald had not offered bribes to any official.",
|
||||
"Not for the first time, he was surprised by this player.",
|
||||
"I would never do it even if I can.",
|
||||
"A decision is not expected until June.",
|
||||
"We do not like washing dishes which lead to the decision of buying a dishwasher."
|
||||
]
|
||||
|
||||
|
||||
def convert_negated_words(sentence):
|
||||
# Pour évaluer cette tâche, nous allons utiliser cette fonction pour tester la portée de la négation
|
||||
# d'autres phrases que celles dans la liste plus haut (sentences).
|
||||
# SVP ne pas changer la signautre de la fonction.
|
||||
#
|
||||
# Pour déterminer la portée d'une négation, utilisez la structure d'un arbre syntaxique (constituants ou dépendances),
|
||||
# les symboles des consituants ou des dépendance, et les part-of-speech des mots (POS).
|
||||
#
|
||||
# Vous pouvez utiliser la libairie de votre choix pour faire l'analyse syntaxique des phrases.
|
||||
# Et vous DEVEZ utiliser un analyseur syntaxique.
|
||||
#
|
||||
# Vous pouvez ajouter autant de fonctions que vous le souhaitez.
|
||||
# Mais vous ne pouvez pas utiliser de code disponible sur le Web pour déterminer la portée de négations.
|
||||
#
|
||||
# Nos solutions pour les 15 phrases sont dans le fichier /data/phrases-references-t2.txt
|
||||
#
|
||||
# Mettre votre code ici. Vous pouvez effacer ces commentaires.
|
||||
#
|
||||
converted_sentence = "This is not NOT_a NOT_test ." # A MODIFIER
|
||||
return converted_sentence.strip()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for sent in sentences:
|
||||
print("\nS:", sent)
|
||||
converted = convert_negated_words(sent)
|
||||
print("N:", converted)
|
37
sentiment_analysis.py
Normal file
37
sentiment_analysis.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import sentiment_analysis_functions as sfun
|
||||
|
||||
# installation
|
||||
# nltk.download('punkt')
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
train_pos_reviews_fn = "./data/train-positive-t1.txt"
|
||||
train_neg_reviews_fn = "./data/train-negative-t1.txt"
|
||||
test_pos_reviews_fn = "./data/test-pos-t1.txt"
|
||||
test_neg_reviews_fn = "./data/test-neg-t1.txt"
|
||||
|
||||
|
||||
# Aucune contrainte pour cette tâche. Vous pouvez structurez votre code comme bon vous semble.
|
||||
# Expliquez dans votre rapport comment exécuter le code et définir les configurations.
|
||||
|
||||
|
||||
def load_reviews(filename):
|
||||
with open(filename, 'r') as fp:
|
||||
reviews_list = json.load(fp)
|
||||
return reviews_list
|
||||
|
||||
if __name__ == '__main__':
|
||||
train_positive_reviews = load_reviews(train_pos_reviews_fn)
|
||||
train_negative_reviews = load_reviews(train_neg_reviews_fn)
|
||||
test_positive_reviews = load_reviews(test_pos_reviews_fn)
|
||||
test_negative_reviews = load_reviews(test_neg_reviews_fn)
|
||||
print("Nb of training reviews - positive:", len(train_positive_reviews), "negative:", len(train_negative_reviews))
|
||||
print("Nb of test reviews - positive:", len(test_positive_reviews), "negative:", len(test_negative_reviews))
|
||||
|
||||
xx = sfun.tokenize(train_positive_reviews)
|
||||
xx[0]
|
||||
|
69
sentiment_analysis_functions.py
Normal file
69
sentiment_analysis_functions.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Sun Oct 27 17:16:54 2019
|
||||
|
||||
@author: François Pelletier
|
||||
"""
|
||||
|
||||
import nltk
|
||||
import re
|
||||
|
||||
## Normalisation
|
||||
|
||||
def tokenize(reviews):
|
||||
tokenized_reviews = []
|
||||
for review in reviews:
|
||||
# Plusieurs fin de phrases étaient représentées par deux espaces ou plus.
|
||||
review = re.sub(r"\s{2,}",". ",review)
|
||||
tokenized_sentences = []
|
||||
sentences = nltk.sent_tokenize(review)
|
||||
for sentence in sentences:
|
||||
sentence_tokens = nltk.word_tokenize(sentence)
|
||||
tokenized_sentences.append(sentence_tokens)
|
||||
tokenized_reviews.append(tokenized_sentences)
|
||||
return tokenized_reviews
|
||||
|
||||
def norm_stemming(tokenized_reviews):
|
||||
return 0
|
||||
|
||||
def norm_lemmatize(tokenized_reviews):
|
||||
return 0
|
||||
|
||||
## Feature selection
|
||||
|
||||
def select_freq(reviews):
|
||||
return 0
|
||||
|
||||
def select_rem_stopwords(reviews):
|
||||
return 0
|
||||
|
||||
def select_open_class(reviews):
|
||||
return 0
|
||||
|
||||
## Attribute value
|
||||
|
||||
def value_count(reviews):
|
||||
return 0
|
||||
|
||||
def value_occurence(reviews):
|
||||
return 0
|
||||
|
||||
def value_tfidf(reviews):
|
||||
return 0
|
||||
|
||||
## Other attributes
|
||||
|
||||
def attribute_polarity_count(reviews):
|
||||
return 0
|
||||
|
||||
def attribute_length(reviews):
|
||||
return 0
|
||||
|
||||
## Training
|
||||
|
||||
def train_naive_model(reviews):
|
||||
return 0
|
||||
|
||||
def train_regression_model(reviews):
|
||||
return 0
|
Loading…
Reference in a new issue