ajout fichier séparé pour métriques et test pour BayesNaif

2019-03-23 00:15:16 -04:00 · 2019-03-23 00:15:16 -04:00 · 86b2b93483
commit 86b2b93483
parent 0df2b6242b
4 changed files with 77 additions and 60 deletions
--- a/Code/BayesNaif.py
+++ b/Code/BayesNaif.py
@ -12,6 +12,7 @@ je vais avoir besoin de tester les méthodes test, predict et test de votre code

 import numpy as np
 import math
+import metrics


 # le nom de votre classe
@ -125,7 +126,7 @@ class BayesNaif: #nom de la class à changer
        
        return (self.unique_labels[np.where(prob==max(prob))[0]].tolist()[0],label)

-    def test(self, test, test_labels):
+    def test(self, test, test_labels, verbose=True):
        """
        c'est la méthode qui va tester votre modèle sur les données de test
        l'argument test est une matrice de type Numpy et de taille nxm, avec 
@ -146,7 +147,13 @@ class BayesNaif: #nom de la class à changer
        Bien entendu ces tests doivent etre faits sur les données de test seulement
        
        """
-    
+        prediction_test = [self.predict(test[i],test_labels[i])[0] for i in range(len(test_labels))]
+        cm = metrics.confusion_matrix(test_labels,prediction_test)
+        accuracy, precision, recall = metrics.prediction_metrics(cm,test_labels,prediction_test)
+        if (verbose):
+            metrics.print_prediction_metrics(cm,accuracy,precision,recall)
+
+        return cm,accuracy,precision,recall
    
    # Vous pouvez rajouter d'autres méthodes et fonctions,
    # il suffit juste de les commenter.
--- a/Code/Knn.py
+++ b/Code/Knn.py
@ -11,6 +11,7 @@ je vais avoir besoin de tester les méthodes test, predict et test de votre code
 """

 import numpy as np
+import metrics

 def minkowski_distance(x,y,p_value):
    return pow(sum(pow(abs(a-b),p_value) for a,b in zip(x, y)),1/p_value)
@ -126,10 +127,10 @@ class Knn: #nom de la class à changer
        nn_mode_label = np.apply_along_axis(mode,1,nn_labels)
        
        # on construit la matrice de confusion
-        cm = self.confusion_matrix(train_labels,nn_mode_label)
-        accuracy, precision, recall = self.prediction_metrics(cm,train_labels,nn_mode_label)
+        cm = metrics.confusion_matrix(train_labels,nn_mode_label)
+        accuracy, precision, recall = metrics.prediction_metrics(cm,train_labels,nn_mode_label)
        if (verbose):
-            self.print_prediction_metrics(cm,accuracy,precision,recall)
+            metrics.print_prediction_metrics(cm,accuracy,precision,recall)

        return cm,accuracy,precision,recall
        
@ -205,59 +206,15 @@ class Knn: #nom de la class à changer
        nn_labels = self.train_labels[nn[:,:,0].astype(np.int)]
        nn_mode_label = np.apply_along_axis(mode,1,nn_labels)
        # on construit la matrice de confusion
-        cm = self.confusion_matrix(test_labels,nn_mode_label)
-        accuracy, precision, recall = self.prediction_metrics(cm,test_labels,nn_mode_label)
+        cm = metrics.confusion_matrix(test_labels,nn_mode_label)
+        accuracy, precision, recall = metrics.prediction_metrics(cm,test_labels,nn_mode_label)
        if (verbose):
-            self.print_prediction_metrics(cm,accuracy,precision,recall)
+            metrics.print_prediction_metrics(cm,accuracy,precision,recall)

        return cm,accuracy,precision,recall
        
        
-    def confusion_matrix(self,obs_labels,pred_labels):
-        """
-        Retourne la matrice de confusion
-        Prend en entrée deux vecteurs d'étiquettes: observations et prédictions
-        Retourne une matrice NumPy
-        """
-        unique_obs_labels=np.unique(obs_labels)
-        nb_unique_obs_labels=(unique_obs_labels.shape)[0]
-        
-        confusion_matrix = np.zeros((nb_unique_obs_labels,nb_unique_obs_labels))
-        
-        for observed,predicted in zip(obs_labels,pred_labels):
-            confusion_matrix[observed][predicted] += 1
-        return confusion_matrix
-    
-    def prediction_metrics(self,cm,obs_labels,pred_labels):
-        """
-        Cette fonction retourne les métriques accuracy, precision et recall
-        Elle prend en entrée la matrice de confusion et les vecteurs d'étiquettes: observations et prédictions
-        accuracy=(tp+tn)/all
-        precision=tp/(tp+fp)
-        recall=tp/(tp+fn)
-        """
-        
-        accuracy = (obs_labels == pred_labels).sum() / float(len(obs_labels))
-        precision=[]
-        recall=[]
-        for label_num in np.unique(obs_labels):
-            precision.append(cm[label_num,label_num] / sum(cm[:,label_num]))
-            recall.append(cm[label_num,label_num] / sum(cm[label_num,:]))
-        
-        return accuracy, precision, recall
-    
-    def print_prediction_metrics(self,cm,accuracy,precision,recall):
-        """
-        Cette fonction imprime la matrice de confusion et les métriques
-        """
-        print("Matrice de confusion:")
-        print(cm)
-        print("\nAccuracy:")
-        print(accuracy)
-        print("\nPrecision:")
-        print(precision)
-        print("\nRecall")
-        print(recall)
+
    
    # Vous pouvez rajouter d'autres méthodes et fonctions,
    # il suffit juste de les commenter.
--- a/Code/entrainer_tester.py
+++ b/Code/entrainer_tester.py
@ -31,7 +31,7 @@ train, train_labels, test, test_labels = ld.load_monks_dataset(3)

 ## Choix du meilleur K
 findbest_Knn = Knn.Knn()
-meilleur_k = findbest_Knn.set_best_k(train, train_labels, nb_split=5, k_potentiel=range(1,16))
+meilleur_k = findbest_Knn.set_best_k(train, train_labels, nb_split=5, k_potentiel=range(5,15))
 print("Le meilleur K est:"+str(meilleur_k)+"\n\n")

 ## Initializer/instanciez vos classifieurs avec leurs paramètres
@ -40,24 +40,22 @@ myKnn = Knn.Knn(meilleur_k)
 ## Entrainez votre classifieur
 myKnn.train(train, train_labels)

-## Prediction
-
-
-
 ## Tester votre classifieur
 myKnn.test(test, test_labels)

 # Naive Bayes

 ## Initializer/instanciez vos classifieurs avec leurs paramètres
-BN = BayesNaif.BayesNaif(True)
+BN = BayesNaif.BayesNaif(False)

 ## Entrainez votre classifieur
 BN.train(train, train_labels)

 ## Prediction
+BN.test(train, train_labels)

-np.array([BN.predict(train[i],train_labels[i]) for i in range(105)])
+## Test
+BN.test(test,test_labels)



--- a/Code/metrics.py
+++ b/Code/metrics.py
@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar 22 23:55:53 2019
+Matrice de confusion et calcul des métriques
+@author: François Pelletier
+"""
+
+import numpy as np
+
+def confusion_matrix(obs_labels,pred_labels):
+    """
+    Retourne la matrice de confusion
+    Prend en entrée deux vecteurs d'étiquettes: observations et prédictions
+    Retourne une matrice NumPy
+    """
+    unique_obs_labels=np.unique(obs_labels)
+    nb_unique_obs_labels=(unique_obs_labels.shape)[0]
+    
+    confusion_matrix = np.zeros((nb_unique_obs_labels,nb_unique_obs_labels))
+    
+    for observed,predicted in zip(obs_labels,pred_labels):
+        confusion_matrix[observed][predicted] += 1
+    return confusion_matrix
+
+def prediction_metrics(cm,obs_labels,pred_labels):
+    """
+    Cette fonction retourne les métriques accuracy, precision et recall
+    Elle prend en entrée la matrice de confusion et les vecteurs d'étiquettes: observations et prédictions
+    accuracy=(tp+tn)/all
+    precision=tp/(tp+fp)
+    recall=tp/(tp+fn)
+    """
+    
+    accuracy = (obs_labels == pred_labels).sum() / float(len(obs_labels))
+    precision=[]
+    recall=[]
+    for label_num in np.unique(obs_labels):
+        precision.append(cm[label_num,label_num] / sum(cm[:,label_num]))
+        recall.append(cm[label_num,label_num] / sum(cm[label_num,:]))
+    
+    return accuracy, precision, recall
+
+def print_prediction_metrics(cm,accuracy,precision,recall):
+    """
+    Cette fonction imprime la matrice de confusion et les métriques
+    """
+    print("Matrice de confusion:")
+    print(cm)
+    print("\nAccuracy:")
+    print(accuracy)
+    print("\nPrecision:")
+    print(precision)
+    print("\nRecall")
+    print(recall)