From 1e7e355f7e2a9d595e2bcacbcc10d5ae825c459d Mon Sep 17 00:00:00 2001 From: Francois Pelletier Date: Sat, 23 Mar 2019 19:22:19 -0400 Subject: [PATCH] =?UTF-8?q?ajout=20readme=20et=20fichier=20.py=20ex=C3=A9c?= =?UTF-8?q?ution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Code/README.txt | 16 + Code/Rapport-Partiel.ipynb | 738 +++++++++++++++++++++++++++++---- Code/entrainer_tester.py | 600 ++++++++++++++++++++++++++- Code/export_notebook_script.sh | 2 + 4 files changed, 1252 insertions(+), 104 deletions(-) create mode 100644 Code/README.txt create mode 100644 Code/export_notebook_script.sh diff --git a/Code/README.txt b/Code/README.txt new file mode 100644 index 0000000..8ec9a8d --- /dev/null +++ b/Code/README.txt @@ -0,0 +1,16 @@ +Tâches effectuées entièrement par François Pelletier 908144032 + +. +├── BayesNaif.py (Classe pour le modèle Bayes Naif) +├── classifieur.py (Classe exemple) +├── datasets (Jeux de données) +├── entrainer_tester.py (Code pour exécuter tous les modèles, créé depuis Rapport-Partiel.ipynb avec export_notebook_script.sh) +├── export_notebook_script.sh (Script pour créer entrainer_tester.py depuis Rapport-Partiel.ipynb) +├── Knn.py (Classe pour le modèle KNN) +├── load_datasets.py (Classe pour charger les données) +├── metrics.py (Fonctions pour le calcul des métriques) +├── Rapport-Partiel.ipynb (Code source du rapport) +├── Rapport-Partiel.pdf (Version PDF du rapport) +└── README.txt (Ce fichier-ci) + +Discussion, enjeux et difficultés: voir rapport PDF diff --git a/Code/Rapport-Partiel.ipynb b/Code/Rapport-Partiel.ipynb index 2a57cde..15ad55b 100644 --- a/Code/Rapport-Partiel.ipynb +++ b/Code/Rapport-Partiel.ipynb @@ -6,6 +6,11 @@ "source": [ "# Introduction\n", "\n", + "- Projet partiel présenté par: **François Pelletier**\n", + "- Matricule: **908144032**\n", + "- Dans le cadre du cours: **IFT-7025**\n", + "\n", + "\n", "## Définitions\n", "\n", "La **matrice de confusion** est un tableau où on décompte les valeurs réelle O et prédites S pour chaque observation du jeu de données dans un problème de classification. Par sa conception, on peut valider rapidement le nombre d'observations où la prédiction est égale à la valeur réelle sur la diagonale de la matrice. Par convention, on place les valeurs prédites à la verticale et les valeurs observées à l'horizontale. Lorsque le nombre de modalités de la variable prédite est de deux, on a un problème de classification binaire et on obtient alors le décompte des vrais positifs, faux positifs, faux négatifs et vrais négatifs.\n", @@ -30,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -55,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -63,7 +68,7 @@ "train2, train_labels2, test2, test_labels2 = ld.load_monks_dataset(1)\n", "train3, train_labels3, test3, test_labels3 = ld.load_monks_dataset(2)\n", "train4, train_labels4, test4, test_labels4 = ld.load_monks_dataset(3)\n", - "train5, train_labels5, test5, test_labels5 = ld.load_congressional_dataset(0.7)" + "train5, train_labels5, test5, test_labels5 = ld.load_congressional_dataset(train_ratio = 0.7)" ] }, { @@ -169,12 +174,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ + "%%capture --no-display\n", "findbest_Knn = Knn.Knn()\n", - "meilleur_k = findbest_Knn.set_best_k(train1, train_labels1, nb_split=5, k_potentiel=range(2,12))" + "meilleur_k = findbest_Knn.set_best_k(train1, \n", + " train_labels1, \n", + " nb_split=5, \n", + " k_potentiel=range(2,12))" ] }, { @@ -186,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ @@ -202,9 +211,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[35. 0. 0.]\n", + " [ 0. 32. 1.]\n", + " [ 0. 4. 33.]]\n", + "\n", + "Exactitude:\n", + "0.9523809523809523\n", + "\n", + "Précision:\n", + "[1.0, 0.8888888888888888, 0.9705882352941176]\n", + "\n", + "Rappel:\n", + "[1.0, 0.9696969696969697, 0.8918918918918919]\n", + "\n", + "Calculé en:\n", + "0.10393261909484863s\n" + ] + } + ], "source": [ "training_iris_knn = myKnn.train(train1, train_labels1)" ] @@ -218,9 +250,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[15. 0. 0.]\n", + " [ 0. 15. 2.]\n", + " [ 0. 0. 13.]]\n", + "\n", + "Exactitude:\n", + "0.9555555555555556\n", + "\n", + "Précision:\n", + "[1.0, 1.0, 0.8666666666666667]\n", + "\n", + "Rappel:\n", + "[1.0, 0.8823529411764706, 1.0]\n", + "\n", + "Calculé en:\n", + "0.04592633247375488s\n" + ] + } + ], "source": [ "test_iris_knn = myKnn.test(test1, test_labels1)" ] @@ -250,17 +305,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Le meilleur k est: 4\n", + "Calculé en 0.02272510528564453s\n" + ] + } + ], "source": [ "findbest_Knn = Knn.Knn()\n", - "meilleur_k = findbest_Knn.set_best_k(train2, train_labels2, nb_split=5, k_potentiel=range(2,12))" + "meilleur_k = findbest_Knn.set_best_k(train2, \n", + " train_labels2, \n", + " nb_split=5, \n", + " k_potentiel=range(2,12))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -276,9 +343,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[36. 26.]\n", + " [19. 43.]]\n", + "\n", + "Exactitude:\n", + "0.6370967741935484\n", + "\n", + "Précision:\n", + "[0.6545454545454545, 0.6231884057971014]\n", + "\n", + "Rappel:\n", + "[0.5806451612903226, 0.6935483870967742]\n", + "\n", + "Calculé en:\n", + "0.12871980667114258s\n" + ] + } + ], "source": [ "training_monks1_knn = myKnn2.train(train2, train_labels2)" ] @@ -292,9 +381,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[139. 77.]\n", + " [ 49. 167.]]\n", + "\n", + "Exactitude:\n", + "0.7083333333333334\n", + "\n", + "Précision:\n", + "[0.7393617021276596, 0.6844262295081968]\n", + "\n", + "Rappel:\n", + "[0.6435185185185185, 0.7731481481481481]\n", + "\n", + "Calculé en:\n", + "0.4427499771118164s\n" + ] + } + ], "source": [ "test_monks1_knn = myKnn2.test(test2, test_labels2)" ] @@ -315,17 +426,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Le meilleur k est: 3\n", + "Calculé en 0.04257488250732422s\n" + ] + } + ], "source": [ "findbest_Knn = Knn.Knn()\n", - "meilleur_k = findbest_Knn.set_best_k(train3, train_labels3, nb_split=5, k_potentiel=range(2,12))" + "meilleur_k = findbest_Knn.set_best_k(train3, \n", + " train_labels3, \n", + " nb_split=5, \n", + " k_potentiel=range(2,12))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -341,9 +464,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[105. 0.]\n", + " [ 63. 1.]]\n", + "\n", + "Exactitude:\n", + "0.6272189349112426\n", + "\n", + "Précision:\n", + "[0.625, 1.0]\n", + "\n", + "Rappel:\n", + "[1.0, 0.015625]\n", + "\n", + "Calculé en:\n", + "0.2338266372680664s\n" + ] + } + ], "source": [ "training_monks2_knn = myKnn3.train(train3, train_labels3)" ] @@ -357,9 +502,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[286. 4.]\n", + " [129. 13.]]\n", + "\n", + "Exactitude:\n", + "0.6921296296296297\n", + "\n", + "Précision:\n", + "[0.689156626506024, 0.7647058823529411]\n", + "\n", + "Rappel:\n", + "[0.9862068965517241, 0.09154929577464789]\n", + "\n", + "Calculé en:\n", + "0.5991506576538086s\n" + ] + } + ], "source": [ "test_monks2_knn = myKnn3.test(test3, test_labels3)" ] @@ -380,17 +547,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Le meilleur k est: 3\n", + "Calculé en 0.02377629280090332s\n" + ] + } + ], "source": [ "findbest_Knn = Knn.Knn()\n", - "meilleur_k = findbest_Knn.set_best_k(train4, train_labels4, nb_split=5, k_potentiel=range(2,12))" + "meilleur_k = findbest_Knn.set_best_k(train4, \n", + " train_labels4, \n", + " nb_split=5, \n", + " k_potentiel=range(2,12))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -406,9 +585,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[40. 22.]\n", + " [16. 44.]]\n", + "\n", + "Exactitude:\n", + "0.6885245901639344\n", + "\n", + "Précision:\n", + "[0.7142857142857143, 0.6666666666666666]\n", + "\n", + "Rappel:\n", + "[0.6451612903225806, 0.7333333333333333]\n", + "\n", + "Calculé en:\n", + "0.1286787986755371s\n" + ] + } + ], "source": [ "training_monks3_knn = myKnn4.train(train4, train_labels4)" ] @@ -422,9 +623,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[134. 70.]\n", + " [ 70. 158.]]\n", + "\n", + "Exactitude:\n", + "0.6759259259259259\n", + "\n", + "Précision:\n", + "[0.6568627450980392, 0.6929824561403509]\n", + "\n", + "Rappel:\n", + "[0.6568627450980392, 0.6929824561403509]\n", + "\n", + "Calculé en:\n", + "0.4376678466796875s\n" + ] + } + ], "source": [ "test_monks3_knn = myKnn4.test(test4, test_labels4)" ] @@ -436,13 +659,6 @@ "## Congressional Voting Records Dataset" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -452,17 +668,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Le meilleur k est: 10\n", + "Calculé en 0.2262570858001709s\n" + ] + } + ], "source": [ "findbest_Knn = Knn.Knn()\n", - "meilleur_k = findbest_Knn.set_best_k(train5, train_labels5, nb_split=5, k_potentiel=range(2,12))" + "meilleur_k = findbest_Knn.set_best_k(train5, \n", + " train_labels5, \n", + " nb_split=5, \n", + " k_potentiel=range(2,12))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -478,9 +706,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[ 91. 18.]\n", + " [ 17. 178.]]\n", + "\n", + "Exactitude:\n", + "0.8848684210526315\n", + "\n", + "Précision:\n", + "[0.8425925925925926, 0.9081632653061225]\n", + "\n", + "Rappel:\n", + "[0.8348623853211009, 0.9128205128205128]\n", + "\n", + "Calculé en:\n", + "1.389171838760376s\n" + ] + } + ], "source": [ "training_congres_knn = myKnn5.train(train5, train_labels5)" ] @@ -494,9 +744,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[50. 9.]\n", + " [ 8. 64.]]\n", + "\n", + "Exactitude:\n", + "0.8702290076335878\n", + "\n", + "Précision:\n", + "[0.8620689655172413, 0.8767123287671232]\n", + "\n", + "Rappel:\n", + "[0.847457627118644, 0.8888888888888888]\n", + "\n", + "Calculé en:\n", + "0.6056292057037354s\n" + ] + } + ], "source": [ "test_congres_knn = myKnn5.test(test5, test_labels5)" ] @@ -603,7 +875,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": {}, "outputs": [], "source": [ @@ -619,9 +891,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[35. 0. 0.]\n", + " [ 0. 31. 2.]\n", + " [ 0. 3. 34.]]\n", + "\n", + "Exactitude:\n", + "0.9523809523809523\n", + "\n", + "Précision:\n", + "[1.0, 0.9117647058823529, 0.9444444444444444]\n", + "\n", + "Rappel:\n", + "[1.0, 0.9393939393939394, 0.918918918918919]\n", + "\n", + "Calculé en:\n", + "0.005628347396850586s\n" + ] + } + ], "source": [ "train_iris_nb = BN.train(train1, train_labels1)" ] @@ -635,9 +930,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 70, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[15. 0. 0.]\n", + " [ 0. 15. 2.]\n", + " [ 0. 0. 13.]]\n", + "\n", + "Exactitude:\n", + "0.9555555555555556\n", + "\n", + "Précision:\n", + "[1.0, 1.0, 0.8666666666666667]\n", + "\n", + "Rappel:\n", + "[1.0, 0.8823529411764706, 1.0]\n", + "\n", + "Calculé en:\n", + "0.0021715164184570312s\n" + ] + } + ], "source": [ "test_iris_nb = BN.test(test1,test_labels1)" ] @@ -660,7 +978,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ @@ -669,9 +987,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 72, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[56. 6.]\n", + " [37. 25.]]\n", + "\n", + "Exactitude:\n", + "0.6532258064516129\n", + "\n", + "Précision:\n", + "[0.6021505376344086, 0.8064516129032258]\n", + "\n", + "Rappel:\n", + "[0.9032258064516129, 0.4032258064516129]\n", + "\n", + "Calculé en:\n", + "0.0070645809173583984s\n" + ] + } + ], "source": [ "train_monks1_nb = BN2.train(train2, train_labels2)" ] @@ -685,9 +1025,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[165. 51.]\n", + " [159. 57.]]\n", + "\n", + "Exactitude:\n", + "0.5138888888888888\n", + "\n", + "Précision:\n", + "[0.5092592592592593, 0.5277777777777778]\n", + "\n", + "Rappel:\n", + "[0.7638888888888888, 0.2638888888888889]\n", + "\n", + "Calculé en:\n", + "0.012335538864135742s\n" + ] + } + ], "source": [ "test_monks1_nb = BN2.test(test2,test_labels2)" ] @@ -701,7 +1063,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 74, "metadata": {}, "outputs": [], "source": [ @@ -710,9 +1072,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[92. 13.]\n", + " [48. 16.]]\n", + "\n", + "Exactitude:\n", + "0.6390532544378699\n", + "\n", + "Précision:\n", + "[0.6571428571428571, 0.5517241379310345]\n", + "\n", + "Rappel:\n", + "[0.8761904761904762, 0.25]\n", + "\n", + "Calculé en:\n", + "0.011017560958862305s\n" + ] + } + ], "source": [ "train_monks2_nb = BN3.train(train3, train_labels3)" ] @@ -726,9 +1110,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 76, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[244. 46.]\n", + " [120. 22.]]\n", + "\n", + "Exactitude:\n", + "0.6157407407407407\n", + "\n", + "Précision:\n", + "[0.6703296703296703, 0.3235294117647059]\n", + "\n", + "Rappel:\n", + "[0.8413793103448276, 0.15492957746478872]\n", + "\n", + "Calculé en:\n", + "0.012041330337524414s\n" + ] + } + ], "source": [ "test_monks2_nb = BN3.test(test3,test_labels3)" ] @@ -742,7 +1148,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": {}, "outputs": [], "source": [ @@ -751,9 +1157,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[57. 5.]\n", + " [ 3. 57.]]\n", + "\n", + "Exactitude:\n", + "0.9344262295081968\n", + "\n", + "Précision:\n", + "[0.95, 0.9193548387096774]\n", + "\n", + "Rappel:\n", + "[0.9193548387096774, 0.95]\n", + "\n", + "Calculé en:\n", + "0.007179975509643555s\n" + ] + } + ], "source": [ "train_monks3_nb = BN4.train(train4, train_labels4)" ] @@ -767,9 +1195,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 79, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[204. 0.]\n", + " [ 12. 216.]]\n", + "\n", + "Exactitude:\n", + "0.9722222222222222\n", + "\n", + "Précision:\n", + "[0.9444444444444444, 1.0]\n", + "\n", + "Rappel:\n", + "[1.0, 0.9473684210526315]\n", + "\n", + "Calculé en:\n", + "0.011954069137573242s\n" + ] + } + ], "source": [ "test_monks3_nb = BN4.test(test4,test_labels4)" ] @@ -785,7 +1235,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 80, "metadata": {}, "outputs": [], "source": [ @@ -794,9 +1244,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 81, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[100. 9.]\n", + " [ 20. 175.]]\n", + "\n", + "Exactitude:\n", + "0.9046052631578947\n", + "\n", + "Précision:\n", + "[0.8333333333333334, 0.9510869565217391]\n", + "\n", + "Rappel:\n", + "[0.9174311926605505, 0.8974358974358975]\n", + "\n", + "Calculé en:\n", + "0.034026384353637695s\n" + ] + } + ], "source": [ "train_congres_nb = BN5.train(train5, train_labels5)" ] @@ -810,9 +1282,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 82, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Matrice de confusion:\n", + "[[56. 3.]\n", + " [ 7. 65.]]\n", + "\n", + "Exactitude:\n", + "0.9236641221374046\n", + "\n", + "Précision:\n", + "[0.8888888888888888, 0.9558823529411765]\n", + "\n", + "Rappel:\n", + "[0.9491525423728814, 0.9027777777777778]\n", + "\n", + "Calculé en:\n", + "0.004875659942626953s\n" + ] + } + ], "source": [ "test_congres_nb = BN5.test(test5, test_labels5)" ] @@ -833,7 +1327,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ @@ -855,9 +1349,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 84, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['Iris' '0.9555555555555556' '0.9555555555555556' 'NB']\n", + " ['MONKS1' '0.7083333333333334' '0.5138888888888888' 'KNN']\n", + " ['MONKS2' '0.6921296296296297' '0.6157407407407407' 'KNN']\n", + " ['MONKS3' '0.6759259259259259' '0.9722222222222222' 'NB']\n", + " ['Votes' '0.8702290076335878' '0.9236641221374046' 'NB']]\n" + ] + } + ], "source": [ "print(np.array([(i[0],\n", " i[1][1],\n", @@ -876,9 +1382,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 85, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['Iris' list([1.0, 1.0, 0.8666666666666667])\n", + " list([1.0, 1.0, 0.8666666666666667]) 'NB']\n", + " ['MONKS1' list([0.7393617021276596, 0.6844262295081968])\n", + " list([0.5092592592592593, 0.5277777777777778]) 'KNN']\n", + " ['MONKS2' list([0.689156626506024, 0.7647058823529411])\n", + " list([0.6703296703296703, 0.3235294117647059]) 'KNN']\n", + " ['MONKS3' list([0.6568627450980392, 0.6929824561403509])\n", + " list([0.9444444444444444, 1.0]) 'NB']\n", + " ['Votes' list([0.8620689655172413, 0.8767123287671232])\n", + " list([0.8888888888888888, 0.9558823529411765]) 'NB']]\n" + ] + } + ], "source": [ "print(np.array([(i[0],\n", " i[1][2],\n", @@ -892,14 +1415,31 @@ "source": [ "### Rappel\n", "\n", - "Pour le rappel, on obtient des résultats différents. Pour une même exactitude, le modele KNN a un meilleur rappel pour la seconde classe. Pour le jeu de données MONKS1, l'ordre des valeurs du rappel est inversé. Ceci nous montre qu'il est important de sélectionner la mesure la plus importante pour le problème que nous voulons résoudre, car il est possible d'obtenir un conclusion erronée en utilisant une autre." + "Pour le rappel, on obtient des résultats différents. Pour le jeu de données MONKS1, l'ordre des valeurs du rappel est inversé. Ceci nous montre qu'il est important de sélectionner la mesure la plus importante pour le problème que nous voulons résoudre, car il est possible d'obtenir un conclusion erronée en utilisant une autre." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 86, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['Iris' list([1.0, 0.8823529411764706, 1.0])\n", + " list([1.0, 0.8823529411764706, 1.0]) 'NB']\n", + " ['MONKS1' list([0.6435185185185185, 0.7731481481481481])\n", + " list([0.7638888888888888, 0.2638888888888889]) 'NB']\n", + " ['MONKS2' list([0.9862068965517241, 0.09154929577464789])\n", + " list([0.8413793103448276, 0.15492957746478872]) 'KNN']\n", + " ['MONKS3' list([0.6568627450980392, 0.6929824561403509])\n", + " list([1.0, 0.9473684210526315]) 'NB']\n", + " ['Votes' list([0.847457627118644, 0.8888888888888888])\n", + " list([0.9491525423728814, 0.9027777777777778]) 'NB']]\n" + ] + } + ], "source": [ "print(np.array([(i[0],\n", " i[1][3],\n", @@ -918,9 +1458,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['Iris' '0.04592633247375488' '0.0021715164184570312' 'NB']\n", + " ['MONKS1' '0.4427499771118164' '0.012335538864135742' 'NB']\n", + " ['MONKS2' '0.5991506576538086' '0.012041330337524414' 'NB']\n", + " ['MONKS3' '0.4376678466796875' '0.011954069137573242' 'NB']\n", + " ['Votes' '0.6056292057037354' '0.004875659942626953' 'NB']]\n" + ] + } + ], "source": [ "print(np.array([(i[0],\n", " i[1][4],\n", @@ -928,6 +1480,17 @@ " \"KNN\" if i[1][4]i[2][1] else "NB") for i in tous_resultats])) + + +# ### Précision +# +# On obtient les mêmes constats que précédemment au niveau de la précision + +# In[85]: + + +print(np.array([(i[0], + i[1][2], + i[2][2], + "KNN" if i[1][2]>i[2][2] else "NB") for i in tous_resultats])) + + +# ### Rappel +# +# Pour le rappel, on obtient des résultats différents. Pour le jeu de données MONKS1, l'ordre des valeurs du rappel est inversé. Ceci nous montre qu'il est important de sélectionner la mesure la plus importante pour le problème que nous voulons résoudre, car il est possible d'obtenir un conclusion erronée en utilisant une autre. + +# In[86]: + + +print(np.array([(i[0], + i[1][3], + i[2][3], + "KNN" if i[1][3]>i[2][3] else "NB") for i in tous_resultats])) + + +# ### Temps d'exécution +# +# Dans tous les cas, le temps de calcul du Naive Bayes est signicativement plus court. + +# In[87]: + + +print(np.array([(i[0], + i[1][4], + i[2][4], + "KNN" if i[1][4]