predict knn et correction random lecture

2019-03-16 18:40:22 -04:00 · 2019-03-16 18:40:22 -04:00 · 3ddc45508a
commit 3ddc45508a
parent f23034e381
3 changed files with 40 additions and 10 deletions
--- a/Code/Knn.py
+++ b/Code/Knn.py
@ -67,9 +67,10 @@ class Knn: #nom de la class à changer
        self.train_labels=train_labels

        n,m = train.shape
-        p=m
        nn=np.empty((n,self.k,2))
        
+        self.minkowski_p=m
+        
        # On trouve les k plus proches voisins et leur distance pour chacunes des observations du training set
        # On enlève la valeur testée de la liste des points pour lesquels on mesure la distance car on sait qu'elle vaut 0. 
        # On veut tester sur les autres points seulement
@ -77,9 +78,9 @@ class Knn: #nom de la class à changer
            i_range = [i for i in range(n)]
            i_range.pop(x)
            nn[x,:,0]=i_range[0:self.k]
-            nn[x,:,1]=np.apply_along_axis(minkowski_distance,1,self.train[i_range[0:self.k]],train[x],p)
+            nn[x,:,1]=np.apply_along_axis(minkowski_distance,1,self.train[i_range[0:self.k]],train[x],self.minkowski_p)
            for i in i_range[self.k:n]:
-                dist = minkowski_distance(self.train[i],train[x],p)
+                dist = minkowski_distance(self.train[i],train[x],self.minkowski_p)
                nn_dist=nn[x,:,1]
                distdiff = nn_dist-dist
                max_distdiff=max(distdiff)
@ -108,9 +109,24 @@ class Knn: #nom de la class à changer
        alors l'exemple est bien classifié, si non c'est une missclassification

        """
+        n,m = self.train.shape
+        nn=np.empty((self.k,2))
+        nn[:,1]=np.apply_along_axis(minkowski_distance,1,self.train[0:self.k],exemple,self.minkowski_p)
+        for i in range(self.k,n):
+            dist = minkowski_distance(self.train[i],exemple,self.minkowski_p)
+            nn_dist=nn[:,1]
+            distdiff = nn_dist-dist
+            max_distdiff=max(distdiff)
+            if(max_distdiff>0):
+                pos_changement = np.argwhere(nn_dist==max(nn_dist))[0]
+                nn[pos_changement,0]=i
+                nn[pos_changement,1]=max_distdiff
+        nn_labels = self.train_labels[nn[:,0].astype(np.int)]
+        nn_mode_label = mode(nn_labels)
        
+        print("Observé:"+str(label)+" Prédit:"+str(nn_mode_label))
        
-        
+        return nn_mode_label

    def test(self, test, test_labels):
        """
--- a/Code/entrainer_tester.py
+++ b/Code/entrainer_tester.py
@ -37,6 +37,9 @@ train, train_labels, test, test_labels = ld.load_iris_dataset(0.7)

 myKnn.train(train, train_labels)

+for i in range(train.shape[0]):
+    myKnn.predict(train[i],train_labels[i])
+
 # Tester votre classifieur


--- a/Code/load_datasets.py
+++ b/Code/load_datasets.py
@ -51,13 +51,15 @@ def load_iris_dataset(train_ratio=0.7):
    np_features=np.array(features,dtype=np.float)
    np_labels=np.array(labels,dtype=np.int)
    
-    n_train = int(np_features.shape[0]*train_ratio)
+    n_obs = np_features.shape[0]
    
-    all_indices = [i for i in range(np_features.shape[0])]
+    n_train = int(n_obs*train_ratio)
+    
+    all_indices = [i for i in range(n_obs)]
    random.shuffle(all_indices)
    
    train_index = all_indices[0:n_train]
-    test_index = all_indices[n_train:np_features.shape[0]]
+    test_index = all_indices[n_train:n_obs]
    
    train = np_features[train_index]
    train_labels = np_labels[train_index]
@ -127,12 +129,21 @@ def load_congressional_dataset(train_ratio):
    np_features=np.array(features,dtype=np.float)
    np_labels=np.array(labels,dtype=np.int)
    
-    train_index = np.random.rand(np_features.shape[0]) < train_ratio
+    n_obs = np_features.shape[0]
+    
+    
+    n_train = int(n_obs*train_ratio)
+    
+    all_indices = [i for i in range(n_obs)]
+    random.shuffle(all_indices)
+    
+    train_index = all_indices[0:n_train]
+    test_index = all_indices[n_train:n_obs]
    
    train = np_features[train_index]
    train_labels = np_labels[train_index]
-    test =  np_features[~train_index]
-    test_labels = np_labels[~train_index] 
+    test =  np_features[test_index]
+    test_labels = np_labels[test_index]
 	
 	# La fonction doit retourner 4 structures de données de type Numpy.
    return (train, train_labels, test, test_labels)