diff --git a/Code/Knn.py b/Code/Knn.py index d63f8f5..a7c8112 100644 --- a/Code/Knn.py +++ b/Code/Knn.py @@ -67,9 +67,10 @@ class Knn: #nom de la class à changer self.train_labels=train_labels n,m = train.shape - p=m nn=np.empty((n,self.k,2)) + self.minkowski_p=m + # On trouve les k plus proches voisins et leur distance pour chacunes des observations du training set # On enlève la valeur testée de la liste des points pour lesquels on mesure la distance car on sait qu'elle vaut 0. # On veut tester sur les autres points seulement @@ -77,9 +78,9 @@ class Knn: #nom de la class à changer i_range = [i for i in range(n)] i_range.pop(x) nn[x,:,0]=i_range[0:self.k] - nn[x,:,1]=np.apply_along_axis(minkowski_distance,1,self.train[i_range[0:self.k]],train[x],p) + nn[x,:,1]=np.apply_along_axis(minkowski_distance,1,self.train[i_range[0:self.k]],train[x],self.minkowski_p) for i in i_range[self.k:n]: - dist = minkowski_distance(self.train[i],train[x],p) + dist = minkowski_distance(self.train[i],train[x],self.minkowski_p) nn_dist=nn[x,:,1] distdiff = nn_dist-dist max_distdiff=max(distdiff) @@ -108,9 +109,24 @@ class Knn: #nom de la class à changer alors l'exemple est bien classifié, si non c'est une missclassification """ + n,m = self.train.shape + nn=np.empty((self.k,2)) + nn[:,1]=np.apply_along_axis(minkowski_distance,1,self.train[0:self.k],exemple,self.minkowski_p) + for i in range(self.k,n): + dist = minkowski_distance(self.train[i],exemple,self.minkowski_p) + nn_dist=nn[:,1] + distdiff = nn_dist-dist + max_distdiff=max(distdiff) + if(max_distdiff>0): + pos_changement = np.argwhere(nn_dist==max(nn_dist))[0] + nn[pos_changement,0]=i + nn[pos_changement,1]=max_distdiff + nn_labels = self.train_labels[nn[:,0].astype(np.int)] + nn_mode_label = mode(nn_labels) + print("Observé:"+str(label)+" Prédit:"+str(nn_mode_label)) - + return nn_mode_label def test(self, test, test_labels): """ diff --git a/Code/entrainer_tester.py b/Code/entrainer_tester.py index 6d7623a..4bc132d 100644 --- a/Code/entrainer_tester.py +++ b/Code/entrainer_tester.py @@ -37,6 +37,9 @@ train, train_labels, test, test_labels = ld.load_iris_dataset(0.7) myKnn.train(train, train_labels) +for i in range(train.shape[0]): + myKnn.predict(train[i],train_labels[i]) + # Tester votre classifieur diff --git a/Code/load_datasets.py b/Code/load_datasets.py index 29ae4df..b3cd6ea 100644 --- a/Code/load_datasets.py +++ b/Code/load_datasets.py @@ -51,13 +51,15 @@ def load_iris_dataset(train_ratio=0.7): np_features=np.array(features,dtype=np.float) np_labels=np.array(labels,dtype=np.int) - n_train = int(np_features.shape[0]*train_ratio) + n_obs = np_features.shape[0] - all_indices = [i for i in range(np_features.shape[0])] + n_train = int(n_obs*train_ratio) + + all_indices = [i for i in range(n_obs)] random.shuffle(all_indices) train_index = all_indices[0:n_train] - test_index = all_indices[n_train:np_features.shape[0]] + test_index = all_indices[n_train:n_obs] train = np_features[train_index] train_labels = np_labels[train_index] @@ -127,12 +129,21 @@ def load_congressional_dataset(train_ratio): np_features=np.array(features,dtype=np.float) np_labels=np.array(labels,dtype=np.int) - train_index = np.random.rand(np_features.shape[0]) < train_ratio + n_obs = np_features.shape[0] + + + n_train = int(n_obs*train_ratio) + + all_indices = [i for i in range(n_obs)] + random.shuffle(all_indices) + + train_index = all_indices[0:n_train] + test_index = all_indices[n_train:n_obs] train = np_features[train_index] train_labels = np_labels[train_index] - test = np_features[~train_index] - test_labels = np_labels[~train_index] + test = np_features[test_index] + test_labels = np_labels[test_index] # La fonction doit retourner 4 structures de données de type Numpy. return (train, train_labels, test, test_labels)