predict knn et correction random lecture

This commit is contained in:
François Pelletier 2019-03-16 18:40:22 -04:00
parent f23034e381
commit 3ddc45508a
3 changed files with 40 additions and 10 deletions

View file

@ -67,9 +67,10 @@ class Knn: #nom de la class à changer
self.train_labels=train_labels
n,m = train.shape
p=m
nn=np.empty((n,self.k,2))
self.minkowski_p=m
# On trouve les k plus proches voisins et leur distance pour chacunes des observations du training set
# On enlève la valeur testée de la liste des points pour lesquels on mesure la distance car on sait qu'elle vaut 0.
# On veut tester sur les autres points seulement
@ -77,9 +78,9 @@ class Knn: #nom de la class à changer
i_range = [i for i in range(n)]
i_range.pop(x)
nn[x,:,0]=i_range[0:self.k]
nn[x,:,1]=np.apply_along_axis(minkowski_distance,1,self.train[i_range[0:self.k]],train[x],p)
nn[x,:,1]=np.apply_along_axis(minkowski_distance,1,self.train[i_range[0:self.k]],train[x],self.minkowski_p)
for i in i_range[self.k:n]:
dist = minkowski_distance(self.train[i],train[x],p)
dist = minkowski_distance(self.train[i],train[x],self.minkowski_p)
nn_dist=nn[x,:,1]
distdiff = nn_dist-dist
max_distdiff=max(distdiff)
@ -108,9 +109,24 @@ class Knn: #nom de la class à changer
alors l'exemple est bien classifié, si non c'est une missclassification
"""
n,m = self.train.shape
nn=np.empty((self.k,2))
nn[:,1]=np.apply_along_axis(minkowski_distance,1,self.train[0:self.k],exemple,self.minkowski_p)
for i in range(self.k,n):
dist = minkowski_distance(self.train[i],exemple,self.minkowski_p)
nn_dist=nn[:,1]
distdiff = nn_dist-dist
max_distdiff=max(distdiff)
if(max_distdiff>0):
pos_changement = np.argwhere(nn_dist==max(nn_dist))[0]
nn[pos_changement,0]=i
nn[pos_changement,1]=max_distdiff
nn_labels = self.train_labels[nn[:,0].astype(np.int)]
nn_mode_label = mode(nn_labels)
print("Observé:"+str(label)+" Prédit:"+str(nn_mode_label))
return nn_mode_label
def test(self, test, test_labels):
"""

View file

@ -37,6 +37,9 @@ train, train_labels, test, test_labels = ld.load_iris_dataset(0.7)
myKnn.train(train, train_labels)
for i in range(train.shape[0]):
myKnn.predict(train[i],train_labels[i])
# Tester votre classifieur

View file

@ -51,13 +51,15 @@ def load_iris_dataset(train_ratio=0.7):
np_features=np.array(features,dtype=np.float)
np_labels=np.array(labels,dtype=np.int)
n_train = int(np_features.shape[0]*train_ratio)
n_obs = np_features.shape[0]
all_indices = [i for i in range(np_features.shape[0])]
n_train = int(n_obs*train_ratio)
all_indices = [i for i in range(n_obs)]
random.shuffle(all_indices)
train_index = all_indices[0:n_train]
test_index = all_indices[n_train:np_features.shape[0]]
test_index = all_indices[n_train:n_obs]
train = np_features[train_index]
train_labels = np_labels[train_index]
@ -127,12 +129,21 @@ def load_congressional_dataset(train_ratio):
np_features=np.array(features,dtype=np.float)
np_labels=np.array(labels,dtype=np.int)
train_index = np.random.rand(np_features.shape[0]) < train_ratio
n_obs = np_features.shape[0]
n_train = int(n_obs*train_ratio)
all_indices = [i for i in range(n_obs)]
random.shuffle(all_indices)
train_index = all_indices[0:n_train]
test_index = all_indices[n_train:n_obs]
train = np_features[train_index]
train_labels = np_labels[train_index]
test = np_features[~train_index]
test_labels = np_labels[~train_index]
test = np_features[test_index]
test_labels = np_labels[test_index]
# La fonction doit retourner 4 structures de données de type Numpy.
return (train, train_labels, test, test_labels)