"No one is harder on a talented person than the person themselves" - Linda Wilkinson ; "Trust your guts and don't follow the herd" ; "Validate direction not destination" ;

October 11, 2019

Day #283 - Clustering to group similar Images


For large retail datasets, before object detection. Clustering becomes essential to focus on each cluster to take it forward. Today's post is clustering images into similar groups

  • Generate Feature Data based on VGG / Resnet
  • Cluster them using Kmeans
  • Result output to their respective cluster directory

#Base Code - https://medium.com/@franky07724_57962/using-keras-pre-trained-models-for-feature-extraction-in-image-clustering-a142c6cdf5b1
#Modified for our custom need
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
import numpy as np
import os
from sklearn.cluster import KMeans
from keras.applications.resnet50 import preprocess_input, decode_predictions
import shutil
datadir = r'E:\Code_Repo\Images'
output_dir = r'E:\Code_Repo\results'
def createFolder(directory):
try:
if not os.path.exists(directory):
os.makedirs(directory)
except OSError:
print('Error: Creating directory. ' + directory)
def VGG_Cluster(numberofclusters):
feature_list = []
model = VGG16(weights='imagenet', include_top=False)
files = os.listdir(datadir)
for file in files:
img_path = datadir+ str('\\') + file
img = image.load_img(img_path, target_size=(224, 224))
img_data = image.img_to_array(img)
img_data = np.expand_dims(img_data, axis=0)
img_data = preprocess_input(img_data)
feature = model.predict(img_data)
feature_np = np.array(feature)
feature_list.append(feature_np.flatten())
feature_list_np = np.array(feature_list)
kmeans = KMeans(n_clusters=numberofclusters, random_state=0).fit(feature_list_np)
labelresult = kmeans.labels_
print(kmeans.labels_)
print(kmeans.cluster_centers_)
print('VGG Results')
#Create Directory based on number of clusters
for i in range(numberofclusters):
directoryname = output_dir + str('\\') + str(i) + str('\\')
createFolder(directoryname)
for i in range(len(files)):
img_path = datadir+ str('\\') + files[i]
#Copy image according to the directory
print(files[i])
shutil.copy(img_path, output_dir + '\\' + str(labelresult[i]) + '\\')
print(labelresult[i])
def Resnet_Cluster(numberofclusters):
feature_list = []
model = ResNet50(weights='imagenet', include_top=False)
files = os.listdir(datadir)
for file in files:
img_path = datadir+ str('\\') + file
img = image.load_img(img_path, target_size=(224, 224))
img_data = image.img_to_array(img)
img_data = np.expand_dims(img_data, axis=0)
img_data = preprocess_input(img_data)
feature = model.predict(img_data)
feature_np = np.array(feature)
feature_list.append(feature_np.flatten())
feature_list_np = np.array(feature_list)
kmeans = KMeans(n_clusters=numberofclusters, random_state=0).fit(feature_list_np)
labelresult = kmeans.labels_
print(kmeans.labels_)
print(kmeans.cluster_centers_)
print('Resnet Results')
#Create Directory based on number of clusters
for i in range(numberofclusters):
directoryname = output_dir + str('\\') + str(i) + str('\\')
createFolder(directoryname)
for i in range(len(files)):
#Copy image according to the directory
img_path = datadir+ str('\\') + files[i]
print(files[i])
shutil.copy(img_path, output_dir + '\\' + str(labelresult[i]) + '\\')
print(labelresult[i])
#VGG_Cluster(3)
Resnet_Cluster(3)

Input - Mixed Set of Images
Output 
Cluster 1
Cluster 2

Cluster 3
More Reads - Example (in R)

Happy Learning!!!

No comments: