For large retail datasets, before object detection. Clustering becomes essential to focus on each cluster to take it forward. Today's post is clustering images into similar groups
- Generate Feature Data based on VGG / Resnet
- Cluster them using Kmeans
- Result output to their respective cluster directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Base Code - https://medium.com/@franky07724_57962/using-keras-pre-trained-models-for-feature-extraction-in-image-clustering-a142c6cdf5b1 | |
#Modified for our custom need | |
from keras.preprocessing import image | |
from keras.applications.vgg16 import VGG16 | |
from keras.applications.resnet50 import ResNet50 | |
import numpy as np | |
import os | |
from sklearn.cluster import KMeans | |
from keras.applications.resnet50 import preprocess_input, decode_predictions | |
import shutil | |
datadir = r'E:\Code_Repo\Images' | |
output_dir = r'E:\Code_Repo\results' | |
def createFolder(directory): | |
try: | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
except OSError: | |
print('Error: Creating directory. ' + directory) | |
def VGG_Cluster(numberofclusters): | |
feature_list = [] | |
model = VGG16(weights='imagenet', include_top=False) | |
files = os.listdir(datadir) | |
for file in files: | |
img_path = datadir+ str('\\') + file | |
img = image.load_img(img_path, target_size=(224, 224)) | |
img_data = image.img_to_array(img) | |
img_data = np.expand_dims(img_data, axis=0) | |
img_data = preprocess_input(img_data) | |
feature = model.predict(img_data) | |
feature_np = np.array(feature) | |
feature_list.append(feature_np.flatten()) | |
feature_list_np = np.array(feature_list) | |
kmeans = KMeans(n_clusters=numberofclusters, random_state=0).fit(feature_list_np) | |
labelresult = kmeans.labels_ | |
print(kmeans.labels_) | |
print(kmeans.cluster_centers_) | |
print('VGG Results') | |
#Create Directory based on number of clusters | |
for i in range(numberofclusters): | |
directoryname = output_dir + str('\\') + str(i) + str('\\') | |
createFolder(directoryname) | |
for i in range(len(files)): | |
img_path = datadir+ str('\\') + files[i] | |
#Copy image according to the directory | |
print(files[i]) | |
shutil.copy(img_path, output_dir + '\\' + str(labelresult[i]) + '\\') | |
print(labelresult[i]) | |
def Resnet_Cluster(numberofclusters): | |
feature_list = [] | |
model = ResNet50(weights='imagenet', include_top=False) | |
files = os.listdir(datadir) | |
for file in files: | |
img_path = datadir+ str('\\') + file | |
img = image.load_img(img_path, target_size=(224, 224)) | |
img_data = image.img_to_array(img) | |
img_data = np.expand_dims(img_data, axis=0) | |
img_data = preprocess_input(img_data) | |
feature = model.predict(img_data) | |
feature_np = np.array(feature) | |
feature_list.append(feature_np.flatten()) | |
feature_list_np = np.array(feature_list) | |
kmeans = KMeans(n_clusters=numberofclusters, random_state=0).fit(feature_list_np) | |
labelresult = kmeans.labels_ | |
print(kmeans.labels_) | |
print(kmeans.cluster_centers_) | |
print('Resnet Results') | |
#Create Directory based on number of clusters | |
for i in range(numberofclusters): | |
directoryname = output_dir + str('\\') + str(i) + str('\\') | |
createFolder(directoryname) | |
for i in range(len(files)): | |
#Copy image according to the directory | |
img_path = datadir+ str('\\') + files[i] | |
print(files[i]) | |
shutil.copy(img_path, output_dir + '\\' + str(labelresult[i]) + '\\') | |
print(labelresult[i]) | |
#VGG_Cluster(3) | |
Resnet_Cluster(3) | |
Input - Mixed Set of Images
Output
Cluster 1
Cluster 2
Cluster 3
More Reads - Example (in R)
Happy Learning!!!
No comments:
Post a Comment