Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Brannigan123 committed Aug 30, 2020
0 parents commit 4e359e1
Show file tree
Hide file tree
Showing 16 changed files with 265 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/reverse image search.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Hy Image Search
An image search implementation in python using tensorflow keras, scikit-learn, scipy and matplotlib.


The image embeddings are generated from Xception imagenet (*can be changed/tuned from [features.py](features.py)*).
Currently the embeddings are stored using pickle, but a database may be used instead.
Image embeddings are compared cosine similarity and hamming.

## Running search on image(s) from console args
1. Run *[search.py](search.py)* with image path(s) as arguments and the script will display the top matches from underlying image library

**Sample**
<img src="single_img_search.png" width="600em" hspace=10 vspace=10/>

## Running search on a custom set of images
1. Transfer the images to queries folder *[data/queries/](data/queries)* or alternatively change `query_images_folder_path` from *[paths.py](paths.py)* to your location of your image set.
2. Run *[search.py](search.py)* and the script will display the top matches from underlying image library

**Sample**
<img src="img_search.png" width="600em" hspace=10 vspace=10/>


## Generating embeddings for a new image set
1. Transfer the images to images folder *[data/images/](data/images)* or alternatively change `images_folder_path` from *[paths.py](paths.py)* to your location of your image set.
2. Run *[features.py](features.py)* and you should have the embeddings generated

## Viewing the similarity map in underlying image library (PCA and t-SNE)
1. Run *[dataset.py](dataset.py)* and you should get a visualization similar to this

<img src="tSNE similarity.png" width="1000em" hspace=10 vspace=10/>


----------

*The sample images used here are from idenprof dataset*
2 changes: 2 additions & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/features.pickle
/filenames.pickle
6 changes: 6 additions & 0 deletions data/images/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/*.png
/*.PNG
/*.jpg
/*.JPG
/*.jpeg
/*.JPEG
6 changes: 6 additions & 0 deletions data/queries/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/*.png
/*.PNG
/*.jpg
/*.JPG
/*.jpeg
/*.JPEG
68 changes: 68 additions & 0 deletions dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import os
import pickle
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tqdm import tqdm

import paths

extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']


def get_file_list(root_dir):
file_list = []
counter = 1

for root, dirs, filenames in os.walk(root_dir):
for filename in tqdm(filenames):
if any(ext in filename for ext in extensions):
file_list.append(os.path.join(root, filename))
counter += 1
return sorted(file_list)


def get_stored_features():
stored_filenames = pickle.load(open(paths.filenames_path, 'rb'))
stored_feature_list = pickle.load(open(paths.features_path, 'rb'))
return stored_filenames, stored_feature_list


def visualize_features():
filenames, features = get_stored_features()

num_feature_dimensions = 100 # Set the number of features
pca = PCA(n_components=num_feature_dimensions)
pca.fit(features)
feature_list_compressed = pca.transform(features)

tsne = TSNE(n_components=2, verbose=1, n_iter=4000, metric='cosine', init='pca')
tsne_results = tsne.fit_transform(feature_list_compressed)
tsne_results = StandardScaler().fit_transform(tsne_results)

size = (45, 45)
imgs = [img_to_array(load_img(path, target_size=size)) / 255 for path in filenames]
visualize_scatter_with_images(tsne_results, imgs=imgs, size=size, zoom=0.7)


def visualize_scatter_with_images(data, imgs, size=(28, 28), zoom=1):
fig, ax = plt.subplots(figsize=size)
artist = []
for xy, i in tqdm(zip(data, imgs)):
x, y = xy
img = OffsetImage(i, zoom=zoom)
ab = AnnotationBbox(img, (x, y), xycoords='data', frameon=False)
artist.append(ax.add_artist(ab))
ax.update_datalim(data)
ax.autoscale()
ax.axis('off')
plt.tight_layout(pad=1.2)
plt.show()


if __name__ == "__main__":
visualize_features()
44 changes: 44 additions & 0 deletions features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pickle

import numpy as np
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.preprocessing import image
from tqdm import tqdm

import dataset
import paths

model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling='max')


def extract_features(img_path):
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
f = model.predict(x)
return f.flatten()


def extract_dataset_features():
file_list = dataset.get_file_list(paths.images_folder_path)
feature_list = []

for filename in tqdm(file_list):
feature_list.append(extract_features(filename))

return file_list, feature_list


def update_features():
file_list, features = extract_dataset_features()

pickle.dump(file_list, open(paths.filenames_path, 'wb'))
pickle.dump(features, open(paths.features_path, 'wb'))

return file_list, features


if __name__ == "__main__":
update_features()
Binary file added img_search.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
filenames_path = 'data/filenames.pickle'
features_path = 'data/features.pickle'
images_folder_path = 'data/images'
query_images_folder_path = 'data/queries'

67 changes: 67 additions & 0 deletions search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import sys
from random import shuffle
import matplotlib.pyplot as plt
from scipy import spatial
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tqdm import tqdm

import dataset
import features
import paths


def visualize_similar_images(img_paths, max_query_imgs=7, max_matches=5):
img_paths = img_paths[:min(max_query_imgs, len(img_paths))]
fig, axs = plt.subplots(len(img_paths), max_matches + 1, figsize=(10, 10))

for i in tqdm(range(len(img_paths))):
img_path = img_paths[i]
similar = similar_images_paths(img_path, max_imgs=max_matches)

__plot_similarities__(axs[i], img_path, similar)

plt.tight_layout(h_pad=2)
plt.show()


def __plot_similarities__(ax, img_path, similar):
ax[0].set_title('Query image', size=7)
ax[0].imshow(img_to_array(load_img(img_path)) / 255)
ax[0].axis('off')
ax[0].autoscale()

cnt = 1
for path, similarity in similar:
ax[cnt].imshow(img_to_array(load_img(path)) / 255)
ax[cnt].set_title('Related image\n similarity %f' % (similarity,), size=7)
ax[cnt].axis('off')
ax[cnt].autoscale()
cnt += 1


def similar_images_paths(img_path, max_imgs=4):
query_features = features.extract_features(img_path)
stored_features = dataset.get_stored_features()

max_imgs = min(max_imgs, len(stored_features[0]))
similarities = []

for filename, encoding in list(zip(*stored_features)):
h_distance = spatial.distance.hamming(query_features, encoding)
c_distance = spatial.distance.cosine(query_features, encoding)
similarity = 1 - (h_distance + c_distance) / 2
similarities.append((filename, similarity))

similarities.sort(key=lambda tup: -tup[1])
return similarities[:max_imgs]


if __name__ == "__main__":
args = sys.argv
if len(args) > 1:
visualize_similar_images(args[1:])
else:
paths = dataset.get_file_list(paths.query_images_folder_path)
shuffle(paths)
visualize_similar_images(paths)
Binary file added single_img_search.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tSNE similarity.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 4e359e1

Please sign in to comment.