-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4e359e1
Showing
16 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Hy Image Search | ||
An image search implementation in python using tensorflow keras, scikit-learn, scipy and matplotlib. | ||
|
||
|
||
The image embeddings are generated from Xception imagenet (*can be changed/tuned from [features.py](features.py)*). | ||
Currently the embeddings are stored using pickle, but a database may be used instead. | ||
Image embeddings are compared cosine similarity and hamming. | ||
|
||
## Running search on image(s) from console args | ||
1. Run *[search.py](search.py)* with image path(s) as arguments and the script will display the top matches from underlying image library | ||
|
||
**Sample** | ||
<img src="single_img_search.png" width="600em" hspace=10 vspace=10/> | ||
|
||
## Running search on a custom set of images | ||
1. Transfer the images to queries folder *[data/queries/](data/queries)* or alternatively change `query_images_folder_path` from *[paths.py](paths.py)* to your location of your image set. | ||
2. Run *[search.py](search.py)* and the script will display the top matches from underlying image library | ||
|
||
**Sample** | ||
<img src="img_search.png" width="600em" hspace=10 vspace=10/> | ||
|
||
|
||
## Generating embeddings for a new image set | ||
1. Transfer the images to images folder *[data/images/](data/images)* or alternatively change `images_folder_path` from *[paths.py](paths.py)* to your location of your image set. | ||
2. Run *[features.py](features.py)* and you should have the embeddings generated | ||
|
||
## Viewing the similarity map in underlying image library (PCA and t-SNE) | ||
1. Run *[dataset.py](dataset.py)* and you should get a visualization similar to this | ||
|
||
<img src="tSNE similarity.png" width="1000em" hspace=10 vspace=10/> | ||
|
||
|
||
---------- | ||
|
||
*The sample images used here are from idenprof dataset* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/features.pickle | ||
/filenames.pickle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
/*.png | ||
/*.PNG | ||
/*.jpg | ||
/*.JPG | ||
/*.jpeg | ||
/*.JPEG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
/*.png | ||
/*.PNG | ||
/*.jpg | ||
/*.JPG | ||
/*.jpeg | ||
/*.JPEG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import os | ||
import pickle | ||
import matplotlib.pyplot as plt | ||
from matplotlib.offsetbox import OffsetImage, AnnotationBbox | ||
from sklearn.decomposition import PCA | ||
from sklearn.manifold import TSNE | ||
from sklearn.preprocessing import StandardScaler | ||
from tensorflow.keras.preprocessing.image import load_img | ||
from tensorflow.keras.preprocessing.image import img_to_array | ||
from tqdm import tqdm | ||
|
||
import paths | ||
|
||
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG'] | ||
|
||
|
||
def get_file_list(root_dir): | ||
file_list = [] | ||
counter = 1 | ||
|
||
for root, dirs, filenames in os.walk(root_dir): | ||
for filename in tqdm(filenames): | ||
if any(ext in filename for ext in extensions): | ||
file_list.append(os.path.join(root, filename)) | ||
counter += 1 | ||
return sorted(file_list) | ||
|
||
|
||
def get_stored_features(): | ||
stored_filenames = pickle.load(open(paths.filenames_path, 'rb')) | ||
stored_feature_list = pickle.load(open(paths.features_path, 'rb')) | ||
return stored_filenames, stored_feature_list | ||
|
||
|
||
def visualize_features(): | ||
filenames, features = get_stored_features() | ||
|
||
num_feature_dimensions = 100 # Set the number of features | ||
pca = PCA(n_components=num_feature_dimensions) | ||
pca.fit(features) | ||
feature_list_compressed = pca.transform(features) | ||
|
||
tsne = TSNE(n_components=2, verbose=1, n_iter=4000, metric='cosine', init='pca') | ||
tsne_results = tsne.fit_transform(feature_list_compressed) | ||
tsne_results = StandardScaler().fit_transform(tsne_results) | ||
|
||
size = (45, 45) | ||
imgs = [img_to_array(load_img(path, target_size=size)) / 255 for path in filenames] | ||
visualize_scatter_with_images(tsne_results, imgs=imgs, size=size, zoom=0.7) | ||
|
||
|
||
def visualize_scatter_with_images(data, imgs, size=(28, 28), zoom=1): | ||
fig, ax = plt.subplots(figsize=size) | ||
artist = [] | ||
for xy, i in tqdm(zip(data, imgs)): | ||
x, y = xy | ||
img = OffsetImage(i, zoom=zoom) | ||
ab = AnnotationBbox(img, (x, y), xycoords='data', frameon=False) | ||
artist.append(ax.add_artist(ab)) | ||
ax.update_datalim(data) | ||
ax.autoscale() | ||
ax.axis('off') | ||
plt.tight_layout(pad=1.2) | ||
plt.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
visualize_features() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import pickle | ||
|
||
import numpy as np | ||
from tensorflow.keras.applications.xception import Xception | ||
from tensorflow.keras.applications.xception import preprocess_input | ||
from tensorflow.keras.preprocessing import image | ||
from tqdm import tqdm | ||
|
||
import dataset | ||
import paths | ||
|
||
model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling='max') | ||
|
||
|
||
def extract_features(img_path): | ||
img = image.load_img(img_path, target_size=(224, 224)) | ||
x = image.img_to_array(img) | ||
x = np.expand_dims(x, axis=0) | ||
x = preprocess_input(x) | ||
f = model.predict(x) | ||
return f.flatten() | ||
|
||
|
||
def extract_dataset_features(): | ||
file_list = dataset.get_file_list(paths.images_folder_path) | ||
feature_list = [] | ||
|
||
for filename in tqdm(file_list): | ||
feature_list.append(extract_features(filename)) | ||
|
||
return file_list, feature_list | ||
|
||
|
||
def update_features(): | ||
file_list, features = extract_dataset_features() | ||
|
||
pickle.dump(file_list, open(paths.filenames_path, 'wb')) | ||
pickle.dump(features, open(paths.features_path, 'wb')) | ||
|
||
return file_list, features | ||
|
||
|
||
if __name__ == "__main__": | ||
update_features() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
filenames_path = 'data/filenames.pickle' | ||
features_path = 'data/features.pickle' | ||
images_folder_path = 'data/images' | ||
query_images_folder_path = 'data/queries' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import sys | ||
from random import shuffle | ||
import matplotlib.pyplot as plt | ||
from scipy import spatial | ||
from tensorflow.keras.preprocessing.image import load_img | ||
from tensorflow.keras.preprocessing.image import img_to_array | ||
from tqdm import tqdm | ||
|
||
import dataset | ||
import features | ||
import paths | ||
|
||
|
||
def visualize_similar_images(img_paths, max_query_imgs=7, max_matches=5): | ||
img_paths = img_paths[:min(max_query_imgs, len(img_paths))] | ||
fig, axs = plt.subplots(len(img_paths), max_matches + 1, figsize=(10, 10)) | ||
|
||
for i in tqdm(range(len(img_paths))): | ||
img_path = img_paths[i] | ||
similar = similar_images_paths(img_path, max_imgs=max_matches) | ||
|
||
__plot_similarities__(axs[i], img_path, similar) | ||
|
||
plt.tight_layout(h_pad=2) | ||
plt.show() | ||
|
||
|
||
def __plot_similarities__(ax, img_path, similar): | ||
ax[0].set_title('Query image', size=7) | ||
ax[0].imshow(img_to_array(load_img(img_path)) / 255) | ||
ax[0].axis('off') | ||
ax[0].autoscale() | ||
|
||
cnt = 1 | ||
for path, similarity in similar: | ||
ax[cnt].imshow(img_to_array(load_img(path)) / 255) | ||
ax[cnt].set_title('Related image\n similarity %f' % (similarity,), size=7) | ||
ax[cnt].axis('off') | ||
ax[cnt].autoscale() | ||
cnt += 1 | ||
|
||
|
||
def similar_images_paths(img_path, max_imgs=4): | ||
query_features = features.extract_features(img_path) | ||
stored_features = dataset.get_stored_features() | ||
|
||
max_imgs = min(max_imgs, len(stored_features[0])) | ||
similarities = [] | ||
|
||
for filename, encoding in list(zip(*stored_features)): | ||
h_distance = spatial.distance.hamming(query_features, encoding) | ||
c_distance = spatial.distance.cosine(query_features, encoding) | ||
similarity = 1 - (h_distance + c_distance) / 2 | ||
similarities.append((filename, similarity)) | ||
|
||
similarities.sort(key=lambda tup: -tup[1]) | ||
return similarities[:max_imgs] | ||
|
||
|
||
if __name__ == "__main__": | ||
args = sys.argv | ||
if len(args) > 1: | ||
visualize_similar_images(args[1:]) | ||
else: | ||
paths = dataset.get_file_list(paths.query_images_folder_path) | ||
shuffle(paths) | ||
visualize_similar_images(paths) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.