Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Local Experience #46

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
data/*
!data/db/index.json
!data/images
data/user-images
*/__pycache__/
*/.ipynb_checkpoints/
.DS_Store
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,6 @@ cd ml && python server.py --index-loc ../data/

# 2. Running the notebook
cd nbs && jupyter notebook

# 3. Running the indexer
```
1 change: 1 addition & 0 deletions data/user-images/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Add Your Images in this directory
4 changes: 4 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ services:
context: .
dockerfile: Dockerfile
image: scanpix-server
profiles:
- local
- demo
ports:
- "5001:5001"
volumes:
Expand All @@ -20,6 +23,7 @@ services:
profiles:
- demo
- index
# - local
ports:
- "5000:5000"
volumes:
Expand Down
8 changes: 6 additions & 2 deletions ml/media_processor.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import os

import numpy as np
from transformers import CLIPProcessor, CLIPModel
from helpers import get_image_from_url
import imagehash

from utils.constants import project_root


class ClipModel:
def __init__(self):
self.model = CLIPModel.from_pretrained("models/clip-vit-large-patch14")
self.model = CLIPModel.from_pretrained(f"{project_root}/models/clip-vit-large-patch14")
self.processor = CLIPProcessor.from_pretrained("models/clip-vit-large-patch14")

def get_embedding(self, input, type):
Expand All @@ -24,7 +28,7 @@ def get_embedding(self, input, type):
return embedding.ravel().tolist()


class MediaProcessor():
class MediaProcessor:
def __init__(self):
self.clip_model = ClipModel()

Expand Down
34 changes: 16 additions & 18 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
flask=2.1.2
ipykernel=6.13.0
ipython=8.3.0
ipywidgets=7.7.0
jupyter-client=7.3.1
jupyter-core=4.10.0
jupyterlab-pygments=0.2.2
jupyterlab-widgets=1.1.0
matplotlib=3.5.2
matplotlib-inline=0.1.3
notebook=6.4.11
numpy=1.22.3
numpy-base=1.22.3
pandas=1.4.2
pillow=9.0.1
requests=2.27.1
seaborn=0.11.2
transformers=4.19.2
flask==2.1.2
ipykernel==6.13.0
ipython==8.3.0
ipywidgets==7.7.0
jupyterlab-pygments==0.2.2
matplotlib==3.5.2
matplotlib-inline==0.1.3
notebook==6.4.11
numpy==1.22.3
pandas==1.4.2
pillow==9.0.1
requests==2.27.1
seaborn==0.11.2
transformers==4.19.2
torch
torchvision
tqdm~=4.64.0
imagehash~=4.2.1
5 changes: 3 additions & 2 deletions test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import unittest
from utils.util import extract_filename,check_if_image
from utils.util import extract_filename, check_if_image


class TestWatcher(unittest.TestCase):

def test_extract_filename(self):
self.assertEqual(extract_filename("/home/what/abcd_efgh.jpg"),"abcd_efgh.jpg")
self.assertEqual(extract_filename("/home/what/abcd_efgh.jpg"), "abcd_efgh.jpg")

def test_check_if_image(self):
self.assertTrue(check_if_image("abcdef.jpg"))
Expand Down
9 changes: 9 additions & 0 deletions utils/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os

from utils.util import is_docker


project_root = os.path.dirname(os.path.dirname(__file__))
BASEURL = "http://scanpix:5001/process_image" if is_docker() else "http://0.0.0.0:5001/process_image"
data_dir = "/worker-app/data" if is_docker() else f"{project_root}/data"
image_directories = [data_dir + "/images", data_dir + "/user-images"]
20 changes: 17 additions & 3 deletions utils/util.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,34 @@
import os
import re


def check_if_image(file_name):
pat = ".*\.(.*)"
m = re.search(pat, file_name)
extensions = set(["jpg","jpeg","png"])
if(m.group(1) in extensions):
extensions = set(["jpg", "jpeg", "png"])
if m.group(1) in extensions:
return True
return False


'''function to extract filename from path'''


def extract_filename(path):
m = re.search(".*/(.*)", path)
return m.group(1)


def check_if_image_in_index(index_list, file_name):
for index in index_list:
if(index["file_name"] == file_name):
if index["file_name"] == file_name:
return True
return False


def is_docker():
path = '/proc/self/cgroup'
return (
os.path.exists('/.dockerenv') or
os.path.isfile(path) and any('docker' in line for line in open(path))
)
24 changes: 12 additions & 12 deletions worker/index_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
import os
from singleton_decorator import singleton

from utils.constants import BASEURL, data_dir

BASEURL = "http://scanpix:5001/process_image"

def append_to_json(filepath, data):

# if file is empty dump to empty file and return
if(os.stat(filepath).st_size == 0):
if os.stat(filepath).st_size == 0:
with open(filepath, "w", encoding='utf-8') as f:
json.dump([data], f)
return
Expand All @@ -20,8 +19,8 @@ def append_to_json(filepath, data):
# edit the file in situ - first open it in read/write mode
with open(filepath, 'r+') as f:

f.seek(0, 2) # move to end of file
index = f.tell() # find index of last byte
f.seek(0, 2) # move to end of file
index = f.tell() # find index of last byte

# walking back from the end of file, find the index
# of the original JSON List's closing ']'
Expand All @@ -36,27 +35,28 @@ def append_to_json(filepath, data):
f.seek(index)
f.write(new_ending)


@singleton
class Indexer:

def index(self, img_name, img_path):
import re
img_path = re.sub("worker-app","scanpix",img_path)
res = requests.get(url=BASEURL, params={'url': img_path }).json()
img_path = re.sub("worker-app", "scanpix", img_path)
res = requests.get(url=BASEURL, params={'url': img_path}).json()
res['file_name'] = img_name
print("got transformed image from server!")
return res

def dump_to_json(self, json_index):
append_to_json("/worker-app/data/db/index.json", json_index)
append_to_json(data_dir + "/db/index.json", json_index)
print("Dumped to index.json!")

def remove_from_json(self, file_name):
with open("/worker-app/data/db/index.json", "r") as f:
with open(data_dir + "/db/index.json", "r") as f:
raw_data = f.read()
index_list = json.loads(raw_data)

index_list = list(filter(lambda x: x["file_name"]!=file_name, index_list))
with open("/worker-app/data/db/index.json", "w", encoding = "utf-8") as f:
json.dump(index_list, f, indent = 4)
index_list = list(filter(lambda x: x["file_name"] != file_name, index_list))
with open(data_dir + "/db/index.json", "w", encoding="utf-8") as f:
json.dump(index_list, f, indent=4)
print(f"removed {file_name} from json")
1 change: 1 addition & 0 deletions worker/start_worker.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash

#TODO (soumyadeep): Sleep only within docker not always
sleep 60
python3 -u watch_and_index.py
30 changes: 18 additions & 12 deletions worker/watch_and_index.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import re
import os

from index_image import *
from utils.util import *
from utils.constants import data_dir, image_directories

''' function to index all exisiting files not included in index.json initially'''


def index_unwatched_files():
print("indexing unwatched files....")
indexer = Indexer()
with open("/worker-app/data/db/index.json") as f:
with open(data_dir + "/db/index.json") as f:
raw_data = f.read()
index_list = json.loads(raw_data)

for file_name in os.listdir("/worker-app/data/images"):
if(check_if_image(file_name) and (not check_if_image_in_index(index_list, file_name))):
json_res = indexer.index(file_name,f"/worker-app/data/images/{file_name}")
indexer.dump_to_json(json_res)
for directory in image_directories:
for file_name in os.listdir(directory):
if check_if_image(file_name) and (not check_if_image_in_index(index_list, file_name)):
json_res = indexer.index(file_name, f"{directory}/{file_name}")
indexer.dump_to_json(json_res)


''' watcher class to monitor directories '''


class Watcher:

def __init__(self, directories=["."], handler=FileSystemEventHandler(), recursive=False):
Expand All @@ -44,15 +49,16 @@ def run(self):
print("\nWatcher Terminated\n")



'''Custom Handler class to handle events retured by the watcher'''


class WatchHandler(FileSystemEventHandler):

def on_created(self, event):
file_name = extract_filename(event.src_path)
indexer = Indexer()
print(f"{file_name} created event!")
if(check_if_image(file_name)):
if check_if_image(file_name):
json_res = indexer.index(file_name, event.src_path)
indexer.dump_to_json(json_res)
else:
Expand All @@ -62,13 +68,13 @@ def on_deleted(self, event):
file_name = extract_filename(event.src_path)
indexer = Indexer()
print(f"{file_name} delete event!")
if(check_if_image(file_name)):
if check_if_image(file_name):
indexer.remove_from_json(file_name)
else:
raise Exception("Invalid type of file, cannot be deleted from index.json!")


if __name__ == "__main__":
index_unwatched_files()
paths = ["/worker-app/data/images"]
w = Watcher(paths, WatchHandler(), True)
w = Watcher(image_directories, WatchHandler(), True)
w.run()