Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vision-web worker #33

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
name: Build and Push
strategy:
matrix:
service: [tesseract-lambda-python,keyword-lambda-python,tesseract-web,tesseract-lambda]
service: [tesseract-lambda-python,keyword-lambda-python,tesseract-web,tesseract-lambda,vision-web]
fail-fast: false
runs-on: ubuntu-latest
steps:
Expand Down
10 changes: 10 additions & 0 deletions workers/vision-web/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# true/false. If true, enables some dev helpers
ENABLE_DEV=true
# Points to where the backend is located. This should not have a trailing slash
ASHIRT_BACKEND_URL=http://10.0.0.100:3000
# The access key of a headless user
ASHIRT_ACCESS_KEY=gR6nVtaQmp2SvzIqLUWdedDk
# The secret key (in base64 format -- how it is delivered via the ashirt UI) of a headless user
ASHIRT_SECRET_KEY=WvtvxFaJS0mPs82nCzqamI+bOGXpq7EIQhg4UD8nxS5448XG9N0gNAceJGBLPdCA3kAzC4MdUSHnKCJ/lZD++A==
# Add a list of questions
VISION_QUESTIONS="What does the image say?"
25 changes: 25 additions & 0 deletions workers/vision-web/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# compiled output
__pycache__

# OS
.DS_Store

# IDEs and editors
/.idea
.project
.classpath
.c9/
*.launch
.settings/
*.sublime-workspace

# IDE - VSCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

# local env files
.env*
!.env.example
37 changes: 37 additions & 0 deletions workers/vision-web/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM --platform=linux/amd64 python:3.12-slim AS builder

WORKDIR /build

COPY Pipfile.lock Pipfile ./
RUN pip install --user pipenv
RUN /root/.local/bin/pipenv requirements > requirements.txt
###

FROM --platform=linux/amd64 python:3.12-slim AS runner

WORKDIR /app
COPY --from=builder /build/requirements.txt .
###################################
# Install other dependencies here #
###################################

RUN pip install -r requirements.txt
RUN pip install --pre onnxruntime-genai numpy huggingface_hub
# Download the required model components
RUN huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/

# COPY bin/docker_start.sh ./start.sh
COPY src .

EXPOSE 8080

# Run as Alpine's guest user
USER 405

# some guidance on using gunicorn in containers:
# https://pythonspeed.com/articles/gunicorn-in-docker/
CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \
"--workers=1", "--worker-class=gthread", \
"--log-file=-", \
"--timeout=600", \
"-b", "0.0.0.0:8080", "wsgi:app"]
30 changes: 30 additions & 0 deletions workers/vision-web/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
FROM --platform=linux/amd64 python:3.12-slim

# With help from https://pipenv.pypa.io/en/latest/basics/#pipenv-and-docker-containers

WORKDIR /app
ENV PIPENV_VENV_IN_PROJECT=1

RUN pip install --user pipenv huggingface_hub
# Download the required model components
RUN /root/.local/bin/huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/

###################################
# Install other dependencies here #
###################################

COPY Pipfile.lock Pipfile ./
RUN /root/.local/bin/pipenv sync
RUN /root/.local/bin/pipenv run pip install --pre onnxruntime-genai numpy

ENV FLASK_APP src/wsgi.py
ENV FLASK_DEBUG 1
ENV FLASK_ENV=developement

EXPOSE 8080

COPY src ./src/

CMD ["/root/.local/bin/pipenv", "run", \
"flask", "run", \
"--host=0.0.0.0", "--port=8080"]
30 changes: 30 additions & 0 deletions workers/vision-web/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
imageNameDev = vision-web-dev
imageName = vision-web

.PHONY: build_dev
build_dev:
docker build -t $(imageNameDev) -f Dockerfile.dev .

.PHONY: build_release
build_release:
docker build -t $(imageName) -f Dockerfile.prod .

run_dev:
docker compose up --build

.PHONY: test-test
test-test:
curl -XPOST "http://localhost:9000/test" -d '{"type": "test"}'

.PHONY: test-process-img
test-process-img:
curl -XPOST "http://localhost:9000/process" \
-H "Content-Type: application/json" \
-d '{"type": "evidence_created", "evidenceUuid": "seed_dursleys", "operationSlug": "HPSS", "contentType": "image"}'

.PHONY: test-unsupported
test-unsupported:
curl -XPOST "http://localhost:9000/process" \
-H "Content-Type: application/json" \
-d '{"type": "unsupported"}'

17 changes: 17 additions & 0 deletions workers/vision-web/Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
flask = "*"
gunicorn = "*"
requests = "*"
structlog = "*"
python-dotenv = "*"

[dev-packages]
autopep8 = "*"

[requires]
python_version = "3.12"
318 changes: 318 additions & 0 deletions workers/vision-web/Pipfile.lock

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions workers/vision-web/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Vision Web Worker

* [Flask](https://flask.palletsprojects.com/en/2.1.x/), to manage the network connection
* [gunicorn](https://gunicorn.org/), for production deployment
* [requests](https://docs.python-requests.org/en/latest/), to handle contacting the ashirt instance
* [structlog](https://www.structlog.org/en/stable/), for structured logging
* [python-dotenv](https://pypi.org/project/python-dotenv/), for environment loading (this is primarily aimed at development)

In addition, this service tries to be as type-safe as possible, so extra effort has been provided to ensure that the typing is specified as much as possible.

To get up and running, open the project root in a terminal, install pipenv, and run `pipenv shell`, then `pipenv install`

## Deploying to AShirt

The typical configuration for deploying this worker archetype is going to look roughly like this:

```json
{
"type": "web",
"version": 1,
"url": "http://vision-web/process"
}
```

Note the url: this is likely what will change for your version.

## Adding custom logic

Most programs should be able to largely ignore most of the code, and instead focus on `actions` directory, and specifically the events you want to target.

## Integrating into AShirt testing environment

Notably, the dev port exposed is port 8080, so all port mapping has to be done with that in mind. When running locally (not via docker), the exposed port is configurable.

This configuration should work for your scenario, though the volumes mapped might need to be different.

```yaml
vision-web:
build:
context: ashirt-workers/workers/vision-web
dockerfile: Dockerfile.dev
ports:
- 3004:8080
restart: on-failure
volumes:
- ./ashirt-workers/workers/vision-web/:/app/
environment:
ENABLE_DEV: true
ASHIRT_BACKEND_URL: http://backend:3000
ASHIRT_ACCESS_KEY: gR6nVtaQmp2SvzIqLUWdedDk
ASHIRT_SECRET_KEY: WvtvxFaJS0mPs82nCzqamI+bOGXpq7EIQhg4UD8nxS5448XG9N0gNAceJGBLPdCA3kAzC4MdUSHnKCJ/lZD++A==
```


Note that the mapped volume overwrites the source files placed in the image. This allows for hot-reloading of the worker when deployed to docker-compose. If you don't want or need hot reloading, then you can simply omit this declaration.
12 changes: 12 additions & 0 deletions workers/vision-web/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: '3'
services:
app:
build:
dockerfile: Dockerfile
context: .
env_file:
- .env
ports:
- 9000:8080
volumes:
- ./src:/app/src
17 changes: 17 additions & 0 deletions workers/vision-web/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
blinker==1.8.2
certifi==2024.6.2
charset-normalizer==3.3.2
click==8.1.7
Flask==3.0.3
gunicorn==22.0.0
idna==3.7
itsdangerous==2.2.0
Jinja2==3.1.4
MarkupSafe==2.1.5
onnxruntime-genai==0.3.0rc2
packaging==24.1
python-dotenv==1.0.1
requests==2.32.3
structlog==24.2.0
urllib3==2.2.2
Werkzeug==3.0.3
Empty file.
2 changes: 2 additions & 0 deletions workers/vision-web/src/actions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .process_handler import *
from .types import *
100 changes: 100 additions & 0 deletions workers/vision-web/src/actions/process_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from request_types import EvidenceCreatedBody
from services import AShirtRequestsService
from constants import SupportedContentType
from .types import ProcessResultDTO
import onnxruntime_genai as og
import uuid
import os
import io

model = og.Model('model/cpu-int4-rtn-block-32-acc-level-4/')
processor = model.create_multimodal_processor()
tokenizer_stream = processor.create_stream()

def handle_evidence_created(body: EvidenceCreatedBody) -> ProcessResultDTO:
"""
handle_process is called when a web request comess in, is validated, and indicates that work
needs to be done on a piece of evidence
"""
accepted_types = [
SupportedContentType.IMAGE
]

if body.content_type in accepted_types:
ashirt_svc = AShirtRequestsService(
os.environ.get('ASHIRT_BACKEND_URL', ''),
os.environ.get('ASHIRT_ACCESS_KEY', ''),
os.environ.get('ASHIRT_SECRET_KEY', '')
)
# Gather content
evidence_content = ashirt_svc.get_evidence_content(
body.operation_slug, body.evidence_uuid, 'media'
)
if type(evidence_content) == str:
print("got a string response")
elif type(evidence_content) != bytes:
return

temp_image_path = f"/tmp/{uuid.uuid4()}.png" # Generate a unique name for the image file in /tmp directory
with open(temp_image_path, "wb") as f:
f.write(io.BytesIO(evidence_content).getbuffer())
img = og.Images.open(temp_image_path)

default_questions = [
"What times are shown in the image?",
"Which applications are open in the image?",
"Which operating system is being used in the image?",
"What does the image say?"
]
questions = os.environ.get('VISION_QUESTIONS', ','.join(default_questions))
questions = questions.split(',') # Convert question(s) to a list

resp = []
for q in questions:
resp.append(do_ai(question=q,image=img)) # Run inference for each question
chunks = [f'Q:{x[0]}\nA:{x[1]}\n' for x in zip(questions,resp)]
os.remove(temp_image_path) # Delete the image file after using it

return {
'action': 'processed',
'content': '\n'.join(chunks)
}
else:
return {
'action': 'rejected'
}

def do_ai(question, image=None):
generated_text = ""
prompt = "<|user|>\n"
if not image:
print("No image provided")
else:
print("Loading image...")
prompt += "<|image_1|>\n"

prompt += f"{question}<|end|>\n<|assistant|>\n"
print("Processing image and prompt...")
inputs = processor(prompt, images=image)

print("Generating response...")
params = og.GeneratorParams(model)
params.set_inputs(inputs)
params.set_search_options(max_length=3072)

generator = og.Generator(model, params)

while not generator.is_done():
generator.compute_logits()
generator.generate_next_token()

new_token = generator.get_next_tokens()[0]
decoded_text = tokenizer_stream.decode(new_token)
generated_text += decoded_text
print(decoded_text, end='', flush=True)
for _ in range(3):
print()
generated_text = generated_text.replace('</s>','')
# Delete the generator to free the captured graph before creating another one
del generator
return generated_text
20 changes: 20 additions & 0 deletions workers/vision-web/src/actions/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import Literal, Optional, TypedDict, Union


class ProcessResultNormal(TypedDict):
action: Literal['rejected', 'error']
content: Optional[str]


class ProcessResultComplete(TypedDict):
action: Literal['processed']
content: str


class ProcessResultDeferred(TypedDict):
action: Literal['deferred']


ProcessResultDTO = Union[ProcessResultNormal,
ProcessResultComplete,
ProcessResultDeferred]
4 changes: 4 additions & 0 deletions workers/vision-web/src/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .supported_content_type import *

STATE_NAME = 'state'
APP_LOGGER = 'logger'
Loading