Skip to content

Commit 393bb25

Browse files
Download model from the local source (#19)
Download model from the local source Signed-off-by: Adarsh Agrawal <[email protected]>
1 parent 8fafbe8 commit 393bb25

File tree

5 files changed

+160
-6
lines changed

5 files changed

+160
-6
lines changed

examples/utility/download_model.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import argparse
2+
import json
3+
import logging
4+
import os
5+
import requests
6+
import tarfile
7+
8+
from urllib.parse import urlparse
9+
10+
LOG_LEVEL = logging.INFO
11+
12+
13+
def get_logger(name):
14+
logger = logging.getLogger(name)
15+
logger.setLevel(LOG_LEVEL)
16+
logger.propagate = False
17+
18+
console_handler = logging.StreamHandler()
19+
console_handler.setLevel(LOG_LEVEL)
20+
formatter = logging.Formatter(
21+
'%(asctime)s - %(name)-18s - %(levelname)-8s - %(message)s',
22+
datefmt='%Y-%m-%d %H:%M:%S')
23+
console_handler.setFormatter(formatter)
24+
25+
logger.addHandler(console_handler)
26+
27+
return logger
28+
29+
logger = get_logger("download-manager")
30+
31+
32+
def http_server(url, download_path):
33+
path = urlparse(url).path
34+
file_name = os.path.basename(path)
35+
file_path = f"{download_path}/{file_name}"
36+
37+
if os.path.exists(file_path):
38+
logger.info(f"Skipping download of file as it is already present in the '{download_path}' the path")
39+
else:
40+
download_file_from_url(url, file_path)
41+
extract_tar_file(file_path, download_path)
42+
remove_tar_file(file_path)
43+
44+
45+
def download_file_from_url(url, file_path):
46+
logger.info(f"Downloading file from the url '{url}'")
47+
48+
try:
49+
response = requests.get(url, stream=True)
50+
response.raise_for_status()
51+
with open(file_path, 'wb') as f:
52+
for chunk in response.iter_content(chunk_size=8192):
53+
f.write(chunk)
54+
logger.info(f"File downloaded successfully to '{file_path}'")
55+
except requests.exceptions.RequestException as e:
56+
logger.error(f"error during download: {e}")
57+
58+
59+
def extract_tar_file(file_path, extract_path):
60+
try:
61+
with tarfile.open(file_path, "r:gz") as tar:
62+
tar.extractall(extract_path)
63+
logger.info(f"File extracted at the location: {extract_path}")
64+
except Exception as e:
65+
logger.error(f"failed to untar '{file_path}', error: {e}")
66+
raise e
67+
68+
69+
def remove_tar_file(file_path):
70+
try:
71+
logger.info(f"Deleting {file_path} file.")
72+
os.remove(file_path)
73+
logger.info(F"File {file_path} deleted.")
74+
except Exception as e:
75+
logger.error(f"failed to remove '{file_path}' file. error: {e}")
76+
raise e
77+
78+
79+
def load_config(config_path):
80+
with open(config_path, "r") as config_file:
81+
return json.load(config_file)
82+
83+
84+
def download_manager(config, download_path):
85+
if "modelSource" in config:
86+
logger.info("Starting model download process.")
87+
http_server(config["modelSource"]["url"], download_path)
88+
logger.info("Model download process completed.")
89+
else:
90+
logger.info("Source to download model is not specified.")
91+
92+
93+
def main():
94+
parser = argparse.ArgumentParser(add_help=False)
95+
parser.add_argument("--config", type=str, help="Path to the configuration file")
96+
parser.add_argument("--downloadPath", type=str, help="Download path of the the AI model.")
97+
98+
args = parser.parse_args()
99+
100+
try:
101+
config = load_config(args.config)
102+
download_manager(config, args.downloadPath)
103+
except Exception as e:
104+
logger.error(f"error encountered: {e}")
105+
106+
107+
if __name__ == "__main__":
108+
main()

examples/vllm/Containerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
FROM na.artifactory.swg-devops.com/sys-pcloud-docker-local/devops/pim/base
22

3-
COPY llm_config.sh /usr/bin/
4-
COPY llm_config.service /etc/systemd/system
3+
COPY vllm/llm_config.sh /usr/bin/
4+
COPY utility/download_model.py /usr/bin/
5+
COPY vllm/llm_config.service /etc/systemd/system
56
RUN systemctl unmask llm_config.service
67
RUN systemctl enable llm_config.service
78

8-
COPY vllm.container /usr/share/containers/systemd
9+
COPY vllm/vllm.container /usr/share/containers/systemd

examples/vllm/README.md

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@ vLLM container image built using app section [here](app/README.md). This is give
1313
Arguments you want to pass it to your vLLM inference engine
1414
#### llmEnv
1515
Environment variables that you want to set while running vLLM inference engine
16+
#### modelSource
17+
A JSON object that specifies the source from which you want to download the model. Use this parameter to use a offline model loaded within the local network instead of downloading from hugging face over the internet. This is suitable for environment which restricts outside connection. Follow the steps [here](#steps-to-set-up-a-server-to-host-an-llm-model-locally) to bring up self-hosted HTTP server which serves the offline models.
1618

1719
**Sample config:**
1820
```ini
1921
config-json = """
2022
{
2123
"llmImage": "na.artifactory.swg-devops.com/sys-pcloud-docker-local/devops/pim/apps/vllm",
2224
"llmArgs": "--model ibm-granite/granite-3.2-8b-instruct --max-model-len=26208 --enable-auto-tool-choice --tool-call-parser granite",
23-
"llmEnv": "OMP_NUM_THREADS=16"
25+
"llmEnv": "OMP_NUM_THREADS=16",
26+
"modelSource": { "url": "http://<Domain>/models--ibm-granite--granite-3.2-8b-instruct.tar.gz" }
2427
}
2528
"""
2629
```
@@ -37,3 +40,40 @@ podman build -t <your_registry>/vllm
3740

3841
podman push <your_registry>/vllm
3942
```
43+
44+
45+
### Steps to Set Up a Server to Host an LLM Model Locally
46+
**Step 1: Download the model using Hugging Face CLI**
47+
```shell
48+
pip install huggingface_hub
49+
50+
huggingface-cli download <model-id>
51+
52+
Example:
53+
huggingface-cli download ibm-granite/granite-3.2-8b-instruct
54+
```
55+
**Step 2: Create a tarball of the downloaded model folder**
56+
- `model-id` should follow models--<account--model> format. Since vLLM expects in this format when it loads from the cache.
57+
Example:
58+
`model-id` for ibm-granite/granite-3.2-8b-instruct is `models--ibm-granite--granite-3.2-8b-instruct`
59+
```shell
60+
tar -cvzf <model-id>.tar.gz <path-to-downloaded-model-directory>
61+
```
62+
**Step 3: Start an HTTP service on the server VM**
63+
```shell
64+
sudo yum install httpd
65+
sudo systemctl start httpd
66+
```
67+
**Step 4: Copy the tar file to the web server directory**
68+
```shell
69+
cp <tarball-file-path> /var/www/html
70+
71+
Example
72+
cp models--ibm-granite--granite-3.2-8b-instruct.tar.gz /var/www/html
73+
```
74+
**Step 5: Form the URL to access the model tarball**
75+
```shell
76+
http://<ip>/models--ibm-granite--granite-3.2-8b-instruct.tar.gz
77+
```
78+
Use the above URL in modelSource.url parameter to download model from the local server.
79+

examples/vllm/llm_config.service

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[Unit]
22
Description=Mount and setup LLM config
3-
Requires=base_config.service
4-
After=base_config.service
3+
Requires=base_config.service network-online.target cloud-config.target
4+
After=base_config.service network-online.target cloud-config.target
55

66
[Service]
77
Type=oneshot

examples/vllm/llm_config.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,8 @@ mkdir /var/huggingface
2525
var_to_add=HF_HUB_CACHE=/var/huggingface
2626
sed -i "/^HF_HUB_CACHE=.*/d" /etc/pim/llm.conf && echo "$var_to_add" >> /etc/pim/llm.conf
2727

28+
# Download model from the self-hosted server
29+
MODEL_SOURCE=$(jq -r '.modelSource' /etc/pim/pim_config.json)
30+
if [[ -n "$MODEL_SOURCE" ]]; then
31+
python3 /usr/bin/download_model.py --config /etc/pim/pim_config.json --downloadPath /var/huggingface
32+
fi

0 commit comments

Comments
 (0)