Download model from the local source (#19)

adarshagrawal38 · web-flow · commit 393bb254fa8a · 2025-09-02T14:31:48.000+05:30
Download model from the local source

Signed-off-by: Adarsh Agrawal &lt;adarsh.agrawal1@ibm.com&gt;
diff --git a/examples/utility/download_model.py b/examples/utility/download_model.py
@@ -0,0 +1,108 @@
+import argparse
+import json
+import logging
+import os
+import requests
+import tarfile
+
+from urllib.parse import urlparse
+
+LOG_LEVEL = logging.INFO
+
+
+def get_logger(name):
+    logger = logging.getLogger(name)
+    logger.setLevel(LOG_LEVEL)
+    logger.propagate = False
+
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(LOG_LEVEL)
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)-18s - %(levelname)-8s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S')
+    console_handler.setFormatter(formatter)
+
+    logger.addHandler(console_handler)
+
+    return logger
+
+logger = get_logger("download-manager")
+
+
+def http_server(url, download_path):
+    path = urlparse(url).path
+    file_name = os.path.basename(path)
+    file_path = f"{download_path}/{file_name}"
+    
+    if os.path.exists(file_path):
+        logger.info(f"Skipping download of file as it is already present in the '{download_path}' the path")
+    else:
+        download_file_from_url(url, file_path)
+    extract_tar_file(file_path, download_path)
+    remove_tar_file(file_path)
+
+
+def download_file_from_url(url, file_path):
+    logger.info(f"Downloading file from the url '{url}'")
+
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        with open(file_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        logger.info(f"File downloaded successfully to '{file_path}'")
+    except requests.exceptions.RequestException as e:
+        logger.error(f"error during download: {e}")
+
+
+def extract_tar_file(file_path, extract_path):
+    try:
+        with tarfile.open(file_path, "r:gz") as tar:
+            tar.extractall(extract_path)
+        logger.info(f"File extracted at the location: {extract_path}")
+    except Exception as e:
+        logger.error(f"failed to untar '{file_path}', error: {e}")
+        raise e
+
+
+def remove_tar_file(file_path):
+    try:
+        logger.info(f"Deleting {file_path} file.")
+        os.remove(file_path)
+        logger.info(F"File {file_path} deleted.")
+    except Exception as e:
+        logger.error(f"failed to remove '{file_path}' file. error: {e}")
+        raise e
+
+
+def load_config(config_path):
+    with open(config_path, "r") as config_file:
+        return json.load(config_file)
+
+
+def download_manager(config, download_path):
+    if "modelSource" in config:
+        logger.info("Starting model download process.")
+        http_server(config["modelSource"]["url"], download_path)
+        logger.info("Model download process completed.")
+    else:
+        logger.info("Source to download model is not specified.")
+
+
+def main():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument("--config", type=str, help="Path to the configuration file")
+    parser.add_argument("--downloadPath", type=str, help="Download path of the the AI model.")
+    
+    args = parser.parse_args()
+
+    try:
+        config = load_config(args.config)
+        download_manager(config, args.downloadPath)
+    except Exception as e:
+        logger.error(f"error encountered: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/vllm/Containerfile b/examples/vllm/Containerfile
@@ -1,8 +1,9 @@
 FROM na.artifactory.swg-devops.com/sys-pcloud-docker-local/devops/pim/base
 
-COPY llm_config.sh /usr/bin/
-COPY llm_config.service /etc/systemd/system
+COPY vllm/llm_config.sh /usr/bin/
+COPY utility/download_model.py /usr/bin/
+COPY vllm/llm_config.service /etc/systemd/system
 RUN systemctl unmask llm_config.service
 RUN systemctl enable llm_config.service
 
-COPY vllm.container /usr/share/containers/systemd
+COPY vllm/vllm.container /usr/share/containers/systemd
diff --git a/examples/vllm/README.md b/examples/vllm/README.md
@@ -13,14 +13,17 @@ vLLM container image built using app section [here](app/README.md). This is give
 Arguments you want to pass it to your vLLM inference engine
 #### llmEnv
 Environment variables that you want to set while running vLLM inference engine
+#### modelSource
+A JSON object that specifies the source from which you want to download the model. Use this parameter to use a offline model loaded within the local network instead of downloading from hugging face over the internet. This is suitable for environment which restricts outside connection. Follow the steps [here](#steps-to-set-up-a-server-to-host-an-llm-model-locally) to bring up self-hosted HTTP server which serves the offline models.
 
 **Sample config:**
 ```ini
 config-json = """
   {
         "llmImage": "na.artifactory.swg-devops.com/sys-pcloud-docker-local/devops/pim/apps/vllm",
         "llmArgs": "--model ibm-granite/granite-3.2-8b-instruct --max-model-len=26208 --enable-auto-tool-choice --tool-call-parser granite",
-        "llmEnv": "OMP_NUM_THREADS=16"
+        "llmEnv": "OMP_NUM_THREADS=16",
+        "modelSource": { "url": "http://<Domain>/models--ibm-granite--granite-3.2-8b-instruct.tar.gz" }
   }
   """
 ```
@@ -37,3 +40,40 @@ podman build -t <your_registry>/vllm
 
 podman push <your_registry>/vllm
 ```
+
+
+### Steps to Set Up a Server to Host an LLM Model Locally
+**Step 1: Download the model using Hugging Face CLI**
+```shell
+pip install huggingface_hub
+
+huggingface-cli download  <model-id>
+
+Example: 
+huggingface-cli download ibm-granite/granite-3.2-8b-instruct
+```
+**Step 2: Create a tarball of the downloaded model folder**
+- `model-id` should follow models--<account--model> format. Since vLLM expects in this format when it loads from the cache. 
+Example:
+`model-id` for ibm-granite/granite-3.2-8b-instruct is `models--ibm-granite--granite-3.2-8b-instruct`
+```shell
+tar -cvzf <model-id>.tar.gz <path-to-downloaded-model-directory>
+```
+**Step 3: Start an HTTP service on the server VM**
+```shell
+sudo yum install httpd
+sudo systemctl start httpd
+```
+**Step 4: Copy the tar file to the web server directory**
+```shell
+cp <tarball-file-path> /var/www/html
+
+Example
+cp models--ibm-granite--granite-3.2-8b-instruct.tar.gz /var/www/html
+```
+**Step 5: Form the URL to access the model tarball**
+```shell
+http://<ip>/models--ibm-granite--granite-3.2-8b-instruct.tar.gz
+```
+Use the above URL in modelSource.url parameter to download model from the local server.
+
diff --git a/examples/vllm/llm_config.service b/examples/vllm/llm_config.service
@@ -1,7 +1,7 @@
 [Unit]
 Description=Mount and setup LLM config
-Requires=base_config.service
-After=base_config.service
+Requires=base_config.service network-online.target cloud-config.target
+After=base_config.service network-online.target cloud-config.target
 
 [Service]
 Type=oneshot
diff --git a/examples/vllm/llm_config.sh b/examples/vllm/llm_config.sh
@@ -25,3 +25,8 @@ mkdir /var/huggingface
 var_to_add=HF_HUB_CACHE=/var/huggingface
 sed -i "/^HF_HUB_CACHE=.*/d" /etc/pim/llm.conf && echo "$var_to_add" >> /etc/pim/llm.conf
 
+# Download model from the self-hosted server
+MODEL_SOURCE=$(jq -r '.modelSource' /etc/pim/pim_config.json)
+if [[ -n "$MODEL_SOURCE" ]]; then
+    python3 /usr/bin/download_model.py --config /etc/pim/pim_config.json --downloadPath /var/huggingface
+fi