Skip to content

Commit

Permalink
Merge pull request #126 from kerthcet/cleanup/modelclaims-change
Browse files Browse the repository at this point in the history
Prepare for v0.0.5
  • Loading branch information
InftyAI-Agent authored Sep 5, 2024
2 parents 71a9652 + 68bbf77 commit e6e68ca
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 12 deletions.
2 changes: 0 additions & 2 deletions docs/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,4 @@ By default, we use [vLLM](https://github.com/vllm-project/vllm) as the inference

### Speculative Decoding with vLLM

> Not supported yet because llama.cpp doesn't support speculative decoding in the server side, see https://github.com/ggerganov/llama.cpp/issues/5877.
[Speculative Decoding](https://arxiv.org/abs/2211.17192) can improve inference performance efficiently, see [example](./speculative-decoding/vllm/) here.
10 changes: 8 additions & 2 deletions docs/examples/speculative-decoding/vllm/playground.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,11 @@ spec:
backendConfig:
args:
- --use-v2-block-manager
- --num_speculative_tokens 5
- -tp 1
- --num_speculative_tokens
- "5"
- -tp
- "1"
resources:
limits:
cpu: 8
memory: "16Gi"
16 changes: 12 additions & 4 deletions llmaz/model_loader/model_hub/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ModelHub,
)
from llmaz.util.logger import Logger
from llmaz.model_loader.model_hub.util import get_folder_total_size

from typing import Optional

Expand All @@ -50,14 +51,18 @@ def load_model(
local_dir=MODEL_LOCAL_DIR,
revision=revision,
)
file_size = os.path.getsize(MODEL_LOCAL_DIR + filename) / (1024**3)
Logger.info(
f"The total size of {MODEL_LOCAL_DIR + filename} is {file_size: .2f} GB"
)
return

local_dir = os.path.join(
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
)

# # TODO: Should we verify the download is finished?
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
local_dir = os.path.join(
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
)

futures = []
for file in list_repo_files(repo_id=model_id):
# TODO: support version management, right now we didn't distinguish with them.
Expand All @@ -71,6 +76,9 @@ def load_model(
).add_done_callback(handle_completion)
)

total_size = get_folder_total_size(local_dir)
Logger.info(f"The total size of {local_dir} is {total_size: .2f} GB")


def handle_completion(future):
filename = future.result()
Expand Down
11 changes: 8 additions & 3 deletions llmaz/model_loader/model_hub/modelscope.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ModelHub,
)
from llmaz.util.logger import Logger
from llmaz.model_loader.model_hub.util import get_folder_total_size


class ModelScope(ModelHub):
Expand All @@ -43,11 +44,12 @@ def load_model(
f"Start to download, model_id: {model_id}, filename: {filename}, revision: {revision}"
)

local_dir = os.path.join(
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
)

with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = []
local_dir = os.path.join(
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
)
futures.append(
executor.submit(
snapshot_download,
Expand All @@ -57,6 +59,9 @@ def load_model(
).add_done_callback(handle_completion)
)

total_size = get_folder_total_size(local_dir)
Logger.info(f"The total size of {local_dir} is {total_size:.2f} GB")


def handle_completion(future):
filename = future.result()
Expand Down
30 changes: 30 additions & 0 deletions llmaz/model_loader/model_hub/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import os


def get_folder_total_size(folder_path: str):
total_size = 0

for dirpath, _, filenames in os.walk(folder_path):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
if os.path.exists(file_path):
total_size += os.path.getsize(file_path)

total_size_gb = total_size / (1024**3)
return total_size_gb
2 changes: 1 addition & 1 deletion pkg/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ limitations under the License.
package pkg

const (
LOADER_IMAGE = "inftyai/model-loader:v0.0.7"
LOADER_IMAGE = "inftyai/model-loader:v0.0.8"
)

0 comments on commit e6e68ca

Please sign in to comment.