Skip to content

Commit 5a930ee

Browse files
authored
feat: Binary wheels for CPU, CUDA (12.1 - 12.3), Metal (#1247)
* Generate binary wheel index on release * Add total release downloads badge * Update download label * Use official cibuildwheel action * Add workflows to build CUDA and Metal wheels * Update generate index workflow * Update workflow name
1 parent 8649d76 commit 5a930ee

6 files changed

+330
-5
lines changed

.github/workflows/build-and-release.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [ubuntu-latest, windows-latest, macOS-latest]
14+
os: [ubuntu-20.04, windows-2019, macos-11]
1515

1616
steps:
1717
- uses: actions/checkout@v3
@@ -23,19 +23,19 @@ jobs:
2323
with:
2424
python-version: "3.8"
2525

26-
- name: Install cibuildwheel
27-
run: python -m pip install cibuildwheel==2.12.1
28-
2926
- name: Install dependencies
3027
run: |
3128
python -m pip install --upgrade pip
3229
python -m pip install -e .[all]
3330
3431
- name: Build wheels
35-
run: python -m cibuildwheel --output-dir wheelhouse
32+
uses: pypa/cibuildwheel@v2.16.5
3633
env:
3734
# disable repair
3835
CIBW_REPAIR_WHEEL_COMMAND: ""
36+
with:
37+
package-dir: .
38+
output-dir: wheelhouse
3939

4040
- uses: actions/upload-artifact@v3
4141
with:
+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
name: Build Wheels (CUDA)
2+
3+
on: workflow_dispatch
4+
5+
permissions:
6+
contents: write
7+
8+
jobs:
9+
define_matrix:
10+
name: Define Build Matrix
11+
runs-on: ubuntu-latest
12+
outputs:
13+
matrix: ${{ steps.set-matrix.outputs.matrix }}
14+
defaults:
15+
run:
16+
shell: pwsh
17+
18+
steps:
19+
- name: Define Job Output
20+
id: set-matrix
21+
run: |
22+
$matrix = @{
23+
'os' = @('ubuntu-20.04', 'windows-latest')
24+
'pyver' = @("3.10", "3.11", "3.12")
25+
'cuda' = @("12.1.1", "12.2.2", "12.3.2")
26+
'releasetag' = @("basic")
27+
}
28+
29+
$matrixOut = ConvertTo-Json $matrix -Compress
30+
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
31+
32+
build_wheels:
33+
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
34+
needs: define_matrix
35+
runs-on: ${{ matrix.os }}
36+
strategy:
37+
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
38+
defaults:
39+
run:
40+
shell: pwsh
41+
env:
42+
CUDAVER: ${{ matrix.cuda }}
43+
AVXVER: ${{ matrix.releasetag }}
44+
45+
steps:
46+
- uses: actions/checkout@v4
47+
with:
48+
submodules: "recursive"
49+
50+
- uses: actions/setup-python@v4
51+
with:
52+
python-version: ${{ matrix.pyver }}
53+
54+
- name: Setup Mamba
55+
uses: conda-incubator/[email protected]
56+
with:
57+
activate-environment: "build"
58+
python-version: ${{ matrix.pyver }}
59+
miniforge-variant: Mambaforge
60+
miniforge-version: latest
61+
use-mamba: true
62+
add-pip-as-python-dependency: true
63+
auto-activate-base: false
64+
65+
- name: VS Integration Cache
66+
id: vs-integration-cache
67+
if: runner.os == 'Windows'
68+
uses: actions/[email protected]
69+
with:
70+
path: ./MSBuildExtensions
71+
key: cuda-${{ matrix.cuda }}-vs-integration
72+
73+
- name: Get Visual Studio Integration
74+
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
75+
run: |
76+
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
77+
$links = (Invoke-RestMethod 'https://github.com/Jimver/cuda-toolkit/raw/dc0ca7bb29c5a92f7a963d3d5c93f8d59765136a/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
78+
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
79+
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
80+
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
81+
Remove-Item 'cudainstaller.zip'
82+
83+
- name: Install Visual Studio Integration
84+
if: runner.os == 'Windows'
85+
run: |
86+
$y = (gi '.\MSBuildExtensions').fullname + '\*'
87+
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
88+
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
89+
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
90+
91+
- name: Install Dependencies
92+
env:
93+
MAMBA_DOWNLOAD_FAILFAST: "0"
94+
MAMBA_NO_LOW_SPEED_LIMIT: "1"
95+
run: |
96+
$cudaVersion = $env:CUDAVER
97+
mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion
98+
python -m pip install build wheel
99+
100+
- name: Build Wheel
101+
run: |
102+
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
103+
$env:CUDA_PATH = $env:CONDA_PREFIX
104+
$env:CUDA_HOME = $env:CONDA_PREFIX
105+
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
106+
if ($IsLinux) {
107+
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
108+
}
109+
$env:VERBOSE = '1'
110+
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
111+
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
112+
if ($env:AVXVER -eq 'AVX') {
113+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
114+
}
115+
if ($env:AVXVER -eq 'AVX512') {
116+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
117+
}
118+
if ($env:AVXVER -eq 'basic') {
119+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
120+
}
121+
python -m build --wheel
122+
# write the build tag to the output
123+
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
124+
125+
- uses: softprops/action-gh-release@v1
126+
with:
127+
files: dist/*
128+
# Set tag_name to <tag>-cu<cuda_version>
129+
tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }}
130+
env:
131+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Build Wheels (Metal)
2+
3+
on: workflow_dispatch
4+
5+
permissions:
6+
contents: write
7+
8+
jobs:
9+
define_matrix:
10+
name: Define Build Matrix
11+
runs-on: ubuntu-latest
12+
outputs:
13+
matrix: ${{ steps.set-matrix.outputs.matrix }}
14+
defaults:
15+
run:
16+
shell: pwsh
17+
18+
steps:
19+
- name: Define Job Output
20+
id: set-matrix
21+
run: |
22+
$matrix = @{
23+
'os' = @('macos-11', 'macos-12', 'macos-13')
24+
'pyver' = @('3.10', '3.11', '3.12')
25+
}
26+
27+
$matrixOut = ConvertTo-Json $matrix -Compress
28+
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
29+
30+
build_wheels:
31+
name: ${{ matrix.os }} Python ${{ matrix.pyver }}
32+
needs: define_matrix
33+
runs-on: ${{ matrix.os }}
34+
strategy:
35+
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
36+
env:
37+
OSVER: ${{ matrix.os }}
38+
39+
steps:
40+
- uses: actions/checkout@v4
41+
with:
42+
submodules: "recursive"
43+
44+
- uses: actions/setup-python@v4
45+
with:
46+
python-version: ${{ matrix.pyver }}
47+
48+
- name: Install Dependencies
49+
run: |
50+
python -m pip install build wheel cmake
51+
52+
- name: Build Wheel
53+
run: |
54+
XCODE15PATH="/Applications/Xcode_15.0.app/Contents/Developer"
55+
XCODE15BINPATH="${XCODE15PATH}/Toolchains/XcodeDefault.xctoolchain/usr/bin"
56+
export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_METAL=on"
57+
[[ "$OSVER" == "macos-13" ]] && export CC="${XCODE15BINPATH}/cc" && export CXX="${XCODE15BINPATH}/c++" && export MACOSX_DEPLOYMENT_TARGET="13.0"
58+
[[ "$OSVER" == "macos-12" ]] && export MACOSX_DEPLOYMENT_TARGET="12.0"
59+
[[ "$OSVER" == "macos-11" ]] && export MACOSX_DEPLOYMENT_TARGET="11.0"
60+
61+
export CMAKE_OSX_ARCHITECTURES="arm64" && export ARCHFLAGS="-arch arm64"
62+
VERBOSE=1 python -m build --wheel
63+
64+
if [[ "$OSVER" == "macos-13" ]]; then
65+
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
66+
export MACOSX_DEPLOYMENT_TARGET="14.0"
67+
VERBOSE=1 python -m build --wheel
68+
fi
69+
70+
for file in ./dist/*.whl; do cp "$file" "${file/arm64.whl/aarch64.whl}"; done
71+
72+
export CMAKE_OSX_ARCHITECTURES="x86_64" && export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_METAL=on" && export ARCHFLAGS="-arch x86_64"
73+
VERBOSE=1 python -m build --wheel
74+
75+
if [[ "$OSVER" == "macos-13" ]]; then
76+
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
77+
export MACOSX_DEPLOYMENT_TARGET="14.0"
78+
VERBOSE=1 python -m build --wheel
79+
fi
80+
81+
- uses: softprops/action-gh-release@v1
82+
with:
83+
files: dist/*
84+
# set release name to <tag>-metal
85+
tag_name: ${{ github.ref_name }}-metal
86+
env:
87+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Wheels Index
2+
3+
on:
4+
# Trigger on any new release
5+
release:
6+
types: [published]
7+
8+
# Allows you to run this workflow manually from the Actions tab
9+
workflow_dispatch:
10+
11+
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
12+
permissions:
13+
contents: read
14+
pages: write
15+
id-token: write
16+
17+
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
18+
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
19+
concurrency:
20+
group: "pages"
21+
cancel-in-progress: false
22+
23+
jobs:
24+
# Single deploy job since we're just deploying
25+
deploy:
26+
environment:
27+
name: github-pages
28+
url: ${{ steps.deployment.outputs.page_url }}
29+
runs-on: ubuntu-latest
30+
steps:
31+
- name: Checkout
32+
uses: actions/checkout@v4
33+
- name: Setup Pages
34+
uses: actions/configure-pages@v4
35+
- name: Build
36+
run: |
37+
./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$'
38+
./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
39+
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
40+
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
41+
- name: Upload artifact
42+
uses: actions/upload-pages-artifact@v3
43+
with:
44+
# Upload entire repository
45+
path: 'index'
46+
- name: Deploy to GitHub Pages
47+
id: deployment
48+
uses: actions/deploy-pages@v4

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
77
[![PyPI - License](https://img.shields.io/pypi/l/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
88
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
9+
[![Github All Releases](https://img.shields.io/github/downloads/abetlen/llama-cpp-python/total.svg?label=Github%20Downloads)]()
910

1011
Simple Python bindings for **@ggerganov's** [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library.
1112
This package provides:

scripts/release-to-pep-503.sh

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/bin/bash
2+
3+
# Get output directory or default to index/whl/cpu
4+
output_dir=${1:-"index/whl/cpu"}
5+
6+
# Create output directory
7+
mkdir -p $output_dir
8+
9+
# Change to output directory
10+
pushd $output_dir
11+
12+
# Create an index html file
13+
echo "<!DOCTYPE html>" > index.html
14+
echo "<html>" >> index.html
15+
echo " <head></head>" >> index.html
16+
echo " <body>" >> index.html
17+
echo " <a href=\"llama-cpp-python/\">llama-cpp-python</a>" >> index.html
18+
echo " <br>" >> index.html
19+
echo " </body>" >> index.html
20+
echo "</html>" >> index.html
21+
echo "" >> index.html
22+
23+
# Create llama-cpp-python directory
24+
mkdir -p llama-cpp-python
25+
26+
# Change to llama-cpp-python directory
27+
pushd llama-cpp-python
28+
29+
# Create an index html file
30+
echo "<!DOCTYPE html>" > index.html
31+
echo "<html>" >> index.html
32+
echo " <body>" >> index.html
33+
echo " <h1>Links for llama-cpp-python</h1>" >> index.html
34+
35+
# Get all releases
36+
releases=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases | jq -r .[].tag_name)
37+
38+
# Get pattern from second arg or default to valid python package version pattern
39+
pattern=${2:-"^[v]?[0-9]+\.[0-9]+\.[0-9]+$"}
40+
41+
# Filter releases by pattern
42+
releases=$(echo $releases | tr ' ' '\n' | grep -E $pattern)
43+
44+
# For each release, get all assets
45+
for release in $releases; do
46+
assets=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases/tags/$release | jq -r .assets)
47+
echo " <h2>$release</h2>" >> index.html
48+
for asset in $(echo $assets | jq -r .[].browser_download_url); do
49+
if [[ $asset == *".whl" ]]; then
50+
echo " <a href=\"$asset\">$asset</a>" >> index.html
51+
echo " <br>" >> index.html
52+
fi
53+
done
54+
done
55+
56+
echo " </body>" >> index.html
57+
echo "</html>" >> index.html
58+
echo "" >> index.html

0 commit comments

Comments
 (0)