Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d3b3fd9
hw1 complete
vankari Jan 16, 2026
6479165
hw2 1
vankari Jan 17, 2026
52eda22
correct hw2 1
vankari Jan 17, 2026
34fb7f7
commit rest of hw2
vankari Jan 22, 2026
fcd281c
修下hw1 bug
vankari Jan 22, 2026
cfb95ba
hw3
vankari Jan 24, 2026
9dd9d5e
hw3
vankari Jan 24, 2026
2f34687
调整架构
vankari Jan 25, 2026
d04a350
commit
vankari Jan 25, 2026
b6581b3
commit
vankari Jan 25, 2026
498f921
mlp_layer_load本地被kill 放在线跑一下
vankari Jan 25, 2026
5eecc4f
修正linear
vankari Jan 25, 2026
87625a5
fix in windows
vankari Jan 25, 2026
c5b54ae
fix in windows2
vankari Jan 25, 2026
071f519
尝试兼容
vankari Jan 25, 2026
886d823
Merge branch 'InfiniTensor:main' into main
vankari Mar 2, 2026
0e55eef
finish random sample op and add test for it
vankari Mar 3, 2026
9b02499
调整了项目架构, test_server现在使用llaisys的推理服务
vankari Mar 3, 2026
894a67a
update build.yaml as for the project framework has been changed, besi…
vankari Mar 3, 2026
474d021
Fix indentation in build.yaml for Python scripts
vankari Mar 3, 2026
e286160
Update build.yaml
vankari Mar 3, 2026
75b3ed0
server在线测试修正
vankari Mar 3, 2026
dc86ca8
server测试修正&&增加思考ui
vankari Mar 3, 2026
a981e68
普通模式ui修正,现在生成被maxtoken截断时直接返回带
vankari Mar 7, 2026
1e0929f
kv cache pool实现与测试:目前实现了一个基于trie的kvcache
vankari Mar 8, 2026
859024b
流式生成逻辑修改
vankari Mar 11, 2026
a1073a4
add report
vankari Mar 12, 2026
2a5cc60
修个ui的bug
vankari Mar 12, 2026
dfb9466
cuda support finished & add report
vankari Mar 14, 2026
b823ad6
修下ci字符编码问题
vankari Mar 14, 2026
5117450
update md file and ci
vankari Mar 14, 2026
b2dd19e
modified: .github/workflows/build.yaml
vankari Mar 14, 2026
d0ca60d
修改ci
vankari Mar 14, 2026
5c29ed8
modified: .github/workflows/build.yaml
vankari Mar 14, 2026
80fded7
modified: .github/workflows/build.yaml
vankari Mar 14, 2026
ebcf6e1
modified: .github/workflows/build.yaml
vankari Mar 14, 2026
d8367ba
modified: .github/workflows/build.yaml
vankari Mar 14, 2026
6fc2ce2
modified: .github/workflows/build.yaml
vankari Mar 14, 2026
20ad864
ci暂时删除windows构建
vankari Mar 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 137 additions & 26 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,18 @@ on:
- 'LICENSE'

jobs:
build:
name: Build
build-cuda:
name: Build and test (CUDA, ${{ matrix.target }})
strategy:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest]
type: [release]
include:
- target: ubuntu
os: ubuntu-latest
runs-on: ${{ matrix.os }}
env:
TMPDIR: ${{ github.workspace }}/.tmp
PYTHONUTF8: "1"
steps:

- name: checkout code
Expand All @@ -24,37 +28,144 @@ jobs:
uses: xmake-io/github-action-setup-xmake@v1
with:
xmake-version: latest

- name: Xmake Build & Install
run: |
xmake

- name: install cuda toolkit
uses: Jimver/cuda-toolkit@v0.2.24
with:
cuda: "12.8.0"

- name: prepare tmp (linux)
if: matrix.target == 'ubuntu'
run: |
mkdir -p "$TMPDIR"
shell: bash

- name: prepare tmp (windows)
if: matrix.target == 'windows'
run: |
New-Item -ItemType Directory -Force -Path $env:TMPDIR | Out-Null
shell: pwsh

- name: check toolchain (linux)
if: matrix.target == 'ubuntu'
run: |
command -v python
command -v pip
command -v xmake
command -v nvcc
python --version
pip --version
xmake --version
nvcc --version
shell: bash

- name: check toolchain (windows)
if: matrix.target == 'windows'
run: |
Get-Command python
Get-Command pip
Get-Command xmake
if (-not $env:CUDA_PATH) {
$nvccCmd = Get-Command nvcc -ErrorAction SilentlyContinue
if ($nvccCmd) {
$env:CUDA_PATH = Split-Path (Split-Path $nvccCmd.Source -Parent) -Parent
}
}
if (-not $env:CUDA_PATH) {
throw "CUDA_PATH is not set and nvcc not found in PATH"
}
"$env:CUDA_PATH\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
python --version
pip --version
xmake --version
& "$env:CUDA_PATH\bin\nvcc.exe" --version
shell: pwsh

- name: detect nvidia gpu (linux)
if: matrix.target == 'ubuntu'
run: |
if command -v nvidia-smi >/dev/null 2>&1; then
nvidia-smi || true
echo "HAS_NVIDIA_GPU=1" >> "$GITHUB_ENV"
else
echo "HAS_NVIDIA_GPU=0" >> "$GITHUB_ENV"
fi
shell: bash

- name: detect nvidia gpu (windows)
if: matrix.target == 'windows'
run: |
if (Get-Command nvidia-smi -ErrorAction SilentlyContinue) {
nvidia-smi
"HAS_NVIDIA_GPU=1" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
} else {
"HAS_NVIDIA_GPU=0" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
}
shell: pwsh

- name: Xmake CUDA Build & Install (linux)
if: matrix.target == 'ubuntu'
run: |
xmake f --nv-gpu=y -c -v
xmake -v
xmake install
shell: bash

- name: Xmake CUDA Build & Install (windows)
if: matrix.target == 'windows'
run: |
if (-not $env:CUDA_PATH) {
throw "CUDA_PATH is required for windows CUDA build"
}
$env:Path = "$env:CUDA_PATH\bin;$env:Path"
xmake f -p windows -a x64 --toolchain=msvc --cuda="$env:CUDA_PATH" --nv-gpu=y -c -v
xmake -v
xmake install
shell: pwsh

- name: Install Python
- name: Install Python (linux)
if: matrix.target == 'ubuntu'
run: |
cd python
pip install .
pip install ./llaisyscore/
pip install ./server-project/
cd ..
shell: bash

- name: Assignment-0
- name: Install Python (windows)
if: matrix.target == 'windows'
run: |
python test/test_runtime.py --device cpu
Set-Location python
pip install ./llaisyscore/
pip install ./server-project/
Set-Location ..
shell: pwsh

- name: Assignment-1
- name: CUDA runtime api test
if: env.HAS_NVIDIA_GPU == '1'
run: |
python test/test_tensor.py

- name: Assignment-2
python test/test_runtime.py --device nvidia

- name: CUDA ops tests
if: env.HAS_NVIDIA_GPU == '1'
run: |
python test/ops/add.py --device nvidia
python test/ops/argmax.py --device nvidia
python test/ops/embedding.py --device nvidia
python test/ops/linear.py --device nvidia
python test/ops/random_sample.py --device nvidia
python test/ops/rms_norm.py --device nvidia
python test/ops/rope.py --device nvidia
python test/ops/self_attention.py --device nvidia
python test/ops/swiglu.py --device nvidia

- name: CUDA infer test
if: env.HAS_NVIDIA_GPU == '1'
run: |
python test/ops/add.py
python test/ops/argmax.py
python test/ops/embedding.py
python test/ops/linear.py
python test/ops/rms_norm.py
python test/ops/rope.py
python test/ops/self_attention.py
python test/ops/swiglu.py
python test/test_infer.py --device nvidia --test

- name: Assignment-3
- name: skip gpu tests when no gpu
if: env.HAS_NVIDIA_GPU != '1'
run: |
python test/test_infer.py --test
echo "No NVIDIA GPU available on this runner, skipped runtime/ops/infer GPU tests."

3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ lib/
*.dll
*.dylib
*.pyd

# tmpfile
.tmp/
# MacOS Cache
.DS_Store

Expand Down
16 changes: 11 additions & 5 deletions include/llaisys/models/qwen2.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,20 @@ __C {
llaisysTensor_t *mlp_down_w;
};

struct LlaisysQwen2Model;
struct LlaisysQwen2Model
{
struct LlaisysQwen2Meta *meta;
struct LlaisysQwen2Weights *weights;
llaisysDeviceType_t device;
int *device_ids;
int ndevice;
};

__export struct LlaisysQwen2Model *llaisysQwen2ModelCreate(const LlaisysQwen2Meta *meta, llaisysDeviceType_t device, int *device_ids, int ndevice);

__export void llaisysQwen2ModelDestroy(struct LlaisysQwen2Model * model);

__export struct LlaisysQwen2Weights *llaisysQwen2ModelWeights(struct LlaisysQwen2Model * model);

__export int64_t llaisysQwen2ModelInfer(struct LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken);
__export int64_t llaisysQwen2ModelInfer(struct LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken,
llaisysTensor_t *kcache, llaisysTensor_t *vcache, size_t past_len,
float temperature, int top_k, float top_p, int64_t seed);
}
#endif // LLAISYS_MODELS_QWEN2_H
2 changes: 2 additions & 0 deletions include/llaisys/ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ __C {
__export void llaisysROPE(llaisysTensor_t out, llaisysTensor_t in, llaisysTensor_t pos_ids, float theta);
__export void llaisysSelfAttention(llaisysTensor_t attn_val, llaisysTensor_t q, llaisysTensor_t k, llaisysTensor_t v, float scale);
__export void llaisysSwiGLU(llaisysTensor_t out, llaisysTensor_t gate, llaisysTensor_t up);
__export void llaisysRandomSample(llaisysTensor_t sample_idx, llaisysTensor_t sample_val, llaisysTensor_t logits,
float temperature, int top_k, float top_p, int64_t seed);
}

#endif
10 changes: 10 additions & 0 deletions include/llaisys/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ __C {
size_t dim,
size_t start,
size_t end);
__export llaisysTensor_t tensorReshape(
llaisysTensor_t tensor,
size_t * shape,
size_t ndim);

__export llaisysTensor_t tensorTo(
llaisysTensor_t tensor,
llaisysDeviceType_t device_type,
int device_id);

}

#endif // LLAISYS_TENSOR_H
25 changes: 25 additions & 0 deletions python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# llaisys-server

Standalone server package for Project #3.

## Install order

1. Install core package:

```bash
python3 -m pip install -e /home/vankari/code/llaisys/python/llaisyscore --user --break-system-packages
```

2. Install server package:

```bash
python3 -m pip install -e /home/vankari/code/llaisys/python/server-project --user --break-system-packages
```

## Run server

```bash
cd /home/vankari/code/llaisys/python
python3 -m uvicorn server.app:app --host 0.0.0.0 --port 8000
```
### attention! the --break-system-packages was enabled on public server ,it should be disabled in personal computer.
2 changes: 2 additions & 0 deletions python/llaisys/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .tensor import Tensor
from .ops import Ops
from . import models
from . import backend
from .models import *

__all__ = [
Expand All @@ -17,4 +18,5 @@
"Tensor",
"Ops",
"models",
"backend",
]
3 changes: 3 additions & 0 deletions python/llaisys/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .inference_backend import InferenceBackend, SessionState

__all__ = ["InferenceBackend", "SessionState"]
Loading