Skip to content

Commit e25c3f6

Browse files
committed
Initial commit
0 parents  commit e25c3f6

17 files changed

+661
-0
lines changed

Diff for: .gitattributes

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.dll filter=lfs diff=lfs merge=lfs -text
2+
*.so filter=lfs diff=lfs merge=lfs -text

Diff for: LlamaCpp.asmdef

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "Abuksigun.LlamaCpp",
3+
"rootNamespace": "",
4+
"references": [],
5+
"includePlatforms": [],
6+
"excludePlatforms": [],
7+
"allowUnsafeCode": true,
8+
"overrideReferences": false,
9+
"precompiledReferences": [],
10+
"autoReferenced": true,
11+
"defineConstraints": [],
12+
"versionDefines": [],
13+
"noEngineReferences": false
14+
}

Diff for: LlamaCpp.asmdef.meta

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: LlamaExample.cs

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
using System;
2+
using System.IO;
3+
using UnityEngine;
4+
5+
namespace Abuksigun.LlamaCpp
6+
{
7+
[ExecuteInEditMode]
8+
public class LlamaExample : MonoBehaviour
9+
{
10+
LlamaModel model;
11+
12+
// Download model here: https://huggingface.co/TheBloke/speechless-mistral-dolphin-orca-platypus-samantha-7B-GGUF/blob/main/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf
13+
[SerializeField] string modelPath = "StreamingAssets/Models/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf";
14+
[SerializeField] string systemPrompt = "You are an AI game character";
15+
[SerializeField] string userPrompt = "You are in a Tavern\nHP:40%\nWhat is your next action:";
16+
[SerializeField] string assistantPrompt = "I will";
17+
18+
[ContextMenu("Run")]
19+
public async void RunAsync()
20+
{
21+
const string promptFormat = "<|im_start|>system\n{{system}}\n<|im_end|>\n<|im_start|>user\n{{user}}\n<|im_end|>\n<|im_start|>assistant\n{{assistant}}";
22+
23+
string fullModelPath = Path.Join(Application.streamingAssetsPath, modelPath);
24+
model ??= await LlamaModel.LoadModel(fullModelPath, new Progress<float>(x => Debug.Log($"Progress {x}")));
25+
if (model == null)
26+
{
27+
Debug.LogError("Failed to load model");
28+
return;
29+
}
30+
string result = await model.RunAsync(FormatPrompt(promptFormat, systemPrompt, userPrompt, assistantPrompt), 100, new Progress<string>(x => Debug.Log(x)));
31+
Debug.Log($"Result: {result}");
32+
}
33+
34+
public static string FormatPrompt(string promptFormat, string system, string user, string assistant = "")
35+
{
36+
return promptFormat
37+
.Replace("{{system}}", system)
38+
.Replace("{{user}}", user)
39+
.Replace("{{assistant}}", assistant);
40+
}
41+
}
42+
}

Diff for: LlamaExample.cs.meta

+11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: LlamaLibrary.cs

+162
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
4+
namespace Abuksigun.LlamaCpp
5+
{
6+
public unsafe static class LlamaLibrary
7+
{
8+
private const string DllName = "llama.dll";
9+
10+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
11+
public static extern void llama_backend_init(bool numa);
12+
13+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
14+
public static extern IntPtr llama_load_model_from_file(string path_model, LlamaModelParams model_params);
15+
16+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
17+
public static extern void llama_free_model(IntPtr model);
18+
19+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
20+
public static extern int llama_n_ctx(IntPtr ctx);
21+
22+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
23+
public static extern LlamaBatch llama_batch_init(int n_tokens, int embd, int n_seq_max);
24+
25+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
26+
public static extern int llama_decode(IntPtr ctx, LlamaBatch batch);
27+
28+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
29+
public static extern IntPtr llama_new_context_with_model(IntPtr model, LlamaContextParams ctx_params);
30+
31+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
32+
public static extern void llama_free(IntPtr ctx);
33+
34+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
35+
public static extern int llama_tokenize(IntPtr model, string text, int text_len, int[] tokens, int n_max_tokens, bool add_bos, bool special);
36+
37+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
38+
public static extern IntPtr llama_get_logits(IntPtr ctx);
39+
40+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
41+
public static extern IntPtr llama_get_logits_ith(IntPtr ctx, int i);
42+
43+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl )]
44+
public static extern int llama_n_vocab(IntPtr model);
45+
46+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
47+
public static extern int llama_sample_token_greedy(IntPtr ctx, ref LlamaTokenDataArray candidates);
48+
49+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
50+
public static extern int llama_token_to_piece(IntPtr model, int token, byte* buffer, int length);
51+
52+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
53+
public static extern void llama_backend_free();
54+
55+
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
56+
public static extern int llama_token_eos(IntPtr model);
57+
58+
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
59+
public delegate void LlamaProgressCallback(float progress, IntPtr ctx);
60+
61+
[StructLayout(LayoutKind.Sequential)]
62+
public struct LlamaModelParams
63+
{
64+
public int n_gpu_layers;
65+
public int main_gpu;
66+
public IntPtr tensor_split;
67+
public LlamaProgressCallback progress_callback;
68+
public IntPtr progress_callback_user_data;
69+
public bool vocab_only;
70+
public bool use_mmap;
71+
public bool use_mlock;
72+
73+
public LlamaModelParams(LlamaProgressCallback progressCallback, IntPtr progressCallbackUserData, int nGpuLayers = 0)
74+
{
75+
n_gpu_layers = nGpuLayers;
76+
main_gpu = 0;
77+
tensor_split = IntPtr.Zero;
78+
progress_callback = progressCallback;
79+
progress_callback_user_data = IntPtr.Zero;
80+
vocab_only = false;
81+
use_mmap = true;
82+
use_mlock = false;
83+
}
84+
}
85+
86+
[StructLayout(LayoutKind.Sequential)]
87+
public struct LlamaContextParams
88+
{
89+
public uint seed;
90+
public uint n_ctx;
91+
public uint n_batch;
92+
public uint n_threads;
93+
public uint n_threads_batch;
94+
public sbyte rope_scaling_type;
95+
public float rope_freq_base;
96+
public float rope_freq_scale;
97+
public float yarn_ext_factor;
98+
public float yarn_attn_factor;
99+
public float yarn_beta_fast;
100+
public float yarn_beta_slow;
101+
public uint yarn_orig_ctx;
102+
public bool mul_mat_q;
103+
public bool f16_kv;
104+
public bool logits_all;
105+
public bool embedding;
106+
107+
public LlamaContextParams(uint seed, uint nThreads = 1, sbyte ropeScaling = -1 )
108+
{
109+
this.seed = seed;
110+
n_ctx = 512;
111+
n_batch = 512;
112+
n_threads = nThreads;
113+
n_threads_batch = nThreads;
114+
rope_scaling_type = ropeScaling;
115+
rope_freq_base = 0.0f;
116+
rope_freq_scale = 0.0f;
117+
yarn_ext_factor = -1.0f;
118+
yarn_attn_factor = 1.0f;
119+
yarn_beta_fast = 32.0f;
120+
yarn_beta_slow = 1.0f;
121+
yarn_orig_ctx = 0;
122+
mul_mat_q = true;
123+
f16_kv = true;
124+
logits_all = false;
125+
embedding = false;
126+
}
127+
}
128+
129+
[StructLayout(LayoutKind.Sequential)]
130+
public struct LlamaTokenDataArray
131+
{
132+
public IntPtr data;
133+
public int size;
134+
public bool sorted;
135+
}
136+
137+
[StructLayout(LayoutKind.Sequential)]
138+
public struct LlamaTokenData
139+
{
140+
public int id;
141+
public float logit;
142+
public float p;
143+
}
144+
145+
[StructLayout(LayoutKind.Sequential)]
146+
public unsafe struct LlamaBatch
147+
{
148+
public int n_tokens;
149+
public int* token;
150+
public float* embd;
151+
public int* pos;
152+
public int* n_seq_id;
153+
public int** seq_id;
154+
public byte* logits;
155+
156+
// Legacy, may require removal in future llama.cpp versions
157+
private int _all_pos_0;
158+
private int _all_pos_1;
159+
private int _all_seq_id;
160+
}
161+
}
162+
}

Diff for: LlamaLibrary.cs.meta

+11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)