1
+ using System ;
2
+ using System . Runtime . InteropServices ;
3
+
4
+ namespace Abuksigun . LlamaCpp
5
+ {
6
+ public unsafe static class LlamaLibrary
7
+ {
8
+ private const string DllName = "llama.dll" ;
9
+
10
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
11
+ public static extern void llama_backend_init ( bool numa ) ;
12
+
13
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
14
+ public static extern IntPtr llama_load_model_from_file ( string path_model , LlamaModelParams model_params ) ;
15
+
16
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
17
+ public static extern void llama_free_model ( IntPtr model ) ;
18
+
19
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
20
+ public static extern int llama_n_ctx ( IntPtr ctx ) ;
21
+
22
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
23
+ public static extern LlamaBatch llama_batch_init ( int n_tokens , int embd , int n_seq_max ) ;
24
+
25
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
26
+ public static extern int llama_decode ( IntPtr ctx , LlamaBatch batch ) ;
27
+
28
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
29
+ public static extern IntPtr llama_new_context_with_model ( IntPtr model , LlamaContextParams ctx_params ) ;
30
+
31
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
32
+ public static extern void llama_free ( IntPtr ctx ) ;
33
+
34
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
35
+ public static extern int llama_tokenize ( IntPtr model , string text , int text_len , int [ ] tokens , int n_max_tokens , bool add_bos , bool special ) ;
36
+
37
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
38
+ public static extern IntPtr llama_get_logits ( IntPtr ctx ) ;
39
+
40
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
41
+ public static extern IntPtr llama_get_logits_ith ( IntPtr ctx , int i ) ;
42
+
43
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
44
+ public static extern int llama_n_vocab ( IntPtr model ) ;
45
+
46
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
47
+ public static extern int llama_sample_token_greedy ( IntPtr ctx , ref LlamaTokenDataArray candidates ) ;
48
+
49
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
50
+ public static extern int llama_token_to_piece ( IntPtr model , int token , byte * buffer , int length ) ;
51
+
52
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
53
+ public static extern void llama_backend_free ( ) ;
54
+
55
+ [ DllImport ( DllName , CallingConvention = CallingConvention . Cdecl ) ]
56
+ public static extern int llama_token_eos ( IntPtr model ) ;
57
+
58
+ [ UnmanagedFunctionPointer ( CallingConvention . Cdecl ) ]
59
+ public delegate void LlamaProgressCallback ( float progress , IntPtr ctx ) ;
60
+
61
+ [ StructLayout ( LayoutKind . Sequential ) ]
62
+ public struct LlamaModelParams
63
+ {
64
+ public int n_gpu_layers ;
65
+ public int main_gpu ;
66
+ public IntPtr tensor_split ;
67
+ public LlamaProgressCallback progress_callback ;
68
+ public IntPtr progress_callback_user_data ;
69
+ public bool vocab_only ;
70
+ public bool use_mmap ;
71
+ public bool use_mlock ;
72
+
73
+ public LlamaModelParams ( LlamaProgressCallback progressCallback , IntPtr progressCallbackUserData , int nGpuLayers = 0 )
74
+ {
75
+ n_gpu_layers = nGpuLayers ;
76
+ main_gpu = 0 ;
77
+ tensor_split = IntPtr . Zero ;
78
+ progress_callback = progressCallback ;
79
+ progress_callback_user_data = IntPtr . Zero ;
80
+ vocab_only = false ;
81
+ use_mmap = true ;
82
+ use_mlock = false ;
83
+ }
84
+ }
85
+
86
+ [ StructLayout ( LayoutKind . Sequential ) ]
87
+ public struct LlamaContextParams
88
+ {
89
+ public uint seed ;
90
+ public uint n_ctx ;
91
+ public uint n_batch ;
92
+ public uint n_threads ;
93
+ public uint n_threads_batch ;
94
+ public sbyte rope_scaling_type ;
95
+ public float rope_freq_base ;
96
+ public float rope_freq_scale ;
97
+ public float yarn_ext_factor ;
98
+ public float yarn_attn_factor ;
99
+ public float yarn_beta_fast ;
100
+ public float yarn_beta_slow ;
101
+ public uint yarn_orig_ctx ;
102
+ public bool mul_mat_q ;
103
+ public bool f16_kv ;
104
+ public bool logits_all ;
105
+ public bool embedding ;
106
+
107
+ public LlamaContextParams ( uint seed , uint nThreads = 1 , sbyte ropeScaling = - 1 )
108
+ {
109
+ this . seed = seed ;
110
+ n_ctx = 512 ;
111
+ n_batch = 512 ;
112
+ n_threads = nThreads ;
113
+ n_threads_batch = nThreads ;
114
+ rope_scaling_type = ropeScaling ;
115
+ rope_freq_base = 0.0f ;
116
+ rope_freq_scale = 0.0f ;
117
+ yarn_ext_factor = - 1.0f ;
118
+ yarn_attn_factor = 1.0f ;
119
+ yarn_beta_fast = 32.0f ;
120
+ yarn_beta_slow = 1.0f ;
121
+ yarn_orig_ctx = 0 ;
122
+ mul_mat_q = true ;
123
+ f16_kv = true ;
124
+ logits_all = false ;
125
+ embedding = false ;
126
+ }
127
+ }
128
+
129
+ [ StructLayout ( LayoutKind . Sequential ) ]
130
+ public struct LlamaTokenDataArray
131
+ {
132
+ public IntPtr data ;
133
+ public int size ;
134
+ public bool sorted ;
135
+ }
136
+
137
+ [ StructLayout ( LayoutKind . Sequential ) ]
138
+ public struct LlamaTokenData
139
+ {
140
+ public int id ;
141
+ public float logit ;
142
+ public float p ;
143
+ }
144
+
145
+ [ StructLayout ( LayoutKind . Sequential ) ]
146
+ public unsafe struct LlamaBatch
147
+ {
148
+ public int n_tokens ;
149
+ public int * token ;
150
+ public float * embd ;
151
+ public int * pos ;
152
+ public int * n_seq_id ;
153
+ public int * * seq_id ;
154
+ public byte * logits ;
155
+
156
+ // Legacy, may require removal in future llama.cpp versions
157
+ private int _all_pos_0 ;
158
+ private int _all_pos_1 ;
159
+ private int _all_seq_id ;
160
+ }
161
+ }
162
+ }
0 commit comments