@@ -47,7 +47,7 @@ struct socket_t {
47
47
sockfd_t fd;
48
48
socket_t (sockfd_t fd) : fd(fd) {}
49
49
~socket_t () {
50
- GGML_PRINT_DEBUG (" [%s] closing socket %d\n " , __func__, this ->fd );
50
+ GGML_LOG_DEBUG (" [%s] closing socket %d\n " , __func__, this ->fd );
51
51
#ifdef _WIN32
52
52
closesocket (this ->fd );
53
53
#else
@@ -265,14 +265,14 @@ static std::shared_ptr<socket_t> socket_connect(const char * host, int port) {
265
265
return nullptr ;
266
266
}
267
267
if (!set_no_delay (sockfd)) {
268
- fprintf (stderr, " Failed to set TCP_NODELAY\n " );
268
+ GGML_LOG_ERROR ( " Failed to set TCP_NODELAY\n " );
269
269
return nullptr ;
270
270
}
271
271
addr.sin_family = AF_INET;
272
272
addr.sin_port = htons (port);
273
273
struct hostent * server = gethostbyname (host);
274
274
if (server == NULL ) {
275
- fprintf (stderr, " Cannot resolve host '%s'\n " , host);
275
+ GGML_LOG_ERROR ( " Cannot resolve host '%s'\n " , host);
276
276
return nullptr ;
277
277
}
278
278
memcpy (&addr.sin_addr .s_addr , server->h_addr , server->h_length );
@@ -289,7 +289,7 @@ static std::shared_ptr<socket_t> socket_accept(sockfd_t srv_sockfd) {
289
289
return nullptr ;
290
290
}
291
291
if (!set_no_delay (client_socket_fd)) {
292
- fprintf (stderr, " Failed to set TCP_NODELAY\n " );
292
+ GGML_LOG_ERROR ( " Failed to set TCP_NODELAY\n " );
293
293
return nullptr ;
294
294
}
295
295
return client_socket;
@@ -302,11 +302,11 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
302
302
return nullptr ;
303
303
}
304
304
if (!set_reuse_addr (sockfd)) {
305
- fprintf (stderr, " Failed to set SO_REUSEADDR\n " );
305
+ GGML_LOG_ERROR ( " Failed to set SO_REUSEADDR\n " );
306
306
return nullptr ;
307
307
}
308
308
if (inet_addr (host) == INADDR_NONE) {
309
- fprintf (stderr, " Invalid host address: %s\n " , host);
309
+ GGML_LOG_ERROR ( " Invalid host address: %s\n " , host);
310
310
return nullptr ;
311
311
}
312
312
struct sockaddr_in serv_addr;
@@ -349,7 +349,7 @@ static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
349
349
return false ;
350
350
}
351
351
if (n == 0 ) {
352
- GGML_LOG_ERROR (" recv returned 0 (peer closed?)\n " );
352
+ GGML_LOG_DEBUG (" recv returned 0 (peer closed?)\n " );
353
353
return false ;
354
354
}
355
355
bytes_recv += (size_t )n;
@@ -383,7 +383,7 @@ static bool recv_msg(sockfd_t sockfd, std::vector<uint8_t> & input) {
383
383
try {
384
384
input.resize (size);
385
385
} catch (const std::bad_alloc & e) {
386
- fprintf (stderr, " Failed to allocate input buffer of size %" PRIu64 " \n " , size);
386
+ GGML_LOG_ERROR ( " Failed to allocate input buffer of size %" PRIu64 " \n " , size);
387
387
return false ;
388
388
}
389
389
return recv_data (sockfd, input.data (), size);
@@ -443,11 +443,11 @@ static bool check_server_version(const std::shared_ptr<socket_t> & sock) {
443
443
bool status = send_rpc_cmd (sock, RPC_CMD_HELLO, nullptr , 0 , &response, sizeof (response));
444
444
RPC_STATUS_ASSERT (status);
445
445
if (response.major != RPC_PROTO_MAJOR_VERSION || response.minor > RPC_PROTO_MINOR_VERSION) {
446
- fprintf (stderr, " RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
446
+ GGML_LOG_ERROR ( " RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
447
447
return false ;
448
448
}
449
449
if (response.minor != RPC_PROTO_MINOR_VERSION || response.patch != RPC_PROTO_PATCH_VERSION) {
450
- fprintf (stderr, " WARNING: RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
450
+ GGML_LOG_INFO ( " WARNING: RPC server version mismatch: %d.%d.%d\n " , response.major , response.minor , response.patch );
451
451
}
452
452
return true ;
453
453
}
@@ -488,7 +488,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
488
488
if (!check_server_version (sock)) {
489
489
return nullptr ;
490
490
}
491
- GGML_PRINT_DEBUG (" [%s] connected to %s, sockfd=%d\n " , __func__, endpoint.c_str (), sock->fd );
491
+ GGML_LOG_DEBUG (" [%s] connected to %s, sockfd=%d\n " , __func__, endpoint.c_str (), sock->fd );
492
492
sockets[endpoint] = sock;
493
493
return sock;
494
494
}
@@ -809,7 +809,7 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
809
809
}
810
810
auto sock = get_socket (endpoint);
811
811
if (sock == nullptr ) {
812
- fprintf (stderr, " Failed to connect to %s\n " , endpoint);
812
+ GGML_LOG_ERROR ( " Failed to connect to %s\n " , endpoint);
813
813
return nullptr ;
814
814
}
815
815
size_t alignment = get_alignment (sock);
@@ -871,8 +871,8 @@ void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, si
871
871
872
872
class rpc_server {
873
873
public:
874
- rpc_server (ggml_backend_t backend, const char * cache_dir)
875
- : backend(backend), cache_dir(cache_dir) {
874
+ rpc_server (ggml_backend_t backend, const char * cache_dir, bool verbose )
875
+ : backend(backend), cache_dir(cache_dir), verbose(verbose) {
876
876
}
877
877
~rpc_server ();
878
878
@@ -902,14 +902,18 @@ class rpc_server {
902
902
903
903
ggml_backend_t backend;
904
904
const char * cache_dir;
905
+ bool verbose;
905
906
std::unordered_set<ggml_backend_buffer_t > buffers;
906
907
};
907
908
909
+ #define LOG_DBG (msg, ...) \
910
+ do { if (verbose) GGML_LOG_DEBUG (msg, ##__VA_ARGS__); } while (0 )
911
+
908
912
void rpc_server::hello (rpc_msg_hello_rsp & response) {
909
913
response.major = RPC_PROTO_MAJOR_VERSION;
910
914
response.minor = RPC_PROTO_MINOR_VERSION;
911
915
response.patch = RPC_PROTO_PATCH_VERSION;
912
- GGML_PRINT_DEBUG (" [%s] version: %d.%d.%d\n " , __func__, response.major , response.minor , response.patch );
916
+ LOG_DBG (" [%s] version: %d.%d.%d\n " , __func__, response.major , response.minor , response.patch );
913
917
}
914
918
915
919
bool rpc_server::get_alloc_size (const rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) {
@@ -929,15 +933,15 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
929
933
GGML_LOG_ERROR (" Null tensor pointer passed to server get_alloc_size function.\n " );
930
934
return false ;
931
935
}
932
-
936
+ LOG_DBG ( " [%s] buffer: %p, data: %p \n " , __func__, ( void *)tensor-> buffer , tensor-> data );
933
937
if (tensor->buffer == nullptr ) {
934
938
// No buffer allocated.
935
939
buft = ggml_backend_get_default_buffer_type (backend);
936
940
} else {
937
941
buft = tensor->buffer ->buft ;
938
942
}
939
943
940
- response.alloc_size = ggml_backend_buft_get_alloc_size (buft,tensor);
944
+ response.alloc_size = ggml_backend_buft_get_alloc_size (buft, tensor);
941
945
942
946
return true ;
943
947
}
@@ -950,29 +954,29 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
950
954
if (buffer != nullptr ) {
951
955
response.remote_ptr = reinterpret_cast <uint64_t >(buffer);
952
956
response.remote_size = buffer->size ;
953
- GGML_PRINT_DEBUG (" [%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 " , remote_size: %" PRIu64 " \n " , __func__, request.size , response.remote_ptr , response.remote_size );
957
+ LOG_DBG (" [%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 " , remote_size: %" PRIu64 " \n " , __func__, request.size , response.remote_ptr , response.remote_size );
954
958
buffers.insert (buffer);
955
959
} else {
956
- GGML_LOG_ERROR (" [%s] size: %" PRIu64 " -> failed\n " , __func__, request.size );
960
+ LOG_DBG (" [%s] size: %" PRIu64 " -> failed\n " , __func__, request.size );
957
961
}
958
962
}
959
963
960
964
void rpc_server::get_alignment (rpc_msg_get_alignment_rsp & response) {
961
965
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type (backend);
962
966
size_t alignment = ggml_backend_buft_get_alignment (buft);
963
- GGML_PRINT_DEBUG (" [%s] alignment: %lu\n " , __func__, alignment);
967
+ LOG_DBG (" [%s] alignment: %lu\n " , __func__, alignment);
964
968
response.alignment = alignment;
965
969
}
966
970
967
971
void rpc_server::get_max_size (rpc_msg_get_max_size_rsp & response) {
968
972
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type (backend);
969
973
size_t max_size = ggml_backend_buft_get_max_size (buft);
970
- GGML_PRINT_DEBUG (" [%s] max_size: %lu\n " , __func__, max_size);
974
+ LOG_DBG (" [%s] max_size: %lu\n " , __func__, max_size);
971
975
response.max_size = max_size;
972
976
}
973
977
974
978
bool rpc_server::buffer_get_base (const rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response) {
975
- GGML_PRINT_DEBUG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
979
+ LOG_DBG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
976
980
ggml_backend_buffer_t buffer = reinterpret_cast <ggml_backend_buffer_t >(request.remote_ptr );
977
981
if (buffers.find (buffer) == buffers.end ()) {
978
982
GGML_LOG_ERROR (" [%s] buffer not found\n " , __func__);
@@ -984,7 +988,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
984
988
}
985
989
986
990
bool rpc_server::free_buffer (const rpc_msg_free_buffer_req & request) {
987
- GGML_PRINT_DEBUG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
991
+ LOG_DBG (" [%s] remote_ptr: %" PRIx64 " \n " , __func__, request.remote_ptr );
988
992
ggml_backend_buffer_t buffer = reinterpret_cast <ggml_backend_buffer_t >(request.remote_ptr );
989
993
if (buffers.find (buffer) == buffers.end ()) {
990
994
GGML_LOG_ERROR (" [%s] buffer not found\n " , __func__);
@@ -996,7 +1000,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
996
1000
}
997
1001
998
1002
bool rpc_server::buffer_clear (const rpc_msg_buffer_clear_req & request) {
999
- GGML_PRINT_DEBUG (" [%s] remote_ptr: %" PRIx64 " , value: %u\n " , __func__, request.remote_ptr , request.value );
1003
+ LOG_DBG (" [%s] remote_ptr: %" PRIx64 " , value: %u\n " , __func__, request.remote_ptr , request.value );
1000
1004
ggml_backend_buffer_t buffer = reinterpret_cast <ggml_backend_buffer_t >(request.remote_ptr );
1001
1005
if (buffers.find (buffer) == buffers.end ()) {
1002
1006
GGML_LOG_ERROR (" [%s] buffer not found\n " , __func__);
@@ -1073,7 +1077,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
1073
1077
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1074
1078
return false ;
1075
1079
}
1076
- GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu\n " , __func__, (void *)tensor->buffer , tensor->data , offset, size);
1080
+ LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu\n " , __func__, (void *)tensor->buffer , tensor->data , offset, size);
1077
1081
1078
1082
// sanitize tensor->data
1079
1083
{
@@ -1096,7 +1100,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
1096
1100
fs::path cache_file = fs::path (cache_dir) / hash_str;
1097
1101
std::ofstream ofs (cache_file, std::ios::binary);
1098
1102
ofs.write ((const char *)data, size);
1099
- printf (" [%s] saved to '%s'\n " , __func__, cache_file.c_str ());
1103
+ GGML_LOG_INFO (" [%s] saved to '%s'\n " , __func__, cache_file.c_str ());
1100
1104
}
1101
1105
ggml_backend_tensor_set (tensor, data, offset, size);
1102
1106
return true ;
@@ -1142,8 +1146,8 @@ bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rp
1142
1146
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1143
1147
return false ;
1144
1148
}
1145
- GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu, hash: %" PRIx64 " \n " ,
1146
- __func__, (void *)tensor->buffer , tensor->data , request.offset , size, request.hash );
1149
+ LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu, hash: %" PRIx64 " \n " ,
1150
+ __func__, (void *)tensor->buffer , tensor->data , request.offset , size, request.hash );
1147
1151
1148
1152
// sanitize tensor->data
1149
1153
{
@@ -1177,7 +1181,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
1177
1181
GGML_LOG_ERROR (" Null tensor pointer passed to server init_tensor function.\n " );
1178
1182
return false ;
1179
1183
}
1180
-
1184
+ LOG_DBG ( " [%s] buffer: %p, data: %p \n " , __func__, ( void *)tensor-> buffer , tensor-> data );
1181
1185
// Call the backend's buffer_init_tensor function
1182
1186
ggml_backend_buffer_t buffer = tensor->buffer ;
1183
1187
if (buffer && buffer->iface .init_tensor ) {
@@ -1210,7 +1214,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
1210
1214
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1211
1215
return false ;
1212
1216
}
1213
- GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %" PRIu64 " \n " , __func__, (void *)tensor->buffer , tensor->data , request.offset , request.size );
1217
+ LOG_DBG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %" PRIu64 " \n " , __func__, (void *)tensor->buffer , tensor->data , request.offset , request.size );
1214
1218
1215
1219
// sanitize tensor->data
1216
1220
{
@@ -1254,7 +1258,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1254
1258
uint64_t dst_buf_sz = (uint64_t ) ggml_backend_buffer_get_size(dst->buffer);
1255
1259
1256
1260
if (dst_data + src_size > dst_base + dst_buf_sz) {
1257
- GGML_PRINT_DEBUG (" [%s] out-of-bounds write in rpc_server::copy_tensor:\n "
1261
+ GGML_LOG_ERROR (" [%s] out-of-bounds write in rpc_server::copy_tensor:\n "
1258
1262
" write range : [0x%" PRIx64 " , 0x%" PRIx64 " ]\n "
1259
1263
" buffer base: [0x%" PRIx64 " , 0x%" PRIx64 " ]\n " ,
1260
1264
__func__,
@@ -1265,8 +1269,8 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1265
1269
return false ;
1266
1270
}
1267
1271
1268
- GGML_PRINT_DEBUG (" [%s] src->buffer: %p, dst->buffer: %p\n " ,
1269
- __func__, (void *) src->buffer, (void *) dst->buffer);
1272
+ LOG_DBG (" [%s] src->buffer: %p, dst->buffer: %p\n " ,
1273
+ __func__, (void *) src->buffer, (void *) dst->buffer);
1270
1274
1271
1275
response.result = ggml_backend_buffer_copy_tensor(src, dst);
1272
1276
return true ;
@@ -1342,7 +1346,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
1342
1346
return false ;
1343
1347
}
1344
1348
const rpc_tensor * tensors = (const rpc_tensor *)(input.data () + sizeof (n_nodes) + n_nodes*sizeof (uint64_t ) + sizeof (n_tensors));
1345
- GGML_PRINT_DEBUG (" [%s] n_nodes: %u, n_tensors: %u\n " , __func__, n_nodes, n_tensors);
1349
+ LOG_DBG (" [%s] n_nodes: %u, n_tensors: %u\n " , __func__, n_nodes, n_tensors);
1346
1350
1347
1351
size_t buf_size = ggml_tensor_overhead ()*(n_nodes + n_tensors) + ggml_graph_overhead_custom (n_nodes, false );
1348
1352
@@ -1385,16 +1389,16 @@ rpc_server::~rpc_server() {
1385
1389
}
1386
1390
}
1387
1391
1388
- static void rpc_serve_client (ggml_backend_t backend, const char * cache_dir,
1392
+ static void rpc_serve_client (ggml_backend_t backend, const char * cache_dir, bool verbose,
1389
1393
sockfd_t sockfd, size_t free_mem, size_t total_mem) {
1390
- rpc_server server (backend, cache_dir);
1394
+ rpc_server server (backend, cache_dir, verbose );
1391
1395
uint8_t cmd;
1392
1396
if (!recv_data (sockfd, &cmd, 1 )) {
1393
1397
return ;
1394
1398
}
1395
1399
// the first command sent by the client must be HELLO
1396
1400
if (cmd != RPC_CMD_HELLO) {
1397
- fprintf (stderr, " Expected HELLO command, update client\n " );
1401
+ GGML_LOG_ERROR ( " Expected HELLO command, update client\n " );
1398
1402
return ;
1399
1403
}
1400
1404
if (!recv_msg (sockfd, nullptr , 0 )) {
@@ -1411,7 +1415,7 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1411
1415
}
1412
1416
if (cmd >= RPC_CMD_COUNT) {
1413
1417
// fail fast if the command is invalid
1414
- fprintf (stderr, " Unknown command: %d\n " , cmd);
1418
+ GGML_LOG_ERROR ( " Unknown command: %d\n " , cmd);
1415
1419
break ;
1416
1420
}
1417
1421
switch (cmd) {
@@ -1599,15 +1603,15 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
1599
1603
break ;
1600
1604
}
1601
1605
default : {
1602
- fprintf (stderr, " Unknown command: %d\n " , cmd);
1606
+ GGML_LOG_ERROR ( " Unknown command: %d\n " , cmd);
1603
1607
return ;
1604
1608
}
1605
1609
}
1606
1610
}
1607
1611
}
1608
1612
1609
1613
void ggml_backend_rpc_start_server (ggml_backend_t backend, const char * endpoint,
1610
- const char * cache_dir,
1614
+ const char * cache_dir, bool verbose,
1611
1615
size_t free_mem, size_t total_mem) {
1612
1616
printf (" Starting RPC server v%d.%d.%d\n " ,
1613
1617
RPC_PROTO_MAJOR_VERSION,
@@ -1645,7 +1649,7 @@ void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint
1645
1649
}
1646
1650
printf (" Accepted client connection, free_mem=%zu, total_mem=%zu\n " , free_mem, total_mem);
1647
1651
fflush (stdout);
1648
- rpc_serve_client (backend, cache_dir, client_socket->fd , free_mem, total_mem);
1652
+ rpc_serve_client (backend, cache_dir, verbose, client_socket->fd , free_mem, total_mem);
1649
1653
printf (" Client connection closed\n " );
1650
1654
fflush (stdout);
1651
1655
}
0 commit comments