diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index f7a20b58b..7e5bd81c6 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -174,6 +174,7 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h" add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/hardware/gguf/ggml.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc @@ -204,7 +205,7 @@ if(CMAKE_CXX_STANDARD LESS 17) find_package(Boost 1.61.0 REQUIRED) target_include_directories(${TARGET_NAME} PRIVATE ${Boost_INCLUDE_DIRS}) else() - message(STATUS "use c++17") + message(STATUS "use c++${CMAKE_CXX_STANDARD}") endif() aux_source_directory(controllers CTL_SRC) diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index 4163042d0..586924ca0 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -61,6 +61,7 @@ find_package(lfreist-hwinfo CONFIG REQUIRED) add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/../utils/cpuid/cpu_info.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../utils/hardware/gguf/ggml.cc ${CMAKE_CURRENT_SOURCE_DIR}/../utils/normalize_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/../utils/file_logger.cc ${CMAKE_CURRENT_SOURCE_DIR}/../utils/dylib_path_manager.cc diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc index 99f51983e..64c221f90 100644 --- a/engine/cli/command_line_parser.cc +++ b/engine/cli/command_line_parser.cc @@ -53,7 +53,7 @@ CommandLineParser::CommandLineParser() engine_service_{std::make_shared( download_service_, dylib_path_manager_, db_service_)} {} -bool CommandLineParser::SetupCommand(int argc, char** argv) { +bool CommandLineParser::SetupCommand() { app_.usage("Usage:\n" + commands::GetCortexBinary() + " [options] [subcommand]"); cml_data_.config = file_manager_utils::GetCortexConfig(); @@ -90,6 +90,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { }; app_.add_flag_function("-v,--version", cb, "Get Cortex version"); + return true; +} + +bool CommandLineParser::runCommand(int argc, char** argv) { CLI11_PARSE(app_, argc, argv); if (argc == 1) { CLI_LOG(app_.help()); @@ -138,7 +142,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { void CommandLineParser::SetupCommonCommands() { auto model_pull_cmd = app_.add_subcommand( "pull", - "Download models by HuggingFace Repo/ModelID" + "Download models by HuggingFace Repo/ModelID\n" "See built-in models: https://huggingface.co/cortexso"); model_pull_cmd->group(kCommonCommandsGroup); model_pull_cmd->usage("Usage:\n" + commands::GetCortexBinary() + diff --git a/engine/cli/command_line_parser.h b/engine/cli/command_line_parser.h index 9b06d793d..699b85f00 100644 --- a/engine/cli/command_line_parser.h +++ b/engine/cli/command_line_parser.h @@ -10,7 +10,8 @@ class CommandLineParser { public: CommandLineParser(); - bool SetupCommand(int argc, char** argv); + bool SetupCommand(); + bool runCommand(int argc, char** argv); private: void SetupCommonCommands(); @@ -63,6 +64,6 @@ class CommandLineParser { CmlData cml_data_; std::unordered_map config_update_opts_; bool executed_ = false; - commands::HarwareOptions hw_opts_; + commands::HardwareQueryFlags hw_opts_; std::unordered_map run_settings_; }; diff --git a/engine/cli/commands/hardware_list_cmd.cc b/engine/cli/commands/hardware_list_cmd.cc index 6d57c9b53..2a025187e 100644 --- a/engine/cli/commands/hardware_list_cmd.cc +++ b/engine/cli/commands/hardware_list_cmd.cc @@ -9,15 +9,16 @@ #include "utils/logging_utils.h" // clang-format off #include +#include // clang-format on namespace commands { -using namespace tabulate; -using Row_t = - std::vector>; +using Row_t = std::vector< + variant>; -bool HardwareListCmd::Exec(const std::string& host, int port, - const std::optional& ho) { +bool HardwareListCmd::Exec( + const std::string& host, int port, + const std::optional& query_flags) { // Start server if server is not started yet if (!commands::IsServerAlive(host, port)) { CLI_LOG("Starting server ..."); @@ -33,149 +34,146 @@ bool HardwareListCmd::Exec(const std::string& host, int port, /* .pathParams = */ {"v1", "hardware"}, /* .queries = */ {}, }; - auto result = curl_utils::SimpleGetJson(url.ToFullPath()); - if (result.has_error()) { - CTL_ERR(result.error()); + + auto hardware_json_response = curl_utils::SimpleGetJson(url.ToFullPath()); + if (hardware_json_response.has_error()) { + CTL_ERR(hardware_json_response.error()); return false; } - if (!ho.has_value() || ho.value().show_cpu) { + // CPU Section + if (!query_flags.has_value() || query_flags.value().show_cpu) { std::cout << "CPU Information:" << std::endl; - Table table; - std::vector column_headers{"#", "Arch", "Cores", - "Model", "Usage", "Instructions"}; - - Row_t header{column_headers.begin(), column_headers.end()}; - table.add_row(header); - table.format().font_color(Color::green); - std::vector row = {"1"}; - cortex::hw::CPU cpu = cortex::hw::cpu::FromJson(result.value()["cpu"]); - row.emplace_back(cpu.arch); - row.emplace_back(std::to_string(cpu.cores)); - row.emplace_back(cpu.model); - row.emplace_back(std::to_string(cpu.usage)); - std::string insts; - for (auto const& i : cpu.instructions) { - insts += i + " "; - }; - row.emplace_back(insts); - table.add_row({row.begin(), row.end()}); - std::cout << table << std::endl; - std::cout << std::endl; + tabulate::Table cpu_table; + cpu_table.add_row(Row_t(CPU_INFO_HEADERS.begin(), CPU_INFO_HEADERS.end())); + cpu_table.format() + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center) + .padding_left(1) + .padding_right(1); + + cortex::hw::CPU cpu = + cortex::hw::cpu::FromJson(hardware_json_response.value()["cpu"]); + std::vector cpu_row = { + "1", + cpu.arch, + std::to_string(cpu.cores), + cpu.model, + std::to_string(cpu.usage), + std::accumulate(cpu.instructions.begin(), cpu.instructions.end(), + std::string{}, + [](const std::string& a, const std::string& b) { + return a + (a.empty() ? "" : " ") + b; + })}; + cpu_table.add_row(Row_t(cpu_row.begin(), cpu_row.end())); + std::cout << cpu_table << std::endl << std::endl; } - if (!ho.has_value() || ho.value().show_os) { + // OS Section + if (!query_flags.has_value() || query_flags.value().show_os) { std::cout << "OS Information:" << std::endl; - Table table; - std::vector column_headers{"#", "Version", "Name"}; - - Row_t header{column_headers.begin(), column_headers.end()}; - table.add_row(header); - table.format().font_color(Color::green); - std::vector row = {"1"}; - cortex::hw::OS os = cortex::hw::os::FromJson(result.value()["os"]); - row.emplace_back(os.version); - row.emplace_back(os.name); - table.add_row({row.begin(), row.end()}); - std::cout << table << std::endl; - std::cout << std::endl; + tabulate::Table os_table; + os_table.add_row(Row_t(OS_INFO_HEADERS.begin(), OS_INFO_HEADERS.end())); + os_table.format() + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center) + .padding_left(1) + .padding_right(1); + + cortex::hw::OS os = + cortex::hw::os::FromJson(hardware_json_response.value()["os"]); + std::vector os_row = {"1", os.version, os.name}; + os_table.add_row(Row_t(os_row.begin(), os_row.end())); + std::cout << os_table << std::endl << std::endl; } - if (!ho.has_value() || ho.value().show_ram) { + // RAM Section + if (!query_flags.has_value() || query_flags.value().show_ram) { std::cout << "RAM Information:" << std::endl; - Table table; - std::vector column_headers{"#", "Total (MiB)", - "Available (MiB)"}; - - Row_t header{column_headers.begin(), column_headers.end()}; - table.add_row(header); - table.format().font_color(Color::green); - std::vector row = {"1"}; - cortex::hw::Memory m = cortex::hw::memory::FromJson(result.value()["ram"]); - row.emplace_back(std::to_string(m.total_MiB)); - row.emplace_back(std::to_string(m.available_MiB)); - table.add_row({row.begin(), row.end()}); - std::cout << table << std::endl; - std::cout << std::endl; + tabulate::Table ram_table; + ram_table.add_row(Row_t(RAM_INFO_HEADERS.begin(), RAM_INFO_HEADERS.end())); + ram_table.format() + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center) + .padding_left(1) + .padding_right(1); + + cortex::hw::Memory ram = + cortex::hw::memory::FromJson(hardware_json_response.value()["ram"]); + std::vector ram_row = {"1", std::to_string(ram.total_MiB), + std::to_string(ram.available_MiB)}; + ram_table.add_row(Row_t(ram_row.begin(), ram_row.end())); + std::cout << ram_table << std::endl << std::endl; } - if (!ho.has_value() || ho.value().show_gpu) { + // GPU Section + if (!query_flags.has_value() || query_flags.value().show_gpu) { std::cout << "GPU Information:" << std::endl; - Table table; - std::vector column_headers{"#", - "GPU ID", - "Name", - "Version", - "Total (MiB)", - "Available (MiB)", - "Driver Version", - "Compute Capability", - "Activated"}; - - Row_t header{column_headers.begin(), column_headers.end()}; - table.add_row(header); - table.format().font_color(Color::green); - int count = 1; + tabulate::Table gpu_table; + gpu_table.add_row(Row_t(GPU_INFO_HEADERS.begin(), GPU_INFO_HEADERS.end())); + gpu_table.format() + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center) + .padding_left(1) + .padding_right(1); std::vector gpus = - cortex::hw::gpu::FromJson(result.value()["gpus"]); - for (auto const& gpu : gpus) { - std::vector row = {std::to_string(count)}; - row.emplace_back(gpu.id); - row.emplace_back(gpu.name); - row.emplace_back(gpu.version); - row.emplace_back(std::to_string(gpu.total_vram)); - row.emplace_back(std::to_string(gpu.free_vram)); - row.emplace_back( - std::get(gpu.add_info).driver_version); - row.emplace_back( - std::get(gpu.add_info).compute_cap); - row.emplace_back(gpu.is_activated ? "Yes" : "No"); - table.add_row({row.begin(), row.end()}); - count++; + cortex::hw::gpu::FromJson(hardware_json_response.value()["gpus"]); + int gpu_index = 1; + for (const auto& gpu : gpus) { + std::vector gpu_row = { + std::to_string(gpu_index), + gpu.id, + gpu.name, + gpu.version, + std::to_string(gpu.total_vram), + std::to_string(gpu.free_vram), + std::get(gpu.add_info).driver_version, + std::get(gpu.add_info).compute_cap, + gpu.is_activated ? "Yes" : "No"}; + gpu_table.add_row(Row_t(gpu_row.begin(), gpu_row.end())); + gpu_index++; } - - std::cout << table << std::endl; - std::cout << std::endl; + std::cout << gpu_table << std::endl << std::endl; } - - if (!ho.has_value() || ho.value().show_storage) { + + // Storage Section + if (!query_flags.has_value() || query_flags.value().show_storage) { std::cout << "Storage Information:" << std::endl; - Table table; - std::vector column_headers{"#", "Total (GiB)", - "Available (GiB)"}; - - Row_t header{column_headers.begin(), column_headers.end()}; - table.add_row(header); - table.format().font_color(Color::green); - std::vector row = {"1"}; - cortex::hw::StorageInfo si = - cortex::hw::storage::FromJson(result.value()["storage"]); - row.emplace_back(std::to_string(si.total)); - row.emplace_back(std::to_string(si.available)); - table.add_row({row.begin(), row.end()}); - std::cout << table << std::endl; - std::cout << std::endl; + tabulate::Table storage_table; + storage_table.add_row(Row_t(STORAGE_INFO_HEADERS.begin(), STORAGE_INFO_HEADERS.end())); + storage_table.format() + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center) + .padding_left(1) + .padding_right(1); + + cortex::hw::StorageInfo storage = cortex::hw::storage::FromJson( + hardware_json_response.value()["storage"]); + std::vector storage_row = {"1", std::to_string(storage.total), + std::to_string(storage.available)}; + storage_table.add_row(Row_t(storage_row.begin(), storage_row.end())); + std::cout << storage_table << std::endl << std::endl; } - - if (!ho.has_value() || ho.value().show_power) { + + // Power Section + if (!query_flags.has_value() || query_flags.value().show_power) { std::cout << "Power Information:" << std::endl; - Table table; - std::vector column_headers{"#", "Battery Life", - "Charging Status", "Power Saving"}; - - Row_t header{column_headers.begin(), column_headers.end()}; - table.add_row(header); - table.format().font_color(Color::green); - std::vector row = {"1"}; - cortex::hw::PowerInfo pi = - cortex::hw::power::FromJson(result.value()["power"]); - row.emplace_back(std::to_string(pi.battery_life)); - row.emplace_back(pi.charging_status); - row.emplace_back(pi.is_power_saving ? "Yes" : "No"); - table.add_row({row.begin(), row.end()}); - std::cout << table << std::endl; - std::cout << std::endl; + tabulate::Table power_table; + power_table.add_row(Row_t(POWER_INFO_HEADERS.begin(), POWER_INFO_HEADERS.end())); + power_table.format() + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center) + .padding_left(1) + .padding_right(1); + + cortex::hw::PowerInfo power = + cortex::hw::power::FromJson(hardware_json_response.value()["power"]); + std::vector power_row = { + "1", std::to_string(power.battery_life), power.charging_status, + power.is_power_saving ? "Yes" : "No"}; + power_table.add_row(Row_t(power_row.begin(), power_row.end())); + std::cout << power_table << std::endl << std::endl; } return true; diff --git a/engine/cli/commands/hardware_list_cmd.h b/engine/cli/commands/hardware_list_cmd.h index 9344c729c..3059a364b 100644 --- a/engine/cli/commands/hardware_list_cmd.h +++ b/engine/cli/commands/hardware_list_cmd.h @@ -1,9 +1,10 @@ #pragma once #include #include +#include namespace commands { -struct HarwareOptions { +struct HardwareQueryFlags { bool show_cpu = false; bool show_os = false; bool show_ram = false; @@ -20,7 +21,24 @@ struct HarwareOptions { class HardwareListCmd { public: - bool Exec(const std::string& host, int port, - const std::optional& ho); + bool Exec(const std::string& server_host, int server_port, + const std::optional& query_flags); + + private: + // Static constexpr arrays for column headers + static constexpr std::array CPU_INFO_HEADERS = { + "#", "Arch", "Cores", "Model", "Usage", "Instructions"}; + static constexpr std::array OS_INFO_HEADERS = {"#", "Version", + "Name"}; + static constexpr std::array RAM_INFO_HEADERS = {"#", "Total (MiB)", + "Available (MiB)"}; + static constexpr std::array GPU_INFO_HEADERS = { + "#", "GPU ID", "Name", "Version", + "Total (MiB)", "Available (MiB)", "Driver Version", "Compute Capability", + "Activated"}; + static constexpr std::array STORAGE_INFO_HEADERS = { + "#", "Total (GiB)", "Available (GiB)"}; + static constexpr std::array POWER_INFO_HEADERS = { + "#", "Battery Life", "Charging Status", "Power Saving"}; }; } // namespace commands \ No newline at end of file diff --git a/engine/cli/main.cc b/engine/cli/main.cc index a4e6c38cc..9fbb1616a 100644 --- a/engine/cli/main.cc +++ b/engine/cli/main.cc @@ -219,6 +219,10 @@ int main(int argc, char* argv[]) { } CommandLineParser clp; - clp.SetupCommand(argc, argv); + if(!clp.SetupCommand()){ + CTL_ERR("Not Able to set Commands\n"); + exit(1); + } + clp.runCommand(argc, argv); return 0; } diff --git a/engine/utils/hardware/gguf/ggml.cc b/engine/utils/hardware/gguf/ggml.cc new file mode 100644 index 000000000..ed35e875a --- /dev/null +++ b/engine/utils/hardware/gguf/ggml.cc @@ -0,0 +1,184 @@ +#include "ggml.h" + +namespace hardware { + +float GetQuantBit(GGMLType gt) { + switch (gt) { + case GGML_TYPE_I32: + case GGML_TYPE_F32: + return 32.0f; + case GGML_TYPE_I16: + case GGML_TYPE_BF16: + case GGML_TYPE_F16: + return 16.0f; + case GGML_TYPE_IQ2_S: + case GGML_TYPE_IQ2_XXS: + case GGML_TYPE_IQ2_XS: + return 2.31f; + case GGML_TYPE_Q2_K: + return 2.5625f; + case GGML_TYPE_IQ3_XXS: + case GGML_TYPE_IQ3_S: + case GGML_TYPE_Q3_K: + return 3.4375f; + case GGML_TYPE_Q4_0_4_4: + case GGML_TYPE_Q4_0_4_8: + case GGML_TYPE_Q4_0_8_8: + case GGML_TYPE_IQ4_NL: + case GGML_TYPE_IQ4_XS: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q4_K: + return 4.5f; + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q5_K: + return 5.5f; + case GGML_TYPE_Q6_K: + return 6.5625f; + case GGML_TYPE_I8: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q8_K: + return 8.0f; + case GGML_TYPE_I64: + case GGML_TYPE_F64: + return 64.0f; + default: + return 8.0f; + } +} + +std::string to_string(GGMLType t) { + switch (t) { + case GGML_TYPE_F32: + return "F32"; + case GGML_TYPE_F16: + return "F16"; + case GGML_TYPE_Q4_0: + return "Q4_0"; + case GGML_TYPE_Q4_1: + return "Q4_1"; + case GGML_TYPE_Q5_0: + return "Q5_0"; + case GGML_TYPE_Q5_1: + return "Q5_1"; + case GGML_TYPE_Q8_0: + return "Q8_0"; + case GGML_TYPE_Q8_1: + return "Q8_1"; + case GGML_TYPE_Q2_K: + return "Q2_K"; + case GGML_TYPE_Q3_K: + return "Q3_K"; + case GGML_TYPE_Q4_K: + return "Q4_K"; + case GGML_TYPE_Q5_K: + return "Q5_K"; + case GGML_TYPE_Q6_K: + return "Q6_K"; + case GGML_TYPE_Q8_K: + return "Q8_K"; + case GGML_TYPE_IQ2_XXS: + return "IQ2_XXS"; + case GGML_TYPE_IQ2_XS: + return "IQ2_XS"; + case GGML_TYPE_IQ3_XXS: + return "IQ3_XXS"; + case GGML_TYPE_IQ1_S: + return "IQ1_S"; + case GGML_TYPE_IQ4_NL: + return "IQ4_NL"; + case GGML_TYPE_IQ3_S: + return "IQ3_S"; + case GGML_TYPE_IQ2_S: + return "IQ2_S"; + case GGML_TYPE_IQ4_XS: + return "IQ4_XS"; + case GGML_TYPE_I8: + return "I8"; + case GGML_TYPE_I16: + return "I16"; + case GGML_TYPE_I32: + return "I32"; + case GGML_TYPE_I64: + return "I64"; + case GGML_TYPE_F64: + return "F64"; + case GGML_TYPE_IQ1_M: + return "IQ1_M"; + case GGML_TYPE_BF16: + return "BF16"; + case GGML_TYPE_Q4_0_4_4: + return "Q4_0_4_4"; + case GGML_TYPE_Q4_0_4_8: + return "Q4_0_4_8"; + case GGML_TYPE_Q4_0_8_8: + return "Q4_0_8_8"; + case GGML_TYPE_TQ1_0: + return "TQ1_0"; + case GGML_TYPE_TQ2_0: + return "TQ2_0"; + default: + return "Invalid"; + } +} + +const std::unordered_map kGGMLTypeTraits = { + {GGML_TYPE_F32, {.block_size = 1, .type_size = 4}}, + {GGML_TYPE_F16, {.block_size = 1, .type_size = 2}}, + {GGML_TYPE_Q4_0, {.block_size = 32, .type_size = 18, .is_quantized = true}}, + {GGML_TYPE_Q4_1, {.block_size = 32, .type_size = 20, .is_quantized = true}}, + {GGML_TYPE_Q5_0, {.block_size = 32, .type_size = 22, .is_quantized = true}}, + {GGML_TYPE_Q5_1, {.block_size = 32, .type_size = 24, .is_quantized = true}}, + {GGML_TYPE_Q8_0, {.block_size = 32, .type_size = 34, .is_quantized = true}}, + {GGML_TYPE_Q8_1, {.block_size = 32, .type_size = 36, .is_quantized = true}}, + {GGML_TYPE_Q2_K, + {.block_size = 256, .type_size = 84, .is_quantized = true}}, + {GGML_TYPE_Q3_K, + {.block_size = 256, .type_size = 110, .is_quantized = true}}, + {GGML_TYPE_Q4_K, + {.block_size = 256, .type_size = 144, .is_quantized = true}}, + {GGML_TYPE_Q5_K, + {.block_size = 256, .type_size = 176, .is_quantized = true}}, + {GGML_TYPE_Q6_K, + {.block_size = 256, .type_size = 210, .is_quantized = true}}, + {GGML_TYPE_Q8_K, + {.block_size = 256, .type_size = 292, .is_quantized = true}}, + {GGML_TYPE_IQ2_XXS, + {.block_size = 256, .type_size = 66, .is_quantized = true}}, + {GGML_TYPE_IQ2_XS, + {.block_size = 256, .type_size = 74, .is_quantized = true}}, + {GGML_TYPE_IQ3_XXS, + {.block_size = 256, .type_size = 98, .is_quantized = true}}, + {GGML_TYPE_IQ1_S, + {.block_size = 256, .type_size = 50, .is_quantized = true}}, + {GGML_TYPE_IQ4_NL, + {.block_size = 32, .type_size = 18, .is_quantized = true}}, + {GGML_TYPE_IQ3_S, + {.block_size = 256, .type_size = 110, .is_quantized = true}}, + {GGML_TYPE_IQ2_S, + {.block_size = 256, .type_size = 82, .is_quantized = true}}, + {GGML_TYPE_IQ4_XS, + {.block_size = 256, .type_size = 136, .is_quantized = true}}, + {GGML_TYPE_I8, {.block_size = 1, .type_size = 1}}, + {GGML_TYPE_I16, {.block_size = 1, .type_size = 2}}, + {GGML_TYPE_I32, {.block_size = 1, .type_size = 4}}, + {GGML_TYPE_I64, {.block_size = 1, .type_size = 8}}, + {GGML_TYPE_F64, {.block_size = 1, .type_size = 8}}, + {GGML_TYPE_IQ1_M, + {.block_size = 256, .type_size = 56, .is_quantized = true}}, + {GGML_TYPE_BF16, {.block_size = 1, .type_size = 2}}, + {GGML_TYPE_Q4_0_4_4, + {.block_size = 32, .type_size = 18, .is_quantized = true}}, + {GGML_TYPE_Q4_0_4_8, + {.block_size = 32, .type_size = 18, .is_quantized = true}}, + {GGML_TYPE_Q4_0_8_8, + {.block_size = 32, .type_size = 18, .is_quantized = true}}, + {GGML_TYPE_TQ1_0, + {.block_size = 256, .type_size = 54, .is_quantized = true}}, + {GGML_TYPE_TQ2_0, + {.block_size = 256, .type_size = 66, .is_quantized = true}}, +}; + +} // namespace hardware diff --git a/engine/utils/hardware/gguf/ggml.h b/engine/utils/hardware/gguf/ggml.h index 15c068019..b9f9da5c9 100644 --- a/engine/utils/hardware/gguf/ggml.h +++ b/engine/utils/hardware/gguf/ggml.h @@ -2,9 +2,9 @@ #include #include #include -#include "utils/result.hpp" namespace hardware { + enum GGMLType { GGML_TYPE_F32 = 0, GGML_TYPE_F16 = 1, @@ -45,154 +45,15 @@ enum GGMLType { GGML_TYPE_COUNT, }; -inline float GetQuantBit(GGMLType gt) { - switch (gt) { - case GGML_TYPE_I32: - case GGML_TYPE_F32: - return 32.0f; - case GGML_TYPE_I16: - case GGML_TYPE_BF16: - case GGML_TYPE_F16: - return 16.0f; - case GGML_TYPE_IQ2_S: - case GGML_TYPE_IQ2_XXS: - case GGML_TYPE_IQ2_XS: - return 2.31f; - case GGML_TYPE_Q2_K: - return 2.5625f; - case GGML_TYPE_IQ3_XXS: - case GGML_TYPE_IQ3_S: - case GGML_TYPE_Q3_K: - return 3.4375f; - case GGML_TYPE_Q4_0_4_4: - case GGML_TYPE_Q4_0_4_8: - case GGML_TYPE_Q4_0_8_8: - case GGML_TYPE_IQ4_NL: - case GGML_TYPE_IQ4_XS: - case GGML_TYPE_Q4_0: - case GGML_TYPE_Q4_1: - case GGML_TYPE_Q4_K: - return 4.5f; - case GGML_TYPE_Q5_0: - case GGML_TYPE_Q5_1: - case GGML_TYPE_Q5_K: - return 5.5f; - case GGML_TYPE_Q6_K: - return 6.5625f; - case GGML_TYPE_I8: - case GGML_TYPE_Q8_0: - case GGML_TYPE_Q8_1: - case GGML_TYPE_Q8_K: - return 8.0f; - - case GGML_TYPE_I64: - case GGML_TYPE_F64: - return 64.0f; - - default: - return 8.0f; - } -} - -inline std::string to_string(GGMLType t) { - switch (t) { - case GGML_TYPE_F32: - return "F32"; - case GGML_TYPE_F16: - return "F16"; - case GGML_TYPE_Q4_0: - return "Q4_0"; - case GGML_TYPE_Q4_1: - return "Q4_1"; - case GGML_TYPE_Q5_0: - return "Q5_0"; - case GGML_TYPE_Q5_1: - return "Q5_1"; - case GGML_TYPE_Q8_0: - return "Q8_0"; - case GGML_TYPE_Q8_1: - return "Q8_1"; - case GGML_TYPE_Q2_K: - return "Q2_K"; - case GGML_TYPE_Q3_K: - return "Q3_K"; - case GGML_TYPE_Q4_K: - return "Q4_K"; - case GGML_TYPE_Q5_K: - return "Q5_K"; - case GGML_TYPE_Q6_K: - return "Q6_K"; - case GGML_TYPE_Q8_K: - return "Q8_K"; - case GGML_TYPE_IQ2_XXS: - return "IQ2_XXS"; - case GGML_TYPE_IQ2_XS: - return "IQ2_XS"; - case GGML_TYPE_IQ3_XXS: - return "IQ3_XXS"; - case GGML_TYPE_IQ1_S: - return "IQ1_S"; - case GGML_TYPE_IQ4_NL: - return "IQ4_NL"; - case GGML_TYPE_IQ3_S: - return "IQ3_S"; - case GGML_TYPE_IQ2_S: - return "IQ2_S"; - case GGML_TYPE_IQ4_XS: - return "IQ4_XS"; - case GGML_TYPE_I8: - return "I8"; - case GGML_TYPE_I16: - return "I16"; - case GGML_TYPE_I32: - return "I32"; - case GGML_TYPE_I64: - return "I64"; - case GGML_TYPE_F64: - return "F64"; - case GGML_TYPE_IQ1_M: - return "IQ1_M"; - case GGML_TYPE_BF16: - return "BF16"; - case GGML_TYPE_Q4_0_4_4: - return "Q4_0_4_4"; - case GGML_TYPE_Q4_0_4_8: - return "Q4_0_4_8"; - case GGML_TYPE_Q4_0_8_8: - return "Q4_0_8_8"; - case GGML_TYPE_TQ1_0: - return "TQ1_0"; - case GGML_TYPE_TQ2_0: - return "TQ2_0"; - default: - return "Invalid"; - } -} - struct GGMLTypeTrait { uint64_t block_size; uint64_t type_size; - bool is_quantized; + bool is_quantized = false; }; -const std::unordered_map kGGMLTypeTraits = { - {GGML_TYPE_F32, {1, 4, false}}, {GGML_TYPE_F16, {1, 2, false}}, - {GGML_TYPE_Q4_0, {32, 18, true}}, {GGML_TYPE_Q4_1, {32, 20, true}}, - {GGML_TYPE_Q5_0, {32, 22, true}}, {GGML_TYPE_Q5_1, {32, 24, true}}, - {GGML_TYPE_Q8_0, {32, 34, true}}, {GGML_TYPE_Q8_1, {32, 36, true}}, - {GGML_TYPE_Q2_K, {256, 84, true}}, {GGML_TYPE_Q3_K, {256, 110, true}}, - {GGML_TYPE_Q4_K, {256, 144, true}}, {GGML_TYPE_Q5_K, {256, 176, true}}, - {GGML_TYPE_Q6_K, {256, 210, true}}, {GGML_TYPE_Q8_K, {256, 292, true}}, - {GGML_TYPE_IQ2_XXS, {256, 66, true}}, {GGML_TYPE_IQ2_XS, {256, 74, true}}, - {GGML_TYPE_IQ3_XXS, {256, 98, true}}, {GGML_TYPE_IQ1_S, {256, 50, true}}, - {GGML_TYPE_IQ4_NL, {32, 18, true}}, {GGML_TYPE_IQ3_S, {256, 110, true}}, - {GGML_TYPE_IQ2_S, {256, 82, true}}, {GGML_TYPE_IQ4_XS, {256, 136, true}}, - {GGML_TYPE_I8, {1, 1, false}}, {GGML_TYPE_I16, {1, 2, false}}, - {GGML_TYPE_I32, {1, 4, false}}, {GGML_TYPE_I64, {1, 8, false}}, - {GGML_TYPE_F64, {1, 8, false}}, {GGML_TYPE_IQ1_M, {256, 56, true}}, - {GGML_TYPE_BF16, {1, 2, false}}, {GGML_TYPE_Q4_0_4_4, {32, 18, true}}, - {GGML_TYPE_Q4_0_4_8, {32, 18, true}}, {GGML_TYPE_Q4_0_8_8, {32, 18, true}}, - {GGML_TYPE_TQ1_0, {256, 54, true}}, {GGML_TYPE_TQ2_0, {256, 66, true}}, -}; +extern const std::unordered_map kGGMLTypeTraits; + +float GetQuantBit(GGMLType gt); +std::string to_string(GGMLType t); } // namespace hardware diff --git a/engine/utils/hardware/gguf/gguf_file.h b/engine/utils/hardware/gguf/gguf_file.h index 0472b1b10..731f0bf59 100644 --- a/engine/utils/hardware/gguf/gguf_file.h +++ b/engine/utils/hardware/gguf/gguf_file.h @@ -49,6 +49,8 @@ constexpr const GGUFVersion kGGUFVersionV1 = 1; constexpr const GGUFVersion kGGUFVersionV2 = 2; constexpr const GGUFVersion kGGUFVersionV3 = 3; +constexpr std::size_t kMaxElementsToShow = 50; + enum GGUFMetadataValueType : uint32_t { GGUFMetadataValueTypeUint8 = 0, GGUFMetadataValueTypeInt8, @@ -63,7 +65,6 @@ enum GGUFMetadataValueType : uint32_t { GGUFMetadataValueTypeUint64, GGUFMetadataValueTypeInt64, GGUFMetadataValueTypeFloat64, - _GGUFMetadataValueTypeCount // Unknown }; struct GGUFMetadataKV { @@ -130,12 +131,23 @@ inline std::string to_string(GGUFMetadataValueType vt, const std::any& v) { return "array"; } inline std::string to_string(const GGUFMetadataKVArrayValue& arr_v) { - std::string res; - auto num = std::min(size_t(5), arr_v.arr.size()); - for (size_t i = 0; i < num; i++) { - res += to_string(arr_v.type, arr_v.arr[i]) + " "; + std::string result = "["; + size_t array_size = arr_v.arr.size(); + size_t elements_to_show = std::min(kMaxElementsToShow, array_size); + for (size_t i = 0; i < elements_to_show; i++) { + result += to_string(arr_v.type, arr_v.arr[i]) + ", "; + } + if(array_size > 0) { + result.pop_back(); + result.pop_back(); + } + result += "]"; + if(array_size > elements_to_show) { + result += "... ("; + result += std::to_string(array_size - elements_to_show); + result += " more elements)"; } - return res; + return result; } inline std::string to_string(const GGUFMetadataKV& kv) { @@ -194,6 +206,9 @@ struct GGUFTensorInfo { // // The offset is the start of the file. int64_t start_offset; + + GGUFTensorInfo() + : name(""), n_dimensions(0), dimensions(), type(GGMLType{}), offset(0), start_offset(0) {} }; struct GGUFHelper { @@ -293,10 +308,9 @@ struct GGUFHelper { std::string ReadString() { auto l = Read(); - std::string res(reinterpret_cast(data), l); - auto r = res; + std::string result(reinterpret_cast(data), l); data += l; - return r; + return result; } GGUFMetadataKVArrayValue ReadArray() { @@ -423,6 +437,10 @@ struct GGUFHeader { // MetadataKV are the key-value pairs in the metadata, std::vector metadata_kv; + // Constructor to initialize member variables. + GGUFHeader() + : magic{0}, version{0}, tensor_count(0), metadata_kv_count(0), metadata_kv() {} + std::pair Get(const std::string& name) { for (auto const& kv : metadata_kv) { if (kv.key == name) { @@ -482,6 +500,20 @@ struct GGUFFile { // which describes how many bits are used to store a weight, // higher is better. double model_bits_per_weight; + + GGUFFile() + : header(), + tensor_infos(), + padding(0), + split_paddings(), + tensor_data_start_offset(-1), + split_tensor_data_start_offsets(), + size(0), + split_sizes(), + model_size(0), + split_model_sizes(), + model_parameters(0), + model_bits_per_weight(0.0) {} }; inline std::optional ParseGgufFile(const std::string& path) {