diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..7f162230c5 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "files.associations": { + "array": "cpp", + "string": "cpp", + "string_view": "cpp", + "vector": "cpp", + "__config": "cpp" + } +} \ No newline at end of file diff --git a/src/video_compress/cmpto_j2k.cpp b/src/video_compress/cmpto_j2k.cpp index 2d6a142212..daf3ab8563 100644 --- a/src/video_compress/cmpto_j2k.cpp +++ b/src/video_compress/cmpto_j2k.cpp @@ -51,18 +51,22 @@ #include "config.h" #endif // HAVE_CONFIG_H -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #ifdef HAVE_CUDA #include "cuda_wrapper.h" #include "cuda_wrapper/kernels.hpp" -#endif +#endif // HAVE_CUDA #include "debug.h" #include "host.h" #include "lib_common.h" @@ -77,34 +81,71 @@ #include "video_compress.h" #include "video_frame.h" // for vf_alloc_desc, vf_free, vf_resto... +using std::condition_variable; +using std::mutex; +using std::shared_ptr; +using std::unique_lock; + #define MOD_NAME "[Cmpto J2K enc.] " +#define ASSIGN_CHECK_VAL(var, str, minval) \ + do { \ + long long val = unit_evaluate_dbl(str, false, nullptr); \ + if (val < (minval) || val > UINT_MAX) { \ + LOG(LOG_LEVEL_ERROR) \ + << "[J2K] Wrong value " << (str) \ + << " for " #var "! Value must be >= " << (minval) \ + << ".\n"; \ + throw InvalidArgument(); \ + } \ + (var) = val; \ + } while (0) + #define CHECK_OK(cmd, err_msg, action_fail) do { \ int j2k_error = cmd; \ if (j2k_error != CMPTO_OK) {\ - log_msg(LOG_LEVEL_ERROR, "[J2K enc.] %s: %s\n", \ - err_msg, cmpto_j2k_enc_get_last_error()); \ + MSG(ERROR, "%s: %s\n", \ + err_msg, cmpto_j2k_enc_get_last_error()); \ action_fail;\ } \ } while(0) +#define HANDLE_ERROR_COMPRESS_POP do { cmpto_j2k_enc_img_destroy(img); goto start; } while (0) + +#define HANDLE_ERROR_COMPRESS_PUSH \ + if (udata != nullptr) { \ + udata->frame.~shared_ptr(); \ + } \ + if (img != nullptr) { \ + cmpto_j2k_enc_img_destroy(img); \ + } \ + return + #define NOOP ((void) 0) -#define DEFAULT_QUALITY 0.7 -/// default max size of state_video_compress_j2k::pool and also value -/// for state_video_compress_j2k::max_in_frames -#define DEFAULT_POOL_SIZE 4 +// Default CPU Settings +#define DEFAULT_CPU_THREAD_COUNT CMPTO_J2K_ENC_CPU_DEFAULT +#define MIN_CPU_THREAD_COUNT CMPTO_J2K_ENC_CPU_NONE +#define DEFAULT_CPU_MEM_LIMIT 0 // DEFAULT_CPU_MEM_LIMIT should always be 0 +#define DEFAULT_CPU_POOL_SIZE 8 +#define DEFAULT_IMG_LIMIT 0 // Default number of images to be decoded by CPU (0 = CMPTO Default) +#define MIN_CPU_IMG_LIMIT 0 // Min number of images decoded by the CPU at once + +// Default CUDA Settings +#define DEFAULT_CUDA_POOL_SIZE 4 /// number of frames that encoder encodes at moment -#define DEFAULT_TILE_LIMIT 1 -#define DEFAULT_MEM_LIMIT 1000000000LLU +#define DEFAULT_CUDA_TILE_LIMIT 1 +#define DEFAULT_CUDA_MEM_LIMIT 1000000000LLU -using std::condition_variable; -using std::mutex; -using std::stod; -using std::shared_ptr; -using std::unique_lock; +// Default General Settings +#define DEFAULT_QUALITY 0.7 + +typedef void (*cuda_convert_func_t)(int width, int height, void *src, void *dst); #ifdef HAVE_CUDA +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames +#define DEFAULT_POOL_SIZE DEFAULT_CUDA_POOL_SIZE template struct cmpto_j2k_enc_cuda_buffer_data_allocator @@ -129,64 +170,43 @@ struct cmpto_j2k_enc_cuda_buffer_data_allocator return new cmpto_j2k_enc_cuda_buffer_data_allocator(*this); } }; + +const cmpto_j2k_enc_preprocessor_run_callback_cuda r12l_to_rg48_cuda = preprocess_r12l_to_rg48; +#else +const cmpto_j2k_enc_preprocessor_run_callback_cuda r12l_to_rg48_cuda = nullptr; + +/// default max size of state_video_compress_j2k::pool and also value +/// for state_video_compress_j2k::max_in_frames +#define DEFAULT_POOL_SIZE DEFAULT_CPU_POOL_SIZE #endif +using cpu_allocator = default_data_allocator; -struct state_video_compress_j2k { - struct module module_data{}; - struct cmpto_j2k_enc_ctx *context{}; - struct cmpto_j2k_enc_cfg *enc_settings{}; - long long int rate = 0; ///< bitrate in bits per second - int mct = -1; // force use of mct - -1 means default - bool pool_in_device_memory = false; ///< frames in pool are on GPU - video_frame_pool pool; ///< pool for frames allocated by us but not yet consumed by encoder - - // settings - unsigned int max_in_frames = - DEFAULT_POOL_SIZE; ///< max number of frames between push and pop - double quality = DEFAULT_QUALITY; - long long int mem_limit = DEFAULT_MEM_LIMIT; - unsigned int tile_limit = DEFAULT_TILE_LIMIT; - - unsigned int in_frames{}; ///< number of currently encoding frames - mutex lock; - condition_variable frame_popped; - video_desc saved_desc{}; - codec_t precompress_codec = VC_NONE; - video_desc compressed_desc{}; - - condition_variable configure_cv; - bool configured = false; - bool should_exit = false; -}; -// prototypes +/* + * Function Predeclarations + */ static void j2k_compressed_frame_dispose(struct video_frame *frame); static void j2k_compress_done(struct module *mod); -static void cleanup_common(struct state_video_compress_j2k *s); +static void j2k_compressed_frame_dispose(struct video_frame *frame); +static void release_cstream(void * custom_data, size_t custom_data_size, const void * codestream, size_t codestream_size); +static void release_cstream_cuda(void *img_custom_data, size_t img_custom_data_size, + int /* device_id */, const void *samples, size_t samples_size); +static void do_gpu_copy(std::shared_ptr &ret, video_frame *in_frame); -static void parallel_conv(video_frame *dst, video_frame *src){ - int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); - int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); - decoder_t decoder = - get_decoder_from_to(src->color_spec, dst->color_spec); - assert(decoder != nullptr); - time_ns_t t0 = get_time_in_ns(); - parallel_pix_conv((int) src->tiles[0].height, dst->tiles[0].data, - dst_pitch, src->tiles[0].data, src_pitch, - decoder, 0); - if (log_level >= LOG_LEVEL_DEBUG) { - MSG(DEBUG, "pixfmt conversion duration: %f ms\n", - NS_TO_MS((double) (get_time_in_ns() - t0))); - } -} +/// auxilliary data structure passed with encoded frame +struct custom_data { + custom_data() = delete; + custom_data(custom_data &b) = delete; + custom_data &operator=(const custom_data &) = delete; + ~custom_data() = delete; + shared_ptr frame; + video_desc desc; + // metadata stored separately, frame may have already been deallocated + // by our release_cstream callback + char metadata[VF_METADATA_SIZE]; +}; -const cmpto_j2k_enc_preprocessor_run_callback_cuda r12l_to_rg48_cuda = -#ifdef HAVE_CUDA - preprocess_r12l_to_rg48; -#else - nullptr; -#endif static struct { codec_t ug_codec; @@ -203,240 +223,486 @@ static struct { {R12L, CMPTO_444_U12_MSB16LE_P012, RG48, r12l_to_rg48_cuda}, }; -#define CPU_CONV_PARAM "j2k-enc-cpu-conv" -ADD_TO_PARAM( - CPU_CONV_PARAM, - "* " CPU_CONV_PARAM "\n" - " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); -static void -set_pool(struct state_video_compress_j2k *s, bool have_gpu_preprocess) -{ -#ifdef HAVE_CUDA - s->pool_in_device_memory = false; - if (cuda_devices_count > 1) { - MSG(WARNING, "More than 1 CUDA device will use CPU buffers and " - "conversion...\n"); - } else if (s->precompress_codec == VC_NONE || have_gpu_preprocess) { - s->pool_in_device_memory = true; - s->pool = video_frame_pool( - s->max_in_frames, - cmpto_j2k_enc_cuda_buffer_data_allocator< - cuda_wrapper_malloc, cuda_wrapper_free>()); - return; - } - s->pool = video_frame_pool( - s->max_in_frames, - cmpto_j2k_enc_cuda_buffer_data_allocator()); -#else - assert(!have_gpu_preprocess); // if CUDA not found, we shouldn't have - s->pool = video_frame_pool(s->max_in_frames, default_data_allocator()); -#endif +/** + * @brief Platforms available for J2K Compression + */ +enum j2k_compress_platform { + NONE = 0, + CPU = CMPTO_TECHNOLOGY_CPU, + CUDA = CMPTO_TECHNOLOGY_CUDA, +}; + +/** + * @brief Struct to hold Platform Name and j2k_compress_platform Type + */ +struct j2k_compress_platform_info_t { + const char* name; + j2k_compress_platform platform; +}; + +// Supported Platforms for Compressing J2K +constexpr auto compress_platforms = std::array { + j2k_compress_platform_info_t{"none", j2k_compress_platform::NONE}, + j2k_compress_platform_info_t{"cpu", j2k_compress_platform::CPU}, + j2k_compress_platform_info_t{"cuda", j2k_compress_platform::CUDA} +}; + +/** + * @fn get_platform_from_name + * @brief Search for j2k_compress_platform from friendly name + * @param name Friendly name of platform to search for + * @return j2k_compress_platform that corresponds to name. If no match, return j2k_compress_platform::NONE + */ +[[nodiscard]][[maybe_unused]] +static j2k_compress_platform get_platform_from_name(std::string name) { + std::transform(name.cbegin(), name.cend(), name.begin(), [](unsigned char c) { return std::tolower(c); }); + + auto matches = [&name](const auto& p) { return name.compare(p.name) == 0; }; + + if (const auto& it = std::find_if(compress_platforms.begin(), compress_platforms.end(), matches) ; it != compress_platforms.end()) { + return it->platform; + } + + return j2k_compress_platform::NONE; } -static bool configure_with(struct state_video_compress_j2k *s, struct video_desc desc){ - enum cmpto_sample_format_type sample_format; - cmpto_j2k_enc_preprocessor_run_callback_cuda cuda_convert_func = - nullptr; - bool found = false; - - for(const auto &codec : codecs){ - if(codec.ug_codec == desc.color_spec){ - sample_format = codec.cmpto_sf; - s->precompress_codec = codec.convert_codec; - cuda_convert_func = codec.cuda_convert_func; - found = true; - break; - } +/** + * @fn supports_cmpto_technology + * @brief Check if Comprimato supports requested technology type + * @param cmpto_technology_type Technology type to check against + * @return True if supported, False if unsupported + */ +static bool supports_cmpto_technology(int cmpto_technology_type) { + const auto *version = cmpto_j2k_enc_get_version(); + + return (version == nullptr) ? false : (version->technology & cmpto_technology_type); +} + +/** + * Exceptions for state_video_compress_j2k construction + */ + +/// @brief HelpRequested Exception +struct HelpRequested : public std::exception { + HelpRequested() = default; +}; + +/// @brief InvalidArgument Exception +struct InvalidArgument : public std::exception { + InvalidArgument() = default; +}; + +/// @brief NoCmptoTechnologyFound Exception +struct NoCmptoTechnologyFound : public std::exception { + NoCmptoTechnologyFound() = default; +}; + +/// @brief UnableToCreateJ2KEncoderCTX Exception +struct UnableToCreateJ2KEncoderCTX : public std::exception { + UnableToCreateJ2KEncoderCTX() = default; +}; + +/** + * @brief state_video_compress_j2k Class + */ +struct state_video_compress_j2k { + state_video_compress_j2k(struct module *parent, const char* opts); + ~state_video_compress_j2k(); + + bool compare_video_desc(const video_desc& video_desc); + bool compare_video_desc_and_reconfigure(const video_desc& video_desc); + void stop(); + void try_push_image(std::shared_ptr tx); + auto try_pop_image() -> shared_ptr; + + module module_data{}; + struct cmpto_j2k_enc_ctx_cfg* ctx_cfg = nullptr; + struct cmpto_j2k_enc_ctx *context{}; + struct cmpto_j2k_enc_cfg *enc_settings{}; + std::unique_ptr pool; + unsigned int in_frames{}; ///< number of currently encoding frames + mutex lock; + condition_variable frame_popped; + video_desc saved_desc{}; ///< pool properties + codec_t precompress_codec = VC_NONE; + video_desc compressed_desc{}; + condition_variable configure_cv; + bool configured = false; + bool should_exit = false; + + // Generic Parameters + double quality = DEFAULT_QUALITY; // default image quality + long long int rate = 0; // bitrate in bits per second + int mct = -1; // force use of mct - -1 means default + bool lossless = false; // lossless encoding + + // CPU Parameters + int cpu_thread_count = DEFAULT_CPU_THREAD_COUNT; + unsigned int cpu_img_limit = DEFAULT_IMG_LIMIT; + + // CUDA Parameters + bool pool_in_device_memory = false; + cmpto_j2k_enc_preprocessor_run_callback_cuda cuda_convert_func = nullptr; + uint8_t *cuda_conv_tmp_buf = nullptr; + unsigned long long cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // j2k_compress_platform::NONE by default at initialization + j2k_compress_platform platform = j2k_compress_platform::NONE; + unsigned int max_in_frames = DEFAULT_CPU_POOL_SIZE; ///< max number of frames between push and pop + + private: + bool initialize_j2k_enc_ctx(); + bool set_pool(const video_desc& video_desc); + void parse_opts(const char* opts); + shared_ptr get_copy(video_frame *frame); + + // CPU Parameter + const size_t cpu_mem_limit = 0; // Not yet implemented as of v2.8.1. Must be 0. + const char* configuration_opts; // Original opts passed at initialization saved here for use by reconfigured +}; + +/** + * @brief state_video_compress_j2k constructor to create from opts + * @param parent Base Module Struct + * @param opts Configuration options to construct class + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + * @throw UnableToCreateJ2KEncoderCTX if failure to create J2K CTX +*/ +state_video_compress_j2k::state_video_compress_j2k(struct module *parent, const char* opts) { + try { + parse_opts(opts); + } catch (...) { + throw; } - if(!found){ - log_msg(LOG_LEVEL_ERROR, "[J2K] Failed to find suitable pixel format\n"); - return false; + if (!initialize_j2k_enc_ctx()) { + throw UnableToCreateJ2KEncoderCTX(); } - if (s->configured) { - unique_lock lk(s->lock); - CHECK_OK(cmpto_j2k_enc_ctx_stop(s->context), "stop", abort()); - s->frame_popped.wait(lk, [s] { return s->in_frames == 0; }); - cleanup_common(s); - s->configured = false; - } - - if (get_commandline_param(CPU_CONV_PARAM) != nullptr) { - cuda_convert_func = nullptr; - } - - struct cmpto_j2k_enc_ctx_cfg *ctx_cfg = nullptr; - CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), - "Context configuration create", return false); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK( - cmpto_j2k_enc_ctx_cfg_add_cuda_device( - ctx_cfg, cuda_devices[i], s->mem_limit, s->tile_limit), - "Setting CUDA device", return false); - } - if (cuda_convert_func != nullptr) { - CHECK_OK(cmpto_j2k_enc_ctx_cfg_set_preprocessor_cuda( - ctx_cfg, nullptr, nullptr, cuda_convert_func), - "Setting CUDA preprocess", return false); - } - - CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &s->context), - "Context create", return false); - CHECK_OK(cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg), - "Context configuration destroy", NOOP); - - CHECK_OK(cmpto_j2k_enc_cfg_create(s->context, &s->enc_settings), - "Creating context configuration:", return false); - CHECK_OK(cmpto_j2k_enc_cfg_set_quantization( - s->enc_settings, - s->quality /* 0.0 = poor quality, 1.0 = full quality */ - ), - "Setting quantization", NOOP); - - CHECK_OK(cmpto_j2k_enc_cfg_set_resolutions(s->enc_settings, 6), - "Setting DWT levels", NOOP); - - CHECK_OK(cmpto_j2k_enc_cfg_set_samples_format_type(s->enc_settings, sample_format), - "Setting sample format", return false); - CHECK_OK(cmpto_j2k_enc_cfg_set_size(s->enc_settings, desc.width, desc.height), - "Setting image size", return false); - if (s->rate) { - CHECK_OK(cmpto_j2k_enc_cfg_set_rate_limit(s->enc_settings, - CMPTO_J2K_ENC_COMP_MASK_ALL, - CMPTO_J2K_ENC_RES_MASK_ALL, s->rate / 8 / desc.fps), - "Setting rate limit", - NOOP); + module_init_default(&module_data); + module_data.cls = MODULE_CLASS_DATA; + module_data.priv_data = this; + module_data.deleter = j2k_compress_done; + module_register(&module_data, parent); +} + +state_video_compress_j2k::~state_video_compress_j2k() { + if (enc_settings != nullptr) { + cmpto_j2k_enc_cfg_destroy(enc_settings); } + enc_settings = nullptr; - int mct = s->mct; - if (mct == -1) { - mct = codec_is_a_rgb(desc.color_spec) ? 1 : 0; + if (context != nullptr) { + cmpto_j2k_enc_ctx_destroy(context); } - CHECK_OK(cmpto_j2k_enc_cfg_set_mct(s->enc_settings, mct), + context = nullptr; + + if (ctx_cfg != nullptr) { + cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg); + } + ctx_cfg = nullptr; +} + +bool state_video_compress_j2k::compare_video_desc(const video_desc& video_desc) { + return video_desc_eq(saved_desc, video_desc); +} + +bool state_video_compress_j2k::compare_video_desc_and_reconfigure(const video_desc& video_desc) { + if (!compare_video_desc(video_desc)) { + enum cmpto_sample_format_type sample_format; + bool found = false; + + for(const auto &codec : codecs){ + if(codec.ug_codec == video_desc.color_spec){ + sample_format = codec.cmpto_sf; + precompress_codec = codec.convert_codec; + cuda_convert_func = codec.cuda_convert_func; + found = true; + break; + } + } + + if (!found){ + MSG(ERROR, "Failed to find suitable pixel format\n"); + return false; + } + + if (configured) { + auto lk = unique_lock(lock); + + CHECK_OK(cmpto_j2k_enc_ctx_stop(context), "stop", abort()); + + frame_popped.wait(lk, [&] { return in_frames == 0; }); + + if (enc_settings != nullptr) { + cmpto_j2k_enc_cfg_destroy(enc_settings); + } + if (context != nullptr) { + cmpto_j2k_enc_ctx_destroy(context); + } + if (ctx_cfg != nullptr) { + cmpto_j2k_enc_ctx_cfg_destroy(ctx_cfg); + } + + enc_settings = nullptr; + context = nullptr; + ctx_cfg = nullptr; + configured = false; + + // Rebuild the encoder ctx + MSG(INFO, "Re-initializating the encoder context\n"); + initialize_j2k_enc_ctx(); + } + + + CHECK_OK(cmpto_j2k_enc_cfg_set_samples_format_type(enc_settings, sample_format), + "Setting sample format", return false); + + CHECK_OK(cmpto_j2k_enc_cfg_set_size(enc_settings, video_desc.width, video_desc.height), + "Setting image size", return false); + + if (rate) { + CHECK_OK(cmpto_j2k_enc_cfg_set_rate_limit(enc_settings, + CMPTO_J2K_ENC_COMP_MASK_ALL, + CMPTO_J2K_ENC_RES_MASK_ALL, rate / 8 / video_desc.fps), + "Setting rate limit", + NOOP); + } + + if (mct == -1) { + mct = codec_is_a_rgb(video_desc.color_spec) ? 1 : 0; + } + + CHECK_OK(cmpto_j2k_enc_cfg_set_mct(enc_settings, mct), "Setting MCT", NOOP); - char rate[100]; - snprintf_ch(rate, "%s", s->rate == 0 ? "unset" : format_in_si_units(s->rate)); - MSG(INFO, - "Using parameters: quality=%.2f, bitrate=%sbps, mem_limit=%sB, " - "tile_limit=%u, pool_size=%u, mct=%d\n", - s->quality, rate, format_in_si_units(s->mem_limit), s->tile_limit, - s->max_in_frames, mct); - - set_pool(s, cuda_convert_func != nullptr); + char rate_ch[100]; + snprintf_ch(rate_ch, "%s", rate == 0 ? "unset" : format_in_si_units(rate)); + + if (j2k_compress_platform::CPU == platform) { + MSG(INFO, + "Using parameters: platform=cpu, quality=%.2f,%s bitrate=%sbps, thread_count=%i, " + "img_limit=%u, pool_size=%u, mct=%d\n", + quality, + lossless ? " lossless," : "", + rate_ch, + cpu_thread_count, + cpu_img_limit, + max_in_frames, + mct); + } else if (j2k_compress_platform::CUDA == platform) { + MSG(INFO, + "Using parameters: platform=cuda, quality=%.2f,%s bitrate=%sbps, mem_limit=%sB, " + "tile_limit=%u, pool_size=%u, mct=%d\n", + quality, + lossless ? " lossless," : "", + rate_ch, + format_in_si_units(cuda_mem_limit), + cuda_tile_limit, + max_in_frames, + mct); + } - s->compressed_desc = desc; - s->compressed_desc.color_spec = codec_is_a_rgb(desc.color_spec) ? J2KR : J2K; - s->compressed_desc.tile_count = 1; + // pool cannot be configured + if (!set_pool(video_desc)) { + return false; + } - s->saved_desc = desc; + compressed_desc = video_desc; + compressed_desc.color_spec = codec_is_a_rgb(video_desc.color_spec) ? J2KR : J2K; + compressed_desc.tile_count = 1; + + saved_desc = video_desc; + configured = true; - s->configured = true; - s->configure_cv.notify_one(); + configure_cv.notify_one(); + } - return true; + return configured; } -/** - * @brief copies frame from RAM to GPU - * - * Does the pixel format conversion as well if specified. - */ -static void -do_gpu_copy(std::shared_ptr &ret, video_frame *in_frame) -{ +bool state_video_compress_j2k::set_pool(const video_desc& video_desc) { + if (j2k_compress_platform::CUDA == platform) { + bool have_gpu_preprocess = cuda_convert_func != nullptr; #ifdef HAVE_CUDA - cuda_wrapper_set_device((int) cuda_devices[0]); - cuda_wrapper_memcpy(ret->tiles[0].data, in_frame->tiles[0].data, - in_frame->tiles[0].data_len, - CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE); -#else - (void) ret, (void) in_frame; - abort(); // must not reach here -#endif + pool_in_device_memory = false; + + if (cuda_devices_count > 1) { + MSG(WARNING, "More than 1 CUDA device will use CPU buffers and " + "conversion...\n"); + } else if (precompress_codec == VC_NONE || have_gpu_preprocess) { + pool_in_device_memory = true; + pool = std::make_unique( + max_in_frames, + cmpto_j2k_enc_cuda_buffer_data_allocator< + cuda_wrapper_malloc, cuda_wrapper_free>()); + } else { + pool = std::make_unique( + max_in_frames, + cmpto_j2k_enc_cuda_buffer_data_allocator< + cuda_wrapper_malloc, cuda_wrapper_free_host>()); + } + + if (cuda_convert_func != nullptr) { + CHECK_OK(cmpto_j2k_enc_ctx_cfg_set_preprocessor_cuda( + ctx_cfg, + nullptr, nullptr, + cuda_convert_func), + "Setting CUDA preprocess", + return false); + } +#else // Compiled without CUDA support + assert(!have_gpu_preprocess); + pool = std::make_unique(max_in_frames, default_data_allocator()); +#endif + } else if (j2k_compress_platform:: CPU == platform) { + pool = std::make_unique(max_in_frames, cpu_allocator()); + } else { + return false; + } + + struct video_desc pool_desc = video_desc; + if (precompress_codec != VC_NONE && + !pool_in_device_memory) { + pool_desc.color_spec = precompress_codec; + } + + pool->reconfigure(pool_desc, (size_t) vc_get_linesize(pool_desc.width, + pool_desc.color_spec) * pool_desc.height); + + return true; } -static shared_ptr get_copy(struct state_video_compress_j2k *s, video_frame *frame){ - std::shared_ptr ret = s->pool.get_frame(); +void state_video_compress_j2k::stop() { + auto lk = unique_lock(lock); + should_exit = true; - if (s->pool_in_device_memory) { - do_gpu_copy(ret, frame); - } else if (s->precompress_codec != VC_NONE) { - parallel_conv(ret.get(), frame); + if (configured) { + CHECK_OK(cmpto_j2k_enc_ctx_stop(context), "stop", NOOP); } else { - memcpy(ret->tiles[0].data, frame->tiles[0].data, - frame->tiles[0].data_len); + configure_cv.notify_one(); } - - return ret; } -/// auxilliary data structure passed with encoded frame -struct custom_data { - custom_data() = delete; - custom_data(custom_data &b) = delete; - custom_data &operator=(const custom_data &) = delete; - ~custom_data() = delete; - shared_ptr frame; - video_desc desc; - // metadata stored separately, frame may have already been deallocated - // by our release_cstream callback - char metadata[VF_METADATA_SIZE]; -}; -/** - * @fn j2k_compress_pop - * @note - * Do not return empty frame in case of error - that would be interpreted - * as a poison pill (see below) and would stop the further processing - * pipeline. Because of that goto + start label is used. - */ -#define HANDLE_ERROR_COMPRESS_POP do { cmpto_j2k_enc_img_destroy(img); goto start; } while (0) -static std::shared_ptr j2k_compress_pop(struct module *state) -{ - auto *s = (struct state_video_compress_j2k *) state; +void state_video_compress_j2k::try_push_image(std::shared_ptr tx) { + struct cmpto_j2k_enc_img *img = NULL; + struct custom_data *udata = nullptr; + + + CHECK_OK(cmpto_j2k_enc_img_create(context, &img), + "Image create", return); + + /* + * Copy video desc to udata (to be able to reconstruct in j2k_compress_pop(). + * Further make a place for a shared pointer of allocated data, deleter + * returns frame to pool in call of release_cstream() callback (called when + * encoder no longer needs the input data). + */ + CHECK_OK(cmpto_j2k_enc_img_allocate_custom_data( + img, + sizeof *udata, + (void **) &udata), + "Allocate custom image data", + HANDLE_ERROR_COMPRESS_PUSH); + + memcpy(&udata->desc, &compressed_desc, sizeof(compressed_desc)); + new (&udata->frame) shared_ptr(get_copy(tx.get())); + vf_store_metadata(tx.get(), udata->metadata); + + if (j2k_compress_platform::CUDA == platform) { + // Fix this for CPU configuration + if (pool_in_device_memory) { + // cmpto_j2k_enc requires the size after postprocess, which + // doesn't equeal the IN frame data_len for R12L + const codec_t device_codec = precompress_codec == VC_NONE + ? udata->frame->color_spec + : precompress_codec; + const size_t data_len = + vc_get_datalen(udata->frame->tiles[0].width, + udata->frame->tiles[0].height, device_codec); + CHECK_OK(cmpto_j2k_enc_img_set_samples_cuda( + img, cuda_devices[0], udata->frame->tiles[0].data, + data_len, release_cstream_cuda), + "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); + } else { + CHECK_OK(cmpto_j2k_enc_img_set_samples( + img, udata->frame->tiles[0].data, + udata->frame->tiles[0].data_len, release_cstream), + "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); + } + } else if (j2k_compress_platform::CPU == platform) { + CHECK_OK(cmpto_j2k_enc_img_set_samples( + img, udata->frame->tiles[0].data, + udata->frame->tiles[0].data_len, release_cstream), + "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); + } + + auto lk = unique_lock(lock); + frame_popped.wait(lk, [this]{return this->in_frames < this->max_in_frames;}); + lk.unlock(); + + auto failed = false; + CHECK_OK(cmpto_j2k_enc_img_encode(img, enc_settings), + "Encode image push", failed = true); + if (failed) { + udata->frame.~shared_ptr(); + cmpto_j2k_enc_img_destroy(img); + return; + } + + lk.lock(); + in_frames++; + lk.unlock(); +} + +shared_ptr state_video_compress_j2k::try_pop_image() { start: { - unique_lock lk(s->lock); - s->configure_cv.wait(lk, [s] { return s->configured || - s->should_exit; }); - if (s->should_exit) { - return {}; // pass poison pill further + auto lk = unique_lock(lock); + configure_cv.wait(lk, [this] { return this->configured || this->should_exit; }); + + if (should_exit) { + return {}; } } struct cmpto_j2k_enc_img *img; - int status; + int encoded_img_status; + CHECK_OK(cmpto_j2k_enc_ctx_get_encoded_img( - s->context, 1, &img /* Set to NULL if encoder stopped */, - &status), - "Encode image pop", HANDLE_ERROR_COMPRESS_POP); + context, 1, &img /* Set to NULL if encoder stopped */, + &encoded_img_status), + "Encode image pop", HANDLE_ERROR_COMPRESS_POP); + if (img == nullptr) { // this happens when cmpto_j2k_enc_ctx_stop() is called goto start; // reconfiguration or exit } else { - unique_lock lk(s->lock); - s->in_frames--; - s->frame_popped.notify_one(); + auto lk = unique_lock(lock); + in_frames--; + frame_popped.notify_one(); } - if (status != CMPTO_J2K_ENC_IMG_OK) { + + if (encoded_img_status != CMPTO_J2K_ENC_IMG_OK) { const char * encoding_error = ""; CHECK_OK(cmpto_j2k_enc_img_get_error(img, &encoding_error), "get error status", encoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image encoding failed: %s\n", encoding_error); + MSG(ERROR, "Image encoding failed: %s\n", encoding_error); goto start; } + struct custom_data *udata = nullptr; size_t len; CHECK_OK(cmpto_j2k_enc_img_get_custom_data(img, (void **) &udata, &len), - "get custom data", HANDLE_ERROR_COMPRESS_POP); + "get custom data", HANDLE_ERROR_COMPRESS_POP); size_t size; void * ptr; CHECK_OK(cmpto_j2k_enc_img_get_cstream(img, &ptr, &size), - "get cstream", HANDLE_ERROR_COMPRESS_POP); + "get cstream", HANDLE_ERROR_COMPRESS_POP); struct video_frame *out = vf_alloc_desc(udata->desc); vf_restore_metadata(out, udata->metadata); @@ -446,126 +712,450 @@ static std::shared_ptr j2k_compress_pop(struct module *state) CHECK_OK(cmpto_j2k_enc_img_destroy(img), "Destroy image", NOOP); out->callbacks.dispose = j2k_compressed_frame_dispose; out->compress_end = get_time_in_ns(); + return shared_ptr(out, out->callbacks.dispose); } -struct { +static void parallel_conv(video_frame *dst, video_frame *src){ + int src_pitch = vc_get_linesize(src->tiles[0].width, src->color_spec); + int dst_pitch = vc_get_linesize(dst->tiles[0].width, dst->color_spec); + + decoder_t decoder = + get_decoder_from_to(src->color_spec, dst->color_spec); + assert(decoder != nullptr); + time_ns_t t0 = get_time_in_ns(); + parallel_pix_conv((int) src->tiles[0].height, dst->tiles[0].data, + dst_pitch, src->tiles[0].data, src_pitch, + decoder, 0); + if (log_level >= LOG_LEVEL_DEBUG) { + MSG(DEBUG, "pixfmt conversion duration: %f ms\n", + NS_TO_MS((double) (get_time_in_ns() - t0))); + } +} + + +/** + * @brief copies frame from RAM to GPU + * + * Does the pixel format conversion as well if specified. + */ +static void +do_gpu_copy(std::shared_ptr &ret, video_frame *in_frame) +{ +#ifdef HAVE_CUDA + cuda_wrapper_set_device((int) cuda_devices[0]); + cuda_wrapper_memcpy(ret->tiles[0].data, in_frame->tiles[0].data, + in_frame->tiles[0].data_len, + CUDA_WRAPPER_MEMCPY_HOST_TO_DEVICE); +#else + (void) ret, (void) in_frame; + abort(); // must not reach here +#endif +} + + +/** + * @fn j2k_compress_pop + * @note + * Do not return empty frame in case of error - that would be interpreted + * as a poison pill (see below) and would stop the further processing + * pipeline. Because of that goto + start label is used. + */ +static std::shared_ptr j2k_compress_pop(struct module *state) +{ + auto *s = (struct state_video_compress_j2k *) state; + return s->try_pop_image(); +} + +/// @brief Struct for options for J2K Compression Usage +struct opts { const char *label; const char *key; const char *description; const char *opt_str; const bool is_boolean; const char *placeholder; -} usage_opts[] = { +}; + +constexpr opts general_opts[5] = { {"Bitrate", "quality", "Target bitrate", ":rate=", false, "70M"}, {"Quality", "quant_coeff", "Quality in range [0-1], default: " TOSTRING(DEFAULT_QUALITY), ":quality=", false, TOSTRING(DEFAULT_QUALITY)}, - {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_MEM_LIMIT), ":mem_limit=", false, TOSTRING(DEFAULT_MEM_LIMIT)}, - {"Tile limit", "tile_limit", "Number of tiles encoded at moment (less to reduce latency, more to increase performance, 0 means infinity), default: " TOSTRING(DEFAULT_TILE_LIMIT), ":tile_limit=", false, TOSTRING(DEFAULT_TILE_LIMIT)}, - {"Pool size", "pool_size", "Total number of tiles encoder can hold at moment (same meaning as above), default: " TOSTRING(DEFAULT_POOL_SIZE) ", should be greater than ", ":pool_size=", false, TOSTRING(DEFAULT_POOL_SIZE)}, - {"Use MCT", "mct", "use MCT", ":mct", true, ""}, + {"Pool size", "pool_size", "Total number of frames encoder can hold at one moment. Must be greater than tile_limit when platform=cuda and img_limit when platform=cpu. "\ + "default: " TOSTRING(DEFAULT_POOL_SIZE), ":pool_size=", false, TOSTRING(DEFAULT_POOL_SIZE)}, + {"Use MCT", "mct", "Use MCT", ":mct", true, ""}, + {"Lossless compression", "lossless", "Enable lossless compression. default: disabled", ":lossless", true, "disabled"} }; -static void usage() { - col() << "J2K compress usage:\n"; - col() << TERM_BOLD << TRED("\t-c cmpto_j2k"); - for(const auto& opt : usage_opts){ - assert(strlen(opt.opt_str) >= 2); - col() << "[" << opt.opt_str; - if (!opt.is_boolean) { - col() << "<" << opt.opt_str[1] << ">"; // :quality -> (first letter used as ":quality=") - } - col() << "]"; - } - col() << "\n\t\t[--cuda-device ] [--param " CPU_CONV_PARAM "]\n" << TERM_RESET; +constexpr opts cuda_opts[2] = { + {"Mem limit", "mem_limit", "CUDA device memory limit (in bytes), default: " TOSTRING(DEFAULT_CUDA_MEM_LIMIT), ":mem_limit=", false, TOSTRING(DEFAULT_CUDA_MEM_LIMIT)}, + {"Tile limit", "tile_limit", "Number of tiles encoded at one moment by GPU (less to reduce latency, more to increase performance, 0 is infinity). "\ + "default: " TOSTRING(DEFAULT_CUDA_TILE_LIMIT), ":tile_limit=", false, TOSTRING(DEFAULT_CUDA_TILE_LIMIT)}, +}; - col() << "where:\n"; - for(const auto& opt : usage_opts){ - if (opt.is_boolean) { - col() << TBOLD("\t" << opt.opt_str + 1 <<); - } else { - col() << TBOLD("\t<" << opt.opt_str[1] << ">"); - } - col() << " - " << opt.description << "\n"; - } - col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; - col() << TBOLD("\t--param " CPU_CONV_PARAM) - << " - use CPU for pixfmt conversion (useful if GPU\n\t\tis fully " - "occupied by the encoder; an option for decoder exists as " - "well)\n"; - color_printf("\nOption prefixes (eg. 'q=' for quality) can be used. SI " - "suffixes are recognized (eg. 'r=7.5M').\n"); +constexpr opts platform_opts[1] = { + {"Plaform", "platform", "Platform device for the encoder to use", ":platform=", false, ""}, +}; + +constexpr opts cpu_opts[2] = { + {"Thread count", "thread_count", "Number of threads to use on the CPU. 0 is all available. default: " TOSTRING(DEFAULT_CPU_THREAD_COUNT), ":thread_count=", false, TOSTRING(DEFAULT_CPU_THREAD_COUNT)}, + {"Image limit", "img_limit", "Number of images that can be encoded at one moment by CPU. Max limit is thread_count. 0 is default limit. default: " TOSTRING(DEFAULT_IMG_LIMIT), ":img_limit=", false, TOSTRING(DEFAULT_IMG_LIMIT)}, +}; + +#define CPU_CONV_PARAM "j2k-enc-cpu-conv" +ADD_TO_PARAM( + CPU_CONV_PARAM, + "* " CPU_CONV_PARAM "\n" + " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); + +/** + * @fn usage + * @brief Display J2K Compression Usage Information + */ +static void usage() { + col() << "J2K compress platform support:\n"; + const auto supports_cpu = supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU); + const auto supports_cuda = supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA); + #ifdef HAVE_CUDA constexpr char cuda_supported[] = "YES"; #else constexpr char cuda_supported[] = TRED("NO"); #endif + + col() << "\tCPU .... " << (supports_cpu ? "yes" : "no") + << (supports_cuda ? "\n" : "\t[default]\n"); + col() << "\tCUDA ... " << (supports_cuda ? "yes\t[default]\n" : "no\n"); + color_printf( - "\nUltraGrid compiled with " TBOLD("CUDA") " support: %s\n", - cuda_supported); -} + "\nUltraGrid compiled with " TBOLD("CUDA") " support: %s\n", + cuda_supported); + + auto show_syntax = [](const auto& options) { + for (const auto& opt : options) { + assert(strlen(opt.opt_str) >= 2); + col() << "[" << opt.opt_str; + if (!opt.is_boolean) { + col() << "<" << opt.opt_str[1] << ">"; // :quality -> (first letter used as ":quality=") + } + col() << "]"; + } + }; + + auto show_arguments = [](const auto& options) { + for (const auto& opt : options) { + assert(strlen(opt.opt_str) >= 2); + if (opt.is_boolean) { + col() << TBOLD("\t" << opt.opt_str + 1 <<); + } else { + col() << TBOLD("\t<" << opt.opt_str[1] << ">"); + } + col() << " - " << opt.description << "\n"; + } + }; -#define ASSIGN_CHECK_VAL(var, str, minval) \ - do { \ - long long val = unit_evaluate_dbl(str, false, nullptr); \ - if (val < (minval) || val > UINT_MAX) { \ - LOG(LOG_LEVEL_ERROR) \ - << "[J2K] Wrong value " << (str) \ - << " for " #var "! Value must be >= " << (minval) \ - << ".\n"; \ - return NULL; \ - } \ - (var) = val; \ - } while (0) + col() << "J2K compress usage:\n"; + if (supports_cuda) { + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cuda"); + show_syntax(cuda_opts); + show_syntax(general_opts); + col() << "\n\t\t[--cuda-device ] [--param " CPU_CONV_PARAM "]\n" << TERM_RESET; + } + if (supports_cpu) { + col() << TERM_BOLD << TRED("\t-c cmpto_j2k:platform=cpu"); + show_syntax(cpu_opts); + show_syntax(general_opts); + } -static struct module * j2k_compress_init(struct module *parent, const char *c_cfg) -{ + col() << "\n" << TERM_RESET; + col() << "where:\n"; + + show_arguments(platform_opts); + + if (supports_cuda) { + col() << "CUDA compress arguments:\n"; + show_arguments(cuda_opts); + col() << TBOLD("\t") << " - CUDA device(s) to use (comma separated)\n"; + col() << TBOLD("\t--param " CPU_CONV_PARAM) + << " - use CPU for pixfmt conversion (useful if GPU\n\t\tis fully " + "occupied by the encoder; an option for decoder exists as " + "well)\n"; + } + + if (supports_cpu) { + col() << "CPU compress arguments:\n"; + show_arguments(cpu_opts); + } + + col() << "General arguments:\n"; + show_arguments(general_opts); + + color_printf("\nOption prefixes (eg. 'q=' for quality) can be used. SI " + "suffixes are recognized (eg. 'r=7.5M').\n"); +} + +/// CUDA opt Syntax +// -c cmpto_j2k:platform=cuda[:mem_limit=][:tile_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] [--cuda-device ] +/// CPU opt Syntax +// -c cmpto_j2k:platform=cpu[:thread_count=][:img_limit=][:rate=][:lossless][:quality=][:pool_size=

][:mct] +/** + * @fn parse_opts + * @brief Parse options and configure class members accordingly + * @param opts Configuration options + * @throw HelpRequested if help requested + * @throw InvalidArgument if argument provided isn't known + */ +void state_video_compress_j2k::parse_opts(const char* opts) { const auto *version = cmpto_j2k_enc_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; - - auto *s = new state_video_compress_j2k(); - - char *tmp = (char *) alloca(strlen(c_cfg) + 1); - strcpy(tmp, c_cfg); - char *save_ptr, *item; - while ((item = strtok_r(tmp, ":", &save_ptr))) { - tmp = NULL; - if (IS_KEY_PREFIX(item, "rate")) { - ASSIGN_CHECK_VAL(s->rate, strchr(item, '=') + 1, 1); - } else if (IS_KEY_PREFIX(item, "quality")) { - s->quality = stod(strchr(item, '=') + 1); - } else if (strcasecmp("mct", item) == 0 || strcasecmp("nomct", item) == 0) { - s->mct = strcasecmp("mct", item) == 0 ? 1 : 0; - } else if (IS_KEY_PREFIX(item, "mem_limit")) { - ASSIGN_CHECK_VAL(s->mem_limit, strchr(item, '=') + 1, 1); - } else if (IS_KEY_PREFIX(item, "tile_limit")) { - ASSIGN_CHECK_VAL(s->tile_limit, strchr(item, '=') + 1, 0); - } else if (IS_KEY_PREFIX(item, "pool_size")) { - ASSIGN_CHECK_VAL(s->max_in_frames, strchr(item, '=') + 1, 1); - } else if (strcasecmp("help", item) == 0) { + MSG(INFO, "Using codec version: %s\n", (version == nullptr ? "(unknown)" : version->name)); + + /** + * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. + * If it does, configure the preferred default platform and max_in_frames using priority below + * 1 - CUDA + * 2 - CPU + * + * If platform is not found, throw NoCmptoTechnologyFound exception + */ + if (supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA)) { // prefer CUDA compress by default + platform = j2k_compress_platform::CUDA; + max_in_frames = DEFAULT_CUDA_POOL_SIZE; + } else if (supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU)) { // prefer CPU compress by default + platform = j2k_compress_platform::CPU; + max_in_frames = DEFAULT_CPU_POOL_SIZE; + } else { + MSG(ERROR, "Unable to find supported CMPTO_TECHNOLOGY\n"); + throw NoCmptoTechnologyFound(); + } + + auto split_arguments = [](std::string args, std::string delimiter) { + auto token = std::string{}; + auto pos = size_t{0}; + auto vec = std::vector{}; + + if (args == "\0") { + return vec; + } + + while ((pos = args.find(delimiter)) != std::string::npos) { + token = args.substr(0, pos); + vec.emplace_back(std::move(token)); + args.erase(0, pos + delimiter.length()); + } + + vec.emplace_back(std::move(args)); + return vec; + }; + + auto args = split_arguments(opts, ":"); + + // No Arguments provided, return and use defaults + if (args.empty()) { + return; + } + + const char* item = ""; + + /** + * Check if :pool_size= set manually during argument parsing. + * Since max_in_frames is default initialized to match compile time platform default (CUDA or CPU) + * Changing from :platform=cuda default to :platform=cpu default will not automatically + * set :pool_size= during argument parsing because opts can passed be out of order. + * + * To prevent potential for overwriting user's defined default, set is_pool_size_manually_configured=true + * during argument parsing and check before final function return + * + * If pool size is manually configured, do not set to default. + * Otherwise, set max_in_frames = platform default + */ + auto is_pool_size_manually_configured = false; + + for (const auto& arg : args) { + item = arg.c_str(); + if (strcasecmp("help", item) == 0) { // :help usage(); - return static_cast(INIT_NOERR); + throw HelpRequested(); + + } else if (IS_KEY_PREFIX(item, "platform")) { // :platform= + const char *const platform_name = strchr(item, '=') + 1; + platform = get_platform_from_name(platform_name); + if (j2k_compress_platform::NONE == platform) { + MSG(ERROR, "Unable to find requested encoding platform: \"%s\"\n", platform_name); + throw InvalidArgument(); + } + if (!supports_cmpto_technology(platform)) { + MSG(ERROR, "Does not support requested encoding platform: \"%s\"\n", platform_name); + throw InvalidArgument(); + } + + } else if (strcasecmp("lossless", item) == 0) { // :lossless + lossless = true; + + } else if (IS_KEY_PREFIX(item, "mem_limit")) { // :mem_limit= + ASSIGN_CHECK_VAL(cuda_mem_limit, strchr(item, '=') + 1, 1); + + } else if (IS_KEY_PREFIX(item, "thread_count")) { // :thread_count= + cpu_thread_count = atoi(strchr(item, '=') + 1); + ASSIGN_CHECK_VAL(cpu_thread_count, strchr(item, '=') + 1, MIN_CPU_THREAD_COUNT); + + } else if (IS_KEY_PREFIX(item, "tile_limit")) { // :tile_limit= + ASSIGN_CHECK_VAL(cuda_tile_limit, strchr(item, '=') + 1, 0); + + } else if (IS_KEY_PREFIX(item, "img_limit")) { // :img_limit= + ASSIGN_CHECK_VAL(cpu_img_limit, strchr(item, '=') + 1, MIN_CPU_IMG_LIMIT); + + } else if (IS_KEY_PREFIX(item, "rate")) { // :rate= + ASSIGN_CHECK_VAL(rate, strchr(item, '=') + 1, 1); + + } else if (IS_KEY_PREFIX(item, "quality")) { // :quality= + quality = std::stod(strchr(item, '=') + 1); + if (quality < 0.0 || quality > 1.0) { + MSG(ERROR, "Quality should be in interval [0-1]\n"); + throw InvalidArgument(); + } + + } else if (IS_KEY_PREFIX(item, "pool_size")) { // :pool_size= + ASSIGN_CHECK_VAL(max_in_frames, strchr(item, '=') + 1, 1); + is_pool_size_manually_configured = true; + + } else if (strcasecmp("mct", item) == 0) { // :mct + mct = strcasecmp("mct", item) == 0 ? 1 : 0; + } else { - log_msg(LOG_LEVEL_ERROR, "[J2K] Wrong option: %s\n", item); - goto error; + MSG(ERROR, "Unable to find option: \"%s\"\n", item); + throw InvalidArgument(); } } - if (s->quality < 0.0 || s->quality > 1.0) { - LOG(LOG_LEVEL_ERROR) << "[J2K] Quality should be in interval [0-1]!\n"; - goto error; + // If CPU selected + if (j2k_compress_platform::CPU == platform) { + /** + * Confirm thread_count != CMPTO_J2K_ENC_CPU_DEFAULT (0) + * If it does, img_limit can be > thread_count since all threads used + * + * If thread_count is not 0, confirm img_limit doesn't exceed thread_count + * Set img_limit = thread_count if exeeded + */ + if (cpu_thread_count != CMPTO_J2K_ENC_CPU_DEFAULT && cpu_thread_count < static_cast(cpu_img_limit)) { + MSG(INFO, "img_limit (%i) exceeds thread_count. Lowering img_limit to %i to match thread_count.\n", + cpu_img_limit, + cpu_thread_count); + cpu_img_limit = cpu_thread_count; + } + + // If pool_size was manually set, ignore this check. + // Otherwise, if it was not set, confirm that max_in_frames matches DEFAULT_CPU_POOL_SIZE + if (!is_pool_size_manually_configured && max_in_frames != DEFAULT_CPU_POOL_SIZE) { + MSG(DEBUG, "max_in_frames set to CPU default: %i", DEFAULT_CPU_POOL_SIZE); + max_in_frames = DEFAULT_CPU_POOL_SIZE; + } } +} - module_init_default(&s->module_data); - s->module_data.cls = MODULE_CLASS_DATA; - s->module_data.priv_data = s; - s->module_data.deleter = j2k_compress_done; - module_register(&s->module_data, parent); +/** + * @fn initialize_j2k_enc_ctx + * @brief Initialize internal cmpto_j2k_enc_ctx_cfg for requested platform and settings + * @return true if successsfully configured + * @return false if unable to configure + */ +[[nodiscard]] +bool state_video_compress_j2k::initialize_j2k_enc_ctx() { + CHECK_OK(cmpto_j2k_enc_ctx_cfg_create(&ctx_cfg), "Context configuration create", + return false); + + if (j2k_compress_platform::CPU == platform) { + MSG(INFO, "Configuring for CPU\n"); + + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cpu( + ctx_cfg, + cpu_thread_count, + cpu_mem_limit, + cpu_img_limit), + "Setting CPU device", + return false); + + MSG(INFO, "Using %s threads on CPU. Thread Count = %i, Image Limit = %i\n", + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_thread_count, + cpu_img_limit); + } - return &s->module_data; + if (j2k_compress_platform::CUDA == platform) { + MSG(INFO, "Configuring for CUDA\n"); -error: - delete s; - return NULL; + if (get_commandline_param(CPU_CONV_PARAM) != nullptr) { + cuda_convert_func = nullptr; + } + + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_enc_ctx_cfg_add_cuda_device( + ctx_cfg, + cuda_devices[i], + cuda_mem_limit, + cuda_tile_limit), + "Setting CUDA device", + return false); + } + } + + CHECK_OK(cmpto_j2k_enc_ctx_create(ctx_cfg, &context), "Context create", + return false); + + CHECK_OK(cmpto_j2k_enc_cfg_create( + context, + &enc_settings), + "Creating context configuration:", + return false); + if (lossless) { + CHECK_OK(cmpto_j2k_enc_cfg_set_lossless( + enc_settings, + lossless ? 1 : 0), + "Enabling lossless", + return false); + } else { + CHECK_OK(cmpto_j2k_enc_cfg_set_quantization( + enc_settings, + quality /* 0.0 = poor quality, 1.0 = full quality */), + "Setting quantization", + NOOP); + } + + CHECK_OK(cmpto_j2k_enc_cfg_set_resolutions(enc_settings, 6), + "Setting DWT levels", + NOOP); + + return true; +} + +shared_ptr state_video_compress_j2k::get_copy(video_frame *frame) { + std::shared_ptr ret = pool->get_frame(); + + if (pool_in_device_memory) { + do_gpu_copy(ret, frame); + } else if (precompress_codec != VC_NONE) { + parallel_conv(ret.get(), frame); + } else { + memcpy(ret->tiles[0].data, frame->tiles[0].data, + frame->tiles[0].data_len); + } + + return ret; +} + +static struct module * j2k_compress_init(struct module *parent, const char *opts) { + try { + auto *s = new state_video_compress_j2k(parent, opts); + return &s->module_data; + } catch (HelpRequested const& e) { + return static_cast(INIT_NOERR); + } catch (InvalidArgument const& e) { + return NULL; + } catch (UnableToCreateJ2KEncoderCTX const& e) { + return NULL; + } catch (NoCmptoTechnologyFound const& e) { + return NULL; + } catch (...) { + return NULL; + } } static void j2k_compressed_frame_dispose(struct video_frame *frame) @@ -589,139 +1179,47 @@ release_cstream_cuda(void *img_custom_data, size_t img_custom_data_size, samples_size); } -#define HANDLE_ERROR_COMPRESS_PUSH \ - if (udata != nullptr) { \ - udata->frame.~shared_ptr(); \ - } \ - if (img != nullptr) { \ - cmpto_j2k_enc_img_destroy(img); \ - } \ - return - -static void j2k_compress_push(struct module *state, std::shared_ptr tx) -{ - struct state_video_compress_j2k *s = - (struct state_video_compress_j2k *) state; - struct cmpto_j2k_enc_img *img = NULL; - struct custom_data *udata = nullptr; +static void j2k_compress_push(struct module *state, std::shared_ptr tx) { + auto *s = (struct state_video_compress_j2k *) state; if (tx == NULL) { // pass poison pill through encoder - unique_lock lk(s->lock); - s->should_exit = true; - if (s->configured) { - CHECK_OK(cmpto_j2k_enc_ctx_stop(s->context), "stop", - NOOP); - } else { - s->configure_cv.notify_one(); - } + s->stop(); return; } const struct video_desc desc = video_desc_from_frame(tx.get()); - if (!video_desc_eq(s->saved_desc, desc)) { - int ret = configure_with(s, desc); - if (!ret) { - return; - } - struct video_desc pool_desc = desc; - if (s->precompress_codec != VC_NONE && - !s->pool_in_device_memory) { - pool_desc.color_spec = s->precompress_codec; - } - s->pool.reconfigure( - pool_desc, (size_t) vc_get_linesize(pool_desc.width, - pool_desc.color_spec) * - pool_desc.height); - } - - assert(tx->tile_count == 1); // TODO - - CHECK_OK(cmpto_j2k_enc_img_create(s->context, &img), - "Image create", return); - - /* - * Copy video desc to udata (to be able to reconstruct in j2k_compress_pop(). - * Further make a place for a shared pointer of allocated data, deleter - * returns frame to pool in call of release_cstream() callback (called when - * encoder no longer needs the input data). - */ - CHECK_OK(cmpto_j2k_enc_img_allocate_custom_data( - img, - sizeof *udata, - (void **) &udata), - "Allocate custom image data", - HANDLE_ERROR_COMPRESS_PUSH); - memcpy(&udata->desc, &s->compressed_desc, sizeof(s->compressed_desc)); - new (&udata->frame) shared_ptr(get_copy(s, tx.get())); - vf_store_metadata(tx.get(), udata->metadata); - - if (s->pool_in_device_memory) { - // cmpto_j2k_enc requires the size after postprocess, which - // doesn't equeal the IN frame data_len for R12L - const codec_t device_codec = s->precompress_codec == VC_NONE - ? udata->frame->color_spec - : s->precompress_codec; - const size_t data_len = - vc_get_datalen(udata->frame->tiles[0].width, - udata->frame->tiles[0].height, device_codec); - CHECK_OK(cmpto_j2k_enc_img_set_samples_cuda( - img, cuda_devices[0], udata->frame->tiles[0].data, - data_len, release_cstream_cuda), - "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); - } else { - CHECK_OK(cmpto_j2k_enc_img_set_samples( - img, udata->frame->tiles[0].data, - udata->frame->tiles[0].data_len, release_cstream), - "Setting image samples", HANDLE_ERROR_COMPRESS_PUSH); - } - - unique_lock lk(s->lock); - s->frame_popped.wait(lk, [s]{return s->in_frames < s->max_in_frames;}); - lk.unlock(); - bool failed = false; - CHECK_OK(cmpto_j2k_enc_img_encode(img, s->enc_settings), - "Encode image push", failed = true); - if (failed) { - udata->frame.~shared_ptr(); - cmpto_j2k_enc_img_destroy(img); + + if (!s->compare_video_desc_and_reconfigure(desc)) { return; } - lk.lock(); - s->in_frames++; - lk.unlock(); + assert(tx->tile_count == 1); // TODO + + s->try_push_image(tx); } + static void j2k_compress_done(struct module *mod) { - struct state_video_compress_j2k *s = - (struct state_video_compress_j2k *) mod->priv_data; - cleanup_common(s); + auto *s = (struct state_video_compress_j2k *) mod->priv_data; delete s; } -static void -cleanup_common(struct state_video_compress_j2k *s) -{ - - if (s->enc_settings != nullptr) { - cmpto_j2k_enc_cfg_destroy(s->enc_settings); - } - s->enc_settings = nullptr; - if (s->context != nullptr) { - cmpto_j2k_enc_ctx_destroy(s->context); - } - s->context = nullptr; -} static compress_module_info get_cmpto_j2k_module_info(){ compress_module_info module_info; module_info.name = "cmpto_j2k"; - for(const auto& opt : usage_opts){ - module_info.opts.emplace_back(module_option{opt.label, - opt.description, opt.placeholder, opt.key, opt.opt_str, opt.is_boolean}); - } + auto add_module_options = [&](const auto& options) { + for (const auto& opt : options) { + module_info.opts.emplace_back(module_option{opt.label, + opt.description, opt.placeholder, opt.key, opt.opt_str, opt.is_boolean}); + } + }; + + add_module_options(cuda_opts); + add_module_options(cpu_opts); + add_module_options(general_opts); codec codec_info; codec_info.name = "Comprimato jpeg2000"; @@ -746,4 +1244,3 @@ static struct video_compress_info j2k_compress_info = { }; REGISTER_MODULE(cmpto_j2k, &j2k_compress_info, LIBRARY_CLASS_VIDEO_COMPRESS, VIDEO_COMPRESS_ABI_VERSION); - diff --git a/src/video_decompress/cmpto_j2k.cpp b/src/video_decompress/cmpto_j2k.cpp index f39ec0d0d7..a4eb04ffb6 100644 --- a/src/video_decompress/cmpto_j2k.cpp +++ b/src/video_decompress/cmpto_j2k.cpp @@ -76,14 +76,8 @@ #include "utils/parallel_conv.h" #include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... #include "video_decompress.h" - -constexpr const int DEFAULT_TILE_LIMIT = 2; -/// maximal size of queue for decompressed frames -constexpr const int DEFAULT_MAX_QUEUE_SIZE = 2; -/// maximal number of concurrently decompressed frames -constexpr const int DEFAULT_MAX_IN_FRAMES = 4; -constexpr const int64_t DEFAULT_MEM_LIMIT = 1000000000LL; -constexpr const char *MOD_NAME = "[J2K dec.] "; +#include "video_codec.h" // for vc_get_linesize, codec_is_a_rgb, get_b... +#include "video_decompress.h" using std::lock_guard; using std::min; @@ -93,14 +87,71 @@ using std::queue; using std::stoi; using std::unique_lock; -static void -j2k_decompress_cleanup_common(struct state_decompress_j2k *s); +constexpr const char *MOD_NAME = "[Cmpto J2K dec.]"; + +// General Parameter Defaults +constexpr int DEFAULT_MAX_QUEUE_SIZE = 2; // maximal size of queue for decompressed frames +constexpr int DEFAULT_MAX_IN_FRAMES = 4; // maximal number of concurrently decompressed frames + +// CPU-specific Defaults +constexpr int DEFAULT_THREAD_COUNT = CMPTO_J2K_DEC_CPU_DEFAULT; // Number of threads equal to all cores +constexpr int MIN_CPU_THREAD_COUNT = CMPTO_J2K_DEC_CPU_NONE; // No threads will be created +constexpr size_t DEFAULT_CPU_MEM_LIMIT = 0; // Should always be 0. Not implemented as of v2.8.1 +constexpr unsigned int DEFAULT_CPU_IMG_LIMIT = 0; // 0 for default, thread_count for max +constexpr unsigned int MIN_CPU_IMG_LIMIT = 0; // Min number of images encoded by the CPU at once + +// CUDA-specific Defaults +constexpr int64_t DEFAULT_CUDA_MEM_LIMIT = 1000000000LL; +constexpr int DEFAULT_CUDA_TILE_LIMIT = 2; + +#define NOOP ((void) 0) +#define CHECK_OK(cmd, err_msg, action_fail) do { \ + int j2k_error = cmd; \ + if (j2k_error != CMPTO_OK) {\ + LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ + action_fail;\ + } \ +} while(0) + +/* + * Function Predeclarations + */ +static void *decompress_j2k_worker(void *args); +static void j2k_decompress_cleanup_common(struct state_decompress_j2k *s); + + +/* + * Platform to use for J2K Decompression + */ +enum j2k_decompress_platform { + NONE = 0, + CPU = CMPTO_TECHNOLOGY_CPU, + CUDA = CMPTO_TECHNOLOGY_CUDA +}; + +/** + * @fn supports_cmpto_technology + * @brief Check if Comprimato supports requested technology type + * @param cmpto_technology_type Technology type to check against + * @return True if supported, False if unsupported + */ +static bool supports_cmpto_technology(int cmpto_technology_type) { + const auto *version = cmpto_j2k_dec_get_version(); + + return (version == nullptr) ? false : (version->technology & cmpto_technology_type); +} + +/* + * Exceptions for state_decompress_j2k construction + */ +/// @brief UnableToCreateJ2KDecoderCTX Exception +struct UnableToCreateJ2KDecoderCTX : public std::exception { + UnableToCreateJ2KDecoderCTX() = default; +}; struct state_decompress_j2k { - state_decompress_j2k(unsigned int mqs, unsigned int mif) - : max_queue_size(mqs), max_in_frames(mif) {} - long long int req_mem_limit = DEFAULT_MEM_LIMIT; - unsigned int req_tile_limit = DEFAULT_TILE_LIMIT; + state_decompress_j2k(); + cmpto_j2k_dec_ctx *decoder{}; cmpto_j2k_dec_cfg *settings{}; @@ -111,26 +162,47 @@ struct state_decompress_j2k { queue> decompressed_frames; ///< buffer, length int pitch; pthread_t thread_id{}; - unsigned int max_queue_size; ///< maximal length of @ref decompressed_frames - unsigned int max_in_frames; ///< maximal frames that can be "in progress" unsigned int in_frames{}; ///< actual number of decompressed frames unsigned long long int dropped{}; ///< number of dropped frames because queue was full + // CUDA Defaults + unsigned int cuda_mem_limit = DEFAULT_CUDA_MEM_LIMIT; + unsigned int cuda_tile_limit = DEFAULT_CUDA_TILE_LIMIT; + + // Default Decompression Platform to Use + j2k_decompress_platform platform = j2k_decompress_platform::NONE; + + // CPU Defaults + unsigned int cpu_img_limit = DEFAULT_CPU_IMG_LIMIT; + const size_t cpu_mem_limit = DEFAULT_CPU_MEM_LIMIT; // Should always be 0. Not yet implemented as of Cmpto v2.8.4 + signed int cpu_thread_count = DEFAULT_THREAD_COUNT; + + // General Defaults + unsigned int max_in_frames = DEFAULT_MAX_IN_FRAMES; // maximal frames that can be "in progress" + unsigned int max_queue_size = DEFAULT_MAX_QUEUE_SIZE; // maximal length of @ref decompressed_frames + void (*convert)(unsigned char *dst_buffer, unsigned char *src_buffer, unsigned int width, unsigned int height){nullptr}; + + private: + void parse_params(); + bool initialize_j2k_dec_ctx(); }; -#define CHECK_OK(cmd, err_msg, action_fail) do { \ - int j2k_error = cmd; \ - if (j2k_error != CMPTO_OK) {\ - LOG(LOG_LEVEL_ERROR) << MOD_NAME << (err_msg) << ": " << cmpto_j2k_dec_get_last_error() << "\n"; \ - action_fail;\ - } \ -} while(0) +/** + * @brief Default state_decompress_j2k Constructor + * @throw UnableToCreateJ2KDecoderCTX if unable to create J2K CTX + */ +state_decompress_j2k::state_decompress_j2k() { + parse_params(); + + if (!initialize_j2k_dec_ctx()) { + throw UnableToCreateJ2KDecoderCTX(); + } +} -#define NOOP ((void) 0) static void rg48_to_r12l(unsigned char *dst_buffer, unsigned char *src_buffer, @@ -150,11 +222,21 @@ static void rg48_to_r12l(unsigned char *dst_buffer, } } -static void print_dropped(unsigned long long int dropped) { +static void print_dropped(unsigned long long int dropped, const j2k_decompress_platform& platform) { if (dropped % 10 == 1) { - log_msg(LOG_LEVEL_WARNING, "[J2K dec] Some frames (%llu) dropped.\n", dropped); - log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "[J2K dec] You may try to increase " - "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=4\n"); + MSG(WARNING, "Some frames (%llu) dropped.\n", dropped); + + if (j2k_decompress_platform::CPU == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "image limit to increase the number of images decoded at one moment by adding parameter: --param j2k-dec-img-limit=#\n", + MOD_NAME); + } + + if (j2k_decompress_platform::CUDA == platform) { + log_msg_once(LOG_LEVEL_INFO, to_fourcc('J', '2', 'D', 'W'), "%s You may try to increase " + "tile limit to increase the throughput by adding parameter: --param j2k-dec-tile-limit=#\n", + MOD_NAME); + } } } @@ -187,7 +269,7 @@ static void *decompress_j2k_worker(void *args) const char * decoding_error = ""; CHECK_OK(cmpto_j2k_dec_img_get_error(img, &decoding_error), "get error status", decoding_error = "(failed)"); - log_msg(LOG_LEVEL_ERROR, "Image decoding failed: %s\n", decoding_error); + MSG(ERROR, "Image decoding failed: %s\n", decoding_error); continue; } @@ -208,7 +290,7 @@ static void *decompress_j2k_worker(void *args) "Unable to to return processed image", NOOP); lock_guard lk(s->lock); while (s->decompressed_frames.size() >= s->max_queue_size) { - print_dropped(s->dropped++); + print_dropped(s->dropped++, s->platform); auto decoded = s->decompressed_frames.front(); s->decompressed_frames.pop(); free(decoded.first); @@ -219,41 +301,204 @@ static void *decompress_j2k_worker(void *args) return NULL; } +/* + * Command Line Parameters for state_decompress_j2k + */ +#define CPU_CONV_PARAM "j2k-dec-cpu-conv" + +// CUDA-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-use-cuda", "* j2k-dec-use-cuda\n" + " use CUDA to decode images\n"); ADD_TO_PARAM("j2k-dec-mem-limit", "* j2k-dec-mem-limit=\n" " J2K max memory usage in bytes.\n"); ADD_TO_PARAM("j2k-dec-tile-limit", "* j2k-dec-tile-limit=\n" " number of tiles decoded at moment (less to reduce latency, more to increase performance, 0 unlimited)\n"); -ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-queue-len=\n" +// CPU-specific Command Line Parameters +ADD_TO_PARAM("j2k-dec-use-cpu", "* j2k-dec-use-cpu\n" + " use the CPU to decode images\n"); +ADD_TO_PARAM("j2k-dec-cpu-thread-count", "* j2k-dec-cpu-thread-count=\n" + " number of threads to use on the CPU (0 means number of threads equal to all cores)\n"); +ADD_TO_PARAM("j2k-dec-img-limit", "* j2k-dec-img-limit=\n" + " number of images which can be decoded at one moment (0 means default, thread-count is maximum limit)\n"); +ADD_TO_PARAM(CPU_CONV_PARAM, "* " CPU_CONV_PARAM "\n" + " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); +// General Command Line Parameters +ADD_TO_PARAM("j2k-dec-queue-len", "* j2k-dec-queue-len=\n" " max queue len\n"); -ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-encoder-queue=\n" +ADD_TO_PARAM("j2k-dec-encoder-queue", "* j2k-dec-encoder-queue=\n" " max number of frames held by encoder\n"); -static void * j2k_decompress_init(void) -{ - unsigned int queue_len = DEFAULT_MAX_QUEUE_SIZE; - unsigned int encoder_in_frames = DEFAULT_MAX_IN_FRAMES; + +/** + * @fn parse_params + * @brief Parse Command Line Parameters and Initialize Struct Members + */ +void state_decompress_j2k::parse_params() { + /** + * Confirm that system has some supported CMPTO_TECHNOLOGY_ type prior to parsing arguments. + * If it does, configure the preferred default platform and max_in_frames using priority below + * 1 - CUDA + * 2 - CPU + * + * If platform is not found set platform = j2k_decompress_platform::NONE + */ + + const auto supports_cpu = supports_cmpto_technology(CMPTO_TECHNOLOGY_CPU); + const auto supports_cuda = supports_cmpto_technology(CMPTO_TECHNOLOGY_CUDA); + + if (supports_cuda) { // prefer CUDA decompress by default + platform = j2k_decompress_platform::CUDA; + } else if (supports_cpu) { // prefer CPU decompress by default + platform = j2k_decompress_platform::CPU; + } else { + MSG(ERROR, "Unable to find supported CMPTO_TECHNOLOGY\n"); + platform = j2k_decompress_platform::NONE; // default to NONE + } + + // CUDA-specific commandline_params + if (get_commandline_param("j2k-dec-use-cuda")) { + if (supports_cuda) { + platform = j2k_decompress_platform::CUDA; + } else { + MSG(ERROR, "j2k-dec-use-cuda argument provided. CUDA decompress not supported.\n"); + + // Check if CPU is default decompress + // If it is, create a log message to notify this will be used automatically + if (j2k_decompress_platform::CPU == platform) { + MSG(INFO, "Defaulting to CPU decompress\n"); + } + } + } + + if (get_commandline_param("j2k-dec-mem-limit")) { + cuda_mem_limit = unit_evaluate_dbl(get_commandline_param("j2k-dec-mem-limit"), false, nullptr); + } + + if (get_commandline_param("j2k-dec-tile-limit")) { + cuda_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit")); + } + + // CPU-specific commandline_params + if (get_commandline_param("j2k-dec-use-cpu")) { + if (supports_cpu) { + platform = j2k_decompress_platform::CPU; + } else { + MSG(ERROR, "j2k-dec-use-cpu argument provided. CPU decompress not supported.\n"); + } + } + + if (get_commandline_param("j2k-dec-cpu-thread-count")) { + cpu_thread_count = atoi(get_commandline_param("j2k-dec-cpu-thread-count")); + + // Confirm cpu_thread_count between MIN_CPU_THREAD_COUNT + 1 (0) + if (cpu_thread_count <= MIN_CPU_THREAD_COUNT) { + // Implementing this requires the creation of executor threads. + MSG(ERROR, "j2k-dec-cpu-thread-count must be 0 or higher. Setting to min allowed 0\n"); + cpu_thread_count = 0; + } + } if (get_commandline_param("j2k-dec-queue-len")) { - queue_len = atoi(get_commandline_param("j2k-dec-queue-len")); + max_queue_size = atoi(get_commandline_param("j2k-dec-queue-len")); + } + + if (get_commandline_param("j2k-dec-img-limit")) { + cpu_img_limit = atoi(get_commandline_param("j2k-dec-img-limit")); + + // Confirm cpu_img_limit between MIN_CPU_IMG_LIMIT + if (cpu_img_limit < MIN_CPU_IMG_LIMIT) { + MSG(INFO, "j2k-dec-img-limit below min allowed of %i. Setting to min allowed %i\n", + MIN_CPU_IMG_LIMIT, + MIN_CPU_IMG_LIMIT); + cpu_img_limit = MIN_CPU_IMG_LIMIT; + } } if (get_commandline_param("j2k-dec-encoder-queue")) { - encoder_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue")); + max_in_frames = atoi(get_commandline_param("j2k-dec-encoder-queue")); + } + + const auto *version = cmpto_j2k_dec_get_version(); + MSG(INFO, "Using code version: %s\n", (version == nullptr ? "(unknown)" : version->name)); +} + +/** + * @fn initialize_j2k_dec_ctx + * @brief Create cmpto_j2k_dec_ctx_cfg based on requested platform and command line arguments + * @return true if cmpto_j2k_dec_ctx_cfg successfully created + * @return false if unable to create cmpto_j2k_dec_ctx_cfg + */ +[[nodiscard]] +bool state_decompress_j2k::initialize_j2k_dec_ctx() { + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg; + CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); + + if (j2k_decompress_platform::NONE == platform) { + MSG(ERROR, "No supported CMPTO_TECHNOLOGY found. Unable to create decompress context.\n"); + return false; } - auto *s = new state_decompress_j2k(queue_len, encoder_in_frames); - if (get_commandline_param("j2k-dec-mem-limit") != nullptr) { - s->req_mem_limit = unit_evaluate_dbl( - get_commandline_param("j2k-dec-mem-limit"), false, nullptr); + if (j2k_decompress_platform::CUDA == platform) { + MSG(INFO, "Using platform CUDA for decompress\n"); + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device(ctx_cfg, cuda_devices[i], cuda_mem_limit, cuda_tile_limit), + "Error setting CUDA device", return false); + MSG(INFO, "Using CUDA Device %s\n", std::to_string(cuda_devices[i]).c_str()); + } } - if (get_commandline_param("j2k-dec-tile-limit") != nullptr) { - s->req_tile_limit = stoi(get_commandline_param("j2k-dec-tile-limit")); + if (j2k_decompress_platform::CPU == platform) { + MSG(INFO, "Using platform CPU for decompress\n"); + // Confirm that cpu_thread_count != 0 (unlimited). If it does, cpu_img_limit can exceed thread_count + if (cpu_thread_count != DEFAULT_THREAD_COUNT && cpu_img_limit > static_cast(cpu_thread_count)) { + MSG(INFO, "j2k-dec-img-limit set to %i. Lowering to match to match j2k-dec-cpu-thread-count (%i)\n", + cpu_img_limit, + cpu_thread_count); + cpu_img_limit = cpu_thread_count; + } + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( + ctx_cfg, + cpu_thread_count, + cpu_mem_limit, + cpu_img_limit), + "Error configuring the CPU", + return false); + + MSG(INFO, "Using %s threads on the CPU. Image Limit set to %i.\n", + (cpu_thread_count == 0 ? "all available" : std::to_string(cpu_thread_count).c_str()), + cpu_img_limit); } - const auto *version = cmpto_j2k_dec_get_version(); - LOG(LOG_LEVEL_INFO) << MOD_NAME << "Using codec version: " << (version == nullptr ? "(unknown)" : version->name) << "\n"; + CHECK_OK(cmpto_j2k_dec_ctx_create(ctx_cfg, &this->decoder), + "Error initializing context", + return false); + + CHECK_OK(cmpto_j2k_dec_ctx_cfg_destroy(ctx_cfg), "Destroy cfg", NOOP); + + CHECK_OK(cmpto_j2k_dec_cfg_create(this->decoder, &this->settings), + "Error creating configuration", { + cmpto_j2k_dec_cfg_destroy(this->settings); + cmpto_j2k_dec_ctx_destroy(this->decoder); + return false; + }); + + auto ret = pthread_create(&this->thread_id, NULL, decompress_j2k_worker, static_cast(this)); + assert(ret == 0 && "Unable to create thread"); - return s; + return true; +} + +/** + * @brief Initialize a new instance of state_decompress_j2k + * @return Null or Pointer to state_decompress_j2k + */ +static void * j2k_decompress_init(void) { + try { + auto *s = new state_decompress_j2k(); + return s; + } catch (...) { + return NULL; + } } static void @@ -266,6 +511,7 @@ r12l_postprocessor_get_sz( *temp_buffer_size = 0; // no temp buffer required *output_buffer_size = vc_get_datalen(size_x, size_y, R12L); } + #ifdef HAVE_CUDA const cmpto_j2k_dec_postprocessor_run_callback_cuda r12l_postprocess_cuda = postprocess_rg48_to_r12l; @@ -294,11 +540,6 @@ static const struct conv_props { r12l_postprocessor_get_sz, r12l_postprocess_cuda }, }; -#define CPU_CONV_PARAM "j2k-dec-cpu-conv" -ADD_TO_PARAM( - CPU_CONV_PARAM, - "* " CPU_CONV_PARAM "\n" - " Enforce CPU conversion instead of CUDA (applicable to R12L now)\n"); static bool set_postprocess_convert(struct state_decompress_j2k *s, struct cmpto_j2k_dec_ctx_cfg *ctx_cfg, @@ -344,16 +585,33 @@ static int j2k_decompress_reconfigure(void *state, struct video_desc desc, } enum cmpto_sample_format_type cmpto_sf = (cmpto_sample_format_type) 0; - + struct cmpto_j2k_dec_ctx_cfg *ctx_cfg = nullptr; CHECK_OK(cmpto_j2k_dec_ctx_cfg_create(&ctx_cfg), "Error creating dec cfg", return false); - for (unsigned int i = 0; i < cuda_devices_count; ++i) { - CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( - ctx_cfg, cuda_devices[i], s->req_mem_limit, - s->req_tile_limit), - "Error setting CUDA device", return false); + if (j2k_decompress_platform::CUDA == s->platform) { + for (unsigned int i = 0; i < cuda_devices_count; ++i) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cuda_device( + ctx_cfg, cuda_devices[i], s->cuda_mem_limit, + s->cuda_tile_limit), + "Error setting CUDA device", return false); + } } + if (j2k_decompress_platform::CPU == s->platform) { + CHECK_OK(cmpto_j2k_dec_ctx_cfg_add_cpu( + ctx_cfg, + s->cpu_thread_count, + s->cpu_mem_limit, + s->cpu_img_limit), + "Error configuring the CPU", + return false); + + MSG(INFO, "Using %s threads on the CPU. Image Limit set to %i.\n", + (s->cpu_thread_count == 0 ? "all available" : std::to_string(s->cpu_thread_count).c_str()), + s->cpu_img_limit); + } + + for(const auto &codec : codecs){ if(codec.ug_codec != out_codec){ continue; @@ -431,7 +689,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha struct cmpto_j2k_dec_comp_info comp_info[3]; if (cmpto_j2k_dec_cstream_get_img_info(buffer, len, &info) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 0, &comp_info[0]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get image or first component info.\n"); + MSG(ERROR, "J2K Failed to get image or first component info.\n"); return DECODER_NO_FRAME; } @@ -440,7 +698,7 @@ static decompress_status j2k_probe_internal_codec(codec_t in_codec, unsigned cha if (info.comp_count == 3) { if (cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 1, &comp_info[1]) != CMPTO_OK || cmpto_j2k_dec_cstream_get_comp_info(buffer, len, 2, &comp_info[2]) != CMPTO_OK) { - log_msg(LOG_LEVEL_ERROR, "J2K Failed to get componentt 1 or 2 info.\n"); + MSG(ERROR, "J2K Failed to get component 1 or 2 info.\n"); return DECODER_NO_FRAME; } if (comp_info[0].sampling_factor_x == 1 && comp_info[0].sampling_factor_y == 1 && @@ -479,7 +737,7 @@ static decompress_status j2k_decompress(void *state, unsigned char *dst, unsigne } if (s->in_frames >= s->max_in_frames + 1) { - print_dropped(s->dropped++); + print_dropped(s->dropped++, s->platform); goto return_previous; } @@ -547,7 +805,7 @@ j2k_decompress_cleanup_common(struct state_decompress_j2k *s) { cmpto_j2k_dec_ctx_stop(s->decoder); pthread_join(s->thread_id, NULL); - log_msg(LOG_LEVEL_VERBOSE, "[J2K dec.] Decoder stopped.\n"); + MSG(VERBOSE, "Decoder stopped.\n"); if (s->settings != nullptr) { cmpto_j2k_dec_cfg_destroy(s->settings); diff --git a/src/video_display/vulkan/vulkan_context.cpp b/src/video_display/vulkan/vulkan_context.cpp index 60576f1f6d..b0a7d5e3f1 100644 --- a/src/video_display/vulkan/vulkan_context.cpp +++ b/src/video_display/vulkan/vulkan_context.cpp @@ -43,7 +43,7 @@ using namespace vulkan_display_detail; using namespace vulkan_display; namespace { - +#if VK_HEADER_VERSION >= 304 VKAPI_ATTR VkBool32 VKAPI_CALL debug_callback( [[maybe_unused]] vk::DebugUtilsMessageSeverityFlagBitsEXT message_severity, [[maybe_unused]] vk::DebugUtilsMessageTypeFlagsEXT message_type, @@ -64,6 +64,28 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debug_callback( return VK_FALSE; } +#else /// compat (eg. Debian 11) @todo TOREMOVE later + VKAPI_ATTR VkBool32 VKAPI_CALL debug_callback( + [[maybe_unused]] VkDebugUtilsMessageSeverityFlagBitsEXT message_severity, + [[maybe_unused]] VkDebugUtilsMessageTypeFlagsEXT message_type, + const VkDebugUtilsMessengerCallbackDataEXT* callback_data, + [[maybe_unused]] void* user_data) +{ + LogLevel level = LogLevel::notice; + if (VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT & message_severity) level = LogLevel::error; + else if (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT & message_severity) level = LogLevel::warning; + else if (VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT & message_severity) level = LogLevel::info; + else if (VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT & message_severity) level = LogLevel::verbose; + + vulkan_log_msg(level, "validation layer: "s + callback_data->pMessage); + + if (message_type != VkDebugUtilsMessageTypeFlagBitsEXT::VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT){ + //assert(false); + } + + return VK_FALSE; +} +#endif void check_validation_layers(const std::vector& required_layers) { std::vector layers = vk::enumerateInstanceLayerProperties();