From eb11fd9a611f5f7908e2359afa7f5fc67d2af04f Mon Sep 17 00:00:00 2001 From: halx99 Date: Fri, 2 Jul 2021 14:20:54 +0800 Subject: [PATCH] Always decompress astc parallel for sotware decoder [ci build] a. Improve task life cycle management. b. Fix ci --- cocos/base/astc.cpp | 197 +++++++++++++++++++++----------------------- 1 file changed, 93 insertions(+), 104 deletions(-) diff --git a/cocos/base/astc.cpp b/cocos/base/astc.cpp index 3f84a2c776..b39aab13be 100644 --- a/cocos/base/astc.cpp +++ b/cocos/base/astc.cpp @@ -15,6 +15,9 @@ #include #include +#include +#include +#include #include "astc/astcenc.h" #include "astc/astcenc_internal.h" #include "yasio/detail/utils.hpp" @@ -25,49 +28,47 @@ typedef std::mutex astc_decompress_mutex_t; struct astc_decompress_task { - astc_decompress_task( - const uint8_t* in, uint8_t* out, unsigned int dim_x, unsigned int dim_y, int block_x, int block_y); - - ~astc_decompress_task(); - - void retain() { - ++_nref; + static std::shared_ptr allocate(const uint8_t* in, unsigned int inlen, uint8_t* out, + unsigned int dim_x, unsigned int dim_y, int block_x, int block_y); + astc_decompress_task() {} + ~astc_decompress_task() { +#if !ASTCDEC_NO_CONTEXT + if (_context) + astcenc_context_free(this->_context); +#endif } - void release() { - if (--_nref == 0) - delete this; + void wait_done() { +#if ASTCDEC_NO_CONTEXT + _decompress_pm.wait(); +#else + _context->manage_decompress.wait(); +#endif } - astcenc_config _config{}; - const uint8_t* _in_texels = nullptr; void* _out_texels[1]{}; - uint32_t _out_len = 0; + unsigned int _xblocks, _yblocks; #if ASTCDEC_NO_CONTEXT + unsigned int _block_x, _block_y; ParallelManager _decompress_pm{}; block_size_descriptor _bsd{}; #else + astcenc_config _config{}; astcenc_context* _context = nullptr; #endif astcenc_image _image_out{}; - - - std::atomic _nref; }; class astc_decompress_job_manager { public: - static int s_thread_count; - static astc_decompress_job_manager* get_instance() { static astc_decompress_job_manager s_instance; return &s_instance; } astc_decompress_job_manager() { - s_thread_count = std::thread::hardware_concurrency(); - - for (int i = 0; i < s_thread_count; ++i) { + int thread_count = std::thread::hardware_concurrency(); + for (int i = 0; i < thread_count; ++i) { _threads.push_back(std::thread{&astc_decompress_job_manager::run, this}); } } @@ -75,12 +76,10 @@ public: ~astc_decompress_job_manager() { _stopped = true; - _taskQueueMtx.lock(); - for (auto task : _taskQueue) - task->release(); - _taskQueue.clear(); - _taskQueueCV.notify_all(); - _taskQueueMtx.unlock(); + _task_queue_mtx.lock(); + _task_queue.clear(); + _task_queue_cv.notify_all(); + _task_queue_mtx.unlock(); for (auto& t : _threads) { if (t.joinable()) @@ -91,6 +90,31 @@ public: int decompress_parallel_sync(const uint8_t* in, uint32_t inlen, uint8_t* out, unsigned int dim_x, unsigned int dim_y, int block_x, int block_y) { + + auto task = make_task(in, inlen, out, dim_x, dim_y, block_x, block_y); + if (!task) + return ASTCENC_ERR_OUT_OF_MEM; + + _task_queue_mtx.lock(); + _task_queue.push_back(task); + _task_queue_mtx.unlock(); + _task_queue_cv.notify_all(); // notify all thread to process the single decompress task parallel + + task->wait_done(); + + _task_queue_mtx.lock(); + assert(!_task_queue.empty()); + auto t = _task_queue.front(); + assert(t.get() == task.get()); + _task_queue.pop_front(); + _task_queue_mtx.unlock(); + + return ASTCENC_SUCCESS; + } + +private: + std::shared_ptr make_task(const uint8_t* in, unsigned int inlen, + uint8_t* out, unsigned int dim_x, unsigned int dim_y, int block_x, int block_y) { unsigned int xblocks = (dim_x + block_x - 1) / block_x; unsigned int yblocks = (dim_y + block_y - 1) / block_y; unsigned int zblocks = 1; // (dim_z + block_z - 1) / block_z; @@ -98,86 +122,73 @@ public: // Check we have enough output space (16 bytes per block) auto total_blocks = xblocks * yblocks * zblocks; size_t size_needed = total_blocks * 16; - if (inlen < size_needed) { - return ASTCENC_ERR_OUT_OF_MEM; - } + if (inlen < size_needed) + return nullptr; + + auto task = std::make_shared(); + task->_in_texels = in; + task->_out_texels[0] = out; + task->_image_out = {dim_x, dim_y, 1, ASTCENC_TYPE_U8, task->_out_texels}; + + task->_xblocks = xblocks; + task->_yblocks = yblocks; #if ASTCDEC_NO_CONTEXT static std::once_flag once_flag; std::call_once(once_flag, init_quant_mode_table); -#endif - auto task = new astc_decompress_task(in, out, dim_x, dim_y, block_x, block_y); -#if ASTCDEC_NO_CONTEXT + + task->_block_x = block_x; + task->_block_y = block_y; + init_block_size_descriptor(block_x, block_y, 1, false, 0 /*unused for decompress*/, task->_bsd); task->_decompress_pm.init(total_blocks); #else + (void) astcenc_config_init( + ASTCENC_PRF_LDR, block_x, block_y, 1, 0, ASTCENC_FLG_DECOMPRESS_ONLY, &task->_config); + (void) astcenc_context_alloc(&task->_config, (unsigned int)_threads.size(), &task->_context); task->_context->manage_decompress.init(total_blocks); #endif - _taskQueueMtx.lock(); - _taskQueue.push_back(task); - _taskQueueMtx.unlock(); - _taskQueueCV.notify_all(); // notify all thread to process the single decompress task parallel - -#if ASTCDEC_NO_CONTEXT - task->_decompress_pm.wait(); -#else - task->_context->manage_decompress.wait(); -#endif - - _taskQueueMtx.lock(); - assert(!_taskQueue.empty()); - - auto t = _taskQueue.front(); - assert(t == task); - _taskQueue.pop_front(); - _taskQueueMtx.unlock(); - - task->release(); - - return ASTCENC_SUCCESS; + return task; } - -private: void run() { const astcenc_swizzle swz_decode{ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A}; bool no_task_count = false; for (;;) { - std::unique_lock lck(_taskQueueMtx); - if (!_stopped && (_taskQueue.empty() || no_task_count)) - _taskQueueCV.wait(lck); + std::unique_lock lck(_task_queue_mtx); + if (!_stopped && (_task_queue.empty() || no_task_count)) + _task_queue_cv.wait(lck); if (_stopped) break; - if (_taskQueue.empty()) + if (_task_queue.empty()) continue; + auto task = _task_queue.front(); lck.unlock(); // unlock make sure other thread can work for the task - auto task = _taskQueue.front(); auto& image_out = task->_image_out; +#if ASTCDEC_NO_CONTEXT + unsigned int block_x = task->_block_x; + unsigned int block_y = task->_block_y; + unsigned int block_z = 1; // task->block_z; + auto& bsd = task->_bsd; + auto& decompress_pm = task->_decompress_pm; +#else unsigned int block_x = task->_config.block_x; unsigned int block_y = task->_config.block_y; - unsigned int block_z = task->_config.block_z; - - unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; - unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; - unsigned int zblocks = 1; // (image_out.dim_z + block_z - 1) / block_z; - - const auto profile = task->_config.profile; - -#if ASTCDEC_NO_CONTEXT - auto& bsd = task->_bsd; - auto& decompress_pm = task->_decompress_pm; -#else - auto& bsd = *task->_context->bsd; + unsigned int block_z = 1; // task->_config.block_z; + auto& bsd = *task->_context->bsd; auto& decompress_pm = task->_context->manage_decompress; #endif + unsigned int xblocks = task->_xblocks; + unsigned int yblocks = task->_yblocks; + unsigned int zblocks = 1; // (image_out.dim_z + block_z - 1) / block_z; + int row_blocks = xblocks; int plane_blocks = xblocks * yblocks; image_block blk; - auto data = task->_in_texels; for (;;) { // process the task unsigned int count = 0; @@ -202,7 +213,7 @@ private: physical_to_symbolic(bsd, pcb, scb); - decompress_symbolic_block(profile, bsd, x * block_x, y * block_y, z * block_z, scb, blk); + decompress_symbolic_block(ASTCENC_PRF_LDR, bsd, x * block_x, y * block_y, z * block_z, scb, blk); write_image_block(image_out, blk, bsd, x * block_x, y * block_y, z * block_z, swz_decode); } @@ -212,36 +223,14 @@ private: } } - std::deque _taskQueue; - astc_decompress_mutex_t _taskQueueMtx; - std::condition_variable_any _taskQueueCV; - std::vector _threads; + + std::deque> _task_queue; + astc_decompress_mutex_t _task_queue_mtx; + std::condition_variable_any _task_queue_cv; + bool _stopped = false; }; -int astc_decompress_job_manager::s_thread_count = 1; - -astc_decompress_task::astc_decompress_task( - const uint8_t* in, uint8_t* out, unsigned int dim_x, unsigned int dim_y, int block_x, int block_y) { - _nref = 1; - - this->_in_texels = in; - this->_out_texels[0] = out; - this->_image_out = {dim_x, dim_y, 1, ASTCENC_TYPE_U8, this->_out_texels}; - - (void) astcenc_config_init(ASTCENC_PRF_LDR, block_x, block_y, 1, 0, ASTCENC_FLG_DECOMPRESS_ONLY, &this->_config); - -#if ASTCDEC_NO_CONTEXT - init_block_size_descriptor(block_x, block_y, 1, false, 0 /*unused for decompress*/, this->_bsd); -#else - (void) astcenc_context_alloc(&this->_config, astc_decompress_job_manager::s_thread_count, &this->_context); -#endif -} -astc_decompress_task::~astc_decompress_task() { -#if !ASTCDEC_NO_CONTEXT - astcenc_context_free(this->_context); -#endif -} int astc_decompress_image(const uint8_t* in, uint32_t inlen, uint8_t* out, uint32_t dim_x, uint32_t dim_y, uint32_t block_x, uint32_t block_y) {