From 069a4fb170f0e875d73068c1e3cd5e8a0777f4d8 Mon Sep 17 00:00:00 2001 From: Ricardo Quesada Date: Fri, 17 Jan 2014 23:10:04 -0800 Subject: [PATCH] Renderer: Don't sort z=0 Commands If Command has z==0, then those elements won't be sorted. Only Z !=0 will be sorted, and it will use `sort` instead of `stable_sort` for z!=0, since it is faster --- CHANGELOG | 7 ++- cocos/2d/renderer/CCBatchCommand.cpp | 2 +- cocos/2d/renderer/CCBatchCommand.h | 2 +- cocos/2d/renderer/CCCustomCommand.cpp | 2 +- cocos/2d/renderer/CCCustomCommand.h | 2 +- cocos/2d/renderer/CCGroupCommand.h | 5 +- cocos/2d/renderer/CCQuadCommand.cpp | 2 +- cocos/2d/renderer/CCQuadCommand.h | 2 +- cocos/2d/renderer/CCRenderCommand.h | 4 +- cocos/2d/renderer/CCRenderer.cpp | 79 ++++++++++++++++++++++----- cocos/2d/renderer/CCRenderer.h | 21 ++++++- 11 files changed, 98 insertions(+), 30 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 8b476b7457..1497c22df8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,10 +17,11 @@ cocos2d-x-3.0final ?.? ? [FIX] ControlSlider doesn't support to set selected thumb sprite. [FIX] ControlButton doesn't support to set scale ratio of touchdown state. [FIX] Particles: Crash was triggered if there is not `textureFileName`section in particle plist file. - [FIX] Renderer: QuadCommand::init() does not copy the Quads, it only store a reference making the code faster - [FIX] Renderer: Performance improved in Sprite and SpriteBatchNode (and subclasses) sprites in about 20% - [FIX] Renderer: When note using VAO, call glBufferData() instead of glBufferSubData(). [FIX] Renderer: Uses a float as key with only the depth. Viewport, opaque are not needed now + [FIX] Renderer Performance Fix: QuadCommand::init() does not copy the Quads, it only store a reference making the code faster + [FIX] Renderer Performance Fix: Sprite and SpriteBatchNode (and subclasses) has much better performance + [FIX] Renderer Performance Fix: When note using VAO, call glBufferData() instead of glBufferSubData(). + [FIX] Renderer Performance Fix: Doesn't sort z=0 elements. It also uses sort() instead of stable_sort() for z!=0. [FIX] Sprite: removed _hasChildren optimization. It uses !_children.empty() now which is super fast as well [FIX] Tests: TestCpp works with CMake on Windows. [FIX] Tests: Sprites Performance Test has 4 new tests diff --git a/cocos/2d/renderer/CCBatchCommand.cpp b/cocos/2d/renderer/CCBatchCommand.cpp index 817cfb421c..9c60b37455 100644 --- a/cocos/2d/renderer/CCBatchCommand.cpp +++ b/cocos/2d/renderer/CCBatchCommand.cpp @@ -54,7 +54,7 @@ BatchCommand::~BatchCommand() { } -void BatchCommand::execute() +void BatchCommand::execute() const { // Set material _shader->use(); diff --git a/cocos/2d/renderer/CCBatchCommand.h b/cocos/2d/renderer/CCBatchCommand.h index 9ac5b346ee..4ad2e7ae34 100644 --- a/cocos/2d/renderer/CCBatchCommand.h +++ b/cocos/2d/renderer/CCBatchCommand.h @@ -45,7 +45,7 @@ public: void init(float depth, GLuint texutreID, GLProgram* shader, BlendFunc blendType, TextureAtlas *textureAtlas, const kmMat4& modelViewTransform); - void execute(); + void execute() const; protected: int32_t _materialID; diff --git a/cocos/2d/renderer/CCCustomCommand.cpp b/cocos/2d/renderer/CCCustomCommand.cpp index 10992393f7..c521928921 100644 --- a/cocos/2d/renderer/CCCustomCommand.cpp +++ b/cocos/2d/renderer/CCCustomCommand.cpp @@ -42,7 +42,7 @@ CustomCommand::~CustomCommand() } -void CustomCommand::execute() +void CustomCommand::execute() const { if(func) { diff --git a/cocos/2d/renderer/CCCustomCommand.h b/cocos/2d/renderer/CCCustomCommand.h index 03fdead69a..5bf149363e 100644 --- a/cocos/2d/renderer/CCCustomCommand.h +++ b/cocos/2d/renderer/CCCustomCommand.h @@ -41,7 +41,7 @@ public: void init(float depth); - void execute(); + void execute() const; inline bool isTranslucent() { return true; } std::function func; diff --git a/cocos/2d/renderer/CCGroupCommand.h b/cocos/2d/renderer/CCGroupCommand.h index 02fe541fd0..42c23d02b9 100644 --- a/cocos/2d/renderer/CCGroupCommand.h +++ b/cocos/2d/renderer/CCGroupCommand.h @@ -56,12 +56,9 @@ public: GroupCommand(); ~GroupCommand(); -public: - void init(float depth); - inline bool isTranslucent() {return true;} - inline int getRenderQueueID() {return _renderQueueID;} + inline int getRenderQueueID() const {return _renderQueueID;} protected: int _renderQueueID; diff --git a/cocos/2d/renderer/CCQuadCommand.cpp b/cocos/2d/renderer/CCQuadCommand.cpp index 0d602cee4e..333fd834d2 100644 --- a/cocos/2d/renderer/CCQuadCommand.cpp +++ b/cocos/2d/renderer/CCQuadCommand.cpp @@ -101,7 +101,7 @@ void QuadCommand::generateMaterialID() | (uint32_t)_textureID << 0; } -void QuadCommand::useMaterial() +void QuadCommand::useMaterial() const { _shader->use(); diff --git a/cocos/2d/renderer/CCQuadCommand.h b/cocos/2d/renderer/CCQuadCommand.h index e6bb2059b3..42411f48e9 100644 --- a/cocos/2d/renderer/CCQuadCommand.h +++ b/cocos/2d/renderer/CCQuadCommand.h @@ -44,7 +44,7 @@ public: void init(float depth, GLuint texutreID, GLProgram* shader, BlendFunc blendType, V3F_C4B_T2F_Quad* quads, ssize_t quadCount, const kmMat4& mv); - void useMaterial(); + void useMaterial() const; //TODO use material to decide if it is translucent inline bool isTranslucent() const { return true; } diff --git a/cocos/2d/renderer/CCRenderCommand.h b/cocos/2d/renderer/CCRenderCommand.h index 9035454bc2..adb47f2f12 100644 --- a/cocos/2d/renderer/CCRenderCommand.h +++ b/cocos/2d/renderer/CCRenderCommand.h @@ -50,10 +50,10 @@ public: }; /** Get Render Command Id */ - inline float getDepth() { return _depth; } + inline float getDepth() const { return _depth; } /** Returns the Command type */ - inline Type getType() { return _type; } + inline Type getType() const { return _type; } protected: RenderCommand(); diff --git a/cocos/2d/renderer/CCRenderer.cpp b/cocos/2d/renderer/CCRenderer.cpp index 5b56fc80cd..4a137295fe 100644 --- a/cocos/2d/renderer/CCRenderer.cpp +++ b/cocos/2d/renderer/CCRenderer.cpp @@ -37,9 +37,65 @@ #include // for std::stable_sort NS_CC_BEGIN -using namespace std; + +bool compareRenderCommand(RenderCommand* a, RenderCommand* b) +{ + return a->getDepth() < b->getDepth(); +} + +void RenderQueue::push_back(RenderCommand* command) +{ + float z = command->getDepth(); + if(z < 0) + _queueNegZ.push_back(command); + if(z > 0) + _queuePosZ.push_back(command); + else + _queue0.push_back(command); +} + +ssize_t RenderQueue::size() const +{ + return _queueNegZ.size() + _queue0.size() + _queuePosZ.size(); +} + +void RenderQueue::sort() +{ + // Don't sort _queue0, it already comes sorted + std::sort(std::begin(_queueNegZ), std::end(_queueNegZ), compareRenderCommand); + std::sort(std::begin(_queuePosZ), std::end(_queuePosZ), compareRenderCommand); +} + +const RenderCommand* RenderQueue::operator[](ssize_t index) const +{ + if(index < _queueNegZ.size()) + return _queueNegZ[index]; + + index -= _queueNegZ.size(); + + if(index < _queue0.size()) + return _queue0[index]; + + index -= _queue0.size(); + + if(index < _queuePosZ.size()) + return _queuePosZ[index]; + + CCASSERT(false, "invalid index"); + return nullptr; +} + +void RenderQueue::clear() +{ + _queueNegZ.clear(); + _queue0.clear(); + _queuePosZ.clear(); +} +// +// +// #define DEFAULT_RENDER_QUEUE 0 Renderer::Renderer() @@ -205,11 +261,6 @@ int Renderer::createRenderQueue() return (int)_renderGroups.size() - 1; } -bool compareRenderCommand(RenderCommand* a, RenderCommand* b) -{ - return a->getDepth() < b->getDepth(); -} - void Renderer::render() { //Uncomment this once everything is rendered by new renderer @@ -221,9 +272,9 @@ void Renderer::render() { //Process render commands //1. Sort render commands based on ID - for (auto it = _renderGroups.begin(); it != _renderGroups.end(); ++it) + for (auto &renderqueue : _renderGroups) { - std::stable_sort((*it).begin(), (*it).end(), compareRenderCommand); + renderqueue.sort(); } while(!_renderStack.empty()) @@ -244,7 +295,7 @@ void Renderer::render() if(commandType == RenderCommand::Type::QUAD_COMMAND) { - QuadCommand* cmd = static_cast(command); + auto cmd = static_cast(command); ssize_t cmdQuadCount = cmd->getQuadCount(); //Batch quads @@ -266,19 +317,19 @@ void Renderer::render() else if(commandType == RenderCommand::Type::CUSTOM_COMMAND) { flush(); - CustomCommand* cmd = static_cast(command); + auto cmd = static_cast(command); cmd->execute(); } else if(commandType == RenderCommand::Type::BATCH_COMMAND) { flush(); - BatchCommand* cmd = static_cast(command); + auto cmd = static_cast(command); cmd->execute(); } else if(commandType == RenderCommand::Type::GROUP_COMMAND) { flush(); - GroupCommand* cmd = static_cast(command); + auto cmd = static_cast(command); _renderStack.top().currentIndex = i + 1; @@ -413,10 +464,10 @@ void Renderer::drawBatchedQuads() //Start drawing verties in batch for(ssize_t i = _firstCommand; i <= _lastCommand; i++) { - RenderCommand* command = _renderGroups[_renderStack.top().renderQueueID][i]; + auto command = _renderGroups[_renderStack.top().renderQueueID][i]; if (command->getType() == RenderCommand::Type::QUAD_COMMAND) { - QuadCommand* cmd = static_cast(command); + auto cmd = static_cast(command); if(_lastMaterialID != cmd->getMaterialID()) { //Draw quads diff --git a/cocos/2d/renderer/CCRenderer.h b/cocos/2d/renderer/CCRenderer.h index a856d201c3..0921fa6bc7 100644 --- a/cocos/2d/renderer/CCRenderer.h +++ b/cocos/2d/renderer/CCRenderer.h @@ -37,7 +37,26 @@ NS_CC_BEGIN class EventListenerCustom; -typedef std::vector RenderQueue; +/** Class that knows how to sort the Commands. + Since the commands that have z==0 are "pushed back" in + the correct order, the only Commands that need to be sorted, + are the ones that have z <0 and z >0. + And that is what this class does. +*/ +class RenderQueue { + +public: + void push_back(RenderCommand* command); + ssize_t size() const; + void sort(); + const RenderCommand* operator[](ssize_t index) const; + void clear(); + +protected: + std::vector _queueNegZ; + std::vector _queue0; + std::vector _queuePosZ; +}; struct RenderStackElement {