关于 CGraph和 taskflow 性能对比测试的相关说明
之前写过一篇文章 炸裂!CGraph性能全面超越taskflow之后,作者却说他更想… ,介绍 色丶图和 taskflow 之间串行、并行和 dag的性能对比,并且记录了相关数据,并保存截图。但并非适用于所有的平台。
希望大家可以根据自己的场景和环境,做出自己的性能对比。也欢迎大家给我们提供性能优化的意见和建议,随时期待您的指教。
再次申明一下,压测情况和实际使用的情况,有较大的区别。实际使用的情况下,更多性能损耗在算子内部,压测环境下损耗主要在调度上。请大家根据实际情况,对比分析。
以下代码,均推荐用当前最新版本测试:
CGraph
#include "MyGAspect/MyTimerAspect.h"
using namespace CGraph;
class MyEmptyNode : public GNode {
public:
CStatus run() override {
return CStatus();
}
};
void tutorial_concurrent_32() {
// 并行的执行32次,对应第1个例子,8thread,32并发,50w次
GPipelinePtr pipeline = GPipelineFactory::create();
CStatus status;
GElementPtr arr[32];
UThreadPoolConfig config;
config.default_thread_size_ = 8; // 我的笔记本,是8核心的 macbook pro m1
config.secondary_thread_size_ = 0;
config.max_task_steal_range_ = 7;
config.max_thread_size_ = 8;
config.primary_thread_policy_ = CGRAPH_THREAD_SCHED_RR;
config.primary_thread_priority_ = 10;
config.primary_thread_empty_interval_ = 1;
config.primary_thread_busy_epoch_ = 3000;
config.monitor_enable_ = false; // 关闭扩缩容机制
pipeline->setUniqueThreadPoolConfig(config);
for (auto & i : arr) {
pipeline->registerGElement<MyEmptyNode>(&i);
}
pipeline->setAutoCheck(false);
status += pipeline->init();
/** 其中流程进行计时 **/
MyTimerAspect asp;
asp.beginRun();
for (int t = 0; t < 200000; t++) {
pipeline->run();
}
asp.finishRun(status); // 这里会输出时间信息
/*******************/
status += pipeline->destroy();
GPipelineFactory::remove(pipeline);
}
void tutorial_serial_32() {
// 串行执行32次,对应第二个例子,1thread,32串行,1000w次
GPipelinePtr pipeline = GPipelineFactory::create();
CStatus status;
GElementPtr arr[32];
pipeline->registerGElement<MyEmptyNode>(&arr[0]);
for (int i = 1; i < 32; i++) {
pipeline->registerGElement<MyEmptyNode>(&arr[i], {arr[i-1]});
}
pipeline->makeSerial();
pipeline->setAutoCheck(false);
status += pipeline->init();
/** 其中流程进行计时 **/
MyTimerAspect asp;
asp.beginRun();
for (int t = 0; t < 1000000; t++) {
pipeline->run();
}
asp.finishRun(status); // 这里会输出时间信息
/*******************/
status += pipeline->destroy();
GPipelineFactory::remove(pipeline);
}
void tutorial_dag() {
// 简单dag场景,对应第三个例子,2thread,dag,100w次
GPipelinePtr pipeline = GPipelineFactory::create();
CStatus status;
GElementPtr a,b1,b2,c1,c2,d;
UThreadPoolConfig config;
config.default_thread_size_ = 2; // 我的笔记本,是8核心的 macbook pro m1
config.secondary_thread_size_ = 0;
config.max_task_steal_range_ = 1;
config.max_thread_size_ = 2;
config.primary_thread_empty_interval_ = 1;
config.primary_thread_busy_epoch_ = 3000;
config.monitor_enable_ = false; // 关闭扩缩容机制
config.primary_thread_policy_ = CGRAPH_THREAD_SCHED_RR;
config.primary_thread_priority_ = 10;
pipeline->setUniqueThreadPoolConfig(config);
pipeline->setAutoCheck(false);
pipeline->registerGElement<MyEmptyNode>(&a);
pipeline->registerGElement<MyEmptyNode>(&b1, {a});
pipeline->registerGElement<MyEmptyNode>(&b2, {b1});
pipeline->registerGElement<MyEmptyNode>(&c1, {a});
pipeline->registerGElement<MyEmptyNode>(&c2, {c1});
pipeline->registerGElement<MyEmptyNode>(&d, {b2, c2});
pipeline->setGEngineType(GEngineType::DYNAMIC);
status += pipeline->init();
/** 其中流程进行计时 **/
MyTimerAspect asp;
asp.beginRun();
for (int t = 0; t < 1000000; t++) {
pipeline->run();
}
asp.finishRun(status); // 这里会输出时间信息
/*******************/
status += pipeline->destroy();
GPipelineFactory::remove(pipeline);
}
int main() {
for (int i = 0; i < 5; i++) {
tutorial_concurrent_32();
// tutorial_serial_32();
// tutorial_dag();
}
return 0;
}
taskflow
#include <taskflow/taskflow.hpp> // the only include you need
class CStatus {
int code = 0;
std::string info;
std::string path;
};
void demo1() {
tf::Executor executor(8);
tf::Taskflow taskflow("simple");
// 并行的32路
for (int i = 0; i < 32; i++) {
auto x = taskflow.emplace([] {
return CStatus();
});
}
auto start_ts_ = std::chrono::high_resolution_clock::now();
for(int i = 0; i < 200000; i++) {
executor.run(taskflow).wait();
}
std::chrono::duration<double, std::milli> span = std::chrono::high_resolution_clock::now() - start_ts_;
printf("----> [taskflow] time cost is : [%0.2lf] ms \n", span.count());
}
void demo2() {
// 串行32个
tf::Executor executor(1);
tf::Taskflow taskflow;
auto task1 = taskflow.emplace([]() { return CStatus(); });
auto task2 = taskflow.emplace([]() { return CStatus(); });
auto task3 = taskflow.emplace([]() { return CStatus(); });
auto task4 = taskflow.emplace([]() { return CStatus(); });
auto task5 = taskflow.emplace([]() { return CStatus(); });
auto task6 = taskflow.emplace([]() { return CStatus(); });
auto task7 = taskflow.emplace([]() { return CStatus(); });
auto task8 = taskflow.emplace([]() { return CStatus(); });
auto task9 = taskflow.emplace([]() { return CStatus(); });
auto task10 = taskflow.emplace([]() { return CStatus(); });
auto task11 = taskflow.emplace([]() { return CStatus(); });
auto task12 = taskflow.emplace([]() { return CStatus(); });
auto task13 = taskflow.emplace([]() { return CStatus(); });
auto task14 = taskflow.emplace([]() { return CStatus(); });
auto task15 = taskflow.emplace([]() { return CStatus(); });
auto task16 = taskflow.emplace([]() { return CStatus(); });
auto task17 = taskflow.emplace([]() { return CStatus(); });
auto task18 = taskflow.emplace([]() { return CStatus(); });
auto task19 = taskflow.emplace([]() { return CStatus(); });
auto task20 = taskflow.emplace([]() { return CStatus(); });
auto task21 = taskflow.emplace([]() { return CStatus(); });
auto task22 = taskflow.emplace([]() { return CStatus(); });
auto task23 = taskflow.emplace([]() { return CStatus(); });
auto task24 = taskflow.emplace([]() { return CStatus(); });
auto task25 = taskflow.emplace([]() { return CStatus(); });
auto task26 = taskflow.emplace([]() { return CStatus(); });
auto task27 = taskflow.emplace([]() { return CStatus(); });
auto task28 = taskflow.emplace([]() { return CStatus(); });
auto task29 = taskflow.emplace([]() { return CStatus(); });
auto task30 = taskflow.emplace([]() { return CStatus(); });
auto task31 = taskflow.emplace([]() { return CStatus(); });
auto task32 = taskflow.emplace([]() { return CStatus(); });
task1.precede(task2);
task2.precede(task3);
task3.precede(task4);
task4.precede(task5);
task5.precede(task6);
task6.precede(task7);
task7.precede(task8);
task8.precede(task9);
task9.precede(task10);
task10.precede(task11);
task11.precede(task12);
task12.precede(task13);
task13.precede(task14);
task14.precede(task15);
task15.precede(task16);
task16.precede(task17);
task17.precede(task18);
task18.precede(task19);
task19.precede(task20);
task20.precede(task21);
task21.precede(task22);
task22.precede(task23);
task23.precede(task24);
task24.precede(task25);
task25.precede(task26);
task26.precede(task27);
task27.precede(task28);
task28.precede(task29);
task29.precede(task30);
task30.precede(task31);
task31.precede(task32);
auto start_ts_ = std::chrono::high_resolution_clock::now();
for(int i = 0; i < 1000000; i++) {
executor.run(taskflow).wait();
}
std::chrono::duration<double, std::milli> span = std::chrono::high_resolution_clock::now() - start_ts_;
printf("----> [taskflow] time cost is : [%0.2lf] ms \n",
span.count());
}
void demo3() {
// 简单dag图
tf::Taskflow taskflow;
auto [A, B1, B2, C1, C2, D] = taskflow.emplace(
// []() { return std::this_thread::sleep_for(std::chrono::milliseconds(1)); },
[]() { return CStatus(); },
[]() { return CStatus(); },
[]() { return CStatus(); },
[]() { return CStatus(); },
[]() { return CStatus(); },
[]() { return CStatus(); }
);
A.precede(B1, C1);
B1.precede(B2);
C1.precede(C2);
D.succeed(B2, C2);
// execute the workflow
tf::Executor executor(2);
auto start_ts_ = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 1000000; i++) {
executor.run(taskflow).wait();
}
std::chrono::duration<double, std::milli> span = std::chrono::high_resolution_clock::now() - start_ts_;
printf("----> [taskflow] time cost is : [%0.2lf] ms \n",
span.count());
}
int main(){
for (int i = 0; i < 5; i++) {
demo1();
// demo2();
// demo3();
}
return 0;
}