zh-cn/latest/runtime_8h_source.html

 #ifndef _NNDEPLOY_NET_RUNTIME_H_

 #define _NNDEPLOY_NET_RUNTIME_H_


 #include "nndeploy/base/any.h"

 #include "nndeploy/base/common.h"

 #include "nndeploy/base/glic_stl_include.h"

 #include "nndeploy/base/log.h"

 #include "nndeploy/base/macro.h"

 #include "nndeploy/base/object.h"

 #include "nndeploy/base/status.h"

 #include "nndeploy/base/string.h"

 #include "nndeploy/net/tensor_pool.h"

 #include "nndeploy/net/util.h"


 namespace nndeploy {

 namespace net {


 class Runtime;


 class PipelineTensor {

  public:

   PipelineTensor() {};

   virtual ~PipelineTensor() {};

   std::vector<device::Tensor *> tensors_;

   std::vector<Runtime *> producers_;

   std::vector<Runtime *> consumers_;


   // 添加互斥锁和条件变量，用于同步不同阶段之间的数据传递

   std::mutex mutex_;

   std::condition_variable cv_;

   std::map<Runtime *, int> current_index_;

   bool is_finish_ = false;


   void push(device::Tensor *tensor) {

     // NNDEPLOY_LOGI("tensor name %s\n", tensor->getName().c_str());

     std::lock_guard<std::mutex> lock(mutex_);

     tensors_.push_back(tensor);

     cv_.notify_all();

   }


   void setFinish() {

     std::lock_guard<std::mutex> lock(mutex_);

     is_finish_ = true;

     cv_.notify_all();

   }


   device::Tensor *pop(Runtime *runtime) {

     std::unique_lock<std::mutex> lock(mutex_);

     cv_.wait(lock, [this, runtime]() {

       bool flag = current_index_[runtime] < tensors_.size();

       return flag || is_finish_;

     });

     if (is_finish_) {

       return nullptr;

     }

     device::Tensor *tensor = tensors_[current_index_[runtime]];

     current_index_[runtime]++;

     return tensor;

   }

 };


 class NNDEPLOY_CC_API Runtime : public base::NonCopyable {

  public:

   Runtime(const base::DeviceType &device_type) : device_type_(device_type) {};

   virtual ~Runtime() {

     if (!is_external_stream_ && stream_ != nullptr) {

       device::destroyStream(stream_);

       stream_ = nullptr;

     }

   };


   void setStream(device::Stream *stream);

   device::Stream *getStream();

   base::Status synchronize();


   base::Status setWorkers(int worker_num,

                           std::vector<base::DeviceType> device_types =

                               std::vector<base::DeviceType>());


   virtual base::Status init(

       std::vector<TensorWrapper *> &tensor_repository,

       std::vector<OpWrapper *> &op_repository,

       std::vector<device::Tensor *> &input_tensors,

       std::vector<device::Tensor *> &output_tensors, bool is_dynamic_shape,

       base::ShapeMap max_shape,

       TensorPoolType tensor_pool_type =

           kTensorPool1DSharedObjectTypeGreedyBySizeImprove,

       bool is_external_tensor_pool_memory = false) = 0;

   virtual base::Status deinit() = 0;


   virtual base::Status reshape(base::ShapeMap &shape_map) = 0;


   virtual int64_t getMemorySize();

   virtual base::Status setMemory(device::Buffer *buffer);


   virtual base::Status preRun() = 0;

   virtual base::Status run() = 0;

   virtual base::Status postRun() = 0;


   virtual base::Status copyToInputTensor(device::Tensor *tensor) = 0;


   virtual device::Tensor *getOutputTensorAfterRun(

       const std::string &name, base::DeviceType device_type, bool is_copy,

       base::DataFormat data_format) = 0;


  protected:

   base::DeviceType device_type_;

   bool is_external_stream_ = false;

   device::Stream *stream_ = nullptr;

   TensorPoolType tensor_pool_type_ =

       kTensorPool1DOffsetCalculateTypeGreedyByBreadth;

   bool is_external_tensor_pool_memory_ = false;

   TensorPool *tensor_pool_;

   bool is_dynamic_shape_ = false;                // 是否是动态shape

   base::ShapeMap max_shape_ = base::ShapeMap();  // 当为动态输入时最大shape

   bool is_pure_dynamic_shape_ = false;

   std::vector<TensorWrapper *> tensor_repository_;

   std::vector<OpWrapper *> op_repository_;

   std::vector<device::Tensor *> input_tensors_;

   std::vector<device::Tensor *> output_tensors_;

   int worker_num_ = 1;

   std::vector<base::DeviceType> device_types_;

 };


 class RuntimeCreator {

  public:

   virtual ~RuntimeCreator() {};


   virtual Runtime *createRuntime(const base::DeviceType &device_type,

                                  base::ParallelType parallel_type) = 0;

 };


 template <typename T>

 class TypeRuntimeCreator : public RuntimeCreator {

   virtual Runtime *createRuntime(const base::DeviceType &device_type,

                                  base::ParallelType parallel_type) {

     auto Runtime = new T(device_type);

     return Runtime;

   }

 };


 std::map<base::ParallelType, std::shared_ptr<RuntimeCreator>> &

 getGlobalRuntimeCreatorMap();


 template <typename T>

 class TypeRuntimeRegister {

  public:

   explicit TypeRuntimeRegister(base::ParallelType parallel_type) {

     getGlobalRuntimeCreatorMap()[parallel_type] = std::shared_ptr<T>(new T());

   }

 };


 Runtime *createRuntime(const base::DeviceType &device_type,

                        base::ParallelType parallel_type);


 }  // namespace net

 }  // namespace nndeploy


 #endif

any.h

nndeploy::base::NonCopyable
Definition: object.h:13

nndeploy::base::Status
Definition: status.h:87

nndeploy::device::Buffer
Definition: buffer.h:21

nndeploy::device::Stream
流类
Definition: device.h:387

nndeploy::device::Tensor
Tensor类
Definition: tensor.h:26

nndeploy::net::PipelineTensor
Definition: runtime.h:21

nndeploy::net::PipelineTensor::pop
device::Tensor * pop(Runtime *runtime)
Definition: runtime.h:48

nndeploy::net::PipelineTensor::mutex_
std::mutex mutex_
Definition: runtime.h:30

nndeploy::net::PipelineTensor::cv_
std::condition_variable cv_
Definition: runtime.h:31

nndeploy::net::PipelineTensor::consumers_
std::vector< Runtime * > consumers_
Definition: runtime.h:27

nndeploy::net::PipelineTensor::setFinish
void setFinish()
Definition: runtime.h:42

nndeploy::net::PipelineTensor::producers_
std::vector< Runtime * > producers_
Definition: runtime.h:26

nndeploy::net::PipelineTensor::push
void push(device::Tensor *tensor)
Definition: runtime.h:35

nndeploy::net::PipelineTensor::current_index_
std::map< Runtime *, int > current_index_
Definition: runtime.h:32

nndeploy::net::PipelineTensor::PipelineTensor
PipelineTensor()
Definition: runtime.h:23

nndeploy::net::PipelineTensor::tensors_
std::vector< device::Tensor * > tensors_
Definition: runtime.h:24

nndeploy::net::PipelineTensor::is_finish_
bool is_finish_
Definition: runtime.h:33

nndeploy::net::PipelineTensor::~PipelineTensor
virtual ~PipelineTensor()
Definition: runtime.h:24

nndeploy::net::RuntimeCreator
Runtime的创建类
Definition: runtime.h:160

nndeploy::net::RuntimeCreator::~RuntimeCreator
virtual ~RuntimeCreator()
Definition: runtime.h:162

nndeploy::net::RuntimeCreator::createRuntime
virtual Runtime * createRuntime(const base::DeviceType &device_type, base::ParallelType parallel_type)=0

nndeploy::net::Runtime
Definition: runtime.h:63

nndeploy::net::Runtime::device_types_
std::vector< base::DeviceType > device_types_
Definition: runtime.h:153

nndeploy::net::Runtime::setMemory
virtual base::Status setMemory(device::Buffer *buffer)
设置推理所需的内存（推理内存由外部分配）

nndeploy::net::Runtime::deinit
virtual base::Status deinit()=0

nndeploy::net::Runtime::input_tensors_
std::vector< device::Tensor * > input_tensors_
Definition: runtime.h:150

nndeploy::net::Runtime::tensor_repository_
std::vector< TensorWrapper * > tensor_repository_
Definition: runtime.h:148

nndeploy::net::Runtime::tensor_pool_
TensorPool * tensor_pool_
Definition: runtime.h:144

nndeploy::net::Runtime::setWorkers
base::Status setWorkers(int worker_num, std::vector< base::DeviceType > device_types=std::vector< base::DeviceType >())

nndeploy::net::Runtime::output_tensors_
std::vector< device::Tensor * > output_tensors_
Definition: runtime.h:151

nndeploy::net::Runtime::getStream
device::Stream * getStream()

nndeploy::net::Runtime::setStream
void setStream(device::Stream *stream)

nndeploy::net::Runtime::device_type_
base::DeviceType device_type_
Definition: runtime.h:134

nndeploy::net::Runtime::synchronize
base::Status synchronize()

nndeploy::net::Runtime::postRun
virtual base::Status postRun()=0

nndeploy::net::Runtime::~Runtime
virtual ~Runtime()
Definition: runtime.h:66

nndeploy::net::Runtime::copyToInputTensor
virtual base::Status copyToInputTensor(device::Tensor *tensor)=0
将输入tensor复制到输入tensor

nndeploy::net::Runtime::getMemorySize
virtual int64_t getMemorySize()
获取推理所需的内存大小

nndeploy::net::Runtime::preRun
virtual base::Status preRun()=0

nndeploy::net::Runtime::init
virtual base::Status init(std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository, std::vector< device::Tensor * > &input_tensors, std::vector< device::Tensor * > &output_tensors, bool is_dynamic_shape, base::ShapeMap max_shape, TensorPoolType tensor_pool_type=kTensorPool1DSharedObjectTypeGreedyBySizeImprove, bool is_external_tensor_pool_memory=false)=0

nndeploy::net::Runtime::Runtime
Runtime(const base::DeviceType &device_type)
Definition: runtime.h:65

nndeploy::net::Runtime::getOutputTensorAfterRun
virtual device::Tensor * getOutputTensorAfterRun(const std::string &name, base::DeviceType device_type, bool is_copy, base::DataFormat data_format)=0
获取推理后的输出tensor

nndeploy::net::Runtime::reshape
virtual base::Status reshape(base::ShapeMap &shape_map)=0

nndeploy::net::Runtime::run
virtual base::Status run()=0

nndeploy::net::Runtime::op_repository_
std::vector< OpWrapper * > op_repository_
Definition: runtime.h:149

nndeploy::net::TensorPool
Definition: tensor_pool.h:70

nndeploy::net::TypeRuntimeCreator
Runtime的创建类模板
Definition: runtime.h:174

nndeploy::net::TypeRuntimeRegister
Runtime的创建类的注册类模板
Definition: runtime.h:197

nndeploy::net::TypeRuntimeRegister::TypeRuntimeRegister
TypeRuntimeRegister(base::ParallelType parallel_type)
Definition: runtime.h:199

common.h

util.h

glic_stl_include.h

log.h

macro.h

NNDEPLOY_CC_API
#define NNDEPLOY_CC_API
api
Definition: macro.h:29

nndeploy::base::ShapeMap
std::map< std::string, std::vector< int > > ShapeMap
Definition: common.h:381

nndeploy::base::ParallelType
ParallelType
Definition: common.h:353

nndeploy::base::DataFormat
DataFormat
Definition: common.h:130

nndeploy::device::destroyStream
base::Status destroyStream(Stream *stream)
销毁流

nndeploy::net::getGlobalRuntimeCreatorMap
std::map< base::ParallelType, std::shared_ptr< RuntimeCreator > > & getGlobalRuntimeCreatorMap()
Get the Global Runtime Creator Map object.

nndeploy::net::TensorPoolType
TensorPoolType
Definition: tensor_pool.h:28

nndeploy::net::kTensorPool1DOffsetCalculateTypeGreedyByBreadth
@ kTensorPool1DOffsetCalculateTypeGreedyByBreadth
Definition: tensor_pool.h:33

nndeploy::net::kTensorPool1DSharedObjectTypeGreedyBySizeImprove
@ kTensorPool1DSharedObjectTypeGreedyBySizeImprove
Definition: tensor_pool.h:31

nndeploy::net::createRuntime
Runtime * createRuntime(const base::DeviceType &device_type, base::ParallelType parallel_type)

nndeploy
Definition: common.h:10

object.h

status.h

string.h

nndeploy::base::DeviceType
Definition: common.h:109

tensor_pool.h