2 #ifndef _NNDEPLOY_NET_RUNTIME_H_
3 #define _NNDEPLOY_NET_RUNTIME_H_
31 std::condition_variable
cv_;
37 std::lock_guard<std::mutex> lock(
mutex_);
43 std::lock_guard<std::mutex> lock(
mutex_);
49 std::unique_lock<std::mutex> lock(
mutex_);
50 cv_.wait(lock, [
this, runtime]() {
67 if (!is_external_stream_ && stream_ !=
nullptr) {
78 std::vector<base::DeviceType> device_types =
79 std::vector<base::DeviceType>());
82 std::vector<TensorWrapper *> &tensor_repository,
83 std::vector<OpWrapper *> &op_repository,
84 std::vector<device::Tensor *> &input_tensors,
85 std::vector<device::Tensor *> &output_tensors,
bool is_dynamic_shape,
89 bool is_external_tensor_pool_memory =
false) = 0;
139 bool is_external_stream_ =
false;
143 bool is_external_tensor_pool_memory_ =
false;
145 bool is_dynamic_shape_ =
false;
147 bool is_pure_dynamic_shape_ =
false;
173 template <
typename T>
177 auto Runtime =
new T(device_type);
188 std::map<base::ParallelType, std::shared_ptr<RuntimeCreator>> &
196 template <
typename T>
device::Tensor * pop(Runtime *runtime)
std::condition_variable cv_
std::vector< Runtime * > consumers_
std::vector< Runtime * > producers_
void push(device::Tensor *tensor)
std::map< Runtime *, int > current_index_
std::vector< device::Tensor * > tensors_
virtual ~PipelineTensor()
virtual ~RuntimeCreator()
virtual Runtime * createRuntime(const base::DeviceType &device_type, base::ParallelType parallel_type)=0
std::vector< base::DeviceType > device_types_
virtual base::Status setMemory(device::Buffer *buffer)
设置推理所需的内存(推理内存由外部分配)
virtual base::Status deinit()=0
std::vector< device::Tensor * > input_tensors_
std::vector< TensorWrapper * > tensor_repository_
TensorPool * tensor_pool_
base::Status setWorkers(int worker_num, std::vector< base::DeviceType > device_types=std::vector< base::DeviceType >())
std::vector< device::Tensor * > output_tensors_
device::Stream * getStream()
void setStream(device::Stream *stream)
base::DeviceType device_type_
base::Status synchronize()
virtual base::Status postRun()=0
virtual base::Status copyToInputTensor(device::Tensor *tensor)=0
将输入tensor复制到输入tensor
virtual int64_t getMemorySize()
获取推理所需的内存大小
virtual base::Status preRun()=0
virtual base::Status init(std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository, std::vector< device::Tensor * > &input_tensors, std::vector< device::Tensor * > &output_tensors, bool is_dynamic_shape, base::ShapeMap max_shape, TensorPoolType tensor_pool_type=kTensorPool1DSharedObjectTypeGreedyBySizeImprove, bool is_external_tensor_pool_memory=false)=0
Runtime(const base::DeviceType &device_type)
virtual device::Tensor * getOutputTensorAfterRun(const std::string &name, base::DeviceType device_type, bool is_copy, base::DataFormat data_format)=0
获取推理后的输出tensor
virtual base::Status reshape(base::ShapeMap &shape_map)=0
virtual base::Status run()=0
std::vector< OpWrapper * > op_repository_
TypeRuntimeRegister(base::ParallelType parallel_type)
#define NNDEPLOY_CC_API
api
std::map< std::string, std::vector< int > > ShapeMap
base::Status destroyStream(Stream *stream)
销毁流
std::map< base::ParallelType, std::shared_ptr< RuntimeCreator > > & getGlobalRuntimeCreatorMap()
Get the Global Runtime Creator Map object.
@ kTensorPool1DOffsetCalculateTypeGreedyByBreadth
@ kTensorPool1DSharedObjectTypeGreedyBySizeImprove
Runtime * createRuntime(const base::DeviceType &device_type, base::ParallelType parallel_type)