nndeploy C++ API  0.2.0
nndeploy C++ API
tensor_pool.h
Go to the documentation of this file.
1 
2 #ifndef _NNDEPLOY_NET_TENSOR_POOL_H_
3 #define _NNDEPLOY_NET_TENSOR_POOL_H_
4 
5 #include "nndeploy/base/any.h"
6 #include "nndeploy/base/common.h"
8 #include "nndeploy/base/log.h"
9 #include "nndeploy/base/macro.h"
10 #include "nndeploy/base/object.h"
11 #include "nndeploy/base/status.h"
12 #include "nndeploy/base/string.h"
13 #include "nndeploy/net/util.h"
14 
15 /*
16  * 内存复用
17  * 1. 5种算法的实现
18  * 2. 生命周期的优化
19  * 3. 多模型共享内存的优化
20  * 4. workspace的优化
21  * 5. inplace算子的优化(TODO)
22  * 6. 更精细的内存优化(基于多生命周期的优化)(TODO)
23  */
24 
25 namespace nndeploy {
26 namespace net {
27 
28 enum TensorPoolType : int {
35 };
36 
37 // 只有激活值
40  size_t size_;
41  std::array<int, 2> interval_;
42  int offset_ = -1; // 初始化offset为-1
43  bool is_allocated_ = false;
44 
45  bool operator<(const TensorUsageRecord &other) const {
46  return size_ < other.size_;
47  }
48 };
49 
50 struct OpBreadth {
52  std::vector<std::shared_ptr<TensorUsageRecord>> breadth_;
53  size_t size_;
54 
55  bool operator<(const OpBreadth &other) const { return size_ < other.size_; }
56 };
57 
58 struct Chunk {
59  // 共享指针 buffer->getData()
61  std::vector<std::array<int, 2>> intervals_;
62 };
63 
64 struct Offset {
65  int offset_;
66  size_t size_;
67  std::vector<std::shared_ptr<TensorUsageRecord>> tensor_usage_records_;
68 };
69 
70 class TensorPool {
71  public:
73  std::vector<TensorWrapper *> &tensor_repository,
74  std::vector<OpWrapper *> &op_repository);
75  virtual ~TensorPool();
76 
77  virtual base::Status setIsExternal(bool is_external);
78 
83 
84  virtual base::Status allocate() = 0;
85  virtual base::Status deallocate() = 0;
86 
88  virtual base::Status deallocateTensor(device::Tensor *tensor, int op_index = -1) = 0;
89  virtual base::Status allocateOp(op::Op *op) = 0;
90  virtual base::Status deallocateOp(op::Op *op) = 0;
91 
97  virtual int64_t getMemorySize();
105 
106  protected:
109  std::vector<TensorWrapper *> tensor_repository_;
110  std::vector<OpWrapper *> op_repository_;
111  bool is_external_ = false;
112 };
113 
119  public:
120  virtual ~TensorPoolCreator() {};
122  device::Device *device, std::vector<TensorWrapper *> &tensor_repository,
123  std::vector<OpWrapper *> &op_repository) = 0;
124 };
125 
131 template <typename T>
133  virtual TensorPool *createTensorPool(
134  device::Device *device, std::vector<TensorWrapper *> &tensor_repository,
135  std::vector<OpWrapper *> &op_repository) {
136  return new T(device, tensor_repository, op_repository);
137  }
138 };
139 
145 std::map<TensorPoolType, std::shared_ptr<TensorPoolCreator>> &
147 
153 template <typename T>
155  public:
157  getGlobalTensorPoolCreatorMap()[type] = std::shared_ptr<T>(new T());
158  }
159 };
160 
171  TensorPoolType type, device::Device *device,
172  std::vector<TensorWrapper *> &tensor_repository,
173  std::vector<OpWrapper *> &op_repository);
174 
175 std::vector<int> getOpOrderIndex(std::vector<OpWrapper *> &producers,
176  std::vector<OpWrapper *> &consumers,
177  std::vector<OpWrapper *> &op_repository);
178 
179 bool isInterval(std::array<int, 2> &interval,
180  std::vector<std::array<int, 2>> &intervals);
181 
183  const std::vector<std::shared_ptr<TensorUsageRecord>>
184  &tensor_usage_records);
185 
186 void chunkPrint(const std::vector<std::shared_ptr<Chunk>> &chunks);
187 
190  const std::string &src);
191 
192 } // namespace net
193 } // namespace nndeploy
194 
195 #endif /* _NNDEPLOY_NET_TENSOR_POOL_H_ */
设备抽象基类
Definition: device.h:155
TensorPool的创建类
Definition: tensor_pool.h:118
virtual TensorPool * createTensorPool(device::Device *device, std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository)=0
virtual base::Status allocate()=0
virtual base::Status deallocateTensor(device::Tensor *tensor, int op_index=-1)=0
base::IntVector config_
Definition: tensor_pool.h:108
std::vector< OpWrapper * > op_repository_
Definition: tensor_pool.h:110
virtual base::Status allocateOp(op::Op *op)=0
virtual base::Status deinitTensorUsageRecordMap()=0
virtual base::Status allocateTensor(device::Tensor *tensor)=0
virtual base::Status deallocate()=0
virtual int64_t getMemorySize()
获取推理所需的内存大小
device::Device * device_
Definition: tensor_pool.h:107
virtual base::Status deinitOpIndexMap()=0
TensorPool(device::Device *device, std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository)
std::vector< TensorWrapper * > tensor_repository_
Definition: tensor_pool.h:109
virtual base::Status initOpIndexMap()=0
virtual base::Status initTensorUsageRecordMap()=0
virtual base::Status deallocateOp(op::Op *op)=0
virtual base::Status setIsExternal(bool is_external)
virtual base::Status setMemory(device::Buffer *buffer)
设置推理所需的内存(推理内存由外部分配)
TensorPool的创建类模板
Definition: tensor_pool.h:132
TensorPool的创建类的注册类模板
Definition: tensor_pool.h:154
TypeTensorPoolRegister(TensorPoolType type)
Definition: tensor_pool.h:156
Op的基类
Definition: op.h:42
#define NNDEPLOY_CC_API
api
Definition: macro.h:29
std::vector< int > IntVector
Definition: common.h:379
TensorPool * createTensorPool(TensorPoolType type, device::Device *device, std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository)
Create a TensorPool object.
bool isInterval(std::array< int, 2 > &interval, std::vector< std::array< int, 2 >> &intervals)
std::vector< int > getOpOrderIndex(std::vector< OpWrapper * > &producers, std::vector< OpWrapper * > &consumers, std::vector< OpWrapper * > &op_repository)
void chunkPrint(const std::vector< std::shared_ptr< Chunk >> &chunks)
std::string tensorPoolTypeToString(TensorPoolType type)
@ kTensorPool1DOffsetCalculateTypeGreedyBySize
Definition: tensor_pool.h:32
@ kTensorPool1DOffsetCalculateTypeGreedyByBreadth
Definition: tensor_pool.h:33
@ kTensorPool1DSharedObjectTypeGreedyBySizeImprove
Definition: tensor_pool.h:31
@ kTensorPool1DSharedObjectTypeGreedyByBreadth
Definition: tensor_pool.h:29
@ kTensorPool1DSharedObjectTypeGreedyBySize
Definition: tensor_pool.h:30
TensorPoolType stringToTensorPoolType(const std::string &src)
void tensorUsageRecordPrint(const std::vector< std::shared_ptr< TensorUsageRecord >> &tensor_usage_records)
std::map< TensorPoolType, std::shared_ptr< TensorPoolCreator > > & getGlobalTensorPoolCreatorMap()
Get the Global TensorPool Creator Map object.
std::vector< std::array< int, 2 > > intervals_
Definition: tensor_pool.h:61
device::Buffer * buffer_
Definition: tensor_pool.h:60
std::vector< std::shared_ptr< TensorUsageRecord > > tensor_usage_records_
Definition: tensor_pool.h:67
std::vector< std::shared_ptr< TensorUsageRecord > > breadth_
Definition: tensor_pool.h:52
bool operator<(const OpBreadth &other) const
Definition: tensor_pool.h:55
bool operator<(const TensorUsageRecord &other) const
Definition: tensor_pool.h:45
std::array< int, 2 > interval_
Definition: tensor_pool.h:41