zh-cn/stable/tensor__pool_8h_source.html

 #ifndef _NNDEPLOY_NET_TENSOR_POOL_H_

 #define _NNDEPLOY_NET_TENSOR_POOL_H_


 #include "nndeploy/base/any.h"

 #include "nndeploy/base/common.h"

 #include "nndeploy/base/glic_stl_include.h"

 #include "nndeploy/base/log.h"

 #include "nndeploy/base/macro.h"

 #include "nndeploy/base/object.h"

 #include "nndeploy/base/status.h"

 #include "nndeploy/base/string.h"

 #include "nndeploy/net/util.h"


 /*

  * 内存复用

  * 1. 5种算法的实现

  * 2. 生命周期的优化

  * 3. 多模型共享内存的优化

  * 4. workspace的优化

  * 5. inplace算子的优化（TODO）

  * 6. 更精细的内存优化（基于多生命周期的优化）（TODO）

  */


 namespace nndeploy {

 namespace net {


 enum TensorPoolType : int {

   kTensorPool1DSharedObjectTypeGreedyByBreadth,      // 正确

   kTensorPool1DSharedObjectTypeGreedyBySize,         // 正确

   kTensorPool1DSharedObjectTypeGreedyBySizeImprove,  // 正确

   kTensorPool1DOffsetCalculateTypeGreedyBySize,      // 正确

   kTensorPool1DOffsetCalculateTypeGreedyByBreadth,   // 正确

   kTensorPool1DNone,

 };


 // 只有激活值

 struct TensorUsageRecord {

   TensorWrapper *tensor_wrapper_;

   size_t size_;

   std::array<int, 2> interval_;

   int offset_ = -1;  // 初始化offset为-1

   bool is_allocated_ = false;


   bool operator<(const TensorUsageRecord &other) const {

     return size_ < other.size_;

   }

 };


 struct OpBreadth {

   OpWrapper *op_wrapper_;

   std::vector<std::shared_ptr<TensorUsageRecord>> breadth_;

   size_t size_;


   bool operator<(const OpBreadth &other) const { return size_ < other.size_; }

 };


 struct Chunk {

   // 共享指针 buffer->getData()

   device::Buffer *buffer_;

   std::vector<std::array<int, 2>> intervals_;

 };


 struct Offset {

   int offset_;

   size_t size_;

   std::vector<std::shared_ptr<TensorUsageRecord>> tensor_usage_records_;

 };


 class TensorPool {

  public:

   TensorPool(device::Device *device,

              std::vector<TensorWrapper *> &tensor_repository,

              std::vector<OpWrapper *> &op_repository);

   virtual ~TensorPool();


   virtual base::Status setIsExternal(bool is_external);


   virtual base::Status initTensorUsageRecordMap() = 0;

   virtual base::Status deinitTensorUsageRecordMap() = 0;

   virtual base::Status initOpIndexMap() = 0;

   virtual base::Status deinitOpIndexMap() = 0;


   virtual base::Status allocate() = 0;

   virtual base::Status deallocate() = 0;


   virtual base::Status allocateTensor(device::Tensor *tensor) = 0;

   virtual base::Status deallocateTensor(device::Tensor *tensor, int op_index = -1) = 0;

   virtual base::Status allocateOp(op::Op *op) = 0;

   virtual base::Status deallocateOp(op::Op *op) = 0;


   virtual int64_t getMemorySize();

   virtual base::Status setMemory(device::Buffer *buffer);


  protected:

   device::Device *device_;

   base::IntVector config_ = base::IntVector();

   std::vector<TensorWrapper *> tensor_repository_;

   std::vector<OpWrapper *> op_repository_;

   bool is_external_ = false;

 };


 class TensorPoolCreator {

  public:

   virtual ~TensorPoolCreator() {};

   virtual TensorPool *createTensorPool(

       device::Device *device, std::vector<TensorWrapper *> &tensor_repository,

       std::vector<OpWrapper *> &op_repository) = 0;

 };


 template <typename T>

 class TypeTensorPoolCreator : public TensorPoolCreator {

   virtual TensorPool *createTensorPool(

       device::Device *device, std::vector<TensorWrapper *> &tensor_repository,

       std::vector<OpWrapper *> &op_repository) {

     return new T(device, tensor_repository, op_repository);

   }

 };


 std::map<TensorPoolType, std::shared_ptr<TensorPoolCreator>> &

 getGlobalTensorPoolCreatorMap();


 template <typename T>

 class TypeTensorPoolRegister {

  public:

   explicit TypeTensorPoolRegister(TensorPoolType type) {

     getGlobalTensorPoolCreatorMap()[type] = std::shared_ptr<T>(new T());

   }

 };


 extern NNDEPLOY_CC_API TensorPool *createTensorPool(

     TensorPoolType type, device::Device *device,

     std::vector<TensorWrapper *> &tensor_repository,

     std::vector<OpWrapper *> &op_repository);


 std::vector<int> getOpOrderIndex(std::vector<OpWrapper *> &producers,

                                  std::vector<OpWrapper *> &consumers,

                                  std::vector<OpWrapper *> &op_repository);


 bool isInterval(std::array<int, 2> &interval,

                 std::vector<std::array<int, 2>> &intervals);


 void tensorUsageRecordPrint(

     const std::vector<std::shared_ptr<TensorUsageRecord>>

         &tensor_usage_records);


 void chunkPrint(const std::vector<std::shared_ptr<Chunk>> &chunks);


 extern NNDEPLOY_CC_API std::string tensorPoolTypeToString(TensorPoolType type);

 extern NNDEPLOY_CC_API TensorPoolType stringToTensorPoolType(

     const std::string &src);


 }  // namespace net

 }  // namespace nndeploy


 #endif /* _NNDEPLOY_NET_TENSOR_POOL_H_ */

any.h

nndeploy::base::Status
Definition: status.h:87

nndeploy::device::Buffer
Definition: buffer.h:21

nndeploy::device::Device
设备抽象基类
Definition: device.h:155

nndeploy::device::Tensor
Tensor类
Definition: tensor.h:26

nndeploy::net::OpWrapper
Definition: util.h:18

nndeploy::net::TensorPoolCreator
TensorPool的创建类
Definition: tensor_pool.h:118

nndeploy::net::TensorPoolCreator::createTensorPool
virtual TensorPool * createTensorPool(device::Device *device, std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository)=0

nndeploy::net::TensorPoolCreator::~TensorPoolCreator
virtual ~TensorPoolCreator()
Definition: tensor_pool.h:120

nndeploy::net::TensorPool
Definition: tensor_pool.h:70

nndeploy::net::TensorPool::allocate
virtual base::Status allocate()=0

nndeploy::net::TensorPool::deallocateTensor
virtual base::Status deallocateTensor(device::Tensor *tensor, int op_index=-1)=0

nndeploy::net::TensorPool::config_
base::IntVector config_
Definition: tensor_pool.h:108

nndeploy::net::TensorPool::op_repository_
std::vector< OpWrapper * > op_repository_
Definition: tensor_pool.h:110

nndeploy::net::TensorPool::allocateOp
virtual base::Status allocateOp(op::Op *op)=0

nndeploy::net::TensorPool::deinitTensorUsageRecordMap
virtual base::Status deinitTensorUsageRecordMap()=0

nndeploy::net::TensorPool::allocateTensor
virtual base::Status allocateTensor(device::Tensor *tensor)=0

nndeploy::net::TensorPool::deallocate
virtual base::Status deallocate()=0

nndeploy::net::TensorPool::is_external_
bool is_external_
Definition: tensor_pool.h:111

nndeploy::net::TensorPool::getMemorySize
virtual int64_t getMemorySize()
获取推理所需的内存大小

nndeploy::net::TensorPool::device_
device::Device * device_
Definition: tensor_pool.h:107

nndeploy::net::TensorPool::deinitOpIndexMap
virtual base::Status deinitOpIndexMap()=0

nndeploy::net::TensorPool::TensorPool
TensorPool(device::Device *device, std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository)

nndeploy::net::TensorPool::tensor_repository_
std::vector< TensorWrapper * > tensor_repository_
Definition: tensor_pool.h:109

nndeploy::net::TensorPool::initOpIndexMap
virtual base::Status initOpIndexMap()=0

nndeploy::net::TensorPool::initTensorUsageRecordMap
virtual base::Status initTensorUsageRecordMap()=0

nndeploy::net::TensorPool::deallocateOp
virtual base::Status deallocateOp(op::Op *op)=0

nndeploy::net::TensorPool::setIsExternal
virtual base::Status setIsExternal(bool is_external)

nndeploy::net::TensorPool::~TensorPool
virtual ~TensorPool()

nndeploy::net::TensorPool::setMemory
virtual base::Status setMemory(device::Buffer *buffer)
设置推理所需的内存（推理内存由外部分配）

nndeploy::net::TensorWrapper
Definition: util.h:35

nndeploy::net::TypeTensorPoolCreator
TensorPool的创建类模板
Definition: tensor_pool.h:132

nndeploy::net::TypeTensorPoolRegister
TensorPool的创建类的注册类模板
Definition: tensor_pool.h:154

nndeploy::net::TypeTensorPoolRegister::TypeTensorPoolRegister
TypeTensorPoolRegister(TensorPoolType type)
Definition: tensor_pool.h:156

nndeploy::op::Op
Op的基类
Definition: op.h:42

common.h

util.h

glic_stl_include.h

log.h

macro.h

NNDEPLOY_CC_API
#define NNDEPLOY_CC_API
api
Definition: macro.h:29

nndeploy::base::IntVector
std::vector< int > IntVector
Definition: common.h:379

nndeploy::net::createTensorPool
TensorPool * createTensorPool(TensorPoolType type, device::Device *device, std::vector< TensorWrapper * > &tensor_repository, std::vector< OpWrapper * > &op_repository)
Create a TensorPool object.

nndeploy::net::isInterval
bool isInterval(std::array< int, 2 > &interval, std::vector< std::array< int, 2 >> &intervals)

nndeploy::net::getOpOrderIndex
std::vector< int > getOpOrderIndex(std::vector< OpWrapper * > &producers, std::vector< OpWrapper * > &consumers, std::vector< OpWrapper * > &op_repository)

nndeploy::net::chunkPrint
void chunkPrint(const std::vector< std::shared_ptr< Chunk >> &chunks)

nndeploy::net::tensorPoolTypeToString
std::string tensorPoolTypeToString(TensorPoolType type)

nndeploy::net::TensorPoolType
TensorPoolType
Definition: tensor_pool.h:28

nndeploy::net::kTensorPool1DOffsetCalculateTypeGreedyBySize
@ kTensorPool1DOffsetCalculateTypeGreedyBySize
Definition: tensor_pool.h:32

nndeploy::net::kTensorPool1DNone
@ kTensorPool1DNone
Definition: tensor_pool.h:34

nndeploy::net::kTensorPool1DOffsetCalculateTypeGreedyByBreadth
@ kTensorPool1DOffsetCalculateTypeGreedyByBreadth
Definition: tensor_pool.h:33

nndeploy::net::kTensorPool1DSharedObjectTypeGreedyBySizeImprove
@ kTensorPool1DSharedObjectTypeGreedyBySizeImprove
Definition: tensor_pool.h:31

nndeploy::net::kTensorPool1DSharedObjectTypeGreedyByBreadth
@ kTensorPool1DSharedObjectTypeGreedyByBreadth
Definition: tensor_pool.h:29

nndeploy::net::kTensorPool1DSharedObjectTypeGreedyBySize
@ kTensorPool1DSharedObjectTypeGreedyBySize
Definition: tensor_pool.h:30

nndeploy::net::stringToTensorPoolType
TensorPoolType stringToTensorPoolType(const std::string &src)

nndeploy::net::tensorUsageRecordPrint
void tensorUsageRecordPrint(const std::vector< std::shared_ptr< TensorUsageRecord >> &tensor_usage_records)

nndeploy::net::getGlobalTensorPoolCreatorMap
std::map< TensorPoolType, std::shared_ptr< TensorPoolCreator > > & getGlobalTensorPoolCreatorMap()
Get the Global TensorPool Creator Map object.

nndeploy
Definition: common.h:10

object.h

status.h

string.h

nndeploy::net::Chunk
Definition: tensor_pool.h:58

nndeploy::net::Chunk::intervals_
std::vector< std::array< int, 2 > > intervals_
Definition: tensor_pool.h:61

nndeploy::net::Chunk::buffer_
device::Buffer * buffer_
Definition: tensor_pool.h:60

nndeploy::net::Offset
Definition: tensor_pool.h:64

nndeploy::net::Offset::size_
size_t size_
Definition: tensor_pool.h:66

nndeploy::net::Offset::offset_
int offset_
Definition: tensor_pool.h:65

nndeploy::net::Offset::tensor_usage_records_
std::vector< std::shared_ptr< TensorUsageRecord > > tensor_usage_records_
Definition: tensor_pool.h:67

nndeploy::net::OpBreadth
Definition: tensor_pool.h:50

nndeploy::net::OpBreadth::breadth_
std::vector< std::shared_ptr< TensorUsageRecord > > breadth_
Definition: tensor_pool.h:52

nndeploy::net::OpBreadth::op_wrapper_
OpWrapper * op_wrapper_
Definition: tensor_pool.h:51

nndeploy::net::OpBreadth::size_
size_t size_
Definition: tensor_pool.h:53

nndeploy::net::OpBreadth::operator<
bool operator<(const OpBreadth &other) const
Definition: tensor_pool.h:55

nndeploy::net::TensorUsageRecord
Definition: tensor_pool.h:38

nndeploy::net::TensorUsageRecord::tensor_wrapper_
TensorWrapper * tensor_wrapper_
Definition: tensor_pool.h:39

nndeploy::net::TensorUsageRecord::operator<
bool operator<(const TensorUsageRecord &other) const
Definition: tensor_pool.h:45

nndeploy::net::TensorUsageRecord::is_allocated_
bool is_allocated_
Definition: tensor_pool.h:43

nndeploy::net::TensorUsageRecord::interval_
std::array< int, 2 > interval_
Definition: tensor_pool.h:41

nndeploy::net::TensorUsageRecord::size_
size_t size_
Definition: tensor_pool.h:40

nndeploy::net::TensorUsageRecord::offset_
int offset_
Definition: tensor_pool.h:42