nndeploy C++ API  0.2.0
nndeploy C++ API
tensor.h
Go to the documentation of this file.
1 
2 #ifndef _NNDEPLOY_DEVICE_TENSOR_H_
3 #define _NNDEPLOY_DEVICE_TENSOR_H_
4 
5 #include "nndeploy/base/common.h"
7 #include "nndeploy/base/half.h"
8 #include "nndeploy/base/log.h"
9 #include "nndeploy/base/macro.h"
10 #include "nndeploy/base/object.h"
11 #include "nndeploy/base/status.h"
12 #include "nndeploy/device/buffer.h"
13 #include "nndeploy/device/device.h"
15 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
16 #include "safetensors.hh"
17 #endif
18 
19 namespace nndeploy {
20 namespace device {
21 
27  public:
28  explicit Tensor();
29  explicit Tensor(const std::string &name);
30  explicit Tensor(const TensorDesc &desc, const std::string &name = "");
31  explicit Tensor(const TensorDesc &desc, Buffer *buffer,
32  const std::string &name = "");
33 
34  explicit Tensor(Device *device, const TensorDesc &desc,
35  const std::string &name = "",
36  const base::IntVector &config = base::IntVector());
37  explicit Tensor(Device *device, const TensorDesc &desc, void *data_ptr,
38  const std::string &name,
39  const base::IntVector &config = base::IntVector());
40 
41  explicit Tensor(MemoryPool *memory_pool, const TensorDesc &desc,
42  const std::string &name = "",
43  const base::IntVector &config = base::IntVector());
44  explicit Tensor(MemoryPool *memory_pool, const TensorDesc &desc,
45  void *data_ptr,
46  const std::string &name,
47  const base::IntVector &config = base::IntVector());
48 
49  Tensor(const Tensor &tensor);
50  Tensor &operator=(const Tensor &tensor);
51 
52  Tensor(Tensor &&tensor) noexcept;
53  Tensor &operator=(Tensor &&tensor) noexcept;
54 
55  virtual ~Tensor();
56 
57  void create(const std::string &name);
58 
59  void create(const TensorDesc &desc, const std::string &name = "");
60  void create(const TensorDesc &desc, Buffer *buffer,
61  const std::string &name = "");
62 
63  void create(Device *device, const TensorDesc &desc,
64  const std::string &name = "",
65  const base::IntVector &config = base::IntVector());
66  void create(Device *device, const TensorDesc &desc, void *data_ptr,
67  const std::string &name = "",
68  const base::IntVector &config = base::IntVector());
69 
70  void create(MemoryPool *memory_pool, const TensorDesc &desc,
71  const std::string &name = "",
72  const base::IntVector &config = base::IntVector());
73  void create(MemoryPool *memory_pool, const TensorDesc &desc, void *data_ptr,
74  const std::string &name = "",
75  const base::IntVector &config = base::IntVector());
76 
77  // clear tensor
78  void clear();
79 
80  // alloc
81  void allocate(Device *device,
82  const base::IntVector &config = base::IntVector());
83  void allocate(MemoryPool *memory_pool,
84  const base::IntVector &config = base::IntVector());
85  void deallocate();
86 
87  template <typename T>
88  base::Status set(T value) {
89  if (buffer_ == nullptr) {
90  NNDEPLOY_LOGE("buffer_ is empty");
92  }
93 
94  return buffer_->set(value);
95  }
96 
97  // modify
110  bool justModify(const TensorDesc &desc);
111  bool justModify(Buffer *buffer, bool is_external = true);
112 
113  // clone and copy
115  // dst必须预先分配内存
117 
118  // 序列化模型权重为二进制文件
119  base::Status serialize(std::string &bin_str);
120 
121 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
122  base::Status serializeToSafetensors(safetensors::safetensors_t &st,
123  bool serialize_buffer = false);
124 #endif
125 
126  // 从二进制文件反序列化模型权重
127  base::Status deserialize(const std::string &bin_str);
128 
129 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
130  base::Status serializeFromSafetensors(const safetensors::safetensors_t &st);
131 #endif
132 
133  // print
134  void print(std::ostream &stream = std::cout) const;
135 
136  // bool
137  bool isSameDevice(Tensor *tensor) const;
138  bool isSameMemoryPool(Tensor *tensor) const;
139  bool isSameDesc(Tensor *tensor) const;
140 
141  // get
142  bool empty() const;
143  bool isContinue() const;
144  bool isExternalBuffer() const;
145 
146  std::string getName() const;
147  base::Status setName(const std::string &);
150  void setDataType(base::DataType data_type);
152  void setDataFormat(base::DataFormat data_format);
154  int getShapeIndex(int index) const;
155  int getBatch() const;
156  int getChannel() const;
157  int getDepth() const;
158  int getHeight() const;
159  int getWidth() const;
161  size_t getStrideIndex(int index) const;
162 
163  Buffer *getBuffer() const;
165  Device *getDevice() const;
167  bool isMemoryPool() const;
169  size_t getSize() const;
171  size_t getRealSize() const;
174  void *getData() const;
176 
177  template <typename T, typename... Args>
178  T *getPtr(Args... args) {
179  uint8_t *ptr = reinterpret_cast<uint8_t *>(getData());
180  if (ptr == nullptr) {
181  NNDEPLOY_LOGE("Tensor data is empty\n");
182  return nullptr;
183  }
184 
185  int coord[] = {std::forward<int>(args)...};
186  int dims = static_cast<int>(sizeof...(args));
187  if (dims > static_cast<int>(desc_.shape_.size())) {
188  NNDEPLOY_LOGE("dimension %d is out of range with shape size %d\n", dims,
189  static_cast<int>(desc_.shape_.size()));
190  return nullptr;
191  }
192 
193  size_t offset = 0;
194  for (int i = 0; i < dims; ++i) {
195  if (coord[i] < 0 || coord[i] >= desc_.shape_[i]) {
196  NNDEPLOY_LOGE("index: %d is out of range for dimension %d with size %d\n",
197  coord[i], i, desc_.shape_[i]);
198  return nullptr;
199  }
200  offset += coord[i] * desc_.stride_[i];
201  }
202 
203  return reinterpret_cast<T *>(ptr + offset);
204  }
205 
206 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
207  static base::Status dtype2SafetensorsDtype(
208  const base::DataType &data_type,
209  safetensors::dtype &safetensors_data_type);
210  static base::Status shape2SafetensorsShape(
211  const base::IntVector &shape,
212  std::vector<size_t> &safetensors_data_shape);
213 
214  static base::Status safetensorsDtype2Dtype(
215  const safetensors::dtype &safetensors_data_type,
216  base::DataType &data_type);
217 
218  static base::Status safetensorsShape2Shape(
219  const std::vector<size_t> &safetensors_data_shape,
221 #endif
222 
223  inline int addRef() const { return NNDEPLOY_XADD(ref_count_, 1); }
224  inline int subRef() const { return NNDEPLOY_XADD(ref_count_, -1); }
225 
226  private:
227  std::string name_ = ""; // tensor name
228  TensorDesc desc_; // tensor desc
229  bool is_external_ = false; // is external
230  int *ref_count_ = nullptr; // 引用计数
231  Buffer *buffer_ = nullptr; // buffer
232  // bool is_quant_ = false;
233  // Buffer *scale_ = nullptr;
234  // Buffer *zero_point_ = nullptr;
235 };
236 
238  public:
239  virtual ~TensorCreator(){};
240  virtual Tensor *createTensor() = 0;
241 };
242 
243 template <typename T>
245  virtual Tensor *createTensor() { return new T(); }
246 };
247 
248 std::map<base::TensorType, std::shared_ptr<TensorCreator>> &
250 
251 template <typename T>
253  public:
255  getGlobalTensorCreatorMap()[type] = std::shared_ptr<T>(new T());
256  }
257 };
258 
260 
261 template <typename T>
262 base::Status randnTensor(T &generator, float mean, float std, Tensor *tensor,
263  int64_t seed = -1) {
265  if (tensor == nullptr) {
266  NNDEPLOY_LOGE("tensor is empty");
268  }
269  Device *host_device = getDefaultHostDevice();
270  Buffer *host_buffer = nullptr;
271  if (!device::isHostDeviceType(tensor->getDeviceType())) {
272  host_buffer = new Buffer(host_device, tensor->getBufferDesc());
273  if (host_buffer == nullptr) {
274  NNDEPLOY_LOGE("host_buffer is empty");
276  }
277  } else {
278  host_buffer = tensor->getBuffer();
279  }
280  size_t size = host_buffer->getSize();
281  base::DataType data_type = tensor->getDataType();
282  size_t ele_size = data_type.size();
283  size_t ele_count = size / ele_size;
284  void *data = host_buffer->getData();
285  if (seed == -1) {
286  generator.seed(std::random_device()());
287  } else {
288  generator.seed(seed);
289  }
290  std::normal_distribution<float> normal(mean, std);
291  if (data_type.code_ == base::kDataTypeCodeInt && data_type.bits_ == 8 &&
292  data_type.lanes_ == 1) {
293  for (size_t i = 0; i < ele_count; ++i) {
294  ((int8_t *)data)[i] = (int8_t)(normal(generator));
295  }
296  } else if (data_type.code_ == base::kDataTypeCodeInt &&
297  data_type.bits_ == 16 && data_type.lanes_ == 1) {
298  for (size_t i = 0; i < ele_count; ++i) {
299  ((int16_t *)data)[i] = (int16_t)(normal(generator));
300  }
301  } else if (data_type.code_ == base::kDataTypeCodeInt &&
302  data_type.bits_ == 32 && data_type.lanes_ == 1) {
303  for (size_t i = 0; i < ele_count; ++i) {
304  ((int32_t *)data)[i] = (int32_t)(normal(generator));
305  }
306  } else if (data_type.code_ == base::kDataTypeCodeInt &&
307  data_type.bits_ == 64 && data_type.lanes_ == 1) {
308  for (size_t i = 0; i < ele_count; ++i) {
309  ((int64_t *)data)[i] = (int64_t)(normal(generator));
310  }
311  } else if (data_type.code_ == base::kDataTypeCodeUint &&
312  data_type.bits_ == 8 && data_type.lanes_ == 1) {
313  for (size_t i = 0; i < ele_count; ++i) {
314  ((uint8_t *)data)[i] = (uint8_t)(normal(generator));
315  }
316  } else if (data_type.code_ == base::kDataTypeCodeUint &&
317  data_type.bits_ == 16 && data_type.lanes_ == 1) {
318  for (size_t i = 0; i < ele_count; ++i) {
319  ((uint16_t *)data)[i] = (uint16_t)(normal(generator));
320  }
321  } else if (data_type.code_ == base::kDataTypeCodeUint &&
322  data_type.bits_ == 32 && data_type.lanes_ == 1) {
323  for (size_t i = 0; i < ele_count; ++i) {
324  ((uint32_t *)data)[i] = (uint32_t)(normal(generator));
325  }
326  } else if (data_type.code_ == base::kDataTypeCodeUint &&
327  data_type.bits_ == 64 && data_type.lanes_ == 1) {
328  for (size_t i = 0; i < ele_count; ++i) {
329  ((uint64_t *)data)[i] = (uint64_t)(normal(generator));
330  }
331  } else if (data_type.code_ == base::kDataTypeCodeFp &&
332  data_type.bits_ == 32 && data_type.lanes_ == 1) {
333  for (size_t i = 0; i < ele_count; ++i) {
334  ((float *)data)[i] = (float)(normal(generator));
335  }
336  } else if (data_type.code_ == base::kDataTypeCodeFp &&
337  data_type.bits_ == 64 && data_type.lanes_ == 1) {
338  for (size_t i = 0; i < ele_count; ++i) {
339  ((double *)data)[i] = (double)(normal(generator));
340  }
341  } else if (data_type.code_ == base::kDataTypeCodeBFp &&
342  data_type.bits_ == 16 && data_type.lanes_ == 1) {
343  float *fp32 = (float *)malloc(ele_count * sizeof(float));
344  for (size_t i = 0; i < ele_count; ++i) {
345  ((float *)fp32)[i] = (float)(normal(generator));
346  }
347  base::convertFromFloatToBfp16(fp32, (void *)data, ele_count);
348  free(fp32);
349  } else if (data_type.code_ == base::kDataTypeCodeFp &&
350  data_type.bits_ == 16 && data_type.lanes_ == 1) {
351  float *fp32 = (float *)malloc(ele_count * sizeof(float));
352  for (size_t i = 0; i < ele_count; ++i) {
353  ((float *)fp32)[i] = (float)(normal(generator));
354  }
355  base::convertFromFloatToFp16(fp32, (void *)data, ele_count);
356  free(fp32);
357  } else {
358  NNDEPLOY_LOGE("data type is not support");
359  }
360 
361  if (!device::isHostDeviceType(tensor->getDeviceType())) {
362  status = host_buffer->copyTo(tensor->getBuffer());
363  NNDEPLOY_RETURN_ON_NEQ(status, base::kStatusCodeOk, "copyTo failed!");
364  delete host_buffer;
365  }
366  return status;
367 }
368 
369 } // namespace device
370 } // namespace nndeploy
371 
372 #endif
void * getData() const
size_t getSize() const
base::Status copyTo(Buffer *dst)
设备抽象基类
Definition: device.h:155
virtual Tensor * createTensor()=0
base::Status copyTo(Tensor *dst)
Device * getDevice() const
void allocate(MemoryPool *memory_pool, const base::IntVector &config=base::IntVector())
TensorDesc getDesc() const
Buffer * getBuffer() const
T * getPtr(Args... args)
Definition: tensor.h:178
base::MemoryType getMemoryType() const
void print(std::ostream &stream=std::cout) const
int addRef() const
Definition: tensor.h:223
void create(MemoryPool *memory_pool, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
Tensor(const TensorDesc &desc, const std::string &name="")
Tensor(MemoryPool *memory_pool, const TensorDesc &desc, void *data_ptr, const std::string &name, const base::IntVector &config=base::IntVector())
size_t getSize() const
Tensor(Device *device, const TensorDesc &desc, void *data_ptr, const std::string &name, const base::IntVector &config=base::IntVector())
void setDataType(base::DataType data_type)
void allocate(Device *device, const base::IntVector &config=base::IntVector())
int subRef() const
Definition: tensor.h:224
bool isExternalBuffer() const
void create(const TensorDesc &desc, const std::string &name="")
void create(const TensorDesc &desc, Buffer *buffer, const std::string &name="")
size_t getRealSize() const
base::Status set(T value)
Definition: tensor.h:88
std::string getName() const
Tensor & operator=(Tensor &&tensor) noexcept
Tensor(MemoryPool *memory_pool, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
void setDataFormat(base::DataFormat data_format)
Tensor(const std::string &name)
base::Status reshape(base::IntVector shape)
size_t getStrideIndex(int index) const
MemoryPool * getMemoryPool() const
base::SizeVector getRealSizeVector() const
bool isSameDevice(Tensor *tensor) const
int getShapeIndex(int index) const
bool justModify(const TensorDesc &desc)
bool isSameDesc(Tensor *tensor) const
void create(MemoryPool *memory_pool, const TensorDesc &desc, void *data_ptr, const std::string &name="", const base::IntVector &config=base::IntVector())
BufferDesc getBufferDesc() const
Tensor(const TensorDesc &desc, Buffer *buffer, const std::string &name="")
void * getData() const
base::DataType getDataType() const
void create(Device *device, const TensorDesc &desc, void *data_ptr, const std::string &name="", const base::IntVector &config=base::IntVector())
Tensor(const Tensor &tensor)
void create(Device *device, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
base::IntVector getConfig() const
base::DeviceType getDeviceType() const
base::SizeVector getSizeVector() const
base::IntVector getShape() const
base::Status setName(const std::string &)
base::DataFormat getDataFormat() const
Tensor(Device *device, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
Tensor & operator=(const Tensor &tensor)
base::SizeVector getStride() const
bool isSameMemoryPool(Tensor *tensor) const
bool justModify(Buffer *buffer, bool is_external=true)
base::Status serialize(std::string &bin_str)
Tensor(Tensor &&tensor) noexcept
base::Status deserialize(const std::string &bin_str)
void create(const std::string &name)
TypeTensorRegister(base::TensorType type)
Definition: tensor.h:254
#define NNDEPLOY_LOGE(fmt,...)
Definition: log.h:59
#define NNDEPLOY_CC_API
api
Definition: macro.h:29
@ kStatusCodeOk
Definition: status.h:13
@ kStatusCodeErrorNullParam
Definition: status.h:22
std::vector< int > IntVector
Definition: common.h:379
bool convertFromFloatToFp16(float *fp32, void *fp16, int count)
@ kDataTypeCodeUint
Definition: common.h:14
@ kDataTypeCodeInt
Definition: common.h:15
@ kDataTypeCodeFp
Definition: common.h:16
@ kDataTypeCodeBFp
Definition: common.h:17
std::vector< size_t > SizeVector
Definition: common.h:380
bool convertFromFloatToBfp16(float *fp32, void *bfp16, int count)
base::Status randnTensor(T &generator, float mean, float std, Tensor *tensor, int64_t seed=-1)
Definition: tensor.h:262
bool isHostDeviceType(base::DeviceType device_type)
判断是否为主机设备类型
Device * getDefaultHostDevice()
获取默认主机设备
Tensor * createTensor(base::TensorType type)
std::map< base::TensorType, std::shared_ptr< TensorCreator > > & getGlobalTensorCreatorMap()
base::Status shape(device::Tensor *input, std::shared_ptr< ir::ShapeParam > param, device::Tensor *output)
#define NNDEPLOY_RETURN_ON_NEQ(status, expected, str)
Definition: status.h:183