2 #ifndef _NNDEPLOY_DEVICE_TENSOR_H_
3 #define _NNDEPLOY_DEVICE_TENSOR_H_
15 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
16 #include "safetensors.hh"
29 explicit Tensor(
const std::string &name);
32 const std::string &name =
"");
35 const std::string &name =
"",
38 const std::string &name,
42 const std::string &name =
"",
46 const std::string &name,
57 void create(
const std::string &name);
61 const std::string &name =
"");
64 const std::string &name =
"",
67 const std::string &name =
"",
71 const std::string &name =
"",
74 const std::string &name =
"",
89 if (buffer_ ==
nullptr) {
94 return buffer_->set(value);
121 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
122 base::Status serializeToSafetensors(safetensors::safetensors_t &st,
123 bool serialize_buffer =
false);
129 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
130 base::Status serializeFromSafetensors(
const safetensors::safetensors_t &st);
134 void print(std::ostream &stream = std::cout)
const;
177 template <
typename T,
typename... Args>
179 uint8_t *ptr =
reinterpret_cast<uint8_t *
>(getData());
180 if (ptr ==
nullptr) {
185 int coord[] = {std::forward<int>(args)...};
186 int dims =
static_cast<int>(
sizeof...(args));
187 if (dims >
static_cast<int>(desc_.shape_.size())) {
188 NNDEPLOY_LOGE(
"dimension %d is out of range with shape size %d\n", dims,
189 static_cast<int>(desc_.shape_.size()));
194 for (
int i = 0; i < dims; ++i) {
195 if (coord[i] < 0 || coord[i] >= desc_.shape_[i]) {
196 NNDEPLOY_LOGE(
"index: %d is out of range for dimension %d with size %d\n",
197 coord[i], i, desc_.shape_[i]);
200 offset += coord[i] * desc_.stride_[i];
203 return reinterpret_cast<T *
>(ptr + offset);
206 #ifdef ENABLE_NNDEPLOY_SAFETENSORS_CPP
209 safetensors::dtype &safetensors_data_type);
212 std::vector<size_t> &safetensors_data_shape);
215 const safetensors::dtype &safetensors_data_type,
219 const std::vector<size_t> &safetensors_data_shape,
223 inline int addRef()
const {
return NNDEPLOY_XADD(ref_count_, 1); }
224 inline int subRef()
const {
return NNDEPLOY_XADD(ref_count_, -1); }
227 std::string name_ =
"";
229 bool is_external_ =
false;
230 int *ref_count_ =
nullptr;
231 Buffer *buffer_ =
nullptr;
243 template <
typename T>
245 virtual Tensor *createTensor() {
return new T(); }
248 std::map<base::TensorType, std::shared_ptr<TensorCreator>> &
251 template <
typename T>
261 template <
typename T>
265 if (tensor ==
nullptr) {
270 Buffer *host_buffer =
nullptr;
273 if (host_buffer ==
nullptr) {
280 size_t size = host_buffer->
getSize();
282 size_t ele_size = data_type.
size();
283 size_t ele_count = size / ele_size;
284 void *data = host_buffer->
getData();
286 generator.seed(std::random_device()());
288 generator.seed(seed);
290 std::normal_distribution<float> normal(mean, std);
293 for (
size_t i = 0; i < ele_count; ++i) {
294 ((int8_t *)data)[i] = (int8_t)(normal(generator));
298 for (
size_t i = 0; i < ele_count; ++i) {
299 ((int16_t *)data)[i] = (int16_t)(normal(generator));
303 for (
size_t i = 0; i < ele_count; ++i) {
304 ((int32_t *)data)[i] = (int32_t)(normal(generator));
308 for (
size_t i = 0; i < ele_count; ++i) {
309 ((int64_t *)data)[i] = (int64_t)(normal(generator));
313 for (
size_t i = 0; i < ele_count; ++i) {
314 ((uint8_t *)data)[i] = (uint8_t)(normal(generator));
318 for (
size_t i = 0; i < ele_count; ++i) {
319 ((uint16_t *)data)[i] = (uint16_t)(normal(generator));
323 for (
size_t i = 0; i < ele_count; ++i) {
324 ((uint32_t *)data)[i] = (uint32_t)(normal(generator));
328 for (
size_t i = 0; i < ele_count; ++i) {
329 ((uint64_t *)data)[i] = (uint64_t)(normal(generator));
333 for (
size_t i = 0; i < ele_count; ++i) {
334 ((
float *)data)[i] = (float)(normal(generator));
338 for (
size_t i = 0; i < ele_count; ++i) {
339 ((
double *)data)[i] = (double)(normal(generator));
343 float *fp32 = (
float *)malloc(ele_count *
sizeof(
float));
344 for (
size_t i = 0; i < ele_count; ++i) {
345 ((
float *)fp32)[i] = (float)(normal(generator));
351 float *fp32 = (
float *)malloc(ele_count *
sizeof(
float));
352 for (
size_t i = 0; i < ele_count; ++i) {
353 ((
float *)fp32)[i] = (float)(normal(generator));
base::Status copyTo(Buffer *dst)
virtual Tensor * createTensor()=0
base::Status copyTo(Tensor *dst)
Device * getDevice() const
void allocate(MemoryPool *memory_pool, const base::IntVector &config=base::IntVector())
TensorDesc getDesc() const
Buffer * getBuffer() const
base::MemoryType getMemoryType() const
void print(std::ostream &stream=std::cout) const
void create(MemoryPool *memory_pool, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
Tensor(const TensorDesc &desc, const std::string &name="")
Tensor(MemoryPool *memory_pool, const TensorDesc &desc, void *data_ptr, const std::string &name, const base::IntVector &config=base::IntVector())
Tensor(Device *device, const TensorDesc &desc, void *data_ptr, const std::string &name, const base::IntVector &config=base::IntVector())
void setDataType(base::DataType data_type)
void allocate(Device *device, const base::IntVector &config=base::IntVector())
bool isExternalBuffer() const
void create(const TensorDesc &desc, const std::string &name="")
void create(const TensorDesc &desc, Buffer *buffer, const std::string &name="")
size_t getRealSize() const
base::Status set(T value)
std::string getName() const
Tensor & operator=(Tensor &&tensor) noexcept
Tensor(MemoryPool *memory_pool, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
void setDataFormat(base::DataFormat data_format)
Tensor(const std::string &name)
base::Status reshape(base::IntVector shape)
size_t getStrideIndex(int index) const
MemoryPool * getMemoryPool() const
base::SizeVector getRealSizeVector() const
bool isMemoryPool() const
bool isSameDevice(Tensor *tensor) const
int getShapeIndex(int index) const
bool justModify(const TensorDesc &desc)
bool isSameDesc(Tensor *tensor) const
void create(MemoryPool *memory_pool, const TensorDesc &desc, void *data_ptr, const std::string &name="", const base::IntVector &config=base::IntVector())
BufferDesc getBufferDesc() const
Tensor(const TensorDesc &desc, Buffer *buffer, const std::string &name="")
base::DataType getDataType() const
void create(Device *device, const TensorDesc &desc, void *data_ptr, const std::string &name="", const base::IntVector &config=base::IntVector())
Tensor(const Tensor &tensor)
void create(Device *device, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
base::IntVector getConfig() const
base::DeviceType getDeviceType() const
base::SizeVector getSizeVector() const
base::IntVector getShape() const
base::Status setName(const std::string &)
base::DataFormat getDataFormat() const
Tensor(Device *device, const TensorDesc &desc, const std::string &name="", const base::IntVector &config=base::IntVector())
Tensor & operator=(const Tensor &tensor)
base::SizeVector getStride() const
bool isSameMemoryPool(Tensor *tensor) const
bool justModify(Buffer *buffer, bool is_external=true)
base::Status serialize(std::string &bin_str)
Tensor(Tensor &&tensor) noexcept
base::Status deserialize(const std::string &bin_str)
void create(const std::string &name)
TypeTensorRegister(base::TensorType type)
#define NNDEPLOY_LOGE(fmt,...)
#define NNDEPLOY_CC_API
api
@ kStatusCodeErrorNullParam
std::vector< int > IntVector
bool convertFromFloatToFp16(float *fp32, void *fp16, int count)
std::vector< size_t > SizeVector
bool convertFromFloatToBfp16(float *fp32, void *bfp16, int count)
base::Status randnTensor(T &generator, float mean, float std, Tensor *tensor, int64_t seed=-1)
bool isHostDeviceType(base::DeviceType device_type)
判断是否为主机设备类型
Device * getDefaultHostDevice()
获取默认主机设备
Tensor * createTensor(base::TensorType type)
std::map< base::TensorType, std::shared_ptr< TensorCreator > > & getGlobalTensorCreatorMap()
base::Status shape(device::Tensor *input, std::shared_ptr< ir::ShapeParam > param, device::Tensor *output)
#define NNDEPLOY_RETURN_ON_NEQ(status, expected, str)