nndeploy C++ API  0.2.0
nndeploy C++ API
Public Member Functions | Protected Attributes | List of all members
nndeploy::op::Op Class Referenceabstract

Op的基类 More...

#include <op.h>

Inheritance diagram for nndeploy::op::Op:
[legend]
Collaboration diagram for nndeploy::op::Op:
[legend]

Public Member Functions

virtual base::Status allocateWorkspace ()
 
virtual base::Status checkOrAllocOutput ()
 检查输出tensor More...
 
virtual base::Status deinit ()
 
std::vector< device::Tensor * > getAllInput ()
 
std::vector< std::string > getAllInputName ()
 
std::vector< device::Tensor * > getAllOutput ()
 
std::vector< std::string > getAllOutputName ()
 
bool getConstructed ()
 
bool getDebugFlag ()
 
base::DeviceType getDeviceType ()
 
virtual uint64_t getFlops ()
 得到op的flops More...
 
bool getInitialized ()
 
device::TensorgetInput (int index=0)
 
std::string getInputName (int index=0)
 
device::TensorgetInputTensor (const std::string &name)
 
std::string getName ()
 
ir::OpType getOpType ()
 
device::TensorgetOutput (int index=0)
 
std::string getOutputName (int index=0)
 
device::TensorgetOutputTensor (const std::string &name)
 
base::ParallelType getParallelType ()
 
virtual std::shared_ptr< base::ParamgetParam ()
 
base::PrecisionType getPrecisionType ()
 
device::StreamgetStream ()
 
bool getTimeProfileFlag ()
 
virtual uint64_t getWorkspaceSize ()
 得到op的workspace大小 note: op在运行时的workspace大小,在输入确定后调用 eg:例如Conv,当存在padding时,需要分配额外的内存,存放padding后的内存 More...
 
virtual base::Status inferDataFormat ()
 数据格式推理 More...
 
virtual base::Status inferDataType ()
 类型推理 More...
 
virtual base::Status inferShape ()
 形状推理 More...
 
virtual base::Status init ()
 初始化 More...
 
bool isRunning ()
 
 Op ()
 
virtual base::Status postRun ()
 
virtual base::Status preRun ()
 
base::Status replaceInputTensor (const std::string &name, device::Tensor *tensor)
 
base::Status replaceOutputTensor (const std::string &name, device::Tensor *tensor)
 
virtual base::Status reshape (base::ShapeMap &shape_map)
 重新推理形状,通常在初始化之后、preRun前调用 More...
 
base::Status rmInput (device::Tensor *tensor)
 
virtual base::Status run ()=0
 
base::Status setAllInput (std::vector< device::Tensor * > inputs)
 
base::Status setAllInputName (std::initializer_list< std::string >)
 
base::Status setAllInputName (std::vector< std::string > &)
 
base::Status setAllOutput (std::vector< device::Tensor * > outputs)
 
base::Status setAllOutputName (std::initializer_list< std::string >)
 
base::Status setAllOutputName (std::vector< std::string > &)
 
void setDebugFlag (bool flag)
 
base::Status setDeviceType (base::DeviceType device_type)
 
void setInitializedFlag (bool flag)
 
void setInnerFlag (bool flag)
 
virtual base::Status setInput (device::Tensor *input)
 
base::Status setInput (device::Tensor *input, int index)
 
base::Status setName (std::string name)
 
base::Status setOpType (ir::OpType op_type)
 
virtual base::Status setOutput (device::Tensor *output)
 
base::Status setOutput (device::Tensor *output, int index)
 
base::Status setParallelType (const base::ParallelType &paralle_type)
 
virtual base::Status setParam (std::shared_ptr< base::Param > param)
 
virtual base::Status setPrecisionType (base::PrecisionType precision_type)
 设置精度类型 精度不同,计算方式不同,内存分配不同 More...
 
void setRunningFlag (bool flag)
 
void setStream (device::Stream *stream)
 
void setTimeProfileFlag (bool flag)
 
virtual void setWorkspace (void *workspace)
 
virtual ~Op ()
 

Protected Attributes

bool constructed_ = false
 
base::DeviceType device_type_
 op的设备类型 More...
 
uint64_t flops_ = 0
 
bool initialized_ = false
 
std::vector< device::Tensor * > inputs_
 op的输入tensor note: 当权重为tensor时,权重tensor也会在这里 eg: More...
 
bool is_changed_ = false
 
bool is_debug_ = false
 
bool is_external_stream_ = false
 op的stream note: 当stream为外部传入时,is_external_stream_为true More...
 
bool is_inner_ = false
 
bool is_inplace_ = false
 
bool is_running_ = false
 
bool is_time_profile_ = false
 
ir::OpDesc op_desc_
 op的描述 包含op的类型、名称、输入名称、输出名称、参数 More...
 
std::vector< device::Tensor * > outputs_
 op的输出tensor More...
 
base::ParallelType parallel_type_ = base::kParallelTypeNone
 
base::PrecisionType precision_type_ = base::kPrecisionTypeFp32
 op的精度类型 note: 精度类型与输入输出tensor的data_type的不同 More...
 
device::Streamstream_ = nullptr
 
void * workspace_ = nullptr
 
bool workspace_is_external_ = false
 op的workspace大小 note: op在运行时的workspace大小, 在preRun中确定 eg:例如Conv,当存在padding时,需要分配额外的内存,存放padding后的内存 More...
 
uint64_t workspace_size_ = 0
 

Detailed Description

Op的基类

Note

单算子模式

当输出tensor为空时,内部分配

autotoc_md28

当输出tensor不为空时,检测当前输出tensor的内存是否足够,如果足够,则直接使用,否则报错

计算图Net模式

静态shape,由tensor pool分配

动态shape

指定了max_shape,则由tensor

pool按最大shape分配,调用reshape函数时,只是重新调整了tensor逻辑shape的大小

autotoc_md33

未指定max_shape,每次调用reshape函数时,在计算图层面都会:先释放上一次分配的内存,再重新分配内存

大语言模型

kvblock的方式

Definition at line 42 of file op.h.

Constructor & Destructor Documentation

◆ Op()

nndeploy::op::Op::Op ( )

◆ ~Op()

virtual nndeploy::op::Op::~Op ( )
virtual

Member Function Documentation

◆ allocateWorkspace()

virtual base::Status nndeploy::op::Op::allocateWorkspace ( )
virtual

◆ checkOrAllocOutput()

virtual base::Status nndeploy::op::Op::checkOrAllocOutput ( )
virtual

检查输出tensor

内存足够

内存不足 - 报错

内存为空 - 分配内存

Returns
base::Status

◆ deinit()

virtual base::Status nndeploy::op::Op::deinit ( )
virtual

Reimplemented in nndeploy::net::Net.

◆ getAllInput()

std::vector<device::Tensor *> nndeploy::op::Op::getAllInput ( )

◆ getAllInputName()

std::vector<std::string> nndeploy::op::Op::getAllInputName ( )

◆ getAllOutput()

std::vector<device::Tensor *> nndeploy::op::Op::getAllOutput ( )

◆ getAllOutputName()

std::vector<std::string> nndeploy::op::Op::getAllOutputName ( )

◆ getConstructed()

bool nndeploy::op::Op::getConstructed ( )

◆ getDebugFlag()

bool nndeploy::op::Op::getDebugFlag ( )

◆ getDeviceType()

base::DeviceType nndeploy::op::Op::getDeviceType ( )

◆ getFlops()

virtual uint64_t nndeploy::op::Op::getFlops ( )
virtual

得到op的flops

Returns
uint64_t

Reimplemented in nndeploy::net::Net.

◆ getInitialized()

bool nndeploy::op::Op::getInitialized ( )

◆ getInput()

device::Tensor* nndeploy::op::Op::getInput ( int  index = 0)

◆ getInputName()

std::string nndeploy::op::Op::getInputName ( int  index = 0)

◆ getInputTensor()

device::Tensor* nndeploy::op::Op::getInputTensor ( const std::string &  name)

◆ getName()

std::string nndeploy::op::Op::getName ( )

◆ getOpType()

ir::OpType nndeploy::op::Op::getOpType ( )

◆ getOutput()

device::Tensor* nndeploy::op::Op::getOutput ( int  index = 0)

◆ getOutputName()

std::string nndeploy::op::Op::getOutputName ( int  index = 0)

◆ getOutputTensor()

device::Tensor* nndeploy::op::Op::getOutputTensor ( const std::string &  name)

◆ getParallelType()

base::ParallelType nndeploy::op::Op::getParallelType ( )

◆ getParam()

virtual std::shared_ptr<base::Param> nndeploy::op::Op::getParam ( )
virtual

◆ getPrecisionType()

base::PrecisionType nndeploy::op::Op::getPrecisionType ( )

◆ getStream()

device::Stream* nndeploy::op::Op::getStream ( )

◆ getTimeProfileFlag()

bool nndeploy::op::Op::getTimeProfileFlag ( )

◆ getWorkspaceSize()

virtual uint64_t nndeploy::op::Op::getWorkspaceSize ( )
virtual

得到op的workspace大小 note: op在运行时的workspace大小,在输入确定后调用 eg:例如Conv,当存在padding时,需要分配额外的内存,存放padding后的内存

◆ inferDataFormat()

virtual base::Status nndeploy::op::Op::inferDataFormat ( )
virtual

数据格式推理

Returns
base::Status
Note
当输入的shape数据格式时,在计算图Net::init中调用该函数

Reimplemented in nndeploy::op::OpTranspose, nndeploy::op::OpReshape, nndeploy::op::OpMuls, nndeploy::op::OpBinary, and nndeploy::net::Net.

◆ inferDataType()

virtual base::Status nndeploy::op::Op::inferDataType ( )
virtual

类型推理

Returns
base::Status
Note
当输入的data_type确定时,在计算图Net::init中会调用该函数

Reimplemented in nndeploy::op::OpShape, nndeploy::op::OpQuantizeLinear, nndeploy::op::OpMuls, nndeploy::op::OpEqual, nndeploy::op::OpDequantizeLinear, nndeploy::op::OpConstantOfShape, and nndeploy::net::Net.

◆ inferShape()

virtual base::Status nndeploy::op::Op::inferShape ( )
virtual

◆ init()

virtual base::Status nndeploy::op::Op::init ( )
virtual

初始化

Returns
base::Status
Note
功能
  1. 参数
  2. 权重

Reimplemented in nndeploy::net::Net.

◆ isRunning()

bool nndeploy::op::Op::isRunning ( )

◆ postRun()

virtual base::Status nndeploy::op::Op::postRun ( )
virtual

Reimplemented in nndeploy::net::Net.

◆ preRun()

virtual base::Status nndeploy::op::Op::preRun ( )
virtual

Reimplemented in nndeploy::net::Net.

◆ replaceInputTensor()

base::Status nndeploy::op::Op::replaceInputTensor ( const std::string &  name,
device::Tensor tensor 
)

◆ replaceOutputTensor()

base::Status nndeploy::op::Op::replaceOutputTensor ( const std::string &  name,
device::Tensor tensor 
)

◆ reshape()

virtual base::Status nndeploy::op::Op::reshape ( base::ShapeMap shape_map)
virtual

重新推理形状,通常在初始化之后、preRun前调用

Parameters
shape_map
Returns
base::Status

Reimplemented in nndeploy::net::Net.

◆ rmInput()

base::Status nndeploy::op::Op::rmInput ( device::Tensor tensor)

◆ run()

virtual base::Status nndeploy::op::Op::run ( )
pure virtual

Implemented in nndeploy::op::OpWhere, nndeploy::op::OpUnsqueeze, nndeploy::op::OpTranspose, nndeploy::op::OpTanh, nndeploy::op::OpTan, nndeploy::op::OpSwiGLU, nndeploy::op::OpSub, nndeploy::op::OpSqrt, nndeploy::op::OpSplit, nndeploy::op::OpSoftmax, nndeploy::op::OpSlice, nndeploy::op::OpSinh, nndeploy::op::OpSin, nndeploy::op::OpSilu, nndeploy::op::OpSign, nndeploy::op::OpSigmoid, nndeploy::op::OpShape, nndeploy::op::OpSelu, nndeploy::op::OpRound, nndeploy::op::OpRMSNorm, nndeploy::op::OpResize, nndeploy::op::OpReshape, nndeploy::op::OpRelu, nndeploy::op::OpReduceMean, nndeploy::op::OpReciprocal, nndeploy::op::OpQuantizeLinear, nndeploy::op::OpQLinearConv, nndeploy::op::OpPow, nndeploy::op::OpNeg, nndeploy::op::OpMuls, nndeploy::op::OpMul, nndeploy::op::OpMaxPool, nndeploy::op::OpMatMul, nndeploy::op::OpLog, nndeploy::op::OpLayerNorm, nndeploy::op::OpHardSigmoid, nndeploy::op::OpGlobalAveragepool, nndeploy::op::OpGemm, nndeploy::op::OpGelu, nndeploy::op::OpGather, nndeploy::op::OpFloor, nndeploy::op::OpFlatten, nndeploy::op::OpExpand, nndeploy::op::OpExp, nndeploy::op::OpErf, nndeploy::op::OpEqual, nndeploy::op::OpEmbedding, nndeploy::op::OpDiv, nndeploy::op::OpDequantizeLinear, nndeploy::op::OpCosh, nndeploy::op::OpCos, nndeploy::op::OpConv, nndeploy::op::OpConstantOfShape, nndeploy::op::OpConcat, nndeploy::op::OpCeil, nndeploy::op::OpCast, nndeploy::op::OpBatchNorm, nndeploy::op::OpAtan, nndeploy::op::OpAsin, nndeploy::op::OpAdd, nndeploy::op::OpAcos, nndeploy::op::OpAbs, and nndeploy::net::Net.

◆ setAllInput()

base::Status nndeploy::op::Op::setAllInput ( std::vector< device::Tensor * >  inputs)

◆ setAllInputName() [1/2]

base::Status nndeploy::op::Op::setAllInputName ( std::initializer_list< std::string >  )

◆ setAllInputName() [2/2]

base::Status nndeploy::op::Op::setAllInputName ( std::vector< std::string > &  )

◆ setAllOutput()

base::Status nndeploy::op::Op::setAllOutput ( std::vector< device::Tensor * >  outputs)

◆ setAllOutputName() [1/2]

base::Status nndeploy::op::Op::setAllOutputName ( std::initializer_list< std::string >  )

◆ setAllOutputName() [2/2]

base::Status nndeploy::op::Op::setAllOutputName ( std::vector< std::string > &  )

◆ setDebugFlag()

void nndeploy::op::Op::setDebugFlag ( bool  flag)

◆ setDeviceType()

base::Status nndeploy::op::Op::setDeviceType ( base::DeviceType  device_type)

◆ setInitializedFlag()

void nndeploy::op::Op::setInitializedFlag ( bool  flag)

◆ setInnerFlag()

void nndeploy::op::Op::setInnerFlag ( bool  flag)

◆ setInput() [1/2]

virtual base::Status nndeploy::op::Op::setInput ( device::Tensor input)
virtual

◆ setInput() [2/2]

base::Status nndeploy::op::Op::setInput ( device::Tensor input,
int  index 
)

◆ setName()

base::Status nndeploy::op::Op::setName ( std::string  name)

◆ setOpType()

base::Status nndeploy::op::Op::setOpType ( ir::OpType  op_type)

◆ setOutput() [1/2]

virtual base::Status nndeploy::op::Op::setOutput ( device::Tensor output)
virtual

◆ setOutput() [2/2]

base::Status nndeploy::op::Op::setOutput ( device::Tensor output,
int  index 
)

◆ setParallelType()

base::Status nndeploy::op::Op::setParallelType ( const base::ParallelType paralle_type)

◆ setParam()

virtual base::Status nndeploy::op::Op::setParam ( std::shared_ptr< base::Param param)
virtual

◆ setPrecisionType()

virtual base::Status nndeploy::op::Op::setPrecisionType ( base::PrecisionType  precision_type)
virtual

设置精度类型 精度不同,计算方式不同,内存分配不同

Parameters
precision_type
Returns
base::Status
Note
当且仅当data_type为浮点数类型时,precision_type_会与data_type一起,共同决定具体调用的kernel函数

◆ setRunningFlag()

void nndeploy::op::Op::setRunningFlag ( bool  flag)

◆ setStream()

void nndeploy::op::Op::setStream ( device::Stream stream)

◆ setTimeProfileFlag()

void nndeploy::op::Op::setTimeProfileFlag ( bool  flag)

◆ setWorkspace()

virtual void nndeploy::op::Op::setWorkspace ( void *  workspace)
virtual

Member Data Documentation

◆ constructed_

bool nndeploy::op::Op::constructed_ = false
protected

Definition at line 277 of file op.h.

◆ device_type_

base::DeviceType nndeploy::op::Op::device_type_
protected

op的设备类型

Definition at line 208 of file op.h.

◆ flops_

uint64_t nndeploy::op::Op::flops_ = 0
protected

Definition at line 266 of file op.h.

◆ initialized_

bool nndeploy::op::Op::initialized_ = false
protected

Definition at line 278 of file op.h.

◆ inputs_

std::vector<device::Tensor *> nndeploy::op::Op::inputs_
protected

op的输入tensor note: 当权重为tensor时,权重tensor也会在这里 eg:

当op为Conv时,inputs_[0]为输入数据,inputs_[1]为weight, inputs_[2]为bias

内存分配

权重内存:

在初始化时完成,当权重与推理设备要求的权重一致时,浅拷贝即可

当权重与推理设备要求的权重不一致时,需要进行内存迁移

op输入

已经完成分配

Definition at line 237 of file op.h.

◆ is_changed_

bool nndeploy::op::Op::is_changed_ = false
protected

Definition at line 275 of file op.h.

◆ is_debug_

bool nndeploy::op::Op::is_debug_ = false
protected

Definition at line 281 of file op.h.

◆ is_external_stream_

bool nndeploy::op::Op::is_external_stream_ = false
protected

op的stream note: 当stream为外部传入时,is_external_stream_为true

Definition at line 213 of file op.h.

◆ is_inner_

bool nndeploy::op::Op::is_inner_ = false
protected

Definition at line 269 of file op.h.

◆ is_inplace_

bool nndeploy::op::Op::is_inplace_ = false
protected

Definition at line 273 of file op.h.

◆ is_running_

bool nndeploy::op::Op::is_running_ = false
protected

Definition at line 279 of file op.h.

◆ is_time_profile_

bool nndeploy::op::Op::is_time_profile_ = false
protected

Definition at line 280 of file op.h.

◆ op_desc_

ir::OpDesc nndeploy::op::Op::op_desc_
protected

op的描述 包含op的类型、名称、输入名称、输出名称、参数

Definition at line 203 of file op.h.

◆ outputs_

std::vector<device::Tensor *> nndeploy::op::Op::outputs_
protected

op的输出tensor

Note
: outputs_的内存分配

单算子模式

当输出tensor为空时,内部分配

autotoc_md48

当输出tensor不为空时,检测当前输出tensor的内存是否足够,如果足够,则直接使用,否则报错

计算图Net模式

静态shape,由tensor pool分配

动态shape

指定了max_shape,则由tensor

pool按最大shape分配,调用reshape函数时,只是重新调整了tensor逻辑shape的大小

autotoc_md53

未指定max_shape,每次调用reshape函数时,在计算图层面都会:先释放上一次分配的内存,再重新分配内存

大语言模型

kvblock的方式

Definition at line 256 of file op.h.

◆ parallel_type_

base::ParallelType nndeploy::op::Op::parallel_type_ = base::kParallelTypeNone
protected

Definition at line 271 of file op.h.

◆ precision_type_

base::PrecisionType nndeploy::op::Op::precision_type_ = base::kPrecisionTypeFp32
protected

op的精度类型 note: 精度类型与输入输出tensor的data_type的不同

data_type大部分时候决定具体调用的kernel函数

# 当且仅当data_type为浮点数类型时,precision_type_会与data_type一起,共同决定具体调用的kernel函数

Definition at line 223 of file op.h.

◆ stream_

device::Stream* nndeploy::op::Op::stream_ = nullptr
protected

Definition at line 214 of file op.h.

◆ workspace_

void* nndeploy::op::Op::workspace_ = nullptr
protected

Definition at line 265 of file op.h.

◆ workspace_is_external_

bool nndeploy::op::Op::workspace_is_external_ = false
protected

op的workspace大小 note: op在运行时的workspace大小, 在preRun中确定 eg:例如Conv,当存在padding时,需要分配额外的内存,存放padding后的内存

Definition at line 263 of file op.h.

◆ workspace_size_

uint64_t nndeploy::op::Op::workspace_size_ = 0
protected

Definition at line 264 of file op.h.


The documentation for this class was generated from the following file: