Op的基类 More...

#include <op.h>

Inheritance diagram for nndeploy::op::Op:

Collaboration diagram for nndeploy::op::Op:

Public Member Functions
virtual base::Status	allocateWorkspace ()

virtual base::Status	checkOrAllocOutput ()
	检查输出tensor More...

virtual base::Status	deinit ()

std::vector< device::Tensor * >	getAllInput ()

std::vector< std::string >	getAllInputName ()

std::vector< device::Tensor * >	getAllOutput ()

std::vector< std::string >	getAllOutputName ()

bool	getConstructed ()

bool	getDebugFlag ()

base::DeviceType	getDeviceType ()

virtual uint64_t	getFlops ()
	得到op的flops More...

bool	getInitialized ()

device::Tensor *	getInput (int index=0)

std::string	getInputName (int index=0)

device::Tensor *	getInputTensor (const std::string &name)

std::string	getName ()

ir::OpType	getOpType ()

device::Tensor *	getOutput (int index=0)

std::string	getOutputName (int index=0)

device::Tensor *	getOutputTensor (const std::string &name)

base::ParallelType	getParallelType ()

virtual std::shared_ptr< base::Param >	getParam ()

base::PrecisionType	getPrecisionType ()

device::Stream *	getStream ()

bool	getTimeProfileFlag ()

virtual uint64_t	getWorkspaceSize ()
	得到op的workspace大小 note: op在运行时的workspace大小，在输入确定后调用 eg：例如Conv，当存在padding时，需要分配额外的内存，存放padding后的内存 More...

virtual base::Status	inferDataFormat ()
	数据格式推理 More...

virtual base::Status	inferDataType ()
	类型推理 More...

virtual base::Status	inferShape ()
	形状推理 More...

virtual base::Status	init ()
	初始化 More...

bool	isRunning ()

	Op ()

virtual base::Status	postRun ()

virtual base::Status	preRun ()

base::Status	replaceInputTensor (const std::string &name, device::Tensor *tensor)

base::Status	replaceOutputTensor (const std::string &name, device::Tensor *tensor)

virtual base::Status	reshape (base::ShapeMap &shape_map)
	重新推理形状，通常在初始化之后、preRun前调用 More...

base::Status	rmInput (device::Tensor *tensor)

virtual base::Status	run ()=0

base::Status	setAllInput (std::vector< device::Tensor * > inputs)

base::Status	setAllInputName (std::initializer_list< std::string >)

base::Status	setAllInputName (std::vector< std::string > &)

base::Status	setAllOutput (std::vector< device::Tensor * > outputs)

base::Status	setAllOutputName (std::initializer_list< std::string >)

base::Status	setAllOutputName (std::vector< std::string > &)

void	setDebugFlag (bool flag)

base::Status	setDeviceType (base::DeviceType device_type)

void	setInitializedFlag (bool flag)

void	setInnerFlag (bool flag)

virtual base::Status	setInput (device::Tensor *input)

base::Status	setInput (device::Tensor *input, int index)

base::Status	setName (std::string name)

base::Status	setOpType (ir::OpType op_type)

virtual base::Status	setOutput (device::Tensor *output)

base::Status	setOutput (device::Tensor *output, int index)

base::Status	setParallelType (const base::ParallelType &paralle_type)

virtual base::Status	setParam (std::shared_ptr< base::Param > param)

virtual base::Status	setPrecisionType (base::PrecisionType precision_type)
	设置精度类型精度不同，计算方式不同，内存分配不同 More...

void	setRunningFlag (bool flag)

void	setStream (device::Stream *stream)

void	setTimeProfileFlag (bool flag)

virtual void	setWorkspace (void *workspace)

virtual	~Op ()

Protected Attributes
bool	constructed_ = false

base::DeviceType	device_type_
	op的设备类型 More...

uint64_t	flops_ = 0

bool	initialized_ = false

std::vector< device::Tensor * >	inputs_
	op的输入tensor note: 当权重为tensor时，权重tensor也会在这里 eg: More...

bool	is_changed_ = false

bool	is_debug_ = false

bool	is_external_stream_ = false
	op的stream note: 当stream为外部传入时，is_external_stream_为true More...

bool	is_inner_ = false

bool	is_inplace_ = false

bool	is_running_ = false

bool	is_time_profile_ = false

ir::OpDesc	op_desc_
	op的描述包含op的类型、名称、输入名称、输出名称、参数 More...

std::vector< device::Tensor * >	outputs_
	op的输出tensor More...

base::ParallelType	parallel_type_ = base::kParallelTypeNone

base::PrecisionType	precision_type_ = base::kPrecisionTypeFp32
	op的精度类型 note: 精度类型与输入输出tensor的data_type的不同 More...

device::Stream *	stream_ = nullptr

void *	workspace_ = nullptr

bool	workspace_is_external_ = false
	op的workspace大小 note: op在运行时的workspace大小, 在preRun中确定 eg：例如Conv，当存在padding时，需要分配额外的内存，存放padding后的内存 More...

uint64_t	workspace_size_ = 0

Detailed Description

Op的基类

Note

单算子模式

当输出tensor为空时，内部分配

autotoc_md28

当输出tensor不为空时，检测当前输出tensor的内存是否足够，如果足够，则直接使用，否则报错

计算图Net模式

静态shape，由tensor pool分配

动态shape

指定了max_shape，则由tensor

pool按最大shape分配，调用reshape函数时，只是重新调整了tensor逻辑shape的大小

autotoc_md33

未指定max_shape，每次调用reshape函数时，在计算图层面都会：先释放上一次分配的内存，再重新分配内存

大语言模型

kvblock的方式

Definition at line 42 of file op.h.

Constructor & Destructor Documentation

◆ Op()

nndeploy::op::Op::Op ( )

◆ ~Op()

virtual nndeploy::op::Op::~Op ( )

virtual

Member Function Documentation

◆ allocateWorkspace()

virtual base::Status nndeploy::op::Op::allocateWorkspace ( )

virtual

◆ checkOrAllocOutput()

virtual base::Status nndeploy::op::Op::checkOrAllocOutput ( )

virtual

检查输出tensor

内存足够

内存不足 - 报错

内存为空 - 分配内存

Returns: base::Status

◆ deinit()

virtual base::Status nndeploy::op::Op::deinit ( )

virtual

Reimplemented in nndeploy::net::Net.

◆ getAllInput()

std::vector<device::Tensor *> nndeploy::op::Op::getAllInput ( )

◆ getAllInputName()

std::vector<std::string> nndeploy::op::Op::getAllInputName ( )

◆ getAllOutput()

std::vector<device::Tensor *> nndeploy::op::Op::getAllOutput ( )

◆ getAllOutputName()

std::vector<std::string> nndeploy::op::Op::getAllOutputName ( )

◆ getConstructed()

bool nndeploy::op::Op::getConstructed ( )

◆ getDebugFlag()

bool nndeploy::op::Op::getDebugFlag ( )

◆ getDeviceType()

base::DeviceType nndeploy::op::Op::getDeviceType ( )

◆ getFlops()

virtual uint64_t nndeploy::op::Op::getFlops ( )

virtual

得到op的flops

Returns: uint64_t

Reimplemented in nndeploy::net::Net.

◆ getInitialized()

bool nndeploy::op::Op::getInitialized ( )

◆ getInput()

device::Tensor* nndeploy::op::Op::getInput ( int index = 0 )

◆ getInputName()

std::string nndeploy::op::Op::getInputName ( int index = 0 )

◆ getInputTensor()

device::Tensor* nndeploy::op::Op::getInputTensor ( const std::string & name )

◆ getName()

std::string nndeploy::op::Op::getName ( )

◆ getOpType()

ir::OpType nndeploy::op::Op::getOpType ( )

◆ getOutput()

device::Tensor* nndeploy::op::Op::getOutput ( int index = 0 )

◆ getOutputName()

std::string nndeploy::op::Op::getOutputName ( int index = 0 )

◆ getOutputTensor()

device::Tensor* nndeploy::op::Op::getOutputTensor ( const std::string & name )

◆ getParallelType()

base::ParallelType nndeploy::op::Op::getParallelType ( )

◆ getParam()

virtual std::shared_ptr<base::Param> nndeploy::op::Op::getParam ( )

virtual

◆ getPrecisionType()

base::PrecisionType nndeploy::op::Op::getPrecisionType ( )

◆ getStream()

device::Stream* nndeploy::op::Op::getStream ( )

◆ getTimeProfileFlag()

bool nndeploy::op::Op::getTimeProfileFlag ( )

◆ getWorkspaceSize()

virtual uint64_t nndeploy::op::Op::getWorkspaceSize ( )

virtual

得到op的workspace大小 note: op在运行时的workspace大小，在输入确定后调用 eg：例如Conv，当存在padding时，需要分配额外的内存，存放padding后的内存

◆ inferDataFormat()

virtual base::Status nndeploy::op::Op::inferDataFormat ( )

virtual

数据格式推理

Returns: base::Status

Note: 当输入的shape数据格式时，在计算图Net::init中调用该函数

Reimplemented in nndeploy::op::OpTranspose, nndeploy::op::OpReshape, nndeploy::op::OpMuls, nndeploy::op::OpBinary, and nndeploy::net::Net.

◆ inferDataType()

virtual base::Status nndeploy::op::Op::inferDataType ( )

virtual

类型推理

Returns: base::Status

Note: 当输入的data_type确定时，在计算图Net::init中会调用该函数

Reimplemented in nndeploy::op::OpShape, nndeploy::op::OpQuantizeLinear, nndeploy::op::OpMuls, nndeploy::op::OpEqual, nndeploy::op::OpDequantizeLinear, nndeploy::op::OpConstantOfShape, and nndeploy::net::Net.

◆ inferShape()

virtual base::Status nndeploy::op::Op::inferShape ( )

virtual

形状推理

Returns: base::Status

Note: 当输入的shape确定时，在计算图Net::init中调用该函数

Reimplemented in nndeploy::op::OpWhere, nndeploy::op::OpUnsqueeze, nndeploy::op::OpUnary, nndeploy::op::OpTranspose, nndeploy::op::OpSwiGLU, nndeploy::op::OpSplit, nndeploy::op::OpSoftmax, nndeploy::op::OpSlice, nndeploy::op::OpShape, nndeploy::op::OpRMSNorm, nndeploy::op::OpResize, nndeploy::op::OpReshape, nndeploy::op::OpReduceMean, nndeploy::op::OpQuantizeLinear, nndeploy::op::OpQLinearConv, nndeploy::op::OpPow, nndeploy::op::OpMuls, nndeploy::op::OpMaxPool, nndeploy::op::OpMatMul, nndeploy::op::OpLayerNorm, nndeploy::op::OpGlobalAveragepool, nndeploy::op::OpGemm, nndeploy::op::OpGather, nndeploy::op::OpFlatten, nndeploy::op::OpExpand, nndeploy::op::OpEqual, nndeploy::op::OpEmbedding, nndeploy::op::OpDequantizeLinear, nndeploy::op::OpConv, nndeploy::op::OpConstantOfShape, nndeploy::op::OpConcat, nndeploy::op::OpBinary, nndeploy::op::OpBatchNorm, and nndeploy::net::Net.

◆ init()

virtual base::Status nndeploy::op::Op::init ( )

virtual

初始化

Returns: base::Status

Note

功能

参数
权重

Reimplemented in nndeploy::net::Net.

◆ isRunning()

bool nndeploy::op::Op::isRunning ( )

◆ postRun()

virtual base::Status nndeploy::op::Op::postRun ( )

virtual

Reimplemented in nndeploy::net::Net.

◆ preRun()

virtual base::Status nndeploy::op::Op::preRun ( )

virtual

Reimplemented in nndeploy::net::Net.

◆ replaceInputTensor()

base::Status nndeploy::op::Op::replaceInputTensor	(	const std::string &	name,
		device::Tensor *	tensor
	)

◆ replaceOutputTensor()

base::Status nndeploy::op::Op::replaceOutputTensor	(	const std::string &	name,
		device::Tensor *	tensor
	)

◆ reshape()

virtual base::Status nndeploy::op::Op::reshape ( base::ShapeMap & shape_map )

virtual

重新推理形状，通常在初始化之后、preRun前调用

Parameters

shape_map

Returns: base::Status

Reimplemented in nndeploy::net::Net.

◆ rmInput()

base::Status nndeploy::op::Op::rmInput ( device::Tensor * tensor )

◆ run()

virtual base::Status nndeploy::op::Op::run ( )

pure virtual

Implemented in nndeploy::op::OpWhere, nndeploy::op::OpUnsqueeze, nndeploy::op::OpTranspose, nndeploy::op::OpTanh, nndeploy::op::OpTan, nndeploy::op::OpSwiGLU, nndeploy::op::OpSub, nndeploy::op::OpSqrt, nndeploy::op::OpSplit, nndeploy::op::OpSoftmax, nndeploy::op::OpSlice, nndeploy::op::OpSinh, nndeploy::op::OpSin, nndeploy::op::OpSilu, nndeploy::op::OpSign, nndeploy::op::OpSigmoid, nndeploy::op::OpShape, nndeploy::op::OpSelu, nndeploy::op::OpRound, nndeploy::op::OpRMSNorm, nndeploy::op::OpResize, nndeploy::op::OpReshape, nndeploy::op::OpRelu, nndeploy::op::OpReduceMean, nndeploy::op::OpReciprocal, nndeploy::op::OpQuantizeLinear, nndeploy::op::OpQLinearConv, nndeploy::op::OpPow, nndeploy::op::OpNeg, nndeploy::op::OpMuls, nndeploy::op::OpMul, nndeploy::op::OpMaxPool, nndeploy::op::OpMatMul, nndeploy::op::OpLog, nndeploy::op::OpLayerNorm, nndeploy::op::OpHardSigmoid, nndeploy::op::OpGlobalAveragepool, nndeploy::op::OpGemm, nndeploy::op::OpGelu, nndeploy::op::OpGather, nndeploy::op::OpFloor, nndeploy::op::OpFlatten, nndeploy::op::OpExpand, nndeploy::op::OpExp, nndeploy::op::OpErf, nndeploy::op::OpEqual, nndeploy::op::OpEmbedding, nndeploy::op::OpDiv, nndeploy::op::OpDequantizeLinear, nndeploy::op::OpCosh, nndeploy::op::OpCos, nndeploy::op::OpConv, nndeploy::op::OpConstantOfShape, nndeploy::op::OpConcat, nndeploy::op::OpCeil, nndeploy::op::OpCast, nndeploy::op::OpBatchNorm, nndeploy::op::OpAtan, nndeploy::op::OpAsin, nndeploy::op::OpAdd, nndeploy::op::OpAcos, nndeploy::op::OpAbs, and nndeploy::net::Net.

◆ setAllInput()

base::Status nndeploy::op::Op::setAllInput ( std::vector< device::Tensor * > inputs )

◆ setAllInputName() [1/2]

base::Status nndeploy::op::Op::setAllInputName ( std::initializer_list< std::string > )

◆ setAllInputName() [2/2]

base::Status nndeploy::op::Op::setAllInputName ( std::vector< std::string > & )

◆ setAllOutput()

base::Status nndeploy::op::Op::setAllOutput ( std::vector< device::Tensor * > outputs )

◆ setAllOutputName() [1/2]

base::Status nndeploy::op::Op::setAllOutputName ( std::initializer_list< std::string > )

◆ setAllOutputName() [2/2]

base::Status nndeploy::op::Op::setAllOutputName ( std::vector< std::string > & )

◆ setDebugFlag()

void nndeploy::op::Op::setDebugFlag ( bool flag )

◆ setDeviceType()

base::Status nndeploy::op::Op::setDeviceType ( base::DeviceType device_type )

◆ setInitializedFlag()

void nndeploy::op::Op::setInitializedFlag ( bool flag )

◆ setInnerFlag()

void nndeploy::op::Op::setInnerFlag ( bool flag )

◆ setInput() [1/2]

virtual base::Status nndeploy::op::Op::setInput ( device::Tensor * input )

virtual

◆ setInput() [2/2]

base::Status nndeploy::op::Op::setInput	(	device::Tensor *	input,
		int	index
	)

◆ setName()

base::Status nndeploy::op::Op::setName ( std::string name )

◆ setOpType()

base::Status nndeploy::op::Op::setOpType ( ir::OpType op_type )

◆ setOutput() [1/2]

virtual base::Status nndeploy::op::Op::setOutput ( device::Tensor * output )

virtual

◆ setOutput() [2/2]

base::Status nndeploy::op::Op::setOutput	(	device::Tensor *	output,
		int	index
	)

◆ setParallelType()

base::Status nndeploy::op::Op::setParallelType ( const base::ParallelType & paralle_type )

◆ setParam()

virtual base::Status nndeploy::op::Op::setParam ( std::shared_ptr< base::Param > param )

virtual

◆ setPrecisionType()

virtual base::Status nndeploy::op::Op::setPrecisionType ( base::PrecisionType precision_type )

virtual

设置精度类型精度不同，计算方式不同，内存分配不同

Parameters

precision_type

Returns: base::Status

Note: 当且仅当data_type为浮点数类型时，precision_type_会与data_type一起，共同决定具体调用的kernel函数

◆ setRunningFlag()

void nndeploy::op::Op::setRunningFlag ( bool flag )

◆ setStream()

void nndeploy::op::Op::setStream ( device::Stream * stream )

◆ setTimeProfileFlag()

void nndeploy::op::Op::setTimeProfileFlag ( bool flag )

◆ setWorkspace()

virtual void nndeploy::op::Op::setWorkspace ( void * workspace )

virtual

Member Data Documentation

◆ constructed_

bool nndeploy::op::Op::constructed_ = false

protected

Definition at line 277 of file op.h.

◆ device_type_

base::DeviceType nndeploy::op::Op::device_type_

protected

op的设备类型

Definition at line 208 of file op.h.

◆ flops_

uint64_t nndeploy::op::Op::flops_ = 0

protected

Definition at line 266 of file op.h.

◆ initialized_

bool nndeploy::op::Op::initialized_ = false

protected

Definition at line 278 of file op.h.

◆ inputs_

std::vector<device::Tensor *> nndeploy::op::Op::inputs_

protected

op的输入tensor note: 当权重为tensor时，权重tensor也会在这里 eg:

当op为Conv时，inputs_[0]为输入数据，inputs_[1]为weight, inputs_[2]为bias

内存分配

权重内存：

在初始化时完成，当权重与推理设备要求的权重一致时，浅拷贝即可

当权重与推理设备要求的权重不一致时，需要进行内存迁移

op输入

已经完成分配

Definition at line 237 of file op.h.

◆ is_changed_

bool nndeploy::op::Op::is_changed_ = false

protected

Definition at line 275 of file op.h.

◆ is_debug_

bool nndeploy::op::Op::is_debug_ = false

protected

Definition at line 281 of file op.h.

◆ is_external_stream_

bool nndeploy::op::Op::is_external_stream_ = false

protected

op的stream note: 当stream为外部传入时，is_external_stream_为true

Definition at line 213 of file op.h.

◆ is_inner_

bool nndeploy::op::Op::is_inner_ = false

protected

Definition at line 269 of file op.h.

◆ is_inplace_

bool nndeploy::op::Op::is_inplace_ = false

protected

Definition at line 273 of file op.h.

◆ is_running_

bool nndeploy::op::Op::is_running_ = false

protected

Definition at line 279 of file op.h.

◆ is_time_profile_

bool nndeploy::op::Op::is_time_profile_ = false

protected

Definition at line 280 of file op.h.

◆ op_desc_

ir::OpDesc nndeploy::op::Op::op_desc_

protected

op的描述包含op的类型、名称、输入名称、输出名称、参数

Definition at line 203 of file op.h.

◆ outputs_

std::vector<device::Tensor *> nndeploy::op::Op::outputs_

protected

op的输出tensor

Note: : outputs_的内存分配

单算子模式

当输出tensor为空时，内部分配

autotoc_md48

当输出tensor不为空时，检测当前输出tensor的内存是否足够，如果足够，则直接使用，否则报错

计算图Net模式

静态shape，由tensor pool分配

动态shape

指定了max_shape，则由tensor

pool按最大shape分配，调用reshape函数时，只是重新调整了tensor逻辑shape的大小

autotoc_md53

未指定max_shape，每次调用reshape函数时，在计算图层面都会：先释放上一次分配的内存，再重新分配内存

大语言模型

kvblock的方式

Definition at line 256 of file op.h.

◆ parallel_type_

base::ParallelType nndeploy::op::Op::parallel_type_ = base::kParallelTypeNone

protected

Definition at line 271 of file op.h.

◆ precision_type_

base::PrecisionType nndeploy::op::Op::precision_type_ = base::kPrecisionTypeFp32

protected

op的精度类型 note: 精度类型与输入输出tensor的data_type的不同

data_type大部分时候决定具体调用的kernel函数

# 当且仅当data_type为浮点数类型时，precision_type_会与data_type一起，共同决定具体调用的kernel函数

Definition at line 223 of file op.h.

◆ stream_

device::Stream* nndeploy::op::Op::stream_ = nullptr

protected

Definition at line 214 of file op.h.

◆ workspace_

void* nndeploy::op::Op::workspace_ = nullptr

protected

Definition at line 265 of file op.h.

◆ workspace_is_external_

bool nndeploy::op::Op::workspace_is_external_ = false

protected

op的workspace大小 note: op在运行时的workspace大小, 在preRun中确定 eg：例如Conv，当存在padding时，需要分配额外的内存，存放padding后的内存

Definition at line 263 of file op.h.

◆ workspace_size_

uint64_t nndeploy::op::Op::workspace_size_ = 0

protected

Definition at line 264 of file op.h.

The documentation for this class was generated from the following file:

/home/docs/checkouts/readthedocs.org/user_builds/nndeploy-zh/checkouts/latest/framework/include/nndeploy/op/op.h

Public Member Functions

Protected Attributes

Detailed Description

单算子模式

当输出tensor为空时，内部分配

autotoc_md28

计算图Net模式

静态shape，由tensor pool分配

动态shape

指定了max_shape，则由tensor

autotoc_md33

大语言模型

kvblock的方式

Constructor & Destructor Documentation

◆ Op()

◆ ~Op()

Member Function Documentation

◆ allocateWorkspace()

◆ checkOrAllocOutput()

内存足够

内存不足 - 报错

内存为空 - 分配内存

◆ deinit()

◆ getAllInput()

◆ getAllInputName()

◆ getAllOutput()

◆ getAllOutputName()

◆ getConstructed()

◆ getDebugFlag()

◆ getDeviceType()

◆ getFlops()

◆ getInitialized()

◆ getInput()

◆ getInputName()

◆ getInputTensor()

◆ getName()

◆ getOpType()

◆ getOutput()

◆ getOutputName()

◆ getOutputTensor()

◆ getParallelType()

◆ getParam()

◆ getPrecisionType()

◆ getStream()

◆ getTimeProfileFlag()

◆ getWorkspaceSize()

◆ inferDataFormat()

◆ inferDataType()

◆ inferShape()

◆ init()

◆ isRunning()

◆ postRun()

◆ preRun()

◆ replaceInputTensor()

◆ replaceOutputTensor()

◆ reshape()

◆ rmInput()

◆ run()

◆ setAllInput()

◆ setAllInputName() [1/2]

◆ setAllInputName() [2/2]

◆ setAllOutput()

◆ setAllOutputName() [1/2]

◆ setAllOutputName() [2/2]

◆ setDebugFlag()

◆ setDeviceType()

◆ setInitializedFlag()

◆ setInnerFlag()

◆ setInput() [1/2]

◆ setInput() [2/2]

◆ setName()

◆ setOpType()

◆ setOutput() [1/2]

◆ setOutput() [2/2]

◆ setParallelType()

◆ setParam()

◆ setPrecisionType()

◆ setRunningFlag()

◆ setStream()

◆ setTimeProfileFlag()