zh-cn/latest/default__llm__infer_8h_source.html

 #ifndef _NNDEPLOY_LLM_INFER_DEFAULT_LLM_INFER_H_

 #define _NNDEPLOY_LLM_INFER_DEFAULT_LLM_INFER_H_


 #include "nndeploy/base/any.h"

 #include "nndeploy/base/common.h"

 #include "nndeploy/base/glic_stl_include.h"

 #include "nndeploy/base/log.h"

 #include "nndeploy/base/macro.h"

 #include "nndeploy/base/object.h"

 #include "nndeploy/base/opencv_include.h"

 #include "nndeploy/base/param.h"

 #include "nndeploy/base/status.h"

 #include "nndeploy/base/string.h"

 #include "nndeploy/dag/composite_node.h"

 #include "nndeploy/dag/edge.h"

 #include "nndeploy/dag/graph.h"

 #include "nndeploy/dag/loop.h"

 #include "nndeploy/dag/node.h"

 #include "nndeploy/device/buffer.h"

 #include "nndeploy/device/device.h"

 #include "nndeploy/device/memory_pool.h"

 #include "nndeploy/device/tensor.h"

 #include "nndeploy/infer/infer.h"

 #include "nndeploy/llm/abstract_llm_infer.h"

 #include "nndeploy/llm/embedding.h"


 namespace nndeploy {

 namespace llm {


 struct NNDEPLOY_CC_API DefaultLlmInferParam : public base::Param {

   // embedding

   bool is_embedding_ = false;

   std::shared_ptr<EmbeddingParam> embedding_param_ = nullptr;

   // infer

   base::InferenceType inference_type_ = base::kInferenceTypeOnnxRuntime;

   std::shared_ptr<inference::InferenceParam> inference_param_ = nullptr;

   // model

   int layer_nums_ = 24;

   int max_seq_len_ = 2048;  // TODO

   std::vector<int32_t> kv_init_shape_;

   base::DataType attention_mask_data_type_ = base::dataTypeOf<float>();

   std::string attention_type_ = "full";


   using base::Param::serialize;

   virtual base::Status serialize(

       rapidjson::Value& json,

       rapidjson::Document::AllocatorType& allocator) override {

     base::Status status = base::Param::serialize(json, allocator);

     if (status != base::kStatusCodeOk) {

       NNDEPLOY_LOGE("DefaultLlmInferParam::serialize failed\n");

       return status;

     }

     //

     json.AddMember("is_embedding_", is_embedding_, allocator);

     if (is_embedding_ && embedding_param_ != nullptr) {

       rapidjson::Value embedding_param_value;

       embedding_param_->serialize(embedding_param_value, allocator);

       json.AddMember("embedding_param_", embedding_param_value, allocator);

     }

     //

     std::string inference_type_str =

         base::inferenceTypeToString(inference_type_);

     json.AddMember("inference_type_",

                    rapidjson::Value(inference_type_str.c_str(), allocator),

                    allocator);

     if (inference_param_ == nullptr) {

       inference_param_ = inference::createInferenceParam(inference_type_);

       if (inference_param_ == nullptr) {

         inference_param_ =

             std::make_shared<inference::InferenceParam>(inference_type_);

       }

     }

     rapidjson::Value inference_param_value;

     inference_param_->serialize(inference_param_value, allocator);

     json.AddMember("inference_param_", inference_param_value, allocator);

     //

     json.AddMember("layer_nums_", layer_nums_, allocator);

     json.AddMember("max_seq_len_", max_seq_len_, allocator);

     rapidjson::Value kv_init_shape_array(rapidjson::kArrayType);

     for (auto dim : kv_init_shape_) {

       kv_init_shape_array.PushBack(dim, allocator);

     }

     json.AddMember("kv_init_shape_", kv_init_shape_array, allocator);

     std::string attention_mask_data_type_str =

         base::dataTypeToString(attention_mask_data_type_);

     json.AddMember(

         "attention_mask_data_type_",

         rapidjson::Value(attention_mask_data_type_str.c_str(), allocator),

         allocator);

     json.AddMember("attention_type_",

                    rapidjson::Value(attention_type_.c_str(), allocator),

                    allocator);

     return base::kStatusCodeOk;

   }

   using base::Param::deserialize;

   virtual base::Status deserialize(rapidjson::Value& json) override {

     base::Status status = base::Param::deserialize(json);

     if (status != base::kStatusCodeOk) {

       NNDEPLOY_LOGE("DefaultLlmInferParam::deserialize failed\n");

       return status;

     }

     //

     if (json.HasMember("is_embedding_") && json["is_embedding_"].IsBool()) {

       is_embedding_ = json["is_embedding_"].GetBool();

     }

     if (is_embedding_ && json.HasMember("embedding_param_") &&

         json["embedding_param_"].IsObject()) {

       if (embedding_param_ == nullptr) {

         embedding_param_ = std::make_shared<EmbeddingParam>();

       }

       embedding_param_->deserialize(json["embedding_param_"]);

     }

     //

     if (json.HasMember("inference_type_") &&

         json["inference_type_"].IsString()) {

       inference_type_ =

           base::stringToInferenceType(json["inference_type_"].GetString());

     }

     if (inference_param_ == nullptr) {

       inference_param_ = inference::createInferenceParam(inference_type_);

       if (inference_param_ == nullptr) {

         inference_param_ =

             std::make_shared<inference::InferenceParam>(inference_type_);

       }

     }

     rapidjson::Value inference_param_value;

     inference_param_->deserialize(json["inference_param_"]);

     // model

     if (json.HasMember("layer_nums_") && json["layer_nums_"].IsInt()) {

       layer_nums_ = json["layer_nums_"].GetInt();

     }

     if (json.HasMember("max_seq_len_") && json["max_seq_len_"].IsInt()) {

       max_seq_len_ = json["max_seq_len_"].GetInt();

     }

     if (json.HasMember("kv_init_shape_") && json["kv_init_shape_"].IsArray()) {

       kv_init_shape_.clear();

       const rapidjson::Value& kv_init_shape_array = json["kv_init_shape_"];

       for (rapidjson::SizeType i = 0; i < kv_init_shape_array.Size(); i++) {

         kv_init_shape_.push_back(kv_init_shape_array[i].GetInt());

       }

     }

     if (json.HasMember("attention_mask_data_type_") &&

         json["attention_mask_data_type_"].IsString()) {

       attention_mask_data_type_ =

           base::stringToDataType(json["attention_mask_data_type_"].GetString());

     }

     if (json.HasMember("attention_type_") &&

         json["attention_type_"].IsString()) {

       attention_type_ = json["attention_type_"].GetString();

     }

     return base::kStatusCodeOk;

   }

 };


 class DefaultLlmInfer : public AbstractLlmInfer {

  public:

   DefaultLlmInfer(const std::string& name) : AbstractLlmInfer(name) {

     param_ = std::make_shared<DefaultLlmInferParam>();

     key_ = "nndeploy::llm::DefaultLlmInfer";

     desc_ =

         "LLM default pipeline: input_tokens -> "

         "inference -> [logits]";

   }

   DefaultLlmInfer(const std::string& name, std::vector<dag::Edge*> inputs,

                   std::vector<dag::Edge*> outputs)

       : AbstractLlmInfer(name, inputs, outputs) {

     param_ = std::make_shared<DefaultLlmInferParam>();

     key_ = "nndeploy::llm::DefaultLlmInfer";

     desc_ =

         "LLM default pipeline: input_tokens -> "

         "inference -> [logits]";

   }

   virtual ~DefaultLlmInfer() {}


   virtual base::Status init() {

     // 解析参数

     if (!config_path_.empty()) {

       parseConfig(config_path_[0]);

     }


     // 创建输入边

     input_ids_name_ = model_inputs_[0];

     std::vector<dag::Edge*> input_edges;

     input_ids_edge_ = this->createEdge(input_ids_name_);

     input_edges.push_back(input_ids_edge_);

     if (model_inputs_.size() > 1) {

       attention_mask_name_ = model_inputs_[1];

       attention_mask_edge_ = this->createEdge(attention_mask_name_);

       input_edges.push_back(attention_mask_edge_);

     }

     if (model_inputs_.size() > 2) {

       position_ids_name_ = model_inputs_[2];

       position_ids_edge_ = this->createEdge(position_ids_name_);

       input_edges.push_back(position_ids_edge_);

     }

     if (model_inputs_.size() > 3) {

       past_key_values_name_ = model_inputs_[3];

       past_key_values_edge_ = this->createEdge(past_key_values_name_);

       input_edges.push_back(past_key_values_edge_);

     }


     // 创建输出边

     std::vector<dag::Edge*> output_edges;

     logits_name_ = model_outputs_[0];

     logits_edge_ = outputs_[0];

     output_edges.push_back(logits_edge_);

     if (model_outputs_.size() > 1) {

       presents_name_ = model_outputs_[1];

       presents_edge_ = this->createEdge(presents_name_);

       output_edges.push_back(presents_edge_);

     }


     // 创建embedding节点

     DefaultLlmInferParam* default_llm_infer_param =

         dynamic_cast<DefaultLlmInferParam*>(param_.get());

     if (default_llm_infer_param->is_embedding_) {

       dag::NodeDesc desc("embedding_node", {inputs_[0]->getName()},

                          {input_ids_edge_->getName()});

       embedding_node_ =

           dynamic_cast<Embedding*>(this->createNode<Embedding>(desc));

       // 参数设置开始

       auto embedding_param = default_llm_infer_param->embedding_param_;

       embedding_node_->setParamSharedPtr(embedding_param);

       // 参数设置结束

       embedding_node_->setInitializedFlag(false);

       embedding_node_->init();

       embedding_node_->setInitializedFlag(true);

     } else {

       // TODO

       // tokenizer::TokenizerIds -> device::Tensor

       ;

     }


     // 创建infer节点

     std::vector<std::string> input_names;

     std::vector<std::string> output_names;

     for (auto input : input_edges) {

       input_names.push_back(input->getName());

     }

     for (auto output : output_edges) {

       output_names.push_back(output->getName());

     }

     dag::NodeDesc desc("llm_infer", input_names, output_names);

     std::string share_key = this->getShareKey();

     auto infer = this->getResourceWithoutState<infer::Infer*>(share_key);

     if (infer == nullptr) {

       llm_infer_ = dynamic_cast<infer::Infer*>(this->createInfer<infer::Infer>(

           desc, default_llm_infer_param->inference_type_));

       // 参数设置开始

       llm_infer_->setParamSharedPtr(default_llm_infer_param->inference_param_);

       // 参数设置结束

       llm_infer_->init();

       this->addResourceWithoutState(share_key, llm_infer_);

     } else {

       llm_infer_ =

           dynamic_cast<infer::Infer*>(this->createNode<infer::Infer>(desc));

       infer->shareInference(llm_infer_);

       llm_infer_->setInitializedFlag(false);

       llm_infer_->init();

       llm_infer_->setInitializedFlag(true);

     }

     return base::kStatusCodeOk;

   }


   virtual base::Status run() {

     if (is_prefill_) {

       return prefill();

     } else {

       return decode();

     }

   }


   virtual base::Status prefill() {

     DefaultLlmInferParam* default_llm_infer_param =

         dynamic_cast<DefaultLlmInferParam*>(param_.get());

     // 全局的history_token

     tokenizer::TokenizerIds* ids =

         (tokenizer::TokenizerIds*)inputs_[0]->getParam(this);

     std::vector<int32_t>* history_tokens =

         new std::vector<int32_t>(ids->ids_[0]);

     dag::Edge* history_tokens_edge =

         this->createResourceWithState("history_tokens");

     history_tokens_edge->set<std::vector<int32_t>>(history_tokens, false);


     auto seq_len = ids->ids_[0].size();

     auto all_seq_len = all_seq_len_;

     auto attention_mask_data_type = base::dataTypeOf<float>();

     auto attention_mask_data_format = base::DataFormat::kDataFormatS1D;

     auto position_ids_data_type = base::dataTypeOf<int>();

     auto position_ids_data_format = base::DataFormat::kDataFormatNC;


     // 给输入边数据

     if (attention_mask_edge_ != nullptr) {

       auto attention_mask =

           genAttentionMask(seq_len, all_seq_len, attention_mask_data_type,

                            attention_mask_data_format);

       attention_mask_edge_->set(attention_mask, false);

     }

     if (position_ids_edge_ != nullptr) {

       auto position_ids =

           genPositionIds(seq_len, all_seq_len, position_ids_data_type,

                          position_ids_data_format);

       position_ids_edge_->set(position_ids, false);

     }

     if (past_key_values_edge_ != nullptr) {

       auto kv_init_shape = default_llm_infer_param->kv_init_shape_;

       kv_init_shape.insert(kv_init_shape.begin(), 24);

       auto past_kv = genPastKeyValue(kv_init_shape);

       past_key_values_edge_->set(past_kv, false);

     }


     // 执行embedding节点和infer节点

     if (embedding_node_ != nullptr) {

       auto status = embedding_node_->run();

       NNDEPLOY_RETURN_ON_NEQ(status, base::kStatusCodeOk,

                              "prefill embedding_node_ run failed!");

     }

     if (llm_infer_ != nullptr) {

       auto status = llm_infer_->run();

       NNDEPLOY_RETURN_ON_NEQ(status, base::kStatusCodeOk,

                              "prefill llm_infer_ run failed!");

     }


     // 全局tensor资源

     if (presents_edge_ != nullptr && past_key_values_edge_ != nullptr) {

       device::Tensor* presents =

           (device::Tensor*)presents_edge_->getTensor(llm_infer_);

       presents->setName(past_key_values_edge_->getName());

       dag::Edge* past_key_values_edge =

           this->createResourceWithState(past_key_values_edge_->getName());

       past_key_values_edge->set(presents, true);

     }


     return base::kStatusCodeOk;

   }

   virtual base::Status decode() {  // 执行embedding节点和infer节点

     tokenizer::TokenizerIds* ids = nullptr;

     if (inputs_.size() == 1 || inputs_[1]->empty()) {

       ids = (tokenizer::TokenizerIds*)inputs_[0]->getParam(this);

     } else {

       ids = (tokenizer::TokenizerIds*)inputs_[1]->getParam(this);

     }

     dag::Edge* history_tokens_edge =

         this->getResourceWithState("history_tokens");

     std::vector<int32_t>* history_tokens = nullptr;

     if (history_tokens_edge != nullptr) {

       history_tokens = history_tokens_edge->get<std::vector<int32_t>>(this);

       history_tokens->push_back(ids->ids_[0].back());

     }


     // auto seq_len = ids->ids_[0].size();

     auto seq_len = 1;

     all_seq_len_ = history_tokens->size();

     auto all_seq_len = all_seq_len_;

     auto attention_mask_data_type = base::dataTypeOf<float>();

     auto attention_mask_data_format = base::DataFormat::kDataFormatS1D;

     auto position_ids_data_type = base::dataTypeOf<int>();

     auto position_ids_data_format = base::DataFormat::kDataFormatNC;


     gen_seq_len_++;


     if (attention_mask_edge_ != nullptr) {

       auto attention_mask =

           genAttentionMask(seq_len, all_seq_len, attention_mask_data_type,

                            attention_mask_data_format);

       attention_mask_edge_->set(attention_mask, false);

     }

     if (position_ids_edge_ != nullptr) {

       auto position_ids =

           genPositionIds(seq_len, all_seq_len, position_ids_data_type,

                          position_ids_data_format);

       position_ids_edge_->set(position_ids, false);

     }


     if (past_key_values_edge_ != nullptr) {

       auto past_kv = this->getResourceWithState<device::Tensor>(

           past_key_values_edge_->getName());

       past_key_values_edge_->set(past_kv, true);

     }


     if (embedding_node_ != nullptr) {

       auto status = embedding_node_->run();

       NNDEPLOY_RETURN_ON_NEQ(status, base::kStatusCodeOk,

                              "decode embedding_node_ run failed!");

     }

     if (llm_infer_ != nullptr) {

       auto status = llm_infer_->run();

       NNDEPLOY_RETURN_ON_NEQ(status, base::kStatusCodeOk,

                              "decode llm_infer_ run failed!");

     }


     // 全局tensor资源

     if (presents_edge_ != nullptr && past_key_values_edge_ != nullptr) {

       device::Tensor* presents =

           (device::Tensor*)presents_edge_->getTensor(llm_infer_);

       presents->setName(past_key_values_edge_->getName());

       this->setResourceWithState(past_key_values_edge_->getName(), presents);

     }


     return base::kStatusCodeOk;

   }


   base::Status parseConfig(const std::string& file_path) {

     base::Status status = base::kStatusCodeOk;

     if (param_ != nullptr) {

       DefaultLlmInferParam* default_llm_infer_param =

           dynamic_cast<DefaultLlmInferParam*>(param_.get());

       default_llm_infer_param->loadFile(file_path);

     }

     return status;

   }


   virtual base::Status setIterInput(dag::Edge* input, int index) {

     base::Status status = dag::Node::setIterInput(input, index);

     if (status != base::kStatusCodeOk) {

       NNDEPLOY_LOGE("DefaultLlmInfer::setIterInput failed\n");

       return status;

     }

     if (embedding_node_ != nullptr) {

       embedding_node_->setIterInput(input, 1);

     }

     return base::kStatusCodeOk;

   }


  private:

   Embedding* embedding_node_;

   infer::Infer* llm_infer_;


   // 输入边

   std::string input_ids_name_ = "input_ids";

   std::string attention_mask_name_ = "attention_mask";

   std::string position_ids_name_ = "position_ids";

   std::string past_key_values_name_ = "past_key_values";

   dag::Edge* input_ids_edge_ = nullptr;

   dag::Edge* attention_mask_edge_ = nullptr;

   dag::Edge* position_ids_edge_ = nullptr;

   dag::Edge* past_key_values_edge_ = nullptr;

   // 输出边

   std::string logits_name_ = "logits";

   std::string presents_name_ = "presents";

   dag::Edge* logits_edge_ = nullptr;

   dag::Edge* presents_edge_ = nullptr;


   //

   int all_seq_len_ = 0;

   int gen_seq_len_ = 0;

 };


 }  // namespace llm

 }  // namespace nndeploy


 #endif

abstract_llm_infer.h

any.h

buffer.h

nndeploy::base::Param
Definition: param.h:37

nndeploy::base::Param::deserialize
virtual base::Status deserialize(rapidjson::Value &json)

nndeploy::base::Param::loadFile
virtual base::Status loadFile(const std::string &path)

nndeploy::base::Param::serialize
virtual std::string serialize()

nndeploy::base::Status
Definition: status.h:87

nndeploy::dag::CompositeNode::createEdge
Edge * createEdge(const std::string &name)

nndeploy::dag::Edge
Edge class in DAG graph for connecting nodes and transferring data.
Definition: edge.h:35

nndeploy::dag::Edge::set
base::Status set(device::Buffer *buffer, bool is_external=true)
Set Buffer data to Edge.

nndeploy::dag::Edge::get
T * get(const Node *node)
Get arbitrary type data for specified node (template version)
Definition: edge.h:443

nndeploy::dag::Edge::getTensor
device::Tensor * getTensor(const Node *node)
Get Tensor data for specified node.

nndeploy::dag::Edge::getName
std::string getName()
Get the name of the Edge.

nndeploy::dag::NodeDesc
Node description class.
Definition: node.h:35

nndeploy::dag::Node::desc_
std::string desc_
Node description.
Definition: node.h:1294

nndeploy::dag::Node::setResourceWithState
base::Status setResourceWithState(const std::string &key, T *value, bool is_external=true)
Set stateful resource (template method)
Definition: node.h:533

nndeploy::dag::Node::getParam
virtual base::Param * getParam()
Get parameter.

nndeploy::dag::Node::addResourceWithoutState
virtual base::Status addResourceWithoutState(const std::string &key, const base::Any &value)
Add stateless resource.

nndeploy::dag::Node::setIterInput
virtual base::Status setIterInput(Edge *input, int index=-1)
Set iteration input edge.

nndeploy::dag::Node::createResourceWithState
virtual Edge * createResourceWithState(const std::string &key)
Create stateful resource.

nndeploy::dag::Node::outputs_
std::vector< Edge * > outputs_
Output edge list.
Definition: node.h:1318

nndeploy::dag::Node::setInitializedFlag
void setInitializedFlag(bool flag)
Set initialized flag.

nndeploy::dag::Node::key_
std::string key_
Node key.
Definition: node.h:1290

nndeploy::dag::Node::inputs_
std::vector< Edge * > inputs_
Input edge list.
Definition: node.h:1317

nndeploy::dag::Node::getResourceWithState
virtual Edge * getResourceWithState(const std::string &key)
Get stateful resource.

nndeploy::dag::Node::param_
std::shared_ptr< base::Param > param_
Node parameters.
Definition: node.h:1304

nndeploy::device::Tensor
Tensor类
Definition: tensor.h:26

nndeploy::device::Tensor::setName
base::Status setName(const std::string &)

nndeploy::infer::Infer
Definition: infer.h:12

nndeploy::infer::Infer::shareInference
virtual base::Status shareInference(Infer *infer)

nndeploy::infer::Infer::run
virtual base::Status run()
Run node (pure virtual function)

nndeploy::infer::Infer::setParamSharedPtr
virtual base::Status setParamSharedPtr(std::shared_ptr< base::Param > param)
Set parameter (shared pointer)

nndeploy::infer::Infer::init
virtual base::Status init()
Initialize node.

nndeploy::llm::AbstractLlmInfer
Definition: abstract_llm_infer.h:30

nndeploy::llm::AbstractLlmInfer::genAttentionMask
device::Tensor * genAttentionMask(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format, base::DeviceType device_type=base::kDeviceTypeCodeCpu)
Definition: abstract_llm_infer.h:115

nndeploy::llm::AbstractLlmInfer::getShareKey
std::string getShareKey()
Definition: abstract_llm_infer.h:256

nndeploy::llm::AbstractLlmInfer::genPositionIds
device::Tensor * genPositionIds(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format, base::DeviceType device_type=base::kDeviceTypeCodeCpu)
Definition: abstract_llm_infer.h:89

nndeploy::llm::AbstractLlmInfer::model_outputs_
std::vector< std::string > model_outputs_
Definition: abstract_llm_infer.h:280

nndeploy::llm::AbstractLlmInfer::model_inputs_
std::vector< std::string > model_inputs_
Definition: abstract_llm_infer.h:277

nndeploy::llm::AbstractLlmInfer::is_prefill_
bool is_prefill_
Definition: abstract_llm_infer.h:268

nndeploy::llm::AbstractLlmInfer::genPastKeyValue
device::Tensor * genPastKeyValue(const std::vector< int32_t > &kv_init_shape, base::DeviceType device_type=base::kDeviceTypeCodeCpu)
Definition: abstract_llm_infer.h:76

nndeploy::llm::AbstractLlmInfer::config_path_
std::vector< std::string > config_path_
Definition: abstract_llm_infer.h:270

nndeploy::llm::DefaultLlmInfer
Definition: default_llm_infer.h:156

nndeploy::llm::DefaultLlmInfer::~DefaultLlmInfer
virtual ~DefaultLlmInfer()
Definition: default_llm_infer.h:174

nndeploy::llm::DefaultLlmInfer::DefaultLlmInfer
DefaultLlmInfer(const std::string &name)
Definition: default_llm_infer.h:158

nndeploy::llm::DefaultLlmInfer::init
virtual base::Status init()
Initialize node.
Definition: default_llm_infer.h:176

nndeploy::llm::DefaultLlmInfer::setIterInput
virtual base::Status setIterInput(dag::Edge *input, int index)
Set iteration input edge.
Definition: default_llm_infer.h:414

nndeploy::llm::DefaultLlmInfer::run
virtual base::Status run()
Run node (pure virtual function)
Definition: default_llm_infer.h:266

nndeploy::llm::DefaultLlmInfer::parseConfig
base::Status parseConfig(const std::string &file_path)
Definition: default_llm_infer.h:404

nndeploy::llm::DefaultLlmInfer::DefaultLlmInfer
DefaultLlmInfer(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: default_llm_infer.h:165

nndeploy::llm::DefaultLlmInfer::decode
virtual base::Status decode()
Definition: default_llm_infer.h:337

nndeploy::llm::DefaultLlmInfer::prefill
virtual base::Status prefill()
Definition: default_llm_infer.h:274

nndeploy::llm::Embedding
Embedding - 词嵌入节点
Definition: embedding.h:120

nndeploy::llm::Embedding::run
virtual base::Status run()
Run node (pure virtual function)

nndeploy::tokenizer::TokenizerIds
Definition: tokenizer.h:239

nndeploy::tokenizer::TokenizerIds::ids_
std::vector< std::vector< int32_t > > ids_
Definition: tokenizer.h:241

common.h

composite_node.h

device.h

edge.h

embedding.h

glic_stl_include.h

graph.h

infer.h

log.h

NNDEPLOY_LOGE
#define NNDEPLOY_LOGE(fmt,...)
Definition: log.h:59

loop.h

macro.h

NNDEPLOY_CC_API
#define NNDEPLOY_CC_API
api
Definition: macro.h:29

memory_pool.h

nndeploy::base::InferenceType
InferenceType
Definition: common.h:284

nndeploy::base::kInferenceTypeOnnxRuntime
@ kInferenceTypeOnnxRuntime
Definition: common.h:293

nndeploy::base::kStatusCodeOk
@ kStatusCodeOk
Definition: status.h:13

nndeploy::base::stringToDataType
DataType stringToDataType(const std::string &str)

nndeploy::base::dataTypeToString
std::string dataTypeToString(DataType data_type)

nndeploy::base::stringToInferenceType
InferenceType stringToInferenceType(const std::string &src)

nndeploy::base::dataTypeOf< float >
DataType dataTypeOf< float >()

nndeploy::base::inferenceTypeToString
std::string inferenceTypeToString(InferenceType src)

nndeploy::base::kDataFormatNC
@ kDataFormatNC
Definition: common.h:135

nndeploy::base::kDataFormatS1D
@ kDataFormatS1D
Definition: common.h:143

nndeploy::inference::createInferenceParam
std::shared_ptr< InferenceParam > createInferenceParam(base::InferenceType type)
Create a Inference Param object.

nndeploy
Definition: common.h:10

node.h

object.h

opencv_include.h

param.h

status.h

NNDEPLOY_RETURN_ON_NEQ
#define NNDEPLOY_RETURN_ON_NEQ(status, expected, str)
Definition: status.h:183

string.h

nndeploy::base::DataType
Definition: common.h:24

nndeploy::llm::DefaultLlmInferParam
Definition: default_llm_infer.h:31

nndeploy::llm::DefaultLlmInferParam::is_embedding_
bool is_embedding_
Definition: default_llm_infer.h:33

nndeploy::llm::DefaultLlmInferParam::inference_type_
base::InferenceType inference_type_
Definition: default_llm_infer.h:36

nndeploy::llm::DefaultLlmInferParam::deserialize
virtual base::Status deserialize(rapidjson::Value &json) override
Definition: default_llm_infer.h:97

nndeploy::llm::DefaultLlmInferParam::inference_param_
std::shared_ptr< inference::InferenceParam > inference_param_
Definition: default_llm_infer.h:37

nndeploy::llm::DefaultLlmInferParam::kv_init_shape_
std::vector< int32_t > kv_init_shape_
Definition: default_llm_infer.h:41

nndeploy::llm::DefaultLlmInferParam::serialize
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator) override
Definition: default_llm_infer.h:46

nndeploy::llm::DefaultLlmInferParam::embedding_param_
std::shared_ptr< EmbeddingParam > embedding_param_
Definition: default_llm_infer.h:34

tensor.h