nndeploy C++ API  0.2.0
nndeploy C++ API
llm_infer.h
Go to the documentation of this file.
1 
10 #ifndef _NNDEPLOY_LLM_LLM_INFER_H_
11 #define _NNDEPLOY_LLM_LLM_INFER_H_
12 
13 #include "nndeploy/base/any.h"
14 #include "nndeploy/base/common.h"
16 #include "nndeploy/base/log.h"
17 #include "nndeploy/base/macro.h"
18 #include "nndeploy/base/object.h"
20 #include "nndeploy/base/param.h"
21 #include "nndeploy/base/status.h"
22 #include "nndeploy/base/string.h"
24 #include "nndeploy/dag/edge.h"
25 #include "nndeploy/dag/graph.h"
26 #include "nndeploy/dag/loop.h"
27 #include "nndeploy/dag/node.h"
28 #include "nndeploy/device/buffer.h"
29 #include "nndeploy/device/device.h"
31 #include "nndeploy/device/tensor.h"
32 #include "nndeploy/infer/infer.h"
34 
35 namespace nndeploy {
36 namespace llm {
37 
48  public:
49  LlmInfer(const std::string& name, std::vector<dag::Edge*> inputs,
50  std::vector<dag::Edge*> outputs);
51  virtual ~LlmInfer();
52 
53  virtual base::Status setPrefill(bool is_prefill);
54  virtual int getMaxSeqLen();
55 
56  virtual base::Status init();
57  virtual base::Status deinit();
58 
59  virtual base::Status run();
60  virtual base::Status setIterInput(dag::Edge* input, int index);
61 
64  rapidjson::Value& json,
65  rapidjson::Document::AllocatorType& allocator) override;
67  virtual base::Status deserialize(rapidjson::Value& json) override;
68 
69  llm::AbstractLlmInfer* createLlmInfer(std::vector<dag::Edge*> inputs,
70  std::vector<dag::Edge*> outputs,
71  const std::string& infer_key,
72  const std::string& model_key,
73  bool is_prefill);
74 
75  private:
76  bool is_prefill_ = true;
77  // config_path
78  std::vector<std::string> config_path_;
79  // qwen or llama...
80  std::string model_key_ = "Qwen";
81  // llm::DefaultLlmInfer or llm::MnnLlmInfer
82  std::string infer_key_ = "DefaultLlmInfer";
83  // llm::AbstractLlmInfer
84  llm::AbstractLlmInfer* llm_infer_ = nullptr;
85 
86  // model inputs
87  std::vector<std::string> model_inputs_ = {"input_ids", "attention_mask",
88  "position_ids", "past_key_values"};
89  // model outputs
90  std::vector<std::string> model_outputs_ = {"logits", "presents"};
91 };
92 
93 } // namespace llm
94 } // namespace nndeploy
95 
96 #endif
Composite node Composite node is a special type of node in nndeploy that enhances the capabilities of...
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
virtual std::string serialize()
Serialize to JSON string.
Edge class in DAG graph for connecting nodes and transferring data.
Definition: edge.h:35
LlmInfer - LLM推理节点
Definition: llm_infer.h:47
virtual base::Status run()
Run node (pure virtual function)
virtual base::Status init()
Initialize node.
virtual int getMaxSeqLen()
llm::AbstractLlmInfer * createLlmInfer(std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs, const std::string &infer_key, const std::string &model_key, bool is_prefill)
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator) override
Serialize to JSON.
virtual base::Status setPrefill(bool is_prefill)
LlmInfer(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
virtual base::Status deserialize(rapidjson::Value &json) override
Deserialize from JSON.
virtual base::Status deinit()
Deinitialize node.
virtual base::Status setIterInput(dag::Edge *input, int index)
Set iteration input edge.
#define NNDEPLOY_CC_API
api
Definition: macro.h:29