nndeploy C++ API  0.2.0
nndeploy C++ API
qwen.h
Go to the documentation of this file.
1 
2 #ifndef _NNDEPLOY_LLM_QWEN_H_
3 #define _NNDEPLOY_LLM_QWEN_H_
4 
5 #include "nndeploy/base/any.h"
6 #include "nndeploy/base/common.h"
8 #include "nndeploy/base/log.h"
9 #include "nndeploy/base/macro.h"
10 #include "nndeploy/base/object.h"
12 #include "nndeploy/base/param.h"
13 #include "nndeploy/base/status.h"
14 #include "nndeploy/base/string.h"
16 #include "nndeploy/dag/edge.h"
17 #include "nndeploy/dag/graph.h"
18 #include "nndeploy/dag/loop.h"
19 #include "nndeploy/dag/node.h"
20 #include "nndeploy/device/buffer.h"
21 #include "nndeploy/device/device.h"
23 #include "nndeploy/device/tensor.h"
24 #include "nndeploy/infer/infer.h"
26 #include "nndeploy/tokenizer/tokenizer_cpp/tokenizer_cpp.h"
27 
28 namespace nndeploy {
29 namespace qwen {
30 
35  std::string model_value_;
36  std::string embedding_file_;
37  std::string tokenizer_json_, tokenizer_txt_;
38  std::string prompt_template_;
39  std::string prompt_;
40  std::vector<int32_t> kv_init_shape_;
41 };
42 
43 extern NNDEPLOY_CC_API QwenConfig parseConfig(const std::string& file_path);
44 
45 #define NNDEPLOY_LLAMA2 "NNDEPLOY_LLAMA2"
46 #define DELETE_POINTER(ptr) \
47  if (ptr != nullptr) { \
48  delete ptr; \
49  ptr = nullptr; \
50  }
51 
53  public:
54  std::string prompt_template_ =
55  "<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n";
56  std::string user_content_;
57 
58  public:
59  base::Status serialize(rapidjson::Value& json,
60  rapidjson::Document::AllocatorType& allocator);
61  base::Status deserialize(rapidjson::Value& json);
62 };
63 
65  public:
70  int all_seq_len_ = 0;
71  int gen_seq_len_ = 0;
72  std::string embedding_file_;
73 
77  std::vector<int32_t> kv_init_shape_;
79  base::DataType posid_data_type_ = base::dataTypeOf<int>();
81 
82  public:
83  base::Status serialize(rapidjson::Value& json,
84  rapidjson::Document::AllocatorType& allocator);
85  base::Status deserialize(rapidjson::Value& json);
86 };
87 
89  public:
90  PrefillEmbeddingNode(const std::string& name, std::vector<dag::Edge*>& inputs,
91  std::vector<dag::Edge*>& outputs)
92  : Node(name, inputs, outputs), is_first_(true) {
93  key_ = "nndeploy::qwen::PrefillEmbeddingNode";
94  desc_ =
95  "PrefillEmbeddingNode generates model input embeddings including:\n"
96  "1. Token embedding vectors\n"
97  "2. Attention mask matrix\n"
98  "3. Position ids vector\n"
99  "4. Past key values cache\n"
100  "\n"
101  "Inputs:\n"
102  "- inputs[0]: TokenizerIds containing input token sequence\n"
103  "Outputs:\n"
104  "- outputs[0]: Input token embedding tensor\n"
105  "- outputs[1]: Attention mask tensor\n"
106  "- outputs[2]: Position ids tensor\n"
107  "- outputs[3]: Past key values cache tensor";
108  param_ = std::make_shared<PrefillEmbeddingParam>();
109  this->setInputTypeInfo<tokenizer::TokenizerIds>();
110  this->setOutputTypeInfo<device::Tensor>();
111  this->setOutputTypeInfo<device::Tensor>();
112  this->setOutputTypeInfo<device::Tensor>();
113  this->setOutputTypeInfo<device::Tensor>();
114  }
116  virtual base::Status run();
117 
118  protected:
119  device::Tensor* genPastKeyValue(const std::vector<int32_t>& kv_init_shape);
120  device::Tensor* genEmbedding(const std::vector<int32_t>& input_ids,
121  int seq_len, int hidden_size,
122  base::DataType data_type,
123  base::DataFormat data_format,
124  std::string& embedding_file);
125 
126  device::Tensor* genAttentionMask(int seq_len, int all_seq_len,
127  base::DataType data_type,
128  base::DataFormat data_format);
129 
130  device::Tensor* genPositionIds(int seq_len, int all_seq_len,
131  base::DataType data_type,
132  base::DataFormat data_format);
133 
134  protected:
135  bool is_first_;
136  device::Tensor* past_kv_ = nullptr;
137 };
138 
140  public:
145  int all_seq_len_ = 0;
146  int gen_seq_len_ = 0;
147  std::string embedding_file_;
148 
153  base::DataType posid_data_type_ = base::dataTypeOf<int>();
155  std::vector<std::vector<int32_t>> token_ids_;
158 
159  public:
160  base::Status serialize(rapidjson::Value& json,
161  rapidjson::Document::AllocatorType& allocator);
162  base::Status deserialize(rapidjson::Value& json);
163 };
164 
166  public:
169 };
170 
172  public:
173  DecodeEmbeddingNode(const std::string& name, std::vector<dag::Edge*>& inputs,
174  std::vector<dag::Edge*>& outputs)
175  : Node(name, inputs, outputs), is_first_(true) {
176  key_ = "nndeploy::qwen::DecodeEmbeddingNode";
177  desc_ =
178  "DecodeEmbeddingNode generates model input embeddings including:\n"
179  "1. Token embedding vectors\n"
180  "2. Attention mask matrix\n"
181  "3. Position ids vector\n"
182  "4. Past key values cache\n"
183  "\n"
184  "Inputs:\n"
185  "- inputs[0]: TokenizerIds containing input token sequence\n"
186  "- inputs[1]: past kv values\n"
187  "- inputs[2]: history input token sequence\n"
188  "Outputs:\n"
189  "- outputs[0]: Input token embedding tensor\n"
190  "- outputs[1]: Attention mask tensor\n"
191  "- outputs[2]: Position ids tensor\n"
192  "- outputs[3]: Past key values cache tensor";
193  param_ = std::make_shared<DecodeEmbeddingParam>();
194  this->setInputTypeInfo<tokenizer::TokenizerIds>();
195  this->setInputTypeInfo<tokenizer::TokenizerIds>();
196  this->setInputTypeInfo<device::Tensor>();
197  this->setOutputTypeInfo<device::Tensor>();
198  this->setOutputTypeInfo<device::Tensor>();
199  this->setOutputTypeInfo<device::Tensor>();
200  this->setOutputTypeInfo<device::Tensor>();
201  }
202  virtual ~DecodeEmbeddingNode() {}
203 
204  virtual base::Status run();
205 
206  protected:
207  device::Tensor* genEmbedding(const std::vector<int32_t>& input_ids,
208  int seq_len, int hidden_size,
209  base::DataType data_type,
210  base::DataFormat data_format,
211  std::string& embedding_file);
212 
213  device::Tensor* genAttentionMask(int seq_len, int all_seq_len,
214  base::DataType data_type,
215  base::DataFormat data_format);
216 
217  device::Tensor* genPositionIds(int seq_len, int all_seq_len,
218  base::DataType data_type,
219  base::DataFormat data_format);
220 
221  protected:
222  bool is_first_;
224 };
225 
227  public:
228  PrefillSampleNode(const std::string& name, std::vector<dag::Edge*> inputs,
229  std::vector<dag::Edge*> outputs)
230  : Node(name, inputs, outputs), is_first_(true) {
231  key_ = "nndeploy::qwen::PrefillSampleNode";
232  desc_ = "Sample next token IDs from logits during LLM prefill stage.";
233  this->setInputTypeInfo<device::Tensor>();
234  this->setInputTypeInfo<tokenizer::TokenizerIds>();
235  this->setOutputTypeInfo<tokenizer::TokenizerIds>();
236  }
237  virtual ~PrefillSampleNode() {}
238  virtual base::Status run();
239 
240  protected:
241  int32_t sample(device::Tensor* logits, const std::vector<int>& pre_ids);
242 
243  protected:
244  bool is_first_;
245 };
246 
248  public:
249  DecodeSampleNode(const std::string& name, std::vector<dag::Edge*> inputs,
250  std::vector<dag::Edge*> outputs)
251  : Node(name, inputs, outputs), is_first_(true) {
252  key_ = "nndeploy::qwen::DecodeSampleNode";
253  desc_ = "Sample next token IDs from logits during LLM decode stage.";
254  param_ = std::make_shared<DecodeSampleParam>();
255  this->setInputTypeInfo<device::Tensor>();
256  this->setOutputTypeInfo<tokenizer::TokenizerIds>();
257  }
258  virtual ~DecodeSampleNode() {}
259  virtual base::Status run();
260 
261  protected:
262  int32_t sample(device::Tensor* logits, const std::vector<int>& pre_ids);
263 
264  protected:
265  bool is_first_;
266  std::vector<int32_t> history_ids_;
267 };
268 
270  public:
271  PromptNode(const std::string& name, std::vector<dag::Edge*> inputs,
272  std::vector<dag::Edge*> outputs)
273  : Node(name, inputs, outputs) {
274  key_ = "nndeploy::qwen::PromptNode";
275  desc_ =
276  "Generate TokenizerText from prompt string using optional template.";
277  param_ = std::make_shared<PromptParam>();
278  this->setOutputTypeInfo<tokenizer::TokenizerText>();
279  node_type_ = dag::NodeType::kNodeTypeInput;
280  this->setIoType(dag::IOType::kIOTypeString);
281  }
282  virtual ~PromptNode() {}
283  virtual base::Status run();
284 
286  if (index_ < size_) {
288  } else {
289  if (size_ == 0) {
291  } else {
293  }
294  }
295  }
296 
297  void setSize(int size) {
298  if (size > 0) {
299  size_ = size;
300  }
301  }
302  int getSize() { return size_; }
303 
304  protected:
305  std::string applyTemplate(std::string prompt_template,
306  const std::string& content,
307  const std::string& role = "");
308 
309  private:
310  int index_ = 0;
311  int size_ = 1;
312 };
313 
315  public:
316  PrintNode(const std::string& name, std::vector<dag::Edge*> inputs,
317  std::vector<dag::Edge*> outputs)
318  : Node(name, inputs, outputs) {
319  key_ = "nndeploy::qwen::PrintNode";
320  desc_ = "Print TokenizerText content and save to temporary output file.";
321  this->setInputTypeInfo<tokenizer::TokenizerText>();
322  node_type_ = dag::NodeType::kNodeTypeOutput;
323  this->setIoType(dag::IOType::kIOTypeText);
324  }
325  virtual ~PrintNode() {}
326  virtual base::Status run();
327 
328  virtual base::Status serialize(rapidjson::Value& json,
329  rapidjson::Document::AllocatorType& allocator);
330  virtual base::Status deserialize(rapidjson::Value& json);
331 
332  void set_path(std::string path) { path_ = path; }
333 
334  private:
335  std::string path_ = "resources/others/qwen_out.txt";
336 };
337 
339  public:
340  QwenPrefill(const std::string& name, std::vector<dag::Edge*> inputs,
341  std::vector<dag::Edge*> outputs)
342  : CompositeNode(name, inputs, outputs) {
343  key_ = "nndeploy::qwen::QwenPrefill";
344  desc_ =
345  "LLM prefill pipeline: TokenizerText -> token IDs -> embeddings -> "
346  "inference -> sampled token IDs with KV cache.";
347  this->setInputTypeInfo<tokenizer::TokenizerText>();
348  this->setOutputTypeInfo<tokenizer::TokenizerIds>();
349  this->setOutputTypeInfo<device::Tensor>();
350  this->setOutputTypeInfo<tokenizer::TokenizerIds>();
351 
352  prefill_token_node_ = dynamic_cast<tokenizer::TokenizerEncodeCpp*>(
353  this->createNode<tokenizer::TokenizerEncodeCpp>("token_node"));
354  prefill_embedding_node_ = dynamic_cast<PrefillEmbeddingNode*>(
355  this->createNode<PrefillEmbeddingNode>("embedding_node"));
356  prefill_infer_node_ = dynamic_cast<infer::Infer*>(
357  this->createNode<infer::Infer>("prefill_infer"));
358  prefill_sample_node_ = dynamic_cast<PrefillSampleNode*>(
359  this->createNode<PrefillSampleNode>("prefill_sample_node"));
360  }
361 
362  virtual base::Status init();
363  virtual base::Status run();
364  virtual base::Status deinit();
366 
367  void setConfigPath(std::string config_path) { config_path_ = config_path; }
368 
370  base::Status setInferParams(bool is_path, base::ModelType model_type,
371  base::DeviceType device_type);
373 
374  virtual base::Status serialize(rapidjson::Value& json,
375  rapidjson::Document::AllocatorType& allocator);
376  virtual base::Status deserialize(rapidjson::Value& json);
377 
378  private:
379  dag::Node* prefill_token_node_;
380  dag::Node* prefill_embedding_node_;
381  infer::Infer* prefill_infer_node_;
382  dag::Node* prefill_sample_node_;
383 
384  std::string config_path_;
385 };
386 
388  public:
389  QwenDecode(const std::string& name, std::vector<dag::Edge*> inputs,
390  std::vector<dag::Edge*> outputs)
391  : CompositeNode(name, inputs, outputs) {
392  key_ = "nndeploy::qwen::QwenDecode";
393  desc_ =
394  "LLM decode pipeline: token IDs + KV cache -> embeddings -> inference "
395  "-> sampled tokens -> decoded text.";
396  this->setInputTypeInfo<tokenizer::TokenizerIds>();
397  this->setInputTypeInfo<device::Tensor>();
398  this->setInputTypeInfo<tokenizer::TokenizerIds>();
399  this->setOutputTypeInfo<tokenizer::TokenizerText>();
400 
401  decode_embedding_node_ = dynamic_cast<DecodeEmbeddingNode*>(
402  this->createNode<DecodeEmbeddingNode>("embedding_node"));
403  decode_infer_node_ = dynamic_cast<infer::Infer*>(
404  this->createNode<infer::Infer>("decode_infer"));
405  decode_sample_node_ = dynamic_cast<DecodeSampleNode*>(
406  this->createNode<DecodeSampleNode>("sample_node"));
407  decode_node_ = dynamic_cast<tokenizer::TokenizerDecodeCpp*>(
408  this->createNode<tokenizer::TokenizerDecodeCpp>("decode_node"));
409  }
410 
411  virtual base::Status init();
412  virtual base::Status run();
413  virtual base::Status deinit();
416 
417  base::Status setInferParams(bool is_path, base::ModelType model_type,
418  base::DeviceType device_type);
420 
421  void setConfigPath(std::string config_path) { config_path_ = config_path; }
422 
423  virtual base::Status serialize(rapidjson::Value& json,
424  rapidjson::Document::AllocatorType& allocator);
425  virtual base::Status deserialize(rapidjson::Value& json);
426 
427  protected:
428  void getStopTokens(std::string& token_file);
429  int loops() { return max_seq_len_; }
430  inline bool isStop() {
431  tokenizer::TokenizerIds* token_ids;
432  if (is_first_) {
433  token_ids =
434  (tokenizer::TokenizerIds*)(decode_embedding_node_->getInput(0)
435  ->getParam(decode_embedding_node_));
436  } else {
437  token_ids =
438  (tokenizer::TokenizerIds*)(decode_sample_node_->getOutput(0)
439  ->getParam(decode_sample_node_));
440  }
441 
442  int token = token_ids->ids_[0][0];
443  return std::find(stop_tokens_.begin(), stop_tokens_.end(), token) !=
444  stop_tokens_.end();
445  }
446 
447  public:
452 
455  bool is_first_ = true;
456 
457  std::vector<int> stop_tokens_;
458  std::vector<int> special_tokens_;
459 
461 
462  std::string result_;
463  std::string config_path_;
464 };
465 
467  const std::string& name, base::InferenceType inference_type,
468  base::DeviceType device_type, dag::Edge* input, dag::Edge* output,
469  base::ModelType model_type, bool is_path,
470  std::vector<std::string> model_value);
471 
472 } // namespace qwen
473 } // namespace nndeploy
474 
475 #endif
Composite node Composite node is a special type of node in nndeploy that enhances the capabilities of...
Edge class in DAG graph for connecting nodes and transferring data.
Definition: edge.h:35
Directed Acyclic Graph Node.
Definition: graph.h:31
Node base class.
Definition: node.h:171
Tensorē±»
Definition: tensor.h:26
device::Tensor * genPositionIds(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
device::Tensor * genAttentionMask(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
device::Tensor * past_kv_
Definition: qwen.h:223
DecodeEmbeddingNode(const std::string &name, std::vector< dag::Edge * > &inputs, std::vector< dag::Edge * > &outputs)
Definition: qwen.h:173
virtual base::Status run()
Run node (pure virtual function)
device::Tensor * genEmbedding(const std::vector< int32_t > &input_ids, int seq_len, int hidden_size, base::DataType data_type, base::DataFormat data_format, std::string &embedding_file)
std::vector< std::vector< int32_t > > token_ids_
Definition: qwen.h:155
base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
tokenizer::TokenizerIds history_ids_
Definition: qwen.h:156
base::Status deserialize(rapidjson::Value &json)
virtual base::Status run()
Run node (pure virtual function)
int32_t sample(device::Tensor *logits, const std::vector< int > &pre_ids)
std::vector< int32_t > history_ids_
Definition: qwen.h:266
DecodeSampleNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: qwen.h:249
tokenizer::TokenizerIds stop_tokens_
Definition: qwen.h:168
tokenizer::TokenizerIds history_ids_
Definition: qwen.h:167
device::Tensor * genPastKeyValue(const std::vector< int32_t > &kv_init_shape)
device::Tensor * genEmbedding(const std::vector< int32_t > &input_ids, int seq_len, int hidden_size, base::DataType data_type, base::DataFormat data_format, std::string &embedding_file)
device::Tensor * genPositionIds(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
PrefillEmbeddingNode(const std::string &name, std::vector< dag::Edge * > &inputs, std::vector< dag::Edge * > &outputs)
Definition: qwen.h:90
virtual base::Status run()
Run node (pure virtual function)
device::Tensor * genAttentionMask(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
int hidden_size_
Need to serialize.
Definition: qwen.h:69
std::vector< int32_t > kv_init_shape_
Definition: qwen.h:77
base::Status deserialize(rapidjson::Value &json)
base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
PrefillSampleNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: qwen.h:228
virtual base::Status run()
Run node (pure virtual function)
int32_t sample(device::Tensor *logits, const std::vector< int > &pre_ids)
virtual base::Status run()
Run node (pure virtual function)
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
Serialize to JSON.
virtual ~PrintNode()
Definition: qwen.h:325
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
void set_path(std::string path)
Definition: qwen.h:332
PrintNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: qwen.h:316
virtual base::EdgeUpdateFlag updateInput()
Update input.
Definition: qwen.h:285
std::string applyTemplate(std::string prompt_template, const std::string &content, const std::string &role="")
void setSize(int size)
Definition: qwen.h:297
virtual ~PromptNode()
Definition: qwen.h:282
virtual base::Status run()
Run node (pure virtual function)
PromptNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: qwen.h:271
base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
base::Status deserialize(rapidjson::Value &json)
std::string user_content_
Definition: qwen.h:56
base::Status setInferenceType(base::InferenceType inference_type)
virtual base::Status deinit()
Deinitialize node.
virtual base::Status init()
Initialize node.
base::Status setInferParams(bool is_path, base::ModelType model_type, base::DeviceType device_type)
QwenDecode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: qwen.h:389
virtual base::Status run()
Run node (pure virtual function)
std::vector< int > stop_tokens_
Definition: qwen.h:457
std::string config_path_
Definition: qwen.h:463
void setConfigPath(std::string config_path)
Definition: qwen.h:421
dag::Node * decode_sample_node_
Definition: qwen.h:450
dag::Node * decode_node_
Definition: qwen.h:451
tokenizer::TokenizerIds history_ids_
Definition: qwen.h:460
void getStopTokens(std::string &token_file)
std::string result_
Definition: qwen.h:462
dag::Node * decode_embedding_node_
Definition: qwen.h:448
virtual base::Status defaultParam()
Configure default parameters.
infer::Infer * decode_infer_node_
Definition: qwen.h:449
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
Serialize to JSON.
std::vector< int > special_tokens_
Definition: qwen.h:458
base::Status setConfigParam()
base::Status setInferenceType(base::InferenceType inference_type)
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
Serialize to JSON.
base::Status setConfigParam()
virtual base::Status defaultParam()
Configure default parameters.
void setConfigPath(std::string config_path)
Definition: qwen.h:367
QwenPrefill(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
Definition: qwen.h:340
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
virtual base::Status deinit()
Deinitialize node.
virtual base::Status init()
Initialize node.
base::Status setInferParams(bool is_path, base::ModelType model_type, base::DeviceType device_type)
virtual base::Status run()
Run node (pure virtual function)
std::vector< std::vector< int32_t > > ids_
Definition: tokenizer.h:241
#define NNDEPLOY_CC_API
api
Definition: macro.h:29
DataType dataTypeOf< float >()
@ kEdgeUpdateFlagComplete
Definition: common.h:366
@ kEdgeUpdateFlagTerminate
Definition: common.h:367
@ kDataFormatS1D
Definition: common.h:143
dag::Graph * createQwenGraph(const std::string &name, base::InferenceType inference_type, base::DeviceType device_type, dag::Edge *input, dag::Edge *output, base::ModelType model_type, bool is_path, std::vector< std::string > model_value)
QwenConfig parseConfig(const std::string &file_path)
std::string embedding_file_
Definition: qwen.h:36
std::string prompt_
Definition: qwen.h:39
std::string model_value_
Definition: qwen.h:35
std::string prompt_template_
Definition: qwen.h:38
std::string tokenizer_json_
Definition: qwen.h:37
std::vector< int32_t > kv_init_shape_
Definition: qwen.h:40