2 #ifndef _NNDEPLOY_LLM_QWEN_H_
3 #define _NNDEPLOY_LLM_QWEN_H_
26 #include "nndeploy/tokenizer/tokenizer_cpp/tokenizer_cpp.h"
45 #define NNDEPLOY_LLAMA2 "NNDEPLOY_LLAMA2"
46 #define DELETE_POINTER(ptr) \
47 if (ptr != nullptr) { \
54 std::string prompt_template_ =
55 "<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n";
60 rapidjson::Document::AllocatorType& allocator);
84 rapidjson::Document::AllocatorType& allocator);
91 std::vector<dag::Edge*>& outputs)
92 : Node(name, inputs, outputs), is_first_(true) {
93 key_ =
"nndeploy::qwen::PrefillEmbeddingNode";
95 "PrefillEmbeddingNode generates model input embeddings including:\n"
96 "1. Token embedding vectors\n"
97 "2. Attention mask matrix\n"
98 "3. Position ids vector\n"
99 "4. Past key values cache\n"
102 "- inputs[0]: TokenizerIds containing input token sequence\n"
104 "- outputs[0]: Input token embedding tensor\n"
105 "- outputs[1]: Attention mask tensor\n"
106 "- outputs[2]: Position ids tensor\n"
107 "- outputs[3]: Past key values cache tensor";
108 param_ = std::make_shared<PrefillEmbeddingParam>();
109 this->setInputTypeInfo<tokenizer::TokenizerIds>();
110 this->setOutputTypeInfo<device::Tensor>();
111 this->setOutputTypeInfo<device::Tensor>();
112 this->setOutputTypeInfo<device::Tensor>();
113 this->setOutputTypeInfo<device::Tensor>();
121 int seq_len,
int hidden_size,
124 std::string& embedding_file);
145 int all_seq_len_ = 0;
146 int gen_seq_len_ = 0;
161 rapidjson::Document::AllocatorType& allocator);
174 std::vector<dag::Edge*>& outputs)
175 : Node(name, inputs, outputs), is_first_(true) {
176 key_ =
"nndeploy::qwen::DecodeEmbeddingNode";
178 "DecodeEmbeddingNode generates model input embeddings including:\n"
179 "1. Token embedding vectors\n"
180 "2. Attention mask matrix\n"
181 "3. Position ids vector\n"
182 "4. Past key values cache\n"
185 "- inputs[0]: TokenizerIds containing input token sequence\n"
186 "- inputs[1]: past kv values\n"
187 "- inputs[2]: history input token sequence\n"
189 "- outputs[0]: Input token embedding tensor\n"
190 "- outputs[1]: Attention mask tensor\n"
191 "- outputs[2]: Position ids tensor\n"
192 "- outputs[3]: Past key values cache tensor";
193 param_ = std::make_shared<DecodeEmbeddingParam>();
194 this->setInputTypeInfo<tokenizer::TokenizerIds>();
195 this->setInputTypeInfo<tokenizer::TokenizerIds>();
196 this->setInputTypeInfo<device::Tensor>();
197 this->setOutputTypeInfo<device::Tensor>();
198 this->setOutputTypeInfo<device::Tensor>();
199 this->setOutputTypeInfo<device::Tensor>();
200 this->setOutputTypeInfo<device::Tensor>();
208 int seq_len,
int hidden_size,
211 std::string& embedding_file);
229 std::vector<dag::Edge*> outputs)
230 : Node(name, inputs, outputs), is_first_(true) {
231 key_ =
"nndeploy::qwen::PrefillSampleNode";
232 desc_ =
"Sample next token IDs from logits during LLM prefill stage.";
233 this->setInputTypeInfo<device::Tensor>();
234 this->setInputTypeInfo<tokenizer::TokenizerIds>();
235 this->setOutputTypeInfo<tokenizer::TokenizerIds>();
250 std::vector<dag::Edge*> outputs)
251 : Node(name, inputs, outputs), is_first_(true) {
252 key_ =
"nndeploy::qwen::DecodeSampleNode";
253 desc_ =
"Sample next token IDs from logits during LLM decode stage.";
254 param_ = std::make_shared<DecodeSampleParam>();
255 this->setInputTypeInfo<device::Tensor>();
256 this->setOutputTypeInfo<tokenizer::TokenizerIds>();
271 PromptNode(
const std::string& name, std::vector<dag::Edge*> inputs,
272 std::vector<dag::Edge*> outputs)
273 : Node(name, inputs, outputs) {
274 key_ =
"nndeploy::qwen::PromptNode";
276 "Generate TokenizerText from prompt string using optional template.";
277 param_ = std::make_shared<PromptParam>();
278 this->setOutputTypeInfo<tokenizer::TokenizerText>();
286 if (index_ < size_) {
306 const std::string& content,
307 const std::string& role =
"");
316 PrintNode(
const std::string& name, std::vector<dag::Edge*> inputs,
317 std::vector<dag::Edge*> outputs)
318 : Node(name, inputs, outputs) {
319 key_ =
"nndeploy::qwen::PrintNode";
320 desc_ =
"Print TokenizerText content and save to temporary output file.";
321 this->setInputTypeInfo<tokenizer::TokenizerText>();
329 rapidjson::Document::AllocatorType& allocator);
335 std::string path_ =
"resources/others/qwen_out.txt";
340 QwenPrefill(
const std::string& name, std::vector<dag::Edge*> inputs,
341 std::vector<dag::Edge*> outputs)
342 : CompositeNode(name, inputs, outputs) {
343 key_ =
"nndeploy::qwen::QwenPrefill";
345 "LLM prefill pipeline: TokenizerText -> token IDs -> embeddings -> "
346 "inference -> sampled token IDs with KV cache.";
347 this->setInputTypeInfo<tokenizer::TokenizerText>();
348 this->setOutputTypeInfo<tokenizer::TokenizerIds>();
349 this->setOutputTypeInfo<device::Tensor>();
350 this->setOutputTypeInfo<tokenizer::TokenizerIds>();
352 prefill_token_node_ =
dynamic_cast<tokenizer::TokenizerEncodeCpp*
>(
353 this->createNode<tokenizer::TokenizerEncodeCpp>(
"token_node"));
355 this->createNode<PrefillEmbeddingNode>(
"embedding_node"));
357 this->createNode<infer::Infer>(
"prefill_infer"));
359 this->createNode<PrefillSampleNode>(
"prefill_sample_node"));
375 rapidjson::Document::AllocatorType& allocator);
384 std::string config_path_;
389 QwenDecode(
const std::string& name, std::vector<dag::Edge*> inputs,
390 std::vector<dag::Edge*> outputs)
391 : CompositeNode(name, inputs, outputs) {
392 key_ =
"nndeploy::qwen::QwenDecode";
394 "LLM decode pipeline: token IDs + KV cache -> embeddings -> inference "
395 "-> sampled tokens -> decoded text.";
396 this->setInputTypeInfo<tokenizer::TokenizerIds>();
397 this->setInputTypeInfo<device::Tensor>();
398 this->setInputTypeInfo<tokenizer::TokenizerIds>();
399 this->setOutputTypeInfo<tokenizer::TokenizerText>();
402 this->createNode<DecodeEmbeddingNode>(
"embedding_node"));
404 this->createNode<infer::Infer>(
"decode_infer"));
406 this->createNode<DecodeSampleNode>(
"sample_node"));
407 decode_node_ =
dynamic_cast<tokenizer::TokenizerDecodeCpp*
>(
408 this->createNode<tokenizer::TokenizerDecodeCpp>(
"decode_node"));
424 rapidjson::Document::AllocatorType& allocator);
429 int loops() {
return max_seq_len_; }
435 ->getParam(decode_embedding_node_));
439 ->getParam(decode_sample_node_));
442 int token = token_ids->
ids_[0][0];
443 return std::find(stop_tokens_.begin(), stop_tokens_.end(), token) !=
455 bool is_first_ =
true;
470 std::vector<std::string> model_value);
Composite node Composite node is a special type of node in nndeploy that enhances the capabilities of...
Edge class in DAG graph for connecting nodes and transferring data.
Directed Acyclic Graph Node.
device::Tensor * genPositionIds(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
device::Tensor * genAttentionMask(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
virtual ~DecodeEmbeddingNode()
device::Tensor * past_kv_
DecodeEmbeddingNode(const std::string &name, std::vector< dag::Edge * > &inputs, std::vector< dag::Edge * > &outputs)
virtual base::Status run()
Run node (pure virtual function)
device::Tensor * genEmbedding(const std::vector< int32_t > &input_ids, int seq_len, int hidden_size, base::DataType data_type, base::DataFormat data_format, std::string &embedding_file)
std::vector< std::vector< int32_t > > token_ids_
base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
device::Tensor * past_kv_
std::string embedding_file_
tokenizer::TokenizerIds history_ids_
base::Status deserialize(rapidjson::Value &json)
virtual base::Status run()
Run node (pure virtual function)
virtual ~DecodeSampleNode()
int32_t sample(device::Tensor *logits, const std::vector< int > &pre_ids)
std::vector< int32_t > history_ids_
DecodeSampleNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
tokenizer::TokenizerIds stop_tokens_
tokenizer::TokenizerIds history_ids_
device::Tensor * genPastKeyValue(const std::vector< int32_t > &kv_init_shape)
device::Tensor * genEmbedding(const std::vector< int32_t > &input_ids, int seq_len, int hidden_size, base::DataType data_type, base::DataFormat data_format, std::string &embedding_file)
device::Tensor * genPositionIds(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
PrefillEmbeddingNode(const std::string &name, std::vector< dag::Edge * > &inputs, std::vector< dag::Edge * > &outputs)
virtual base::Status run()
Run node (pure virtual function)
device::Tensor * genAttentionMask(int seq_len, int all_seq_len, base::DataType data_type, base::DataFormat data_format)
virtual ~PrefillEmbeddingNode()
int hidden_size_
Need to serialize.
std::vector< int32_t > kv_init_shape_
base::Status deserialize(rapidjson::Value &json)
std::string embedding_file_
base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
PrefillSampleNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
virtual base::Status run()
Run node (pure virtual function)
int32_t sample(device::Tensor *logits, const std::vector< int > &pre_ids)
virtual ~PrefillSampleNode()
virtual base::Status run()
Run node (pure virtual function)
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
Serialize to JSON.
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
void set_path(std::string path)
PrintNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
virtual base::EdgeUpdateFlag updateInput()
Update input.
std::string applyTemplate(std::string prompt_template, const std::string &content, const std::string &role="")
virtual base::Status run()
Run node (pure virtual function)
PromptNode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
base::Status deserialize(rapidjson::Value &json)
std::string user_content_
base::Status setInferenceType(base::InferenceType inference_type)
virtual base::Status deinit()
Deinitialize node.
virtual base::Status init()
Initialize node.
base::Status setInferParams(bool is_path, base::ModelType model_type, base::DeviceType device_type)
QwenDecode(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
virtual base::Status run()
Run node (pure virtual function)
std::vector< int > stop_tokens_
void setConfigPath(std::string config_path)
dag::Node * decode_sample_node_
tokenizer::TokenizerIds history_ids_
void getStopTokens(std::string &token_file)
dag::Node * decode_embedding_node_
virtual base::Status defaultParam()
Configure default parameters.
infer::Infer * decode_infer_node_
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
Serialize to JSON.
std::vector< int > special_tokens_
base::Status setConfigParam()
base::Status setInferenceType(base::InferenceType inference_type)
virtual base::Status serialize(rapidjson::Value &json, rapidjson::Document::AllocatorType &allocator)
Serialize to JSON.
base::Status setConfigParam()
virtual base::Status defaultParam()
Configure default parameters.
void setConfigPath(std::string config_path)
QwenPrefill(const std::string &name, std::vector< dag::Edge * > inputs, std::vector< dag::Edge * > outputs)
virtual base::Status deserialize(rapidjson::Value &json)
Deserialize from JSON.
virtual base::Status deinit()
Deinitialize node.
virtual base::Status init()
Initialize node.
base::Status setInferParams(bool is_path, base::ModelType model_type, base::DeviceType device_type)
virtual base::Status run()
Run node (pure virtual function)
std::vector< std::vector< int32_t > > ids_
#define NNDEPLOY_CC_API
api
DataType dataTypeOf< float >()
@ kEdgeUpdateFlagComplete
@ kEdgeUpdateFlagTerminate
dag::Graph * createQwenGraph(const std::string &name, base::InferenceType inference_type, base::DeviceType device_type, dag::Edge *input, dag::Edge *output, base::ModelType model_type, bool is_path, std::vector< std::string > model_value)
QwenConfig parseConfig(const std::string &file_path)
std::string embedding_file_
std::string prompt_template_
std::string tokenizer_json_
std::vector< int32_t > kv_init_shape_