No public description

PiperOrigin-RevId: 576663264
2023-10-25 15:28:42 -07:00 · 2023-10-25 15:28:42 -07:00 · 3017c02d3d
commit 3017c02d3d
parent 06dab1e526
3 changed files with 64 additions and 2 deletions
--- a/mediapipe/tasks/cc/components/processors/proto/BUILD
+++ b/mediapipe/tasks/cc/components/processors/proto/BUILD
@ -98,3 +98,9 @@ mediapipe_proto_library(
    name = "transformer_params_proto",
    srcs = ["transformer_params.proto"],
 )
+
+mediapipe_proto_library(
+    name = "llm_params_proto",
+    srcs = ["llm_params.proto"],
+    deps = [":transformer_params_proto"],
+)
--- a/mediapipe/tasks/cc/components/processors/proto/llm_params.proto
+++ b/mediapipe/tasks/cc/components/processors/proto/llm_params.proto
@ -0,0 +1,41 @@
+/* Copyright 2023 The MediaPipe Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+syntax = "proto3";
+
+package mediapipe.tasks.components.processors.proto;
+
+import "mediapipe/tasks/cc/components/processors/proto/transformer_params.proto";
+
+option java_package = "com.google.mediapipe.tasks.components.processors.proto";
+option java_outer_classname = "LLMParametersProto";
+
+// Parameters for Large Language Models (LLM).
+message LLMParameters {
+  TransformerParameters transformer_parameters = 1;
+
+  // Size of vocabulary.
+  int32 vocab_size = 2;
+
+  // Whether or not to disable KV cache, which is also referred as state
+  // somewhere else.
+  bool disable_kv_cache = 3;
+
+  // Id of the start token.
+  int32 start_token_id = 4;
+
+  // Token to determine the end of output stream.
+  string stop_token = 5;
+}
--- a/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto
+++ b/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto
@ -44,6 +44,21 @@ message TransformerParameters {
  // Number of stacked transformers, `N` in the paper.
  int32 num_stacks = 7;

-  // Whether to use Multi-Query-Attention (MQA).
-  bool use_mqa = 8;
+  // Deprecated: bool use_mqa. Use num_kv_heads below.
+  reserved 8;
+
+  // Number of kv heads. 0 means Multi-Head-Attention (MHA), key and value have
+  // same number of heads as query; 1 means Multi-Query-Attention (MQA), key and
+  // value have one head; otherwise, this specifies the number of heads for key
+  // and value, and Grouped-Query-Attention (GQA) will be used. See
+  // https://arxiv.org/pdf/2305.13245.pdf for details.
+  int32 num_kv_heads = 9;
+
+  // Different types of attention mask type.
+  enum AttentionMaskType {
+    UNSPECIFIED = 0;
+    CAUSAL = 1;
+    PREFIX = 2;
+  }
+  AttentionMaskType attention_mask_type = 10;
 }