From 0fd69e8d838d71e364e019f3eb29eb4389dbec7b Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 24 Oct 2022 09:09:42 -0700 Subject: [PATCH] Open-source some tokenizer unit tests. PiperOrigin-RevId: 483399326 --- mediapipe/tasks/cc/text/tokenizers/BUILD | 40 ++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/mediapipe/tasks/cc/text/tokenizers/BUILD b/mediapipe/tasks/cc/text/tokenizers/BUILD index e76d943c5..5ce08b2d7 100644 --- a/mediapipe/tasks/cc/text/tokenizers/BUILD +++ b/mediapipe/tasks/cc/text/tokenizers/BUILD @@ -73,6 +73,19 @@ cc_library( ], ) +cc_test( + name = "sentencepiece_tokenizer_test", + srcs = ["sentencepiece_tokenizer_test.cc"], + data = [ + "//mediapipe/tasks/testdata/text:albert_model", + ], + deps = [ + ":sentencepiece_tokenizer", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/tasks/cc/core:utils", + ], +) + cc_library( name = "tokenizer_utils", srcs = ["tokenizer_utils.cc"], @@ -95,6 +108,33 @@ cc_library( ], ) +cc_test( + name = "tokenizer_utils_test", + srcs = ["tokenizer_utils_test.cc"], + data = [ + "//mediapipe/tasks/testdata/text:albert_model", + "//mediapipe/tasks/testdata/text:mobile_bert_model", + "//mediapipe/tasks/testdata/text:text_classifier_models", + ], + linkopts = ["-ldl"], + deps = [ + ":bert_tokenizer", + ":regex_tokenizer", + ":sentencepiece_tokenizer", + ":tokenizer_utils", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:status", + "//mediapipe/tasks/cc:common", + "//mediapipe/tasks/cc/core:utils", + "//mediapipe/tasks/cc/metadata:metadata_extractor", + "//mediapipe/tasks/metadata:metadata_schema_cc", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:cord", + ], +) + cc_library( name = "regex_tokenizer", srcs = [