Open-source some tokenizer unit tests.

PiperOrigin-RevId: 483399326
This commit is contained in:
MediaPipe Team 2022-10-24 09:09:42 -07:00 committed by Copybara-Service
parent af051dcb62
commit 0fd69e8d83

View File

@ -73,6 +73,19 @@ cc_library(
],
)
cc_test(
name = "sentencepiece_tokenizer_test",
srcs = ["sentencepiece_tokenizer_test.cc"],
data = [
"//mediapipe/tasks/testdata/text:albert_model",
],
deps = [
":sentencepiece_tokenizer",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/tasks/cc/core:utils",
],
)
cc_library(
name = "tokenizer_utils",
srcs = ["tokenizer_utils.cc"],
@ -95,6 +108,33 @@ cc_library(
],
)
cc_test(
name = "tokenizer_utils_test",
srcs = ["tokenizer_utils_test.cc"],
data = [
"//mediapipe/tasks/testdata/text:albert_model",
"//mediapipe/tasks/testdata/text:mobile_bert_model",
"//mediapipe/tasks/testdata/text:text_classifier_models",
],
linkopts = ["-ldl"],
deps = [
":bert_tokenizer",
":regex_tokenizer",
":sentencepiece_tokenizer",
":tokenizer_utils",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status",
"//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/metadata:metadata_extractor",
"//mediapipe/tasks/metadata:metadata_schema_cc",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:cord",
],
)
cc_library(
name = "regex_tokenizer",
srcs = [