No public description

PiperOrigin-RevId: 572722787
This commit is contained in:
Sebastian Schmidt 2023-10-11 16:38:42 -07:00 committed by Copybara-Service
parent dd29666296
commit a97eaad10f
10 changed files with 922 additions and 59 deletions

View File

@ -244,16 +244,14 @@ http_archive(
# sentencepiece
http_archive(
name = "com_google_sentencepiece",
strip_prefix = "sentencepiece-1.0.0",
sha256 = "c05901f30a1d0ed64cbcf40eba08e48894e1b0e985777217b7c9036cac631346",
strip_prefix = "sentencepiece-0.1.96",
sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
urls = [
"https://github.com/google/sentencepiece/archive/1.0.0.zip",
],
patches = [
"@//third_party:com_google_sentencepiece_no_gflag_no_gtest.diff",
"https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip"
],
build_file = "@//third_party:sentencepiece.BUILD",
patches = ["@//third_party:com_google_sentencepiece.diff"],
patch_args = ["-p1"],
repo_mapping = {"@com_google_glog" : "@com_github_glog_glog_no_gflags"},
)
http_archive(

View File

@ -254,7 +254,7 @@ cc_test(
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep
"@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep
],
)
@ -303,7 +303,7 @@ cc_test(
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep
"@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep
],
)

View File

@ -91,6 +91,8 @@ cc_library(
deps = [
"//mediapipe/tasks/cc/audio/audio_classifier:audio_classifier_graph",
"//mediapipe/tasks/cc/audio/audio_embedder:audio_embedder_graph",
"//mediapipe/tasks/cc/text/text_classifier:text_classifier_graph",
"//mediapipe/tasks/cc/text/text_embedder:text_embedder_graph",
"//mediapipe/tasks/cc/vision/face_detector:face_detector_graph",
"//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_graph",
"//mediapipe/tasks/cc/vision/face_stylizer:face_stylizer_graph",
@ -101,14 +103,7 @@ cc_library(
"//mediapipe/tasks/cc/vision/interactive_segmenter:interactive_segmenter_graph",
"//mediapipe/tasks/cc/vision/object_detector:object_detector_graph",
"//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarker_graph",
] + select({
# TODO: Build text_classifier_graph and text_embedder_graph on Windows.
"//mediapipe:windows": [],
"//conditions:default": [
"//mediapipe/tasks/cc/text/text_classifier:text_classifier_graph",
"//mediapipe/tasks/cc/text/text_embedder:text_embedder_graph",
],
}),
],
)
py_library(

View File

@ -108,7 +108,7 @@ cc_library(
":sentencepiece_constants",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_sentencepiece//src:sentencepiece_model_cc_proto",
"@com_google_sentencepiece//:sentencepiece_model_cc_proto",
],
)
@ -165,8 +165,8 @@ cc_test(
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings:str_format",
"@com_google_sentencepiece//src:sentencepiece_cc_proto",
"@com_google_sentencepiece//src:sentencepiece_processor",
"@com_google_sentencepiece//:sentencepiece_cc_proto",
"@com_google_sentencepiece//:sentencepiece_processor",
"@org_tensorflow//tensorflow/core:lib",
],
)

View File

@ -88,7 +88,7 @@ cc_test(
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:cord",
"@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep
"@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep
"@org_tensorflow//tensorflow/lite:test_util",
],
)

View File

@ -92,7 +92,7 @@ cc_test(
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_sentencepiece//src:sentencepiece_processor",
"@com_google_sentencepiece//:sentencepiece_processor",
"@org_tensorflow//tensorflow/lite:test_util",
],
)

View File

@ -73,7 +73,7 @@ cc_library(
"//mediapipe/framework/port:logging",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/strings",
"@com_google_sentencepiece//src:sentencepiece_processor",
"@com_google_sentencepiece//:sentencepiece_processor",
],
)
@ -88,7 +88,7 @@ cc_test(
"//mediapipe/framework/port:gtest_main",
"//mediapipe/tasks/cc/core:utils",
"@com_google_absl//absl/log:absl_check",
"@com_google_sentencepiece//src:sentencepiece_processor",
"@com_google_sentencepiece//:sentencepiece_processor",
],
)
@ -140,7 +140,7 @@ cc_test(
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:cord",
"@com_google_sentencepiece//src:sentencepiece_processor",
"@com_google_sentencepiece//:sentencepiece_processor",
],
)

View File

@ -0,0 +1,808 @@
diff --git a/src/bpe_model.cc b/src/bpe_model.cc
index 22cd115..97e0bda 100644
--- a/src/bpe_model.cc
+++ b/src/bpe_model.cc
@@ -21,7 +21,7 @@
#include "bpe_model.h"
#include "freelist.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc
index 964d44e..ae8983c 100644
--- a/src/bpe_model_trainer.cc
+++ b/src/bpe_model_trainer.cc
@@ -18,7 +18,7 @@
#include <vector>
#include "bpe_model_trainer.h"
-#include "third_party/absl/container/flat_hash_set.h"
+#include "absl/container/flat_hash_set.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h
index e011a37..17f6e06 100644
--- a/src/bpe_model_trainer.h
+++ b/src/bpe_model_trainer.h
@@ -20,7 +20,7 @@
#include <vector>
#include "sentencepiece_model.pb.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "trainer_interface.h"
namespace sentencepiece {
diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc
index 173eb9c..2a43c3a 100644
--- a/src/bpe_model_trainer_test.cc
+++ b/src/bpe_model_trainer_test.cc
@@ -20,8 +20,8 @@
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/builder.cc b/src/builder.cc
index 378aaa0..1557a07 100644
--- a/src/builder.cc
+++ b/src/builder.cc
@@ -18,10 +18,10 @@
#include "builder.h"
#include "filesystem.h"
-#include "third_party/absl/strings/str_join.h"
-#include "third_party/absl/strings/str_replace.h"
-#include "third_party/absl/strings/str_split.h"
-#include "third_party/absl/strings/strip.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/strip.h"
#ifdef ENABLE_NFKC_COMPILE
#include <unicode/errorcode.h>
@@ -36,7 +36,7 @@
#include "normalization_rule.h"
#include "normalizer.h"
-#include "third_party/darts_clone/darts.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/builder.h b/src/builder.h
index 49d2884..289fab6 100644
--- a/src/builder.h
+++ b/src/builder.h
@@ -22,7 +22,7 @@
#include "common.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
namespace sentencepiece {
namespace normalizer {
diff --git a/src/builder_test.cc b/src/builder_test.cc
index 4acb7b3..1dee5c7 100644
--- a/src/builder_test.cc
+++ b/src/builder_test.cc
@@ -18,7 +18,7 @@
#include "normalizer.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc
index 8c2e4b7..e8b4979 100644
--- a/src/char_model_trainer_test.cc
+++ b/src/char_model_trainer_test.cc
@@ -19,8 +19,8 @@
#include "filesystem.h"
#include "sentencepiece_processor.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc
index c5a5188..931028b 100644
--- a/src/compile_charsmap_main.cc
+++ b/src/compile_charsmap_main.cc
@@ -22,8 +22,8 @@
#include "filesystem.h"
#include "init.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/string_view.h"
using sentencepiece::normalizer::Builder;
diff --git a/src/error.cc b/src/error.cc
index a226d98..ab4675d 100644
--- a/src/error.cc
+++ b/src/error.cc
@@ -20,8 +20,8 @@
#ifdef _USE_EXTERNAL_ABSL
// Naive workaround to define minloglevel on external absl package.
// We want to define them in other cc file.
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/flags/parse.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
ABSL_FLAG(int32, minloglevel, 0,
"Messages logged at a lower level than this don't actually.");
#endif
diff --git a/src/filesystem.cc b/src/filesystem.cc
index 833c8f7..6a169d9 100644
--- a/src/filesystem.cc
+++ b/src/filesystem.cc
@@ -15,7 +15,7 @@
#include <iostream>
#include "filesystem.h"
-#include "third_party/absl/memory/memory.h"
+#include "absl/memory/memory.h"
#include "util.h"
#if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE)
diff --git a/src/filesystem.h b/src/filesystem.h
index e572b4b..dbcce48 100644
--- a/src/filesystem.h
+++ b/src/filesystem.h
@@ -23,7 +23,7 @@
#include "common.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
namespace sentencepiece {
namespace filesystem {
diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc
index 790e756..39ece99 100644
--- a/src/filesystem_test.cc
+++ b/src/filesystem_test.cc
@@ -14,7 +14,7 @@
#include "filesystem.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/init.h b/src/init.h
index 090a2d9..acfda8a 100644
--- a/src/init.h
+++ b/src/init.h
@@ -16,8 +16,8 @@
#define INIT_H_
#include "common.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/flags/parse.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
ABSL_DECLARE_FLAG(int32, minloglevel);
diff --git a/src/model_factory.cc b/src/model_factory.cc
index be99501..040c00c 100644
--- a/src/model_factory.cc
+++ b/src/model_factory.cc
@@ -15,7 +15,7 @@
#include "bpe_model.h"
#include "char_model.h"
#include "model_factory.h"
-#include "third_party/absl/memory/memory.h"
+#include "absl/memory/memory.h"
#include "unigram_model.h"
#include "word_model.h"
diff --git a/src/model_interface.cc b/src/model_interface.cc
index c49be1e..22c6378 100644
--- a/src/model_interface.cc
+++ b/src/model_interface.cc
@@ -16,8 +16,8 @@
#include "model_interface.h"
#include "sentencepiece_model.pb.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/str_format.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_format.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/model_interface.h b/src/model_interface.h
index aef5b53..fc14257 100644
--- a/src/model_interface.h
+++ b/src/model_interface.h
@@ -25,9 +25,9 @@
#include "normalizer.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/darts_clone/darts.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc
index 69ee4e6..26a1e05 100644
--- a/src/model_interface_test.cc
+++ b/src/model_interface_test.cc
@@ -15,7 +15,7 @@
#include "model_factory.h"
#include "model_interface.h"
#include "testharness.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/normalizer.cc b/src/normalizer.cc
index 100b875..1791bd1 100644
--- a/src/normalizer.cc
+++ b/src/normalizer.cc
@@ -18,11 +18,11 @@
#include <vector>
#include "common.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/match.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/absl/strings/strip.h"
-#include "third_party/darts_clone/darts.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/normalizer.h b/src/normalizer.h
index 622bbd2..1326102 100644
--- a/src/normalizer.h
+++ b/src/normalizer.h
@@ -24,8 +24,8 @@
#include "common.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/darts_clone/darts.h"
+#include "absl/strings/string_view.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc
index 049658e..8021511 100644
--- a/src/pretokenizer_for_training.cc
+++ b/src/pretokenizer_for_training.cc
@@ -14,7 +14,7 @@
#include <string>
#include "pretokenizer_for_training.h"
-#include "third_party/absl/strings/str_replace.h"
+#include "absl/strings/str_replace.h"
namespace sentencepiece {
namespace pretokenizer {
diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h
index 2d3bc82..38beaa6 100644
--- a/src/pretokenizer_for_training.h
+++ b/src/pretokenizer_for_training.h
@@ -21,7 +21,7 @@
#include "common.h"
#include "sentencepiece.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
namespace sentencepiece {
namespace pretokenizer {
diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc
index 80f4787..c559eb8 100644
--- a/src/pretokenizer_for_training_test.cc
+++ b/src/pretokenizer_for_training_test.cc
@@ -13,7 +13,7 @@
// limitations under the License.!
#include "pretokenizer_for_training.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "trainer_interface.h"
namespace sentencepiece {
diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc
index 1e4e7a0..1ff3990 100644
--- a/src/sentencepiece_processor.cc
+++ b/src/sentencepiece_processor.cc
@@ -23,14 +23,14 @@
#include "normalizer.h"
#include "sentencepiece.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/numbers.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
-#include "third_party/absl/strings/str_replace.h"
-#include "third_party/absl/strings/str_split.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/absl/strings/strip.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
#include "unigram_model.h"
#include "util.h"
diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h
index e8bd5f5..e81ebbf 100644
--- a/src/sentencepiece_processor.h
+++ b/src/sentencepiece_processor.h
@@ -22,7 +22,7 @@
#include <vector>
#if defined(_USE_INTERNAL_STRING_VIEW)
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
#elif defined(_USE_TF_STRING_VIEW)
#include "absl/strings/string_view.h"
#else
diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc
index 373e73e..829c3d4 100644
--- a/src/sentencepiece_processor_test.cc
+++ b/src/sentencepiece_processor_test.cc
@@ -23,10 +23,10 @@
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc
index b9fe64f..47ef33c 100644
--- a/src/sentencepiece_trainer.cc
+++ b/src/sentencepiece_trainer.cc
@@ -22,12 +22,12 @@
#include "sentencepiece_model.pb.h"
#include "sentencepiece_trainer.h"
#include "spec_parser.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/strings/numbers.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_split.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/absl/strings/strip.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
#include "trainer_factory.h"
#include "util.h"
diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc
index e44e66b..fc73b1d 100644
--- a/src/sentencepiece_trainer_test.cc
+++ b/src/sentencepiece_trainer_test.cc
@@ -16,7 +16,7 @@
#include "sentencepiece_model.pb.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/spec_parser.h b/src/spec_parser.h
index 2c5a95b..263f2bd 100644
--- a/src/spec_parser.h
+++ b/src/spec_parser.h
@@ -19,8 +19,8 @@
#include <vector>
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/ascii.h"
-#include "third_party/absl/strings/str_split.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_split.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc
index 3382ddc..9dda65c 100644
--- a/src/spm_decode_main.cc
+++ b/src/spm_decode_main.cc
@@ -21,8 +21,8 @@
#include "init.h"
#include "sentencepiece.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/strings/str_split.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_split.h"
#include "util.h"
ABSL_FLAG(std::string, model, "", "model file name");
diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc
index 4d12a38..29b7458 100644
--- a/src/spm_encode_main.cc
+++ b/src/spm_encode_main.cc
@@ -21,10 +21,10 @@
#include "init.h"
#include "sentencepiece.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "trainer_interface.h"
ABSL_FLAG(std::string, model, "", "model file name");
diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc
index b5d93cb..70a65c1 100644
--- a/src/spm_export_vocab_main.cc
+++ b/src/spm_export_vocab_main.cc
@@ -20,7 +20,7 @@
#include "init.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/flags/flag.h"
+#include "absl/flags/flag.h"
ABSL_FLAG(std::string, output, "", "Output filename");
ABSL_FLAG(std::string, model, "", "input model file name");
diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc
index 96da360..8c541b8 100644
--- a/src/spm_normalize_main.cc
+++ b/src/spm_normalize_main.cc
@@ -21,7 +21,7 @@
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
-#include "third_party/absl/flags/flag.h"
+#include "absl/flags/flag.h"
ABSL_FLAG(std::string, model, "", "Model file name");
ABSL_FLAG(bool, use_internal_normalization, false,
diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc
index baf8dbf..ba1e811 100644
--- a/src/spm_train_main.cc
+++ b/src/spm_train_main.cc
@@ -18,10 +18,10 @@
#include "init.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_trainer.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/strings/ascii.h"
-#include "third_party/absl/strings/str_join.h"
-#include "third_party/absl/strings/str_split.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
#include "util.h"
using sentencepiece::NormalizerSpec;
diff --git a/src/testharness.cc b/src/testharness.cc
index f6b1efe..daf2d14 100644
--- a/src/testharness.cc
+++ b/src/testharness.cc
@@ -26,7 +26,7 @@
#include <vector>
#include "common.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/testharness.h b/src/testharness.h
index 9879b06..98317ad 100644
--- a/src/testharness.h
+++ b/src/testharness.h
@@ -21,9 +21,9 @@
#include <string>
#include "common.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/flags/parse.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
+#include "absl/strings/string_view.h"
ABSL_DECLARE_FLAG(std::string, test_tmpdir);
ABSL_DECLARE_FLAG(std::string, test_srcdir);
diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc
index d1d2541..ff594d0 100644
--- a/src/trainer_factory.cc
+++ b/src/trainer_factory.cc
@@ -14,7 +14,7 @@
#include "bpe_model_trainer.h"
#include "char_model_trainer.h"
-#include "third_party/absl/memory/memory.h"
+#include "absl/memory/memory.h"
#include "trainer_factory.h"
#include "unigram_model_trainer.h"
#include "word_model_trainer.h"
diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc
index a3a4b74..70f2f72 100644
--- a/src/trainer_interface.cc
+++ b/src/trainer_interface.cc
@@ -26,13 +26,13 @@
#include "normalizer.h"
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/numbers.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_format.h"
-#include "third_party/absl/strings/str_join.h"
-#include "third_party/absl/strings/str_split.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
#include "trainer_interface.h"
#include "unicode_script.h"
#include "util.h"
diff --git a/src/trainer_interface.h b/src/trainer_interface.h
index f66d59a..8a654ec 100644
--- a/src/trainer_interface.h
+++ b/src/trainer_interface.h
@@ -27,7 +27,7 @@
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc
index 70a51ad..d7f3f0c 100644
--- a/src/trainer_interface_test.cc
+++ b/src/trainer_interface_test.cc
@@ -16,8 +16,8 @@
#include "filesystem.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_format.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
#include "trainer_interface.h"
#include "util.h"
diff --git a/src/unicode_script.cc b/src/unicode_script.cc
index 583dc30..11b24dc 100644
--- a/src/unicode_script.cc
+++ b/src/unicode_script.cc
@@ -14,7 +14,7 @@
#include <unordered_map>
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "unicode_script.h"
#include "unicode_script_map.h"
#include "util.h"
diff --git a/src/unicode_script_map.h b/src/unicode_script_map.h
index f2e67e9..f1b8299 100644
--- a/src/unicode_script_map.h
+++ b/src/unicode_script_map.h
@@ -14,7 +14,7 @@
#ifndef UNICODE_SCRIPT_DATA_H_
#define UNICODE_SCRIPT_DATA_H_
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
namespace sentencepiece {
namespace unicode_script {
namespace {
diff --git a/src/unicode_script_test.cc b/src/unicode_script_test.cc
index ab33565..e0b1c4d 100644
--- a/src/unicode_script_test.cc
+++ b/src/unicode_script_test.cc
@@ -14,7 +14,7 @@
#include "common.h"
#include "testharness.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
#include "unicode_script.h"
#include "util.h"
diff --git a/src/unigram_model.cc b/src/unigram_model.cc
index 3b99060..9c72fb9 100644
--- a/src/unigram_model.cc
+++ b/src/unigram_model.cc
@@ -22,9 +22,9 @@
#include <utility>
#include <vector>
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/str_split.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
#include "unigram_model.h"
#include "util.h"
diff --git a/src/unigram_model.h b/src/unigram_model.h
index 448e489..9062f12 100644
--- a/src/unigram_model.h
+++ b/src/unigram_model.h
@@ -24,7 +24,7 @@
#include "freelist.h"
#include "model_interface.h"
#include "sentencepiece_model.pb.h"
-#include "third_party/darts_clone/darts.h"
+#include "include/darts.h"
namespace sentencepiece {
namespace unigram {
diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc
index f93b21c..808e907 100644
--- a/src/unigram_model_test.cc
+++ b/src/unigram_model_test.cc
@@ -22,8 +22,8 @@
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc
index 9615040..373ec95 100644
--- a/src/unigram_model_trainer.cc
+++ b/src/unigram_model_trainer.cc
@@ -25,8 +25,8 @@
#include "normalizer.h"
#include "pretokenizer_for_training.h"
#include "sentencepiece_trainer.h"
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/memory/memory.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/memory/memory.h"
#include "third_party/esaxx/esa.hxx" // Suffix array library.
#include "unicode_script.h"
#include "unigram_model_trainer.h"
diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h
index 91fbeb4..f2d6b36 100644
--- a/src/unigram_model_trainer.h
+++ b/src/unigram_model_trainer.h
@@ -21,7 +21,7 @@
#include <vector>
#include "sentencepiece_model.pb.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
#include "trainer_interface.h"
#include "unigram_model.h"
#include "util.h"
diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc
index ffe515e..fdb25f6 100644
--- a/src/unigram_model_trainer_test.cc
+++ b/src/unigram_model_trainer_test.cc
@@ -16,8 +16,8 @@
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "unigram_model_trainer.h"
#include "util.h"
diff --git a/src/util.h b/src/util.h
index 0d15863..d4a2d51 100644
--- a/src/util.h
+++ b/src/util.h
@@ -30,7 +30,7 @@
#include "common.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
#ifdef SPM_NO_THREADLOCAL
#include <pthread.h>
diff --git a/src/util_test.cc b/src/util_test.cc
index 71d006f..231fc96 100644
--- a/src/util_test.cc
+++ b/src/util_test.cc
@@ -16,7 +16,7 @@
#include "filesystem.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc
index 0b8b062..b057843 100644
--- a/src/word_model_trainer.cc
+++ b/src/word_model_trainer.cc
@@ -15,8 +15,8 @@
#include <cmath>
#include <string>
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
#include "util.h"
#include "word_model.h"
#include "word_model_trainer.h"
diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc
index c4a8bc6..366810f 100644
--- a/src/word_model_trainer_test.cc
+++ b/src/word_model_trainer_test.cc
@@ -18,8 +18,8 @@
#include "filesystem.h"
#include "sentencepiece_processor.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "util.h"
#include "word_model_trainer.h"

View File

@ -1,34 +0,0 @@
diff --git a/src/BUILD b/src/BUILD
index b4298d2..f3877a3 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -71,9 +71,7 @@ cc_library(
":common",
":sentencepiece_cc_proto",
":sentencepiece_model_cc_proto",
- "@com_github_gflags_gflags//:gflags",
"@com_google_glog//:glog",
- "@com_google_googletest//:gtest",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/container:flat_hash_map",
diff --git a/src/normalizer.h b/src/normalizer.h
index c16ac16..2af58be 100644
--- a/src/normalizer.h
+++ b/src/normalizer.h
@@ -21,7 +21,6 @@
#include <utility>
#include <vector>
-#include "gtest/gtest_prod.h"
#include "absl/strings/string_view.h"
#include "third_party/darts_clone/include/darts.h"
#include "src/common.h"
@@ -97,7 +96,6 @@ class Normalizer {
friend class Builder;
private:
- FRIEND_TEST(NormalizerTest, EncodeDecodePrecompiledCharsMapTest);
void Init();

96
third_party/sentencepiece.BUILD vendored Normal file
View File

@ -0,0 +1,96 @@
package(
default_visibility = ["//visibility:public"],
features = [
"layering_check",
"parse_headers",
],
)
licenses(["notice"]) # Apache 2, BSD, MIT
proto_library(
name = "sentencepiece_proto",
srcs = ["src/sentencepiece.proto"],
)
cc_proto_library(
name = "sentencepiece_cc_proto",
deps = [":sentencepiece_proto"],
)
proto_library(
name = "sentencepiece_model_proto",
srcs = ["src/sentencepiece_model.proto"],
)
cc_proto_library(
name = "sentencepiece_model_cc_proto",
deps = [":sentencepiece_model_proto"],
)
genrule(
name = "config_h",
srcs = ["config.h.in"],
outs = ["config.h"],
cmd = "cp $< $@",
)
cc_library(
name = "common",
hdrs = [
"config.h",
"src/common.h",
],
deps = [
"@com_google_absl//absl/base",
],
)
cc_library(
name = "sentencepiece_processor",
srcs = [
"src/bpe_model.cc",
"src/char_model.cc",
"src/error.cc",
"src/filesystem.cc",
"src/model_factory.cc",
"src/model_interface.cc",
"src/normalizer.cc",
"src/sentencepiece_processor.cc",
"src/unigram_model.cc",
"src/util.cc",
"src/word_model.cc",
],
hdrs = [
"src/bpe_model.h",
"src/char_model.h",
"src/filesystem.h",
"src/freelist.h",
"src/model_factory.h",
"src/model_interface.h",
"src/normalizer.h",
"src/sentencepiece_processor.h",
"src/trainer_interface.h",
"src/unigram_model.h",
"src/util.h",
"src/word_model.h",
],
defines = ["_USE_TF_STRING_VIEW"],
includes = [
".",
"src",
],
linkstatic = 1,
deps =
[
":common",
":sentencepiece_cc_proto",
":sentencepiece_model_cc_proto",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@darts_clone",
],
)