diff --git a/WORKSPACE b/WORKSPACE index 4f7bb3b2e..df2c4f93b 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -244,16 +244,14 @@ http_archive( # sentencepiece http_archive( name = "com_google_sentencepiece", - strip_prefix = "sentencepiece-1.0.0", - sha256 = "c05901f30a1d0ed64cbcf40eba08e48894e1b0e985777217b7c9036cac631346", + strip_prefix = "sentencepiece-0.1.96", + sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754", urls = [ - "https://github.com/google/sentencepiece/archive/1.0.0.zip", - ], - patches = [ - "@//third_party:com_google_sentencepiece_no_gflag_no_gtest.diff", + "https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip" ], + build_file = "@//third_party:sentencepiece.BUILD", + patches = ["@//third_party:com_google_sentencepiece.diff"], patch_args = ["-p1"], - repo_mapping = {"@com_google_glog" : "@com_github_glog_glog_no_gflags"}, ) http_archive( diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 0fb7b2ba4..a83fc7da8 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -254,7 +254,7 @@ cc_test( "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", - "@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep + "@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep ], ) @@ -303,7 +303,7 @@ cc_test( "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", - "@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep + "@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep ], ) diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD index b6e1ab945..4c89aa6c1 100644 --- a/mediapipe/python/BUILD +++ b/mediapipe/python/BUILD @@ -91,6 +91,8 @@ cc_library( deps = [ "//mediapipe/tasks/cc/audio/audio_classifier:audio_classifier_graph", "//mediapipe/tasks/cc/audio/audio_embedder:audio_embedder_graph", + "//mediapipe/tasks/cc/text/text_classifier:text_classifier_graph", + "//mediapipe/tasks/cc/text/text_embedder:text_embedder_graph", "//mediapipe/tasks/cc/vision/face_detector:face_detector_graph", "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_graph", "//mediapipe/tasks/cc/vision/face_stylizer:face_stylizer_graph", @@ -101,14 +103,7 @@ cc_library( "//mediapipe/tasks/cc/vision/interactive_segmenter:interactive_segmenter_graph", "//mediapipe/tasks/cc/vision/object_detector:object_detector_graph", "//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarker_graph", - ] + select({ - # TODO: Build text_classifier_graph and text_embedder_graph on Windows. - "//mediapipe:windows": [], - "//conditions:default": [ - "//mediapipe/tasks/cc/text/text_classifier:text_classifier_graph", - "//mediapipe/tasks/cc/text/text_embedder:text_embedder_graph", - ], - }), + ], ) py_library( diff --git a/mediapipe/tasks/cc/text/custom_ops/sentencepiece/BUILD b/mediapipe/tasks/cc/text/custom_ops/sentencepiece/BUILD index 334ed74d4..f4125470f 100644 --- a/mediapipe/tasks/cc/text/custom_ops/sentencepiece/BUILD +++ b/mediapipe/tasks/cc/text/custom_ops/sentencepiece/BUILD @@ -108,7 +108,7 @@ cc_library( ":sentencepiece_constants", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", - "@com_google_sentencepiece//src:sentencepiece_model_cc_proto", + "@com_google_sentencepiece//:sentencepiece_model_cc_proto", ], ) @@ -165,8 +165,8 @@ cc_test( "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", - "@com_google_sentencepiece//src:sentencepiece_cc_proto", - "@com_google_sentencepiece//src:sentencepiece_processor", + "@com_google_sentencepiece//:sentencepiece_cc_proto", + "@com_google_sentencepiece//:sentencepiece_processor", "@org_tensorflow//tensorflow/core:lib", ], ) diff --git a/mediapipe/tasks/cc/text/text_classifier/BUILD b/mediapipe/tasks/cc/text/text_classifier/BUILD index 121b4f5e6..eafdc5c68 100644 --- a/mediapipe/tasks/cc/text/text_classifier/BUILD +++ b/mediapipe/tasks/cc/text/text_classifier/BUILD @@ -88,7 +88,7 @@ cc_test( "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:cord", - "@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep + "@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep "@org_tensorflow//tensorflow/lite:test_util", ], ) diff --git a/mediapipe/tasks/cc/text/text_embedder/BUILD b/mediapipe/tasks/cc/text/text_embedder/BUILD index c925abcbd..168bcc5bb 100644 --- a/mediapipe/tasks/cc/text/text_embedder/BUILD +++ b/mediapipe/tasks/cc/text/text_embedder/BUILD @@ -92,7 +92,7 @@ cc_test( "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", - "@com_google_sentencepiece//src:sentencepiece_processor", + "@com_google_sentencepiece//:sentencepiece_processor", "@org_tensorflow//tensorflow/lite:test_util", ], ) diff --git a/mediapipe/tasks/cc/text/tokenizers/BUILD b/mediapipe/tasks/cc/text/tokenizers/BUILD index b299f1c73..3a5c21b72 100644 --- a/mediapipe/tasks/cc/text/tokenizers/BUILD +++ b/mediapipe/tasks/cc/text/tokenizers/BUILD @@ -73,7 +73,7 @@ cc_library( "//mediapipe/framework/port:logging", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/strings", - "@com_google_sentencepiece//src:sentencepiece_processor", + "@com_google_sentencepiece//:sentencepiece_processor", ], ) @@ -88,7 +88,7 @@ cc_test( "//mediapipe/framework/port:gtest_main", "//mediapipe/tasks/cc/core:utils", "@com_google_absl//absl/log:absl_check", - "@com_google_sentencepiece//src:sentencepiece_processor", + "@com_google_sentencepiece//:sentencepiece_processor", ], ) @@ -140,7 +140,7 @@ cc_test( "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:cord", - "@com_google_sentencepiece//src:sentencepiece_processor", + "@com_google_sentencepiece//:sentencepiece_processor", ], ) diff --git a/third_party/com_google_sentencepiece.diff b/third_party/com_google_sentencepiece.diff new file mode 100644 index 000000000..4534397ef --- /dev/null +++ b/third_party/com_google_sentencepiece.diff @@ -0,0 +1,808 @@ +diff --git a/src/bpe_model.cc b/src/bpe_model.cc +index 22cd115..97e0bda 100644 +--- a/src/bpe_model.cc ++++ b/src/bpe_model.cc +@@ -21,7 +21,7 @@ + + #include "bpe_model.h" + #include "freelist.h" +-#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/flat_hash_map.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc +index 964d44e..ae8983c 100644 +--- a/src/bpe_model_trainer.cc ++++ b/src/bpe_model_trainer.cc +@@ -18,7 +18,7 @@ + #include + + #include "bpe_model_trainer.h" +-#include "third_party/absl/container/flat_hash_set.h" ++#include "absl/container/flat_hash_set.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h +index e011a37..17f6e06 100644 +--- a/src/bpe_model_trainer.h ++++ b/src/bpe_model_trainer.h +@@ -20,7 +20,7 @@ + #include + + #include "sentencepiece_model.pb.h" +-#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/flat_hash_map.h" + #include "trainer_interface.h" + + namespace sentencepiece { +diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc +index 173eb9c..2a43c3a 100644 +--- a/src/bpe_model_trainer_test.cc ++++ b/src/bpe_model_trainer_test.cc +@@ -20,8 +20,8 @@ + #include "sentencepiece_processor.h" + #include "sentencepiece_trainer.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/builder.cc b/src/builder.cc +index 378aaa0..1557a07 100644 +--- a/src/builder.cc ++++ b/src/builder.cc +@@ -18,10 +18,10 @@ + + #include "builder.h" + #include "filesystem.h" +-#include "third_party/absl/strings/str_join.h" +-#include "third_party/absl/strings/str_replace.h" +-#include "third_party/absl/strings/str_split.h" +-#include "third_party/absl/strings/strip.h" ++#include "absl/strings/str_join.h" ++#include "absl/strings/str_replace.h" ++#include "absl/strings/str_split.h" ++#include "absl/strings/strip.h" + + #ifdef ENABLE_NFKC_COMPILE + #include +@@ -36,7 +36,7 @@ + + #include "normalization_rule.h" + #include "normalizer.h" +-#include "third_party/darts_clone/darts.h" ++#include "include/darts.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/builder.h b/src/builder.h +index 49d2884..289fab6 100644 +--- a/src/builder.h ++++ b/src/builder.h +@@ -22,7 +22,7 @@ + #include "common.h" + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + + namespace sentencepiece { + namespace normalizer { +diff --git a/src/builder_test.cc b/src/builder_test.cc +index 4acb7b3..1dee5c7 100644 +--- a/src/builder_test.cc ++++ b/src/builder_test.cc +@@ -18,7 +18,7 @@ + #include "normalizer.h" + #include "sentencepiece_trainer.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" ++#include "absl/strings/str_cat.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc +index 8c2e4b7..e8b4979 100644 +--- a/src/char_model_trainer_test.cc ++++ b/src/char_model_trainer_test.cc +@@ -19,8 +19,8 @@ + #include "filesystem.h" + #include "sentencepiece_processor.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc +index c5a5188..931028b 100644 +--- a/src/compile_charsmap_main.cc ++++ b/src/compile_charsmap_main.cc +@@ -22,8 +22,8 @@ + #include "filesystem.h" + #include "init.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/flags/flag.h" ++#include "absl/strings/string_view.h" + + using sentencepiece::normalizer::Builder; + +diff --git a/src/error.cc b/src/error.cc +index a226d98..ab4675d 100644 +--- a/src/error.cc ++++ b/src/error.cc +@@ -20,8 +20,8 @@ + #ifdef _USE_EXTERNAL_ABSL + // Naive workaround to define minloglevel on external absl package. + // We want to define them in other cc file. +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/flags/parse.h" ++#include "absl/flags/flag.h" ++#include "absl/flags/parse.h" + ABSL_FLAG(int32, minloglevel, 0, + "Messages logged at a lower level than this don't actually."); + #endif +diff --git a/src/filesystem.cc b/src/filesystem.cc +index 833c8f7..6a169d9 100644 +--- a/src/filesystem.cc ++++ b/src/filesystem.cc +@@ -15,7 +15,7 @@ + #include + + #include "filesystem.h" +-#include "third_party/absl/memory/memory.h" ++#include "absl/memory/memory.h" + #include "util.h" + + #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) +diff --git a/src/filesystem.h b/src/filesystem.h +index e572b4b..dbcce48 100644 +--- a/src/filesystem.h ++++ b/src/filesystem.h +@@ -23,7 +23,7 @@ + + #include "common.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + + namespace sentencepiece { + namespace filesystem { +diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc +index 790e756..39ece99 100644 +--- a/src/filesystem_test.cc ++++ b/src/filesystem_test.cc +@@ -14,7 +14,7 @@ + + #include "filesystem.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" ++#include "absl/strings/str_cat.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/init.h b/src/init.h +index 090a2d9..acfda8a 100644 +--- a/src/init.h ++++ b/src/init.h +@@ -16,8 +16,8 @@ + #define INIT_H_ + + #include "common.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/flags/parse.h" ++#include "absl/flags/flag.h" ++#include "absl/flags/parse.h" + + ABSL_DECLARE_FLAG(int32, minloglevel); + +diff --git a/src/model_factory.cc b/src/model_factory.cc +index be99501..040c00c 100644 +--- a/src/model_factory.cc ++++ b/src/model_factory.cc +@@ -15,7 +15,7 @@ + #include "bpe_model.h" + #include "char_model.h" + #include "model_factory.h" +-#include "third_party/absl/memory/memory.h" ++#include "absl/memory/memory.h" + #include "unigram_model.h" + #include "word_model.h" + +diff --git a/src/model_interface.cc b/src/model_interface.cc +index c49be1e..22c6378 100644 +--- a/src/model_interface.cc ++++ b/src/model_interface.cc +@@ -16,8 +16,8 @@ + + #include "model_interface.h" + #include "sentencepiece_model.pb.h" +-#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/str_format.h" ++#include "absl/memory/memory.h" ++#include "absl/strings/str_format.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/model_interface.h b/src/model_interface.h +index aef5b53..fc14257 100644 +--- a/src/model_interface.h ++++ b/src/model_interface.h +@@ -25,9 +25,9 @@ + #include "normalizer.h" + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/container/flat_hash_map.h" +-#include "third_party/absl/strings/string_view.h" +-#include "third_party/darts_clone/darts.h" ++#include "absl/container/flat_hash_map.h" ++#include "absl/strings/string_view.h" ++#include "include/darts.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc +index 69ee4e6..26a1e05 100644 +--- a/src/model_interface_test.cc ++++ b/src/model_interface_test.cc +@@ -15,7 +15,7 @@ + #include "model_factory.h" + #include "model_interface.h" + #include "testharness.h" +-#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/flat_hash_map.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/normalizer.cc b/src/normalizer.cc +index 100b875..1791bd1 100644 +--- a/src/normalizer.cc ++++ b/src/normalizer.cc +@@ -18,11 +18,11 @@ + #include + + #include "common.h" +-#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/match.h" +-#include "third_party/absl/strings/string_view.h" +-#include "third_party/absl/strings/strip.h" +-#include "third_party/darts_clone/darts.h" ++#include "absl/memory/memory.h" ++#include "absl/strings/match.h" ++#include "absl/strings/string_view.h" ++#include "absl/strings/strip.h" ++#include "include/darts.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/normalizer.h b/src/normalizer.h +index 622bbd2..1326102 100644 +--- a/src/normalizer.h ++++ b/src/normalizer.h +@@ -24,8 +24,8 @@ + #include "common.h" + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/strings/string_view.h" +-#include "third_party/darts_clone/darts.h" ++#include "absl/strings/string_view.h" ++#include "include/darts.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc +index 049658e..8021511 100644 +--- a/src/pretokenizer_for_training.cc ++++ b/src/pretokenizer_for_training.cc +@@ -14,7 +14,7 @@ + #include + + #include "pretokenizer_for_training.h" +-#include "third_party/absl/strings/str_replace.h" ++#include "absl/strings/str_replace.h" + + namespace sentencepiece { + namespace pretokenizer { +diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h +index 2d3bc82..38beaa6 100644 +--- a/src/pretokenizer_for_training.h ++++ b/src/pretokenizer_for_training.h +@@ -21,7 +21,7 @@ + #include "common.h" + #include "sentencepiece.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + + namespace sentencepiece { + namespace pretokenizer { +diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc +index 80f4787..c559eb8 100644 +--- a/src/pretokenizer_for_training_test.cc ++++ b/src/pretokenizer_for_training_test.cc +@@ -13,7 +13,7 @@ + // limitations under the License.! + #include "pretokenizer_for_training.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" ++#include "absl/strings/str_cat.h" + #include "trainer_interface.h" + + namespace sentencepiece { +diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc +index 1e4e7a0..1ff3990 100644 +--- a/src/sentencepiece_processor.cc ++++ b/src/sentencepiece_processor.cc +@@ -23,14 +23,14 @@ + #include "normalizer.h" + #include "sentencepiece.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/numbers.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" +-#include "third_party/absl/strings/str_replace.h" +-#include "third_party/absl/strings/str_split.h" +-#include "third_party/absl/strings/string_view.h" +-#include "third_party/absl/strings/strip.h" ++#include "absl/memory/memory.h" ++#include "absl/strings/numbers.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" ++#include "absl/strings/str_replace.h" ++#include "absl/strings/str_split.h" ++#include "absl/strings/string_view.h" ++#include "absl/strings/strip.h" + #include "unigram_model.h" + #include "util.h" + +diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h +index e8bd5f5..e81ebbf 100644 +--- a/src/sentencepiece_processor.h ++++ b/src/sentencepiece_processor.h +@@ -22,7 +22,7 @@ + #include + + #if defined(_USE_INTERNAL_STRING_VIEW) +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + #elif defined(_USE_TF_STRING_VIEW) + #include "absl/strings/string_view.h" + #else +diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc +index 373e73e..829c3d4 100644 +--- a/src/sentencepiece_processor_test.cc ++++ b/src/sentencepiece_processor_test.cc +@@ -23,10 +23,10 @@ + #include "sentencepiece_processor.h" + #include "sentencepiece_trainer.h" + #include "testharness.h" +-#include "third_party/absl/container/flat_hash_map.h" +-#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/container/flat_hash_map.h" ++#include "absl/memory/memory.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/string_view.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc +index b9fe64f..47ef33c 100644 +--- a/src/sentencepiece_trainer.cc ++++ b/src/sentencepiece_trainer.cc +@@ -22,12 +22,12 @@ + #include "sentencepiece_model.pb.h" + #include "sentencepiece_trainer.h" + #include "spec_parser.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/strings/numbers.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_split.h" +-#include "third_party/absl/strings/string_view.h" +-#include "third_party/absl/strings/strip.h" ++#include "absl/flags/flag.h" ++#include "absl/strings/numbers.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_split.h" ++#include "absl/strings/string_view.h" ++#include "absl/strings/strip.h" + #include "trainer_factory.h" + #include "util.h" + +diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc +index e44e66b..fc73b1d 100644 +--- a/src/sentencepiece_trainer_test.cc ++++ b/src/sentencepiece_trainer_test.cc +@@ -16,7 +16,7 @@ + #include "sentencepiece_model.pb.h" + #include "sentencepiece_trainer.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" ++#include "absl/strings/str_cat.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/spec_parser.h b/src/spec_parser.h +index 2c5a95b..263f2bd 100644 +--- a/src/spec_parser.h ++++ b/src/spec_parser.h +@@ -19,8 +19,8 @@ + #include + + #include "sentencepiece_processor.h" +-#include "third_party/absl/strings/ascii.h" +-#include "third_party/absl/strings/str_split.h" ++#include "absl/strings/ascii.h" ++#include "absl/strings/str_split.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc +index 3382ddc..9dda65c 100644 +--- a/src/spm_decode_main.cc ++++ b/src/spm_decode_main.cc +@@ -21,8 +21,8 @@ + #include "init.h" + #include "sentencepiece.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/strings/str_split.h" ++#include "absl/flags/flag.h" ++#include "absl/strings/str_split.h" + #include "util.h" + + ABSL_FLAG(std::string, model, "", "model file name"); +diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc +index 4d12a38..29b7458 100644 +--- a/src/spm_encode_main.cc ++++ b/src/spm_encode_main.cc +@@ -21,10 +21,10 @@ + #include "init.h" + #include "sentencepiece.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/container/flat_hash_map.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" ++#include "absl/container/flat_hash_map.h" ++#include "absl/flags/flag.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" + #include "trainer_interface.h" + + ABSL_FLAG(std::string, model, "", "model file name"); +diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc +index b5d93cb..70a65c1 100644 +--- a/src/spm_export_vocab_main.cc ++++ b/src/spm_export_vocab_main.cc +@@ -20,7 +20,7 @@ + #include "init.h" + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/flags/flag.h" ++#include "absl/flags/flag.h" + + ABSL_FLAG(std::string, output, "", "Output filename"); + ABSL_FLAG(std::string, model, "", "input model file name"); +diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc +index 96da360..8c541b8 100644 +--- a/src/spm_normalize_main.cc ++++ b/src/spm_normalize_main.cc +@@ -21,7 +21,7 @@ + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" + #include "sentencepiece_trainer.h" +-#include "third_party/absl/flags/flag.h" ++#include "absl/flags/flag.h" + + ABSL_FLAG(std::string, model, "", "Model file name"); + ABSL_FLAG(bool, use_internal_normalization, false, +diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc +index baf8dbf..ba1e811 100644 +--- a/src/spm_train_main.cc ++++ b/src/spm_train_main.cc +@@ -18,10 +18,10 @@ + #include "init.h" + #include "sentencepiece_model.pb.h" + #include "sentencepiece_trainer.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/strings/ascii.h" +-#include "third_party/absl/strings/str_join.h" +-#include "third_party/absl/strings/str_split.h" ++#include "absl/flags/flag.h" ++#include "absl/strings/ascii.h" ++#include "absl/strings/str_join.h" ++#include "absl/strings/str_split.h" + #include "util.h" + + using sentencepiece::NormalizerSpec; +diff --git a/src/testharness.cc b/src/testharness.cc +index f6b1efe..daf2d14 100644 +--- a/src/testharness.cc ++++ b/src/testharness.cc +@@ -26,7 +26,7 @@ + #include + + #include "common.h" +-#include "third_party/absl/strings/str_cat.h" ++#include "absl/strings/str_cat.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/testharness.h b/src/testharness.h +index 9879b06..98317ad 100644 +--- a/src/testharness.h ++++ b/src/testharness.h +@@ -21,9 +21,9 @@ + #include + + #include "common.h" +-#include "third_party/absl/flags/flag.h" +-#include "third_party/absl/flags/parse.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/flags/flag.h" ++#include "absl/flags/parse.h" ++#include "absl/strings/string_view.h" + + ABSL_DECLARE_FLAG(std::string, test_tmpdir); + ABSL_DECLARE_FLAG(std::string, test_srcdir); +diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc +index d1d2541..ff594d0 100644 +--- a/src/trainer_factory.cc ++++ b/src/trainer_factory.cc +@@ -14,7 +14,7 @@ + + #include "bpe_model_trainer.h" + #include "char_model_trainer.h" +-#include "third_party/absl/memory/memory.h" ++#include "absl/memory/memory.h" + #include "trainer_factory.h" + #include "unigram_model_trainer.h" + #include "word_model_trainer.h" +diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc +index a3a4b74..70f2f72 100644 +--- a/src/trainer_interface.cc ++++ b/src/trainer_interface.cc +@@ -26,13 +26,13 @@ + #include "normalizer.h" + #include "sentencepiece_processor.h" + #include "sentencepiece_trainer.h" +-#include "third_party/absl/container/flat_hash_map.h" +-#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/numbers.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_format.h" +-#include "third_party/absl/strings/str_join.h" +-#include "third_party/absl/strings/str_split.h" ++#include "absl/container/flat_hash_map.h" ++#include "absl/memory/memory.h" ++#include "absl/strings/numbers.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_format.h" ++#include "absl/strings/str_join.h" ++#include "absl/strings/str_split.h" + #include "trainer_interface.h" + #include "unicode_script.h" + #include "util.h" +diff --git a/src/trainer_interface.h b/src/trainer_interface.h +index f66d59a..8a654ec 100644 +--- a/src/trainer_interface.h ++++ b/src/trainer_interface.h +@@ -27,7 +27,7 @@ + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" + #include "sentencepiece_trainer.h" +-#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/flat_hash_map.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc +index 70a51ad..d7f3f0c 100644 +--- a/src/trainer_interface_test.cc ++++ b/src/trainer_interface_test.cc +@@ -16,8 +16,8 @@ + + #include "filesystem.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_format.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_format.h" + #include "trainer_interface.h" + #include "util.h" + +diff --git a/src/unicode_script.cc b/src/unicode_script.cc +index 583dc30..11b24dc 100644 +--- a/src/unicode_script.cc ++++ b/src/unicode_script.cc +@@ -14,7 +14,7 @@ + + #include + +-#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/flat_hash_map.h" + #include "unicode_script.h" + #include "unicode_script_map.h" + #include "util.h" +diff --git a/src/unicode_script_map.h b/src/unicode_script_map.h +index f2e67e9..f1b8299 100644 +--- a/src/unicode_script_map.h ++++ b/src/unicode_script_map.h +@@ -14,7 +14,7 @@ + + #ifndef UNICODE_SCRIPT_DATA_H_ + #define UNICODE_SCRIPT_DATA_H_ +-#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/flat_hash_map.h" + namespace sentencepiece { + namespace unicode_script { + namespace { +diff --git a/src/unicode_script_test.cc b/src/unicode_script_test.cc +index ab33565..e0b1c4d 100644 +--- a/src/unicode_script_test.cc ++++ b/src/unicode_script_test.cc +@@ -14,7 +14,7 @@ + + #include "common.h" + #include "testharness.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + #include "unicode_script.h" + #include "util.h" + +diff --git a/src/unigram_model.cc b/src/unigram_model.cc +index 3b99060..9c72fb9 100644 +--- a/src/unigram_model.cc ++++ b/src/unigram_model.cc +@@ -22,9 +22,9 @@ + #include + #include + +-#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/str_split.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/memory/memory.h" ++#include "absl/strings/str_split.h" ++#include "absl/strings/string_view.h" + #include "unigram_model.h" + #include "util.h" + +diff --git a/src/unigram_model.h b/src/unigram_model.h +index 448e489..9062f12 100644 +--- a/src/unigram_model.h ++++ b/src/unigram_model.h +@@ -24,7 +24,7 @@ + #include "freelist.h" + #include "model_interface.h" + #include "sentencepiece_model.pb.h" +-#include "third_party/darts_clone/darts.h" ++#include "include/darts.h" + + namespace sentencepiece { + namespace unigram { +diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc +index f93b21c..808e907 100644 +--- a/src/unigram_model_test.cc ++++ b/src/unigram_model_test.cc +@@ -22,8 +22,8 @@ + #include "sentencepiece_model.pb.h" + #include "sentencepiece_processor.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc +index 9615040..373ec95 100644 +--- a/src/unigram_model_trainer.cc ++++ b/src/unigram_model_trainer.cc +@@ -25,8 +25,8 @@ + #include "normalizer.h" + #include "pretokenizer_for_training.h" + #include "sentencepiece_trainer.h" +-#include "third_party/absl/container/flat_hash_map.h" +-#include "third_party/absl/memory/memory.h" ++#include "absl/container/flat_hash_map.h" ++#include "absl/memory/memory.h" + #include "third_party/esaxx/esa.hxx" // Suffix array library. + #include "unicode_script.h" + #include "unigram_model_trainer.h" +diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h +index 91fbeb4..f2d6b36 100644 +--- a/src/unigram_model_trainer.h ++++ b/src/unigram_model_trainer.h +@@ -21,7 +21,7 @@ + #include + + #include "sentencepiece_model.pb.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + #include "trainer_interface.h" + #include "unigram_model.h" + #include "util.h" +diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc +index ffe515e..fdb25f6 100644 +--- a/src/unigram_model_trainer_test.cc ++++ b/src/unigram_model_trainer_test.cc +@@ -16,8 +16,8 @@ + #include "sentencepiece_processor.h" + #include "sentencepiece_trainer.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" + #include "unigram_model_trainer.h" + #include "util.h" + +diff --git a/src/util.h b/src/util.h +index 0d15863..d4a2d51 100644 +--- a/src/util.h ++++ b/src/util.h +@@ -30,7 +30,7 @@ + + #include "common.h" + #include "sentencepiece_processor.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + + #ifdef SPM_NO_THREADLOCAL + #include +diff --git a/src/util_test.cc b/src/util_test.cc +index 71d006f..231fc96 100644 +--- a/src/util_test.cc ++++ b/src/util_test.cc +@@ -16,7 +16,7 @@ + + #include "filesystem.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" ++#include "absl/strings/str_cat.h" + #include "util.h" + + namespace sentencepiece { +diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc +index 0b8b062..b057843 100644 +--- a/src/word_model_trainer.cc ++++ b/src/word_model_trainer.cc +@@ -15,8 +15,8 @@ + #include + #include + +-#include "third_party/absl/container/flat_hash_map.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/container/flat_hash_map.h" ++#include "absl/strings/string_view.h" + #include "util.h" + #include "word_model.h" + #include "word_model_trainer.h" +diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc +index c4a8bc6..366810f 100644 +--- a/src/word_model_trainer_test.cc ++++ b/src/word_model_trainer_test.cc +@@ -18,8 +18,8 @@ + #include "filesystem.h" + #include "sentencepiece_processor.h" + #include "testharness.h" +-#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" ++#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" + #include "util.h" + #include "word_model_trainer.h" + \ No newline at end of file diff --git a/third_party/com_google_sentencepiece_no_gflag_no_gtest.diff b/third_party/com_google_sentencepiece_no_gflag_no_gtest.diff deleted file mode 100644 index a084d9262..000000000 --- a/third_party/com_google_sentencepiece_no_gflag_no_gtest.diff +++ /dev/null @@ -1,34 +0,0 @@ -diff --git a/src/BUILD b/src/BUILD -index b4298d2..f3877a3 100644 ---- a/src/BUILD -+++ b/src/BUILD -@@ -71,9 +71,7 @@ cc_library( - ":common", - ":sentencepiece_cc_proto", - ":sentencepiece_model_cc_proto", -- "@com_github_gflags_gflags//:gflags", - "@com_google_glog//:glog", -- "@com_google_googletest//:gtest", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/container:flat_hash_map", -diff --git a/src/normalizer.h b/src/normalizer.h -index c16ac16..2af58be 100644 ---- a/src/normalizer.h -+++ b/src/normalizer.h -@@ -21,7 +21,6 @@ - #include - #include - --#include "gtest/gtest_prod.h" - #include "absl/strings/string_view.h" - #include "third_party/darts_clone/include/darts.h" - #include "src/common.h" -@@ -97,7 +96,6 @@ class Normalizer { - friend class Builder; - - private: -- FRIEND_TEST(NormalizerTest, EncodeDecodePrecompiledCharsMapTest); - - void Init(); - diff --git a/third_party/sentencepiece.BUILD b/third_party/sentencepiece.BUILD new file mode 100644 index 000000000..19ec771a1 --- /dev/null +++ b/third_party/sentencepiece.BUILD @@ -0,0 +1,96 @@ +package( + default_visibility = ["//visibility:public"], + features = [ + "layering_check", + "parse_headers", + ], +) + +licenses(["notice"]) # Apache 2, BSD, MIT + +proto_library( + name = "sentencepiece_proto", + srcs = ["src/sentencepiece.proto"], +) + +cc_proto_library( + name = "sentencepiece_cc_proto", + deps = [":sentencepiece_proto"], +) + +proto_library( + name = "sentencepiece_model_proto", + srcs = ["src/sentencepiece_model.proto"], +) + +cc_proto_library( + name = "sentencepiece_model_cc_proto", + deps = [":sentencepiece_model_proto"], +) + +genrule( + name = "config_h", + srcs = ["config.h.in"], + outs = ["config.h"], + cmd = "cp $< $@", +) + +cc_library( + name = "common", + hdrs = [ + "config.h", + "src/common.h", + ], + deps = [ + "@com_google_absl//absl/base", + ], +) + +cc_library( + name = "sentencepiece_processor", + srcs = [ + "src/bpe_model.cc", + "src/char_model.cc", + "src/error.cc", + "src/filesystem.cc", + "src/model_factory.cc", + "src/model_interface.cc", + "src/normalizer.cc", + "src/sentencepiece_processor.cc", + "src/unigram_model.cc", + "src/util.cc", + "src/word_model.cc", + ], + hdrs = [ + "src/bpe_model.h", + "src/char_model.h", + "src/filesystem.h", + "src/freelist.h", + "src/model_factory.h", + "src/model_interface.h", + "src/normalizer.h", + "src/sentencepiece_processor.h", + "src/trainer_interface.h", + "src/unigram_model.h", + "src/util.h", + "src/word_model.h", + ], + defines = ["_USE_TF_STRING_VIEW"], + includes = [ + ".", + "src", + ], + linkstatic = 1, + deps = + [ + ":common", + ":sentencepiece_cc_proto", + ":sentencepiece_model_cc_proto", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@darts_clone", + ], +)