From f49fb9ad5770a1eb9d5252169e1226cab3324c36 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Thu, 6 Oct 2022 10:18:59 -0700 Subject: [PATCH] move unzip logits from metadata_extractor to zip_utils. PiperOrigin-RevId: 479346238 --- mediapipe/tasks/cc/common.h | 2 + mediapipe/tasks/cc/metadata/BUILD | 3 +- .../tasks/cc/metadata/metadata_extractor.cc | 129 +------------- mediapipe/tasks/cc/metadata/utils/BUILD | 16 ++ .../tasks/cc/metadata/utils/zip_utils.cc | 167 ++++++++++++++++++ mediapipe/tasks/cc/metadata/utils/zip_utils.h | 41 +++++ 6 files changed, 234 insertions(+), 124 deletions(-) create mode 100644 mediapipe/tasks/cc/metadata/utils/zip_utils.cc create mode 100644 mediapipe/tasks/cc/metadata/utils/zip_utils.h diff --git a/mediapipe/tasks/cc/common.h b/mediapipe/tasks/cc/common.h index 62656b7b3..1295177df 100644 --- a/mediapipe/tasks/cc/common.h +++ b/mediapipe/tasks/cc/common.h @@ -65,6 +65,8 @@ enum class MediaPipeTasksStatus { kFileReadError, // I/O error when mmap-ing file. kFileMmapError, + // ZIP I/O error when unpacking the zip file. + kFileZipError, // TensorFlow Lite metadata error codes. diff --git a/mediapipe/tasks/cc/metadata/BUILD b/mediapipe/tasks/cc/metadata/BUILD index c19450448..ef32dd184 100644 --- a/mediapipe/tasks/cc/metadata/BUILD +++ b/mediapipe/tasks/cc/metadata/BUILD @@ -19,7 +19,7 @@ cc_library( deps = [ "//mediapipe/framework/port:status", "//mediapipe/tasks/cc:common", - "//mediapipe/tasks/cc/metadata/utils:zip_readonly_mem_file", + "//mediapipe/tasks/cc/metadata/utils:zip_utils", "//mediapipe/tasks/metadata:metadata_schema_cc", "@com_google_absl//absl/cleanup", "@com_google_absl//absl/container:flat_hash_map", @@ -30,7 +30,6 @@ cc_library( "@com_google_absl//absl/strings:str_format", "@flatbuffers//:runtime_cc", "@org_tensorflow//tensorflow/lite/schema:schema_fbs", - "@zlib//:zlib_minizip", ], ) diff --git a/mediapipe/tasks/cc/metadata/metadata_extractor.cc b/mediapipe/tasks/cc/metadata/metadata_extractor.cc index 31e59ea9f..4bc3e8ba0 100644 --- a/mediapipe/tasks/cc/metadata/metadata_extractor.cc +++ b/mediapipe/tasks/cc/metadata/metadata_extractor.cc @@ -20,14 +20,13 @@ limitations under the License. #include "absl/cleanup/cleanup.h" #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "absl/strings/match.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" -#include "contrib/minizip/ioapi.h" -#include "contrib/minizip/unzip.h" #include "flatbuffers/flatbuffers.h" #include "mediapipe/framework/port/status_macros.h" #include "mediapipe/tasks/cc/common.h" -#include "mediapipe/tasks/cc/metadata/utils/zip_readonly_mem_file.h" +#include "mediapipe/tasks/cc/metadata/utils/zip_utils.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -54,80 +53,6 @@ const T* GetItemFromVector( } return src_vector->Get(index); } - -// Wrapper function around calls to unzip to avoid repeating conversion logic -// from error code to Status. -absl::Status UnzipErrorToStatus(int error) { - if (error != UNZ_OK) { - return CreateStatusWithPayload( - StatusCode::kUnknown, "Unable to read associated file in zip archive.", - MediaPipeTasksStatus::kMetadataAssociatedFileZipError); - } - return absl::OkStatus(); -} - -// Stores a file name, position in zip buffer and size. -struct ZipFileInfo { - std::string name; - ZPOS64_T position; - ZPOS64_T size; -}; - -// Returns the ZipFileInfo corresponding to the current file in the provided -// unzFile object. -absl::StatusOr GetCurrentZipFileInfo(const unzFile& zf) { - // Open file in raw mode, as data is expected to be uncompressed. - int method; - MP_RETURN_IF_ERROR(UnzipErrorToStatus( - unzOpenCurrentFile2(zf, &method, /*level=*/nullptr, /*raw=*/1))); - absl::Cleanup unzipper_closer = [zf]() { - auto status = UnzipErrorToStatus(unzCloseCurrentFile(zf)); - if (!status.ok()) { - LOG(ERROR) << "Failed to close the current zip file: " << status; - } - }; - if (method != Z_NO_COMPRESSION) { - return CreateStatusWithPayload( - StatusCode::kUnknown, "Expected uncompressed zip archive.", - MediaPipeTasksStatus::kMetadataAssociatedFileZipError); - } - - // Get file info a first time to get filename size. - unz_file_info64 file_info; - MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64( - zf, &file_info, /*szFileName=*/nullptr, /*szFileNameBufferSize=*/0, - /*extraField=*/nullptr, /*extraFieldBufferSize=*/0, - /*szComment=*/nullptr, /*szCommentBufferSize=*/0))); - - // Second call to get file name. - auto file_name_size = file_info.size_filename; - char* c_file_name = (char*)malloc(file_name_size); - MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64( - zf, &file_info, c_file_name, file_name_size, - /*extraField=*/nullptr, /*extraFieldBufferSize=*/0, - /*szComment=*/nullptr, /*szCommentBufferSize=*/0))); - std::string file_name = std::string(c_file_name, file_name_size); - free(c_file_name); - - // Get position in file. - auto position = unzGetCurrentFileZStreamPos64(zf); - if (position == 0) { - return CreateStatusWithPayload( - StatusCode::kUnknown, "Unable to read file in zip archive.", - MediaPipeTasksStatus::kMetadataAssociatedFileZipError); - } - - // Perform the cleanup manually for error propagation. - std::move(unzipper_closer).Cancel(); - // Close file and return. - MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzCloseCurrentFile(zf))); - - ZipFileInfo result{}; - result.name = file_name; - result.position = position; - result.size = file_info.uncompressed_size; - return result; -} } // namespace /* static */ @@ -247,55 +172,15 @@ absl::Status ModelMetadataExtractor::InitFromModelBuffer( absl::Status ModelMetadataExtractor::ExtractAssociatedFiles( const char* buffer_data, size_t buffer_size) { - // Create in-memory read-only zip file. - ZipReadOnlyMemFile mem_file = ZipReadOnlyMemFile(buffer_data, buffer_size); - // Open zip. - unzFile zf = unzOpen2_64(/*path=*/nullptr, &mem_file.GetFileFunc64Def()); - if (zf == nullptr) { + auto status = + ExtractFilesfromZipFile(buffer_data, buffer_size, &associated_files_); + if (!status.ok() && + absl::StrContains(status.message(), "Unable to open zip archive.")) { // It's OK if it fails: this means there are no associated files with this // model. return absl::OkStatus(); } - absl::Cleanup unzipper_closer = [zf]() { - if (unzClose(zf) != UNZ_OK) { - LOG(ERROR) << "Unable to close zip archive."; - } - }; - // Get number of files. - unz_global_info global_info; - if (unzGetGlobalInfo(zf, &global_info) != UNZ_OK) { - return CreateStatusWithPayload( - StatusCode::kUnknown, "Unable to get zip archive info.", - MediaPipeTasksStatus::kMetadataAssociatedFileZipError); - } - - // Browse through files in archive. - if (global_info.number_entry > 0) { - int error = unzGoToFirstFile(zf); - while (error == UNZ_OK) { - ASSIGN_OR_RETURN(auto zip_file_info, GetCurrentZipFileInfo(zf)); - // Store result in map. - associated_files_[zip_file_info.name] = absl::string_view( - buffer_data + zip_file_info.position, zip_file_info.size); - error = unzGoToNextFile(zf); - } - if (error != UNZ_END_OF_LIST_OF_FILE) { - return CreateStatusWithPayload( - StatusCode::kUnknown, - "Unable to read associated file in zip archive.", - MediaPipeTasksStatus::kMetadataAssociatedFileZipError); - } - } - - // Perform the cleanup manually for error propagation. - std::move(unzipper_closer).Cancel(); - // Close zip. - if (unzClose(zf) != UNZ_OK) { - return CreateStatusWithPayload( - StatusCode::kUnknown, "Unable to close zip archive.", - MediaPipeTasksStatus::kMetadataAssociatedFileZipError); - } - return absl::OkStatus(); + return status; } absl::StatusOr ModelMetadataExtractor::GetAssociatedFile( diff --git a/mediapipe/tasks/cc/metadata/utils/BUILD b/mediapipe/tasks/cc/metadata/utils/BUILD index b595eb10f..bc11f9d38 100644 --- a/mediapipe/tasks/cc/metadata/utils/BUILD +++ b/mediapipe/tasks/cc/metadata/utils/BUILD @@ -24,3 +24,19 @@ cc_library( "@zlib//:zlib_minizip", ], ) + +cc_library( + name = "zip_utils", + srcs = ["zip_utils.cc"], + hdrs = ["zip_utils.h"], + deps = [ + ":zip_readonly_mem_file", + "//mediapipe/framework/port:status", + "//mediapipe/tasks/cc:common", + "@com_google_absl//absl/cleanup", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@zlib//:zlib_minizip", + ], +) diff --git a/mediapipe/tasks/cc/metadata/utils/zip_utils.cc b/mediapipe/tasks/cc/metadata/utils/zip_utils.cc new file mode 100644 index 000000000..b703fccea --- /dev/null +++ b/mediapipe/tasks/cc/metadata/utils/zip_utils.cc @@ -0,0 +1,167 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/metadata/utils/zip_utils.h" + +#include "absl/cleanup/cleanup.h" +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "contrib/minizip/ioapi.h" +#include "contrib/minizip/unzip.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/tasks/cc/common.h" +#include "mediapipe/tasks/cc/metadata/utils/zip_readonly_mem_file.h" + +namespace mediapipe { +namespace tasks { +namespace metadata { + +namespace { + +using ::absl::StatusCode; + +// Wrapper function around calls to unzip to avoid repeating conversion logic +// from error code to Status. +absl::Status UnzipErrorToStatus(int error) { + if (error != UNZ_OK) { + return CreateStatusWithPayload(StatusCode::kUnknown, + "Unable to read the file in zip archive.", + MediaPipeTasksStatus::kFileZipError); + } + return absl::OkStatus(); +} + +// Stores a file name, position in zip buffer and size. +struct ZipFileInfo { + std::string name; + ZPOS64_T position; + ZPOS64_T size; +}; + +// Returns the ZipFileInfo corresponding to the current file in the provided +// unzFile object. +absl::StatusOr GetCurrentZipFileInfo(const unzFile& zf) { + // Open file in raw mode, as data is expected to be uncompressed. + int method; + MP_RETURN_IF_ERROR(UnzipErrorToStatus( + unzOpenCurrentFile2(zf, &method, /*level=*/nullptr, /*raw=*/1))); + absl::Cleanup unzipper_closer = [zf]() { + auto status = UnzipErrorToStatus(unzCloseCurrentFile(zf)); + if (!status.ok()) { + LOG(ERROR) << "Failed to close the current zip file: " << status; + } + }; + if (method != Z_NO_COMPRESSION) { + return CreateStatusWithPayload(StatusCode::kUnknown, + "Expected uncompressed zip archive.", + MediaPipeTasksStatus::kFileZipError); + } + + // Get file info a first time to get filename size. + unz_file_info64 file_info; + MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64( + zf, &file_info, /*szFileName=*/nullptr, /*szFileNameBufferSize=*/0, + /*extraField=*/nullptr, /*extraFieldBufferSize=*/0, + /*szComment=*/nullptr, /*szCommentBufferSize=*/0))); + + // Second call to get file name. + auto file_name_size = file_info.size_filename; + char* c_file_name = (char*)malloc(file_name_size); + MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64( + zf, &file_info, c_file_name, file_name_size, + /*extraField=*/nullptr, /*extraFieldBufferSize=*/0, + /*szComment=*/nullptr, /*szCommentBufferSize=*/0))); + std::string file_name = std::string(c_file_name, file_name_size); + free(c_file_name); + + // Get position in file. + auto position = unzGetCurrentFileZStreamPos64(zf); + if (position == 0) { + return CreateStatusWithPayload(StatusCode::kUnknown, + "Unable to read file in zip archive.", + MediaPipeTasksStatus::kFileZipError); + } + + // Perform the cleanup manually for error propagation. + std::move(unzipper_closer).Cancel(); + // Close file and return. + MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzCloseCurrentFile(zf))); + + ZipFileInfo result{}; + result.name = file_name; + result.position = position; + result.size = file_info.uncompressed_size; + return result; +} + +} // namespace + +absl::Status ExtractFilesfromZipFile( + const char* buffer_data, const size_t buffer_size, + absl::flat_hash_map* files) { + // Create in-memory read-only zip file. + ZipReadOnlyMemFile mem_file = ZipReadOnlyMemFile(buffer_data, buffer_size); + // Open zip. + unzFile zf = unzOpen2_64(/*path=*/nullptr, &mem_file.GetFileFunc64Def()); + if (zf == nullptr) { + return CreateStatusWithPayload(StatusCode::kUnknown, + "Unable to open zip archive.", + MediaPipeTasksStatus::kFileZipError); + } + absl::Cleanup unzipper_closer = [zf]() { + if (unzClose(zf) != UNZ_OK) { + LOG(ERROR) << "Unable to close zip archive."; + } + }; + // Get number of files. + unz_global_info global_info; + if (unzGetGlobalInfo(zf, &global_info) != UNZ_OK) { + return CreateStatusWithPayload(StatusCode::kUnknown, + "Unable to get zip archive info.", + MediaPipeTasksStatus::kFileZipError); + } + + // Browse through files in archive. + if (global_info.number_entry > 0) { + int error = unzGoToFirstFile(zf); + while (error == UNZ_OK) { + ASSIGN_OR_RETURN(auto zip_file_info, GetCurrentZipFileInfo(zf)); + // Store result in map. + (*files)[zip_file_info.name] = absl::string_view( + buffer_data + zip_file_info.position, zip_file_info.size); + error = unzGoToNextFile(zf); + } + if (error != UNZ_END_OF_LIST_OF_FILE) { + return CreateStatusWithPayload( + StatusCode::kUnknown, + "Unable to read associated file in zip archive.", + MediaPipeTasksStatus::kFileZipError); + } + } + // Perform the cleanup manually for error propagation. + std::move(unzipper_closer).Cancel(); + // Close zip. + if (unzClose(zf) != UNZ_OK) { + return CreateStatusWithPayload(StatusCode::kUnknown, + "Unable to close zip archive.", + MediaPipeTasksStatus::kFileZipError); + } + return absl::OkStatus(); +} + +} // namespace metadata +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/metadata/utils/zip_utils.h b/mediapipe/tasks/cc/metadata/utils/zip_utils.h new file mode 100644 index 000000000..52ccf17dc --- /dev/null +++ b/mediapipe/tasks/cc/metadata/utils/zip_utils.h @@ -0,0 +1,41 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_METADATA_UTILS_ZIP_UTILS_H_ +#define MEDIAPIPE_TASKS_CC_METADATA_UTILS_ZIP_UTILS_H_ + +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" + +namespace mediapipe { +namespace tasks { +namespace metadata { + +// Extract files from the zip file. +// Input: Pointer and length of the zip file in memory. +// Outputs: A map with the filename as key and a pointer to the file contents +// as value. The file contents returned by this function are only guaranteed to +// stay valid while buffer_data is alive. +absl::Status ExtractFilesfromZipFile( + const char* buffer_data, const size_t buffer_size, + absl::flat_hash_map* files); + +} // namespace metadata +} // namespace tasks +} // namespace mediapipe + +#endif // MEDIAPIPE_TASKS_CC_METADATA_UTILS_ZIP_UTILS_H_