move unzip logits from metadata_extractor to zip_utils.

PiperOrigin-RevId: 479346238
This commit is contained in:
Yuqi Li 2022-10-06 10:18:59 -07:00 committed by Copybara-Service
parent e270ef660d
commit f49fb9ad57
6 changed files with 234 additions and 124 deletions

View File

@ -65,6 +65,8 @@ enum class MediaPipeTasksStatus {
kFileReadError,
// I/O error when mmap-ing file.
kFileMmapError,
// ZIP I/O error when unpacking the zip file.
kFileZipError,
// TensorFlow Lite metadata error codes.

View File

@ -19,7 +19,7 @@ cc_library(
deps = [
"//mediapipe/framework/port:status",
"//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/metadata/utils:zip_readonly_mem_file",
"//mediapipe/tasks/cc/metadata/utils:zip_utils",
"//mediapipe/tasks/metadata:metadata_schema_cc",
"@com_google_absl//absl/cleanup",
"@com_google_absl//absl/container:flat_hash_map",
@ -30,7 +30,6 @@ cc_library(
"@com_google_absl//absl/strings:str_format",
"@flatbuffers//:runtime_cc",
"@org_tensorflow//tensorflow/lite/schema:schema_fbs",
"@zlib//:zlib_minizip",
],
)

View File

@ -20,14 +20,13 @@ limitations under the License.
#include "absl/cleanup/cleanup.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/strings/match.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "contrib/minizip/ioapi.h"
#include "contrib/minizip/unzip.h"
#include "flatbuffers/flatbuffers.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/metadata/utils/zip_readonly_mem_file.h"
#include "mediapipe/tasks/cc/metadata/utils/zip_utils.h"
#include "mediapipe/tasks/metadata/metadata_schema_generated.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -54,80 +53,6 @@ const T* GetItemFromVector(
}
return src_vector->Get(index);
}
// Wrapper function around calls to unzip to avoid repeating conversion logic
// from error code to Status.
absl::Status UnzipErrorToStatus(int error) {
if (error != UNZ_OK) {
return CreateStatusWithPayload(
StatusCode::kUnknown, "Unable to read associated file in zip archive.",
MediaPipeTasksStatus::kMetadataAssociatedFileZipError);
}
return absl::OkStatus();
}
// Stores a file name, position in zip buffer and size.
struct ZipFileInfo {
std::string name;
ZPOS64_T position;
ZPOS64_T size;
};
// Returns the ZipFileInfo corresponding to the current file in the provided
// unzFile object.
absl::StatusOr<ZipFileInfo> GetCurrentZipFileInfo(const unzFile& zf) {
// Open file in raw mode, as data is expected to be uncompressed.
int method;
MP_RETURN_IF_ERROR(UnzipErrorToStatus(
unzOpenCurrentFile2(zf, &method, /*level=*/nullptr, /*raw=*/1)));
absl::Cleanup unzipper_closer = [zf]() {
auto status = UnzipErrorToStatus(unzCloseCurrentFile(zf));
if (!status.ok()) {
LOG(ERROR) << "Failed to close the current zip file: " << status;
}
};
if (method != Z_NO_COMPRESSION) {
return CreateStatusWithPayload(
StatusCode::kUnknown, "Expected uncompressed zip archive.",
MediaPipeTasksStatus::kMetadataAssociatedFileZipError);
}
// Get file info a first time to get filename size.
unz_file_info64 file_info;
MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64(
zf, &file_info, /*szFileName=*/nullptr, /*szFileNameBufferSize=*/0,
/*extraField=*/nullptr, /*extraFieldBufferSize=*/0,
/*szComment=*/nullptr, /*szCommentBufferSize=*/0)));
// Second call to get file name.
auto file_name_size = file_info.size_filename;
char* c_file_name = (char*)malloc(file_name_size);
MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64(
zf, &file_info, c_file_name, file_name_size,
/*extraField=*/nullptr, /*extraFieldBufferSize=*/0,
/*szComment=*/nullptr, /*szCommentBufferSize=*/0)));
std::string file_name = std::string(c_file_name, file_name_size);
free(c_file_name);
// Get position in file.
auto position = unzGetCurrentFileZStreamPos64(zf);
if (position == 0) {
return CreateStatusWithPayload(
StatusCode::kUnknown, "Unable to read file in zip archive.",
MediaPipeTasksStatus::kMetadataAssociatedFileZipError);
}
// Perform the cleanup manually for error propagation.
std::move(unzipper_closer).Cancel();
// Close file and return.
MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzCloseCurrentFile(zf)));
ZipFileInfo result{};
result.name = file_name;
result.position = position;
result.size = file_info.uncompressed_size;
return result;
}
} // namespace
/* static */
@ -247,55 +172,15 @@ absl::Status ModelMetadataExtractor::InitFromModelBuffer(
absl::Status ModelMetadataExtractor::ExtractAssociatedFiles(
const char* buffer_data, size_t buffer_size) {
// Create in-memory read-only zip file.
ZipReadOnlyMemFile mem_file = ZipReadOnlyMemFile(buffer_data, buffer_size);
// Open zip.
unzFile zf = unzOpen2_64(/*path=*/nullptr, &mem_file.GetFileFunc64Def());
if (zf == nullptr) {
auto status =
ExtractFilesfromZipFile(buffer_data, buffer_size, &associated_files_);
if (!status.ok() &&
absl::StrContains(status.message(), "Unable to open zip archive.")) {
// It's OK if it fails: this means there are no associated files with this
// model.
return absl::OkStatus();
}
absl::Cleanup unzipper_closer = [zf]() {
if (unzClose(zf) != UNZ_OK) {
LOG(ERROR) << "Unable to close zip archive.";
}
};
// Get number of files.
unz_global_info global_info;
if (unzGetGlobalInfo(zf, &global_info) != UNZ_OK) {
return CreateStatusWithPayload(
StatusCode::kUnknown, "Unable to get zip archive info.",
MediaPipeTasksStatus::kMetadataAssociatedFileZipError);
}
// Browse through files in archive.
if (global_info.number_entry > 0) {
int error = unzGoToFirstFile(zf);
while (error == UNZ_OK) {
ASSIGN_OR_RETURN(auto zip_file_info, GetCurrentZipFileInfo(zf));
// Store result in map.
associated_files_[zip_file_info.name] = absl::string_view(
buffer_data + zip_file_info.position, zip_file_info.size);
error = unzGoToNextFile(zf);
}
if (error != UNZ_END_OF_LIST_OF_FILE) {
return CreateStatusWithPayload(
StatusCode::kUnknown,
"Unable to read associated file in zip archive.",
MediaPipeTasksStatus::kMetadataAssociatedFileZipError);
}
}
// Perform the cleanup manually for error propagation.
std::move(unzipper_closer).Cancel();
// Close zip.
if (unzClose(zf) != UNZ_OK) {
return CreateStatusWithPayload(
StatusCode::kUnknown, "Unable to close zip archive.",
MediaPipeTasksStatus::kMetadataAssociatedFileZipError);
}
return absl::OkStatus();
return status;
}
absl::StatusOr<absl::string_view> ModelMetadataExtractor::GetAssociatedFile(

View File

@ -24,3 +24,19 @@ cc_library(
"@zlib//:zlib_minizip",
],
)
cc_library(
name = "zip_utils",
srcs = ["zip_utils.cc"],
hdrs = ["zip_utils.h"],
deps = [
":zip_readonly_mem_file",
"//mediapipe/framework/port:status",
"//mediapipe/tasks/cc:common",
"@com_google_absl//absl/cleanup",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@zlib//:zlib_minizip",
],
)

View File

@ -0,0 +1,167 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/cc/metadata/utils/zip_utils.h"
#include "absl/cleanup/cleanup.h"
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "contrib/minizip/ioapi.h"
#include "contrib/minizip/unzip.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/metadata/utils/zip_readonly_mem_file.h"
namespace mediapipe {
namespace tasks {
namespace metadata {
namespace {
using ::absl::StatusCode;
// Wrapper function around calls to unzip to avoid repeating conversion logic
// from error code to Status.
absl::Status UnzipErrorToStatus(int error) {
if (error != UNZ_OK) {
return CreateStatusWithPayload(StatusCode::kUnknown,
"Unable to read the file in zip archive.",
MediaPipeTasksStatus::kFileZipError);
}
return absl::OkStatus();
}
// Stores a file name, position in zip buffer and size.
struct ZipFileInfo {
std::string name;
ZPOS64_T position;
ZPOS64_T size;
};
// Returns the ZipFileInfo corresponding to the current file in the provided
// unzFile object.
absl::StatusOr<ZipFileInfo> GetCurrentZipFileInfo(const unzFile& zf) {
// Open file in raw mode, as data is expected to be uncompressed.
int method;
MP_RETURN_IF_ERROR(UnzipErrorToStatus(
unzOpenCurrentFile2(zf, &method, /*level=*/nullptr, /*raw=*/1)));
absl::Cleanup unzipper_closer = [zf]() {
auto status = UnzipErrorToStatus(unzCloseCurrentFile(zf));
if (!status.ok()) {
LOG(ERROR) << "Failed to close the current zip file: " << status;
}
};
if (method != Z_NO_COMPRESSION) {
return CreateStatusWithPayload(StatusCode::kUnknown,
"Expected uncompressed zip archive.",
MediaPipeTasksStatus::kFileZipError);
}
// Get file info a first time to get filename size.
unz_file_info64 file_info;
MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64(
zf, &file_info, /*szFileName=*/nullptr, /*szFileNameBufferSize=*/0,
/*extraField=*/nullptr, /*extraFieldBufferSize=*/0,
/*szComment=*/nullptr, /*szCommentBufferSize=*/0)));
// Second call to get file name.
auto file_name_size = file_info.size_filename;
char* c_file_name = (char*)malloc(file_name_size);
MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzGetCurrentFileInfo64(
zf, &file_info, c_file_name, file_name_size,
/*extraField=*/nullptr, /*extraFieldBufferSize=*/0,
/*szComment=*/nullptr, /*szCommentBufferSize=*/0)));
std::string file_name = std::string(c_file_name, file_name_size);
free(c_file_name);
// Get position in file.
auto position = unzGetCurrentFileZStreamPos64(zf);
if (position == 0) {
return CreateStatusWithPayload(StatusCode::kUnknown,
"Unable to read file in zip archive.",
MediaPipeTasksStatus::kFileZipError);
}
// Perform the cleanup manually for error propagation.
std::move(unzipper_closer).Cancel();
// Close file and return.
MP_RETURN_IF_ERROR(UnzipErrorToStatus(unzCloseCurrentFile(zf)));
ZipFileInfo result{};
result.name = file_name;
result.position = position;
result.size = file_info.uncompressed_size;
return result;
}
} // namespace
absl::Status ExtractFilesfromZipFile(
const char* buffer_data, const size_t buffer_size,
absl::flat_hash_map<std::string, absl::string_view>* files) {
// Create in-memory read-only zip file.
ZipReadOnlyMemFile mem_file = ZipReadOnlyMemFile(buffer_data, buffer_size);
// Open zip.
unzFile zf = unzOpen2_64(/*path=*/nullptr, &mem_file.GetFileFunc64Def());
if (zf == nullptr) {
return CreateStatusWithPayload(StatusCode::kUnknown,
"Unable to open zip archive.",
MediaPipeTasksStatus::kFileZipError);
}
absl::Cleanup unzipper_closer = [zf]() {
if (unzClose(zf) != UNZ_OK) {
LOG(ERROR) << "Unable to close zip archive.";
}
};
// Get number of files.
unz_global_info global_info;
if (unzGetGlobalInfo(zf, &global_info) != UNZ_OK) {
return CreateStatusWithPayload(StatusCode::kUnknown,
"Unable to get zip archive info.",
MediaPipeTasksStatus::kFileZipError);
}
// Browse through files in archive.
if (global_info.number_entry > 0) {
int error = unzGoToFirstFile(zf);
while (error == UNZ_OK) {
ASSIGN_OR_RETURN(auto zip_file_info, GetCurrentZipFileInfo(zf));
// Store result in map.
(*files)[zip_file_info.name] = absl::string_view(
buffer_data + zip_file_info.position, zip_file_info.size);
error = unzGoToNextFile(zf);
}
if (error != UNZ_END_OF_LIST_OF_FILE) {
return CreateStatusWithPayload(
StatusCode::kUnknown,
"Unable to read associated file in zip archive.",
MediaPipeTasksStatus::kFileZipError);
}
}
// Perform the cleanup manually for error propagation.
std::move(unzipper_closer).Cancel();
// Close zip.
if (unzClose(zf) != UNZ_OK) {
return CreateStatusWithPayload(StatusCode::kUnknown,
"Unable to close zip archive.",
MediaPipeTasksStatus::kFileZipError);
}
return absl::OkStatus();
}
} // namespace metadata
} // namespace tasks
} // namespace mediapipe

View File

@ -0,0 +1,41 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_METADATA_UTILS_ZIP_UTILS_H_
#define MEDIAPIPE_TASKS_CC_METADATA_UTILS_ZIP_UTILS_H_
#include <string>
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
namespace mediapipe {
namespace tasks {
namespace metadata {
// Extract files from the zip file.
// Input: Pointer and length of the zip file in memory.
// Outputs: A map with the filename as key and a pointer to the file contents
// as value. The file contents returned by this function are only guaranteed to
// stay valid while buffer_data is alive.
absl::Status ExtractFilesfromZipFile(
const char* buffer_data, const size_t buffer_size,
absl::flat_hash_map<std::string, absl::string_view>* files);
} // namespace metadata
} // namespace tasks
} // namespace mediapipe
#endif // MEDIAPIPE_TASKS_CC_METADATA_UTILS_ZIP_UTILS_H_