Cleanup tarball creation with try/catch, and remove duplicate file handling

This commit is contained in:
Simon Quigley 2025-02-07 17:12:26 -06:00
parent f08b3ee199
commit c8889e4b51

View File

@ -419,184 +419,183 @@ std::vector<std::string> extract_files_excluded(const std::string& filepath) {
void create_tarball(const std::string& tarballPath, const std::string& directory, const std::vector<std::string>& exclusions, std::shared_ptr<Log> log) { void create_tarball(const std::string& tarballPath, const std::string& directory, const std::vector<std::string>& exclusions, std::shared_ptr<Log> log) {
log->append("Creating tarball: " + tarballPath); log->append("Creating tarball: " + tarballPath);
struct archive* a = archive_write_new(); try {
if (!a) { struct archive* a = archive_write_new();
throw std::runtime_error("Failed to create a new archive."); if (!a) throw std::runtime_error("Failed to create a new archive.");
}
if (archive_write_add_filter_gzip(a) != ARCHIVE_OK) { if (archive_write_add_filter_gzip(a) != ARCHIVE_OK) {
std::string err = "Failed to add gzip filter: "; std::string err = "Failed to add gzip filter: ";
err += archive_error_string(a); err += archive_error_string(a);
archive_write_free(a);
throw std::runtime_error(err);
}
if (archive_write_set_format_pax_restricted(a) != ARCHIVE_OK) {
std::string err = "Failed to set format: ";
err += archive_error_string(a);
archive_write_free(a);
throw std::runtime_error(err);
}
if (archive_write_open_filename(a, tarballPath.c_str()) != ARCHIVE_OK) {
std::string err = "Could not open tarball for writing: ";
err += archive_error_string(a);
archive_write_free(a);
throw std::runtime_error(err);
}
// Initialize a set to track added relative paths to prevent duplication
std::unordered_set<std::string> added_paths;
// Iterate through the directory recursively without following symlinks
for (auto it = fs::recursive_directory_iterator(
directory,
fs::directory_options::skip_permission_denied);
it != fs::recursive_directory_iterator(); ++it) {
const auto& path = it->path();
std::error_code ec;
fs::path relative_path = fs::relative(path, directory, ec);
if (ec) {
log->append("Failed to compute relative path for: " + path.string() + " Error: " + ec.message());
continue;
}
// Normalize the relative path to avoid discrepancies
fs::path normalized_relative_path = relative_path.lexically_normal();
std::string relative_path_str = normalized_relative_path.string();
// Check if this path has already been added
if (!added_paths.insert(relative_path_str).second) {
log->append("Duplicate path detected and skipped: " + relative_path_str);
continue; // Skip adding this duplicate path
}
// Exclusion logic (if any exclusions are provided)
bool excluded = std::any_of(exclusions.begin(), exclusions.end(), [&relative_path_str](const std::string& exclusion) {
return relative_path_str.find(exclusion) != std::string::npos;
});
if (excluded) { continue; }
fs::file_status fstatus = it->symlink_status(ec);
if (ec) {
log->append("Failed to get file status for: " + path.string() + " Error: " + ec.message());
continue;
}
struct archive_entry* entry = archive_entry_new();
if (!entry) {
log->append("Failed to create archive entry for: " + path.string());
archive_write_free(a); archive_write_free(a);
throw std::runtime_error("Failed to create archive entry."); throw std::runtime_error(err);
} }
std::string entry_path = relative_path_str; if (archive_write_set_format_pax_restricted(a) != ARCHIVE_OK) {
if (fs::is_directory(fstatus)) { std::string err = "Failed to set format: ";
// Ensure the directory pathname ends with '/' err += archive_error_string(a);
if (!entry_path.empty() && entry_path.back() != '/') { archive_write_free(a);
entry_path += '/'; throw std::runtime_error(err);
}
archive_entry_set_pathname(entry, entry_path.c_str());
} else {
archive_entry_set_pathname(entry, entry_path.c_str());
} }
// Set file type, permissions, and size if (archive_write_open_filename(a, tarballPath.c_str()) != ARCHIVE_OK) {
if (fs::is_regular_file(fstatus)) { std::string err = "Could not open tarball for writing: ";
// Regular file err += archive_error_string(a);
uintmax_t filesize = fs::file_size(path, ec); archive_write_free(a);
if (ec) { throw std::runtime_error(err);
log->append("Cannot get file size for: " + path.string() + " Error: " + ec.message());
archive_entry_free(entry);
continue;
}
archive_entry_set_size(entry, static_cast<off_t>(filesize));
archive_entry_set_filetype(entry, AE_IFREG);
archive_entry_set_perm(entry, static_cast<mode_t>(fstatus.permissions()));
}
else if (fs::is_symlink(fstatus)) {
fs::path target = fs::read_symlink(path, ec);
if (ec) {
log->append("Cannot read symlink for: " + path.string() + " Error: " + ec.message());
archive_entry_free(entry);
continue;
}
archive_entry_set_symlink(entry, target.c_str());
archive_entry_set_filetype(entry, AE_IFLNK);
archive_entry_set_perm(entry, static_cast<mode_t>(fstatus.permissions()));
}
else if (fs::is_directory(fstatus)) {
archive_entry_set_size(entry, 0);
archive_entry_set_filetype(entry, AE_IFDIR);
archive_entry_set_perm(entry, static_cast<mode_t>(fstatus.permissions()));
}
else {
log->append("Unsupported file type for: " + path.string());
archive_entry_free(entry);
continue;
} }
// Retrieve and set the modification time // Iterate through the directory recursively without following symlinks
fs::file_time_type ftime = fs::last_write_time(path, ec); for (auto it = fs::recursive_directory_iterator(
std::time_t mtime; directory,
if (ec) { fs::directory_options::skip_permission_denied);
log->append("Failed to get last write time for: " + path.string() + " Error: " + ec.message()); it != fs::recursive_directory_iterator(); ++it) {
// Obtain current UTC time as fallback const auto& path = it->path();
auto now = std::chrono::system_clock::now(); try {
mtime = std::chrono::system_clock::to_time_t(now); std::error_code ec;
log->append("Setting default mtime (current UTC time) for: " + path.string());
} else {
mtime = to_time_t(ftime);
}
archive_entry_set_mtime(entry, mtime, 0);
if (archive_write_header(a, entry) != ARCHIVE_OK) { fs::path relative_path = fs::relative(path, directory, ec);
log->append("Failed to write header for: " + path.string() + " Error: " + archive_error_string(a)); if (ec) {
archive_entry_free(entry); log->append("Failed to compute relative path for: " + path.string() + " Error: " + ec.message());
continue; continue;
} }
if (fs::is_regular_file(fstatus)) { // Normalize the relative path to avoid discrepancies
std::ifstream fileStream(path, std::ios::binary); fs::path normalized_relative_path = relative_path.lexically_normal();
if (!fileStream) { std::string relative_path_str = normalized_relative_path.string();
log->append("Failed to open file for reading: " + path.string());
archive_entry_free(entry);
continue;
}
const std::size_t bufferSize = 8192; // Exclusion logic (if any exclusions are provided)
char buffer[bufferSize]; bool excluded = std::any_of(exclusions.begin(), exclusions.end(), [&relative_path_str](const std::string& exclusion) {
while (fileStream) { return relative_path_str.find(exclusion) != std::string::npos;
fileStream.read(buffer, bufferSize); });
std::streamsize bytesRead = fileStream.gcount(); if (excluded) { continue; }
if (bytesRead > 0) {
if (archive_write_data(a, buffer, static_cast<size_t>(bytesRead)) < 0) { fs::file_status fstatus = it->symlink_status(ec);
log->append("Failed to write data for: " + path.string() + " Error: " + archive_error_string(a)); if (ec) {
break; log->append("Failed to get file status for: " + path.string() + " Error: " + ec.message());
continue;
}
struct archive_entry* entry = archive_entry_new();
if (!entry) {
log->append("Failed to create archive entry for: " + path.string());
archive_write_free(a);
throw std::runtime_error("Failed to create archive entry.");
}
std::string entry_path = relative_path_str;
if (fs::is_directory(fstatus)) {
// Ensure the directory pathname ends with '/'
if (!entry_path.empty() && entry_path.back() != '/') {
entry_path += '/';
}
archive_entry_set_pathname(entry, entry_path.c_str());
} else {
archive_entry_set_pathname(entry, entry_path.c_str());
}
// Set file type, permissions, and size
if (fs::is_regular_file(fstatus)) {
// Regular file
uintmax_t filesize = fs::file_size(path, ec);
if (ec) {
log->append("Cannot get file size for: " + path.string() + " Error: " + ec.message());
archive_entry_free(entry);
continue;
}
archive_entry_set_size(entry, static_cast<off_t>(filesize));
archive_entry_set_filetype(entry, AE_IFREG);
archive_entry_set_perm(entry, static_cast<mode_t>(fstatus.permissions()));
}
else if (fs::is_symlink(fstatus)) {
fs::path target = fs::read_symlink(path, ec);
if (ec) {
log->append("Cannot read symlink for: " + path.string() + " Error: " + ec.message());
archive_entry_free(entry);
continue;
}
archive_entry_set_symlink(entry, target.c_str());
archive_entry_set_filetype(entry, AE_IFLNK);
archive_entry_set_perm(entry, static_cast<mode_t>(fstatus.permissions()));
}
else if (fs::is_directory(fstatus)) {
archive_entry_set_size(entry, 0);
archive_entry_set_filetype(entry, AE_IFDIR);
archive_entry_set_perm(entry, static_cast<mode_t>(fstatus.permissions()));
}
else {
log->append("Unsupported file type for: " + path.string());
archive_entry_free(entry);
continue;
}
// Retrieve and set the modification time
fs::file_time_type ftime = fs::last_write_time(path, ec);
std::time_t mtime;
if (ec) {
log->append("Failed to get last write time for: " + path.string() + " Error: " + ec.message());
// Obtain current UTC time as fallback
auto now = std::chrono::system_clock::now();
mtime = std::chrono::system_clock::to_time_t(now);
log->append("Setting default mtime (current UTC time) for: " + path.string());
} else {
mtime = to_time_t(ftime);
}
archive_entry_set_mtime(entry, mtime, 0);
if (archive_write_header(a, entry) != ARCHIVE_OK) {
log->append("Failed to write header for: " + path.string() + " Error: " + archive_error_string(a));
archive_entry_free(entry);
continue;
}
if (fs::is_regular_file(fstatus)) {
std::ifstream fileStream(path, std::ios::binary);
if (!fileStream) {
log->append("Failed to open file for reading: " + path.string());
archive_entry_free(entry);
continue;
}
const std::size_t bufferSize = 8192;
char buffer[bufferSize];
while (fileStream) {
fileStream.read(buffer, bufferSize);
std::streamsize bytesRead = fileStream.gcount();
if (bytesRead > 0) {
if (archive_write_data(a, buffer, static_cast<size_t>(bytesRead)) < 0) {
log->append("Failed to write data for: " + path.string() + " Error: " + archive_error_string(a));
break;
}
}
}
if (fileStream.bad()) {
log->append("Error reading file: " + path.string());
} }
} }
}
if (fileStream.bad()) { archive_entry_free(entry);
log->append("Error reading file: " + path.string()); } catch (const std::exception &e) {
log->append("Failed to add the following file to the tarball: " + path.string());
log->append(e.what());
} }
} }
archive_entry_free(entry); if (archive_write_close(a) != ARCHIVE_OK) {
} std::string err = "Failed to close archive: ";
err += archive_error_string(a);
archive_write_free(a);
throw std::runtime_error(err);
}
if (archive_write_close(a) != ARCHIVE_OK) { if (archive_write_free(a) != ARCHIVE_OK) {
std::string err = "Failed to close archive: "; std::string err = "Failed to free archive: ";
err += archive_error_string(a); err += archive_error_string(a);
archive_write_free(a); throw std::runtime_error(err);
throw std::runtime_error(err); }
} } catch (const std::exception &e) {
log->append("Failed to create tarball: " + tarballPath);
if (archive_write_free(a) != ARCHIVE_OK) { log->append(e.what());
std::string err = "Failed to free archive: ";
err += archive_error_string(a);
throw std::runtime_error(err);
} }
log->append("Tarball created and compressed: " + tarballPath); log->append("Tarball created and compressed: " + tarballPath);