Performance optimization

Avoid copying around the entire file table so that the speed doesn't decrease as the archive has more files.
This commit is contained in:
2025-03-27 01:11:17 -04:00
parent 021ca575d1
commit 79b9e546ee
5 changed files with 20 additions and 35 deletions

View File

@@ -4,6 +4,7 @@
#include <vector> #include <vector>
// TODO compression. // TODO compression.
// TODO allow the user to pass in the file table if they already have it to avoid de-serializing it over and over.
namespace ReArchive { namespace ReArchive {
/// Creates a new empty archive. /// Creates a new empty archive.
/// @param filesystem_path where the archive is to be created. /// @param filesystem_path where the archive is to be created.

View File

@@ -16,7 +16,8 @@ public:
void Append(const FileEntry& file_entry); void Append(const FileEntry& file_entry);
void Remove(const FileEntry& file_entry); void Remove(const FileEntry& file_entry);
[[nodiscard]] bool Contains(const std::filesystem::path& entry ) const { return entries.contains(entry); } [[nodiscard]] bool Contains(const std::filesystem::path& entry ) const { return entries.contains(entry); }
[[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry> GetEntries() const { return entries; } [[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry>* GetEntries() { return &entries; }
[[nodiscard]] const std::unordered_map<std::filesystem::path, FileEntry>* GetEntries() const { return &entries; }
[[nodiscard]] int64_t Count() const { return entries.size(); } [[nodiscard]] int64_t Count() const { return entries.size(); }
public: public:
[[nodiscard]] static std::vector<unsigned char> Serialize(const FileTable& file_table); [[nodiscard]] static std::vector<unsigned char> Serialize(const FileTable& file_table);

View File

@@ -85,7 +85,7 @@ void DisplayArchiveContents(const std::filesystem::path& archive) {
auto file_table = result.second; auto file_table = result.second;
std::cout << "path" << " | " << "size (bytes)" << std::endl; std::cout << "path" << " | " << "size (bytes)" << std::endl;
for (const auto& e : file_table.GetEntries()) for (const auto& e : *file_table.GetEntries())
std::cout << e.second.Path() << " " << e.second.Size() << std::endl; std::cout << e.second.Path() << " " << e.second.Size() << std::endl;
} }
@@ -104,23 +104,11 @@ void AddFileToArchive(const std::filesystem::path& file_to_add, const std::files
return; return;
} }
auto file_table_result = ReArchive::ReadFileTable(archive);
if (!file_table_result.first) {
std::cerr << "The specified path is inaccessible or not a valid archive." << std::endl;
return;
}
auto file_table = file_table_result.second;
if (file_table.Contains(file_to_add)) {
std::cerr << "The specified path for the file to be added already exists within the archive." << std::endl;
return;
}
auto file_data = ReadFileFromDisk(file_to_add); auto file_data = ReadFileFromDisk(file_to_add);
auto result = ReArchive::WriteFile(archive, file_to_add, file_data.data(), file_data.size()); auto result = ReArchive::WriteFile(archive, file_to_add, file_data.data(), file_data.size());
if (!result) if (!result)
std::cerr << "The specified path is inaccessible or not a valid archive." << std::endl; std::cerr << "The specified path for the file to be added already exists within the archive." << std::endl;
} }
void AddDirectoryToArchive(const std::filesystem::path& directory_to_add, const std::filesystem::path& archive) { void AddDirectoryToArchive(const std::filesystem::path& directory_to_add, const std::filesystem::path& archive) {
@@ -171,7 +159,7 @@ void ExtractArchive(const std::filesystem::path& archive) {
return; return;
} }
for (const auto& entry : file_table_result.second.GetEntries()) { for (const auto& entry : *file_table_result.second.GetEntries()) {
if (std::filesystem::exists(entry.first)) if (std::filesystem::exists(entry.first))
if (!GetConfirmation("File " + entry.first.string() + " already exists, overwrite?")) if (!GetConfirmation("File " + entry.first.string() + " already exists, overwrite?"))
continue; continue;

View File

@@ -49,7 +49,7 @@ FileTable GetFileTable(const Header& header, std::ifstream& in) {
int64_t data_offset = be64toh(*reinterpret_cast<const int64_t*>(ptr)); int64_t data_offset = be64toh(*reinterpret_cast<const int64_t*>(ptr));
ptr += sizeof(int64_t); ptr += sizeof(int64_t);
result.Append(FileEntry(data_size, data_offset, path)); result.Append({ data_size, data_offset, path });
} }
return result; return result;
} }
@@ -122,9 +122,6 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
if (!std::filesystem::exists(archive)) if (!std::filesystem::exists(archive))
return false; return false;
if (std::filesystem::is_directory(archive))
return false;
// Busy-wait. // Busy-wait.
while (locked.contains(archive)) {} while (locked.contains(archive)) {}
locked.insert(archive); locked.insert(archive);
@@ -146,9 +143,7 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
auto header = GetHeader(buffer.data()); auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in); auto file_table = GetFileTable(header, in);
auto file_entries = file_table.GetEntries(); if (file_table.Contains(file_path))
auto value = file_entries.find(file_path);
if (value != file_entries.end())
return false; return false;
in.close(); in.close();
@@ -210,10 +205,10 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
const FileEntry* target = nullptr; const FileEntry* target = nullptr;
auto file_entries = file_table.GetEntries(); auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path); auto value = file_entries->find(file_path);
if (value != file_entries.end()) if (value != file_entries->end())
target = &value->second; target = &value->second;
if (!target) if (!target)
return false; return false;
@@ -265,8 +260,8 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
const FileEntry* target = nullptr; const FileEntry* target = nullptr;
auto file_entries = file_table.GetEntries(); auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path); auto value = file_entries->find(file_path);
if (value != file_entries.end()) if (value != file_entries->end())
target = &value->second; target = &value->second;
if (!target) if (!target)
@@ -317,11 +312,11 @@ bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::files
return false; return false;
auto file_entries = current_file_table.GetEntries(); auto file_entries = current_file_table.GetEntries();
auto value = file_entries.find(file_path); auto value = file_entries->find(file_path);
if (value != file_entries.end()) if (value != file_entries->end())
file_entries.erase(value); file_entries->erase(value);
for (auto& e : file_entries) { for (auto& e : *file_entries) {
auto file_buffer = ReadFile(archive, e.first); auto file_buffer = ReadFile(archive, e.first);
if (!WriteFile(archive.string() + ".tmp", e.first, file_buffer.data(), (int64_t) file_buffer.size())) { if (!WriteFile(archive.string() + ".tmp", e.first, file_buffer.data(), (int64_t) file_buffer.size())) {
std::filesystem::remove(archive.string() + ".tmp"); std::filesystem::remove(archive.string() + ".tmp");

View File

@@ -18,14 +18,14 @@ void FileTable::Remove(const FileEntry& file_entry) {
std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) { std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
auto files = file_table.GetEntries(); auto files = file_table.GetEntries();
int64_t count = files.size(); int64_t count = files->size();
auto network_count = htobe64(count); auto network_count = htobe64(count);
std::vector<unsigned char> result(reinterpret_cast<unsigned char*>(&network_count), reinterpret_cast<unsigned char*>(&network_count) + sizeof(network_count)); std::vector<unsigned char> result(reinterpret_cast<unsigned char*>(&network_count), reinterpret_cast<unsigned char*>(&network_count) + sizeof(network_count));
if (files.empty()) if (files->empty())
return result; return result;
for (const auto& file : files) { for (const auto& file : *files) {
size_t current_size = result.size(); size_t current_size = result.size();
auto serialization = FileEntry::Serialize(file.second); auto serialization = FileEntry::Serialize(file.second);