Performance optimization

Avoid copying around the entire file table so that the speed doesn't decrease as the archive has more files.
This commit is contained in:
2025-03-27 01:11:17 -04:00
parent 021ca575d1
commit 79b9e546ee
5 changed files with 20 additions and 35 deletions

View File

@@ -4,6 +4,7 @@
#include <vector>
// TODO compression.
// TODO allow the user to pass in the file table if they already have it to avoid de-serializing it over and over.
namespace ReArchive {
/// Creates a new empty archive.
/// @param filesystem_path where the archive is to be created.

View File

@@ -16,7 +16,8 @@ public:
void Append(const FileEntry& file_entry);
void Remove(const FileEntry& file_entry);
[[nodiscard]] bool Contains(const std::filesystem::path& entry ) const { return entries.contains(entry); }
[[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry> GetEntries() const { return entries; }
[[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry>* GetEntries() { return &entries; }
[[nodiscard]] const std::unordered_map<std::filesystem::path, FileEntry>* GetEntries() const { return &entries; }
[[nodiscard]] int64_t Count() const { return entries.size(); }
public:
[[nodiscard]] static std::vector<unsigned char> Serialize(const FileTable& file_table);

View File

@@ -85,7 +85,7 @@ void DisplayArchiveContents(const std::filesystem::path& archive) {
auto file_table = result.second;
std::cout << "path" << " | " << "size (bytes)" << std::endl;
for (const auto& e : file_table.GetEntries())
for (const auto& e : *file_table.GetEntries())
std::cout << e.second.Path() << " " << e.second.Size() << std::endl;
}
@@ -104,23 +104,11 @@ void AddFileToArchive(const std::filesystem::path& file_to_add, const std::files
return;
}
auto file_table_result = ReArchive::ReadFileTable(archive);
if (!file_table_result.first) {
std::cerr << "The specified path is inaccessible or not a valid archive." << std::endl;
return;
}
auto file_table = file_table_result.second;
if (file_table.Contains(file_to_add)) {
std::cerr << "The specified path for the file to be added already exists within the archive." << std::endl;
return;
}
auto file_data = ReadFileFromDisk(file_to_add);
auto result = ReArchive::WriteFile(archive, file_to_add, file_data.data(), file_data.size());
if (!result)
std::cerr << "The specified path is inaccessible or not a valid archive." << std::endl;
std::cerr << "The specified path for the file to be added already exists within the archive." << std::endl;
}
void AddDirectoryToArchive(const std::filesystem::path& directory_to_add, const std::filesystem::path& archive) {
@@ -171,7 +159,7 @@ void ExtractArchive(const std::filesystem::path& archive) {
return;
}
for (const auto& entry : file_table_result.second.GetEntries()) {
for (const auto& entry : *file_table_result.second.GetEntries()) {
if (std::filesystem::exists(entry.first))
if (!GetConfirmation("File " + entry.first.string() + " already exists, overwrite?"))
continue;

View File

@@ -49,7 +49,7 @@ FileTable GetFileTable(const Header& header, std::ifstream& in) {
int64_t data_offset = be64toh(*reinterpret_cast<const int64_t*>(ptr));
ptr += sizeof(int64_t);
result.Append(FileEntry(data_size, data_offset, path));
result.Append({ data_size, data_offset, path });
}
return result;
}
@@ -122,9 +122,6 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
if (!std::filesystem::exists(archive))
return false;
if (std::filesystem::is_directory(archive))
return false;
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
@@ -146,9 +143,7 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in);
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
if (file_table.Contains(file_path))
return false;
in.close();
@@ -210,10 +205,10 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
const FileEntry* target = nullptr;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
auto value = file_entries->find(file_path);
if (value != file_entries->end())
target = &value->second;
if (!target)
return false;
@@ -265,8 +260,8 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
const FileEntry* target = nullptr;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
auto value = file_entries->find(file_path);
if (value != file_entries->end())
target = &value->second;
if (!target)
@@ -317,11 +312,11 @@ bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::files
return false;
auto file_entries = current_file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
file_entries.erase(value);
auto value = file_entries->find(file_path);
if (value != file_entries->end())
file_entries->erase(value);
for (auto& e : file_entries) {
for (auto& e : *file_entries) {
auto file_buffer = ReadFile(archive, e.first);
if (!WriteFile(archive.string() + ".tmp", e.first, file_buffer.data(), (int64_t) file_buffer.size())) {
std::filesystem::remove(archive.string() + ".tmp");

View File

@@ -18,14 +18,14 @@ void FileTable::Remove(const FileEntry& file_entry) {
std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
auto files = file_table.GetEntries();
int64_t count = files.size();
int64_t count = files->size();
auto network_count = htobe64(count);
std::vector<unsigned char> result(reinterpret_cast<unsigned char*>(&network_count), reinterpret_cast<unsigned char*>(&network_count) + sizeof(network_count));
if (files.empty())
if (files->empty())
return result;
for (const auto& file : files) {
for (const auto& file : *files) {
size_t current_size = result.size();
auto serialization = FileEntry::Serialize(file.second);