Performance optimization

Used unordered_map for file table so that the speed to retrieve any file is the same regardless of where.
This commit is contained in:
2025-03-19 21:00:25 -04:00
parent df1fa92a68
commit 4ebdb726be
5 changed files with 77 additions and 41 deletions

View File

@@ -2,8 +2,8 @@
#include <string>
#include <cstdint>
#include <utility>
#include <vector>
#include <filesystem>
#include <vector>
#include <array>
namespace ReArchive {
@@ -31,4 +31,15 @@ public:
static std::vector<unsigned char> Serialize(const FileEntry& file);
public:
bool operator ==(const FileEntry& other) const;
};
template<>
struct std::hash<ReArchive::FileEntry> {
std::size_t operator()(const ReArchive::FileEntry & k) const {
std::string hash_string = k.Path().string();
hash_string.append(std::to_string(k.Size()));
hash_string.append(std::to_string(k.Offset()));
return std::hash<std::string>()(hash_string);
}
};

View File

@@ -1,7 +1,7 @@
#pragma once
#include <vector>
#include <cstdint>
#include <unordered_map>
#include <ReArchive/types/FileEntry.h>
namespace ReArchive {
@@ -11,15 +11,17 @@ namespace ReArchive {
class ReArchive::FileTable {
protected:
// count
std::vector<FileEntry> entries;
// TODO unordered_set so time to find a particular entry doesn't depend on the length.
std::unordered_map<std::filesystem::path, FileEntry> entries;
public:
void Append(const FileEntry& file_entry);
void Remove(const FileEntry& file_entry);
[[nodiscard]] std::vector<FileEntry> GetEntries() const { return entries; }
[[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry> GetEntries() const { return entries; }
[[nodiscard]] int64_t Count() const { return entries.size(); }
public:
[[nodiscard]] static std::vector<unsigned char> Serialize(const FileTable& file_table);
public:
FileTable() = default;
~FileTable() = default;
};

View File

@@ -28,5 +28,5 @@ int main() {
std::cout << std::string( retrieved2.begin(), retrieved2.end()) << std::endl;
for (auto& e : running_tally.GetEntries())
std::cout << e.Path() << std::endl;
std::cout << e.second.Path() << std::endl;
}

View File

@@ -4,6 +4,7 @@
#include <ReArchive/types/FileTable.h>
#include <ReArchive/types/FileEntry.h>
#include <fstream>
#include <iostream>
using ReArchive::Header;
using ReArchive::FileTable;
@@ -74,9 +75,7 @@ bool ReArchive::CreateArchive(const std::filesystem::path& filesystem_path, bool
if (std::filesystem::exists(filesystem_path))
return false;
if (locked.contains(filesystem_path))
return false;
while (locked.contains(filesystem_path)) {}
locked.insert(filesystem_path);
std::ofstream file(filesystem_path, std::ios::binary);
@@ -127,9 +126,11 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in);
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
return false;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
return false;
in.close();
std::ofstream out(archive, std::ios::binary | std::ios::out | std::ios::in);
@@ -187,9 +188,13 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
auto file_table = GetFileTable(header, in);
const FileEntry* target = nullptr;
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
target = &e;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
target = &value->second;
if (!target)
return false;
@@ -218,6 +223,10 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
if (!std::filesystem::exists(archive))
return {};
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::ifstream in(archive, std::ios::binary);
if (!in)
return {};
@@ -235,10 +244,20 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in);
/*
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
target = &e;
*/
const FileEntry* target = nullptr;
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
target = &e;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
target = &value->second;
if (!target)
return {};
@@ -248,17 +267,22 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
in.read(reinterpret_cast<char*>(result.data()), (int64_t) result.size());
in.close();
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
return result;
}
// I tried to do this several different ways but this seems to be the best approach - Redacted.
bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, FileTable* running_tally) {
if (!std::filesystem::exists(archive))
return false;
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::ifstream in(archive, std::ios::binary);
if (!in)
@@ -281,31 +305,34 @@ bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::files
if (!CreateArchive(archive.string() + ".tmp", current_header.Compressed()))
return false;
for (auto& e : current_file_table.GetEntries())
if (e.Path() == file_path) {
current_file_table.Remove(e);
break;
}
auto file_entries = current_file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
file_entries.erase(value);
for (auto& e : current_file_table.GetEntries()) {
auto file_buffer = ReadFile(archive, e.Path());
if (!WriteFile(archive.string() + ".tmp", e.Path(), file_buffer.data(), (int64_t) file_buffer.size())) {
for (auto& e : file_entries) {
auto file_buffer = ReadFile(archive, e.first);
if (!WriteFile(archive.string() + ".tmp", e.first, file_buffer.data(), (int64_t) file_buffer.size())) {
std::filesystem::remove(archive.string() + ".tmp");
return false;
}
}
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::filesystem::remove(archive);
std::filesystem::rename(archive.string() + ".tmp", archive);
// TODO read the header from the file we just wrote.
if (running_tally)
*running_tally = current_file_table;
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
// TODO read the header from the file we just wrote.
if (running_tally)
*running_tally = current_file_table;
return true;
}

View File

@@ -4,23 +4,19 @@
using namespace ReArchive;
void FileTable::Append(const FileEntry& file_entry) {
for (const auto& e : entries)
if (e.Path() == file_entry.Path())
return;
if (entries.contains(file_entry.Path()))
return;
entries.push_back(file_entry);
entries.insert(std::make_pair(file_entry.Path(), file_entry));
}
void FileTable::Remove(const FileEntry& file_entry) {
if (entries.empty())
return;
for (int64_t i = 0; i < entries.size(); i++) {
if (entries[i] == file_entry) {
entries.erase(entries.begin() + i);
break;
}
}
auto position = entries.find(file_entry.Path());
if (position != entries.end())
entries.erase(position);
}
std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
@@ -35,7 +31,7 @@ std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
for (const auto& file : files) {
size_t current_size = result.size();
auto serialization = FileEntry::Serialize(file);
auto serialization = FileEntry::Serialize(file.second);
result.resize(current_size + serialization.size());
memcpy(result.data() + current_size, serialization.data(), serialization.size());