Performance optimization
Used unordered_map for file table so that the speed to retrieve any file is the same regardless of where.
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
|
||||
namespace ReArchive {
|
||||
@@ -32,3 +32,14 @@ public:
|
||||
public:
|
||||
bool operator ==(const FileEntry& other) const;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct std::hash<ReArchive::FileEntry> {
|
||||
std::size_t operator()(const ReArchive::FileEntry & k) const {
|
||||
std::string hash_string = k.Path().string();
|
||||
hash_string.append(std::to_string(k.Size()));
|
||||
hash_string.append(std::to_string(k.Offset()));
|
||||
|
||||
return std::hash<std::string>()(hash_string);
|
||||
}
|
||||
};
|
@@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <ReArchive/types/FileEntry.h>
|
||||
|
||||
namespace ReArchive {
|
||||
@@ -11,15 +11,17 @@ namespace ReArchive {
|
||||
class ReArchive::FileTable {
|
||||
protected:
|
||||
// count
|
||||
std::vector<FileEntry> entries;
|
||||
// TODO unordered_set so time to find a particular entry doesn't depend on the length.
|
||||
std::unordered_map<std::filesystem::path, FileEntry> entries;
|
||||
public:
|
||||
void Append(const FileEntry& file_entry);
|
||||
void Remove(const FileEntry& file_entry);
|
||||
[[nodiscard]] std::vector<FileEntry> GetEntries() const { return entries; }
|
||||
[[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry> GetEntries() const { return entries; }
|
||||
[[nodiscard]] int64_t Count() const { return entries.size(); }
|
||||
public:
|
||||
[[nodiscard]] static std::vector<unsigned char> Serialize(const FileTable& file_table);
|
||||
public:
|
||||
FileTable() = default;
|
||||
~FileTable() = default;
|
||||
|
||||
};
|
2
main.cpp
2
main.cpp
@@ -28,5 +28,5 @@ int main() {
|
||||
std::cout << std::string( retrieved2.begin(), retrieved2.end()) << std::endl;
|
||||
|
||||
for (auto& e : running_tally.GetEntries())
|
||||
std::cout << e.Path() << std::endl;
|
||||
std::cout << e.second.Path() << std::endl;
|
||||
}
|
@@ -4,6 +4,7 @@
|
||||
#include <ReArchive/types/FileTable.h>
|
||||
#include <ReArchive/types/FileEntry.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
using ReArchive::Header;
|
||||
using ReArchive::FileTable;
|
||||
@@ -74,9 +75,7 @@ bool ReArchive::CreateArchive(const std::filesystem::path& filesystem_path, bool
|
||||
if (std::filesystem::exists(filesystem_path))
|
||||
return false;
|
||||
|
||||
if (locked.contains(filesystem_path))
|
||||
return false;
|
||||
|
||||
while (locked.contains(filesystem_path)) {}
|
||||
locked.insert(filesystem_path);
|
||||
|
||||
std::ofstream file(filesystem_path, std::ios::binary);
|
||||
@@ -127,9 +126,11 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
|
||||
auto header = GetHeader(buffer.data());
|
||||
auto file_table = GetFileTable(header, in);
|
||||
|
||||
for (const auto& e : file_table.GetEntries())
|
||||
if (e.Path() == file_path)
|
||||
auto file_entries = file_table.GetEntries();
|
||||
auto value = file_entries.find(file_path);
|
||||
if (value != file_entries.end())
|
||||
return false;
|
||||
|
||||
in.close();
|
||||
|
||||
std::ofstream out(archive, std::ios::binary | std::ios::out | std::ios::in);
|
||||
@@ -187,9 +188,13 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
|
||||
auto file_table = GetFileTable(header, in);
|
||||
|
||||
const FileEntry* target = nullptr;
|
||||
for (const auto& e : file_table.GetEntries())
|
||||
if (e.Path() == file_path)
|
||||
target = &e;
|
||||
auto file_entries = file_table.GetEntries();
|
||||
|
||||
auto value = file_entries.find(file_path);
|
||||
if (value != file_entries.end())
|
||||
target = &value->second;
|
||||
|
||||
|
||||
|
||||
if (!target)
|
||||
return false;
|
||||
@@ -218,6 +223,10 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
|
||||
if (!std::filesystem::exists(archive))
|
||||
return {};
|
||||
|
||||
// Busy-wait.
|
||||
while (locked.contains(archive)) {}
|
||||
locked.insert(archive);
|
||||
|
||||
std::ifstream in(archive, std::ios::binary);
|
||||
if (!in)
|
||||
return {};
|
||||
@@ -235,10 +244,20 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
|
||||
auto header = GetHeader(buffer.data());
|
||||
auto file_table = GetFileTable(header, in);
|
||||
|
||||
const FileEntry* target = nullptr;
|
||||
/*
|
||||
for (const auto& e : file_table.GetEntries())
|
||||
if (e.Path() == file_path)
|
||||
target = &e;
|
||||
*/
|
||||
|
||||
const FileEntry* target = nullptr;
|
||||
auto file_entries = file_table.GetEntries();
|
||||
|
||||
auto value = file_entries.find(file_path);
|
||||
if (value != file_entries.end())
|
||||
target = &value->second;
|
||||
|
||||
|
||||
|
||||
if (!target)
|
||||
return {};
|
||||
@@ -248,17 +267,22 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
|
||||
in.read(reinterpret_cast<char*>(result.data()), (int64_t) result.size());
|
||||
in.close();
|
||||
|
||||
// Remove lock.
|
||||
auto position = locked.find(archive);
|
||||
if (position != locked.end())
|
||||
locked.erase(position);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// I tried to do this several different ways but this seems to be the best approach - Redacted.
|
||||
bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, FileTable* running_tally) {
|
||||
|
||||
if (!std::filesystem::exists(archive))
|
||||
return false;
|
||||
|
||||
// Busy-wait.
|
||||
while (locked.contains(archive)) {}
|
||||
locked.insert(archive);
|
||||
|
||||
std::ifstream in(archive, std::ios::binary);
|
||||
if (!in)
|
||||
@@ -281,31 +305,34 @@ bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::files
|
||||
if (!CreateArchive(archive.string() + ".tmp", current_header.Compressed()))
|
||||
return false;
|
||||
|
||||
for (auto& e : current_file_table.GetEntries())
|
||||
if (e.Path() == file_path) {
|
||||
current_file_table.Remove(e);
|
||||
break;
|
||||
}
|
||||
auto file_entries = current_file_table.GetEntries();
|
||||
auto value = file_entries.find(file_path);
|
||||
if (value != file_entries.end())
|
||||
file_entries.erase(value);
|
||||
|
||||
for (auto& e : current_file_table.GetEntries()) {
|
||||
auto file_buffer = ReadFile(archive, e.Path());
|
||||
if (!WriteFile(archive.string() + ".tmp", e.Path(), file_buffer.data(), (int64_t) file_buffer.size())) {
|
||||
for (auto& e : file_entries) {
|
||||
auto file_buffer = ReadFile(archive, e.first);
|
||||
if (!WriteFile(archive.string() + ".tmp", e.first, file_buffer.data(), (int64_t) file_buffer.size())) {
|
||||
std::filesystem::remove(archive.string() + ".tmp");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Busy-wait.
|
||||
while (locked.contains(archive)) {}
|
||||
locked.insert(archive);
|
||||
|
||||
std::filesystem::remove(archive);
|
||||
std::filesystem::rename(archive.string() + ".tmp", archive);
|
||||
|
||||
// TODO read the header from the file we just wrote.
|
||||
if (running_tally)
|
||||
*running_tally = current_file_table;
|
||||
|
||||
// Remove lock.
|
||||
auto position = locked.find(archive);
|
||||
if (position != locked.end())
|
||||
locked.erase(position);
|
||||
|
||||
// TODO read the header from the file we just wrote.
|
||||
if (running_tally)
|
||||
*running_tally = current_file_table;
|
||||
|
||||
return true;
|
||||
}
|
@@ -4,23 +4,19 @@
|
||||
using namespace ReArchive;
|
||||
|
||||
void FileTable::Append(const FileEntry& file_entry) {
|
||||
for (const auto& e : entries)
|
||||
if (e.Path() == file_entry.Path())
|
||||
if (entries.contains(file_entry.Path()))
|
||||
return;
|
||||
|
||||
entries.push_back(file_entry);
|
||||
entries.insert(std::make_pair(file_entry.Path(), file_entry));
|
||||
}
|
||||
|
||||
void FileTable::Remove(const FileEntry& file_entry) {
|
||||
if (entries.empty())
|
||||
return;
|
||||
|
||||
for (int64_t i = 0; i < entries.size(); i++) {
|
||||
if (entries[i] == file_entry) {
|
||||
entries.erase(entries.begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto position = entries.find(file_entry.Path());
|
||||
if (position != entries.end())
|
||||
entries.erase(position);
|
||||
}
|
||||
|
||||
std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
|
||||
@@ -35,7 +31,7 @@ std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
|
||||
|
||||
for (const auto& file : files) {
|
||||
size_t current_size = result.size();
|
||||
auto serialization = FileEntry::Serialize(file);
|
||||
auto serialization = FileEntry::Serialize(file.second);
|
||||
|
||||
result.resize(current_size + serialization.size());
|
||||
memcpy(result.data() + current_size, serialization.data(), serialization.size());
|
||||
|
Reference in New Issue
Block a user