6 Commits

Author SHA1 Message Date
8cab591f98 Update FileTable.h
copy constructor
2025-03-20 20:03:42 -04:00
0e17d02451 Cleanup & update 2025-03-20 19:49:34 -04:00
44c2ea3a3a Update FileTable.h
Remove TODO I forgot to remove 🤷
2025-03-19 22:11:28 -04:00
895db6c071 Remove unnecessary includes. 2025-03-19 22:08:18 -04:00
4ebdb726be Performance optimization
Used unordered_map for file table so that the speed to retrieve any file is the same regardless of where.
2025-03-19 22:01:52 -04:00
df1fa92a68 Update ReArchive.cpp
busy-wait in the event of a thread collision.
2025-03-19 20:06:00 -04:00
6 changed files with 159 additions and 53 deletions

View File

@@ -13,6 +13,11 @@ namespace ReArchive {
/// @returns True if success.
[[nodiscard]] bool CreateArchive(const std::filesystem::path& archive, bool use_compression = false, FileTable* running_tally = nullptr);
/// @param archive The archive on the disk.
/// @returns std::pair bool, FileTable. bool is success, FileTable is only valid if success.
/// @note *Always* check if bool is true before using the file table for anything.
[[nodiscard]] std::pair<bool, FileTable> ReadFileTable(const std::filesystem::path& archive);
/// Add a file to an archive.
/// @param archive The archive on the disk.
/// @param file_data The raw data of the file to be written.

View File

@@ -1,10 +1,8 @@
#pragma once
#include <string>
#include <cstdint>
#include <utility>
#include <vector>
#include <filesystem>
#include <array>
namespace ReArchive {
class FileEntry;
@@ -31,4 +29,15 @@ public:
static std::vector<unsigned char> Serialize(const FileEntry& file);
public:
bool operator ==(const FileEntry& other) const;
};
template<>
struct std::hash<ReArchive::FileEntry> {
std::size_t operator()(const ReArchive::FileEntry & k) const {
std::string hash_string = k.Path().string();
hash_string.append(std::to_string(k.Size()));
hash_string.append(std::to_string(k.Offset()));
return std::hash<std::string>()(hash_string);
}
};

View File

@@ -1,7 +1,7 @@
#pragma once
#include <vector>
#include <cstdint>
#include <unordered_map>
#include <ReArchive/types/FileEntry.h>
namespace ReArchive {
@@ -11,15 +11,19 @@ namespace ReArchive {
class ReArchive::FileTable {
protected:
// count
std::vector<FileEntry> entries;
std::unordered_map<std::filesystem::path, FileEntry> entries;
public:
void Append(const FileEntry& file_entry);
void Remove(const FileEntry& file_entry);
[[nodiscard]] std::vector<FileEntry> GetEntries() const { return entries; }
[[nodiscard]] bool Contains(std::filesystem::path& entry ) const { return entries.contains(entry); }
[[nodiscard]] std::unordered_map<std::filesystem::path, FileEntry> GetEntries() const { return entries; }
[[nodiscard]] int64_t Count() const { return entries.size(); }
public:
[[nodiscard]] static std::vector<unsigned char> Serialize(const FileTable& file_table);
public:
FileTable(const FileTable& rhs) : entries(rhs.entries) {};
FileTable(FileTable& rhs) : entries(rhs.entries) {};
FileTable() = default;
~FileTable() = default;
};

View File

@@ -28,5 +28,7 @@ int main() {
std::cout << std::string( retrieved2.begin(), retrieved2.end()) << std::endl;
for (auto& e : running_tally.GetEntries())
std::cout << e.Path() << std::endl;
std::cout << e.second.Path() << std::endl;
ReArchive::FileTable copy = running_tally;
}

View File

@@ -1,32 +1,21 @@
#include <fstream>
#include <unordered_set>
#include <ReArchive/ReArchive.h>
#include <ReArchive/types/Header.h>
#include <ReArchive/types/FileTable.h>
#include <ReArchive/types/FileEntry.h>
#include <fstream>
using ReArchive::Header;
using ReArchive::FileTable;
using ReArchive::FileEntry;
std::unordered_set<std::filesystem::path> locked {};
Header GetHeader(const unsigned char* archive) {
ReArchive::Header h = ReArchive::Header::DeSerialize(archive);
return h;
}
Header GetHeader(const std::filesystem::path& archive) {
std::ifstream file(archive, std::ios::binary);
if (!file)
throw std::runtime_error("Trying to get the header of an archive which doesn't exist?");
std::vector<unsigned char> buffer(ReArchive::Header::Size());
file.read(reinterpret_cast<char *>(buffer.data()), (int64_t) buffer.size());
file.close();
return GetHeader(buffer.data());
}
/// @param header our header.
/// @param in Our input stream to the file.
/// @note Does not close the input stream.
@@ -68,10 +57,47 @@ FileTable GetFileTable(const Header& header, std::ifstream& in) {
return result;
}
std::pair<bool, FileTable> ReArchive::ReadFileTable(const std::filesystem::path& archive) {
if (!std::filesystem::exists(archive))
return {false, {}};
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::ifstream in(archive, std::ios::binary);
if (!in)
return {false, {}};
in.seekg(0, std::ios::end);
if (in.tellg() < Header::Size())
return {false, {}};
in.seekg(0, std::ios::beg);
std::vector<unsigned char> buffer (Header::Size());
in.read(reinterpret_cast<char *>(buffer.data()), (int64_t) buffer.size());
if (buffer[0] != 'R' || buffer[1] != 'S' || buffer[2] != 'A')
return {false, {}};
auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in);
in.close();
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
return {true, file_table};
}
bool ReArchive::CreateArchive(const std::filesystem::path& filesystem_path, bool use_compression, FileTable* running_tally) {
if (std::filesystem::exists(filesystem_path))
return false;
while (locked.contains(filesystem_path)) {}
locked.insert(filesystem_path);
std::ofstream file(filesystem_path, std::ios::binary);
if (!file)
return false;
@@ -86,6 +112,12 @@ bool ReArchive::CreateArchive(const std::filesystem::path& filesystem_path, bool
if (running_tally)
*running_tally = file_table;
// Remove lock.
auto position = locked.find(filesystem_path);
if (position != locked.end())
locked.erase(position);
return true;
}
@@ -93,6 +125,10 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
if (!std::filesystem::exists(archive))
return false;
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::ifstream in(archive, std::ios::binary);
if (!in)
return false;
@@ -110,9 +146,11 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in);
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
return false;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
return false;
in.close();
std::ofstream out(archive, std::ios::binary | std::ios::out | std::ios::in);
@@ -133,8 +171,14 @@ bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::files
out.write(reinterpret_cast<const char *>(new_header.data()), (int64_t) new_header.size());
out.close();
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
if (running_tally)
*running_tally = file_table;
return true;
}
@@ -142,6 +186,10 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
if (!std::filesystem::exists(archive))
return false;
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::ifstream in(archive, std::ios::binary);
if (!in)
return false;
@@ -160,9 +208,13 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
auto file_table = GetFileTable(header, in);
const FileEntry* target = nullptr;
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
target = &e;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
target = &value->second;
if (!target)
return false;
@@ -179,6 +231,11 @@ bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::f
out.write(reinterpret_cast<const char *>(file_data), byte_count);
out.close();
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
return true;
}
@@ -186,6 +243,10 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
if (!std::filesystem::exists(archive))
return {};
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::ifstream in(archive, std::ios::binary);
if (!in)
return {};
@@ -203,10 +264,20 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
auto header = GetHeader(buffer.data());
auto file_table = GetFileTable(header, in);
/*
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
target = &e;
*/
const FileEntry* target = nullptr;
for (const auto& e : file_table.GetEntries())
if (e.Path() == file_path)
target = &e;
auto file_entries = file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
target = &value->second;
if (!target)
return {};
@@ -216,14 +287,23 @@ std::vector<unsigned char> ReArchive::ReadFile(const std::filesystem::path& arch
in.read(reinterpret_cast<char*>(result.data()), (int64_t) result.size());
in.close();
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
return result;
}
// I tried to do this several different ways but this seems to be the best approach - Redacted.
bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, FileTable* running_tally) {
if (!std::filesystem::exists(archive))
return false;
// Busy-wait.
while (locked.contains(archive)) {}
std::ifstream in(archive, std::ios::binary);
if (!in)
return false;
@@ -242,29 +322,39 @@ bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::files
auto current_header = GetHeader(buffer.data());
auto current_file_table = GetFileTable(current_header, in);
// TODO randomize the name more than that.
if (!CreateArchive(archive.string() + ".tmp", current_header.Compressed()))
return false;
for (auto& e : current_file_table.GetEntries())
if (e.Path() == file_path) {
current_file_table.Remove(e);
break;
}
auto file_entries = current_file_table.GetEntries();
auto value = file_entries.find(file_path);
if (value != file_entries.end())
file_entries.erase(value);
for (auto& e : current_file_table.GetEntries()) {
auto file_buffer = ReadFile(archive, e.Path());
if (!WriteFile(archive.string() + ".tmp", e.Path(), file_buffer.data(), (int64_t) file_buffer.size())) {
for (auto& e : file_entries) {
auto file_buffer = ReadFile(archive, e.first);
if (!WriteFile(archive.string() + ".tmp", e.first, file_buffer.data(), (int64_t) file_buffer.size())) {
std::filesystem::remove(archive.string() + ".tmp");
return false;
}
}
// Busy-wait.
while (locked.contains(archive)) {}
locked.insert(archive);
std::filesystem::remove(archive);
std::filesystem::rename(archive.string() + ".tmp", archive);
// TODO read the header from the file we just wrote.
if (running_tally)
*running_tally = current_file_table;
// Remove lock.
auto position = locked.find(archive);
if (position != locked.end())
locked.erase(position);
if (running_tally) {
auto result = ReadFileTable(archive);
if (result.first)
*running_tally = result.second;
}
return true;
}

View File

@@ -4,23 +4,19 @@
using namespace ReArchive;
void FileTable::Append(const FileEntry& file_entry) {
for (const auto& e : entries)
if (e.Path() == file_entry.Path())
return;
if (entries.contains(file_entry.Path()))
return;
entries.push_back(file_entry);
entries.insert(std::make_pair(file_entry.Path(), file_entry));
}
void FileTable::Remove(const FileEntry& file_entry) {
if (entries.empty())
return;
for (int64_t i = 0; i < entries.size(); i++) {
if (entries[i] == file_entry) {
entries.erase(entries.begin() + i);
break;
}
}
auto position = entries.find(file_entry.Path());
if (position != entries.end())
entries.erase(position);
}
std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
@@ -35,7 +31,7 @@ std::vector<unsigned char> FileTable::Serialize(const FileTable& file_table) {
for (const auto& file : files) {
size_t current_size = result.size();
auto serialization = FileEntry::Serialize(file);
auto serialization = FileEntry::Serialize(file.second);
result.resize(current_size + serialization.size());
memcpy(result.data() + current_size, serialization.data(), serialization.size());