From 9d12909bca32d30c31f50b5a6f201be5c4fc4198 Mon Sep 17 00:00:00 2001 From: Redacted Date: Sun, 16 Feb 2025 22:39:39 -0500 Subject: [PATCH] initial commit :sunglasses: --- .gitignore | 6 + CMakeLists.txt | 15 ++ include/ReArchive/ReArchive.h | 46 +++++ include/ReArchive/types/FileEntry.h | 34 ++++ include/ReArchive/types/FileTable.h | 25 +++ include/ReArchive/types/Header.h | 39 ++++ main.cpp | 32 ++++ src/ReArchive.cpp | 270 ++++++++++++++++++++++++++++ src/types/FileEntry.cpp | 38 ++++ src/types/FileTable.cpp | 44 +++++ src/types/Header.cpp | 26 +++ 11 files changed, 575 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 include/ReArchive/ReArchive.h create mode 100644 include/ReArchive/types/FileEntry.h create mode 100644 include/ReArchive/types/FileTable.h create mode 100644 include/ReArchive/types/Header.h create mode 100644 main.cpp create mode 100644 src/ReArchive.cpp create mode 100644 src/types/FileEntry.cpp create mode 100644 src/types/FileTable.cpp create mode 100644 src/types/Header.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..52c505d --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/.idea +/.cache +/.ccls-cache +/compile_commands.json +/cmake-build-debug +/build diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..8042fee --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.18..3.28) +project(ReArchive) + +set(CMAKE_CXX_STANDARD 20) + +file(GLOB_RECURSE HEADERS "include/*.h" "include/*.hpp") +file(GLOB_RECURSE SOURCES "src/*.c" "src/*.cpp") + +add_library(ReArchive SHARED ${SOURCES}) + +set_target_properties(ReArchive PROPERTIES LINKER_LANGUAGE CXX) +target_include_directories(ReArchive PUBLIC ${PROJECT_SOURCE_DIR}/include) + +add_executable(ReArchive_Demo main.cpp) +target_link_libraries(ReArchive_Demo PUBLIC ReArchive) \ No newline at end of file diff --git a/include/ReArchive/ReArchive.h b/include/ReArchive/ReArchive.h new file mode 100644 index 0000000..4acf1b3 --- /dev/null +++ b/include/ReArchive/ReArchive.h @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include + +// TODO compression. +namespace ReArchive { + /// Creates a new empty archive. + /// @param filesystem_path where the archive is to be created. + /// @param use_compression whether you'd like the file to use compression. + /// @param running_tally If you're keeping track of the file table in your program, Providing it here will update it to reflect our changes. + /// @note use_compression currently does nothing. + /// @returns True if success. + [[nodiscard]] bool CreateArchive(const std::filesystem::path& archive, bool use_compression = false, FileTable* running_tally = nullptr); + + /// Add a file to an archive. + /// @param archive The archive on the disk. + /// @param file_data The raw data of the file to be written. + /// @param file_path The std::filesystem::path you would use to retrieve the file from the archive. + /// @param byte_count The length of the file in bytes. + /// @param running_tally If you're keeping track of the file table in your program, Providing it here will update it to reflect our changes. + /// @returns True if success. + [[nodiscard]] bool WriteFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, const unsigned char* file_data, const int64_t& byte_count, FileTable* running_tally = nullptr); + + /// Overwrite a file which already exists in the archive. + /// @param archive The archive on the disk. + /// @param file_data The raw data of the file to be written. + /// @param file_path The std::filesystem::path file in the archive to be overwritten. + /// @param byte_count The length of the file in bytes. + /// @note It is expected that byte_count will the the same as the file size. + /// @returns True if success. + [[nodiscard]] bool OverwriteFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, const unsigned char* file_data, const int64_t& byte_count); + + /// Remove a file from the archive. + /// @param archive The archive on the disk. + /// @param file_path The std::filesystem::path file in the archive to be removed. + /// @param running_tally If you're keeping track of the file table in your program, Providing it here will update it to reflect our changes. + /// @returns True if success. + [[nodiscard]] bool EraseFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, FileTable* running_tally = nullptr); + + /// Read a file from a given archive + /// @param archive The archive on the disk. + /// @param file_path The std::filesystem::path you specified for the given file. + /// @note An empty vector is returned in the event that no such file exists or there was an error reading it back. + std::vector ReadFile(const std::filesystem::path& archive, const std::filesystem::path& file_path); +} \ No newline at end of file diff --git a/include/ReArchive/types/FileEntry.h b/include/ReArchive/types/FileEntry.h new file mode 100644 index 0000000..948b7bc --- /dev/null +++ b/include/ReArchive/types/FileEntry.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace ReArchive { + class FileEntry; +} + +class ReArchive::FileEntry { +protected: + //uint64 string length. + std::filesystem::path path; + int64_t data_size; + int64_t data_offset; +public: + [[nodiscard]] std::filesystem::path Path() const { return path; } + [[nodiscard]] int64_t Size() const { return data_size; } + [[nodiscard]] int64_t Offset() const { return data_offset; } +public: + /// @param data_size The number of bytes long the file is. + /// @param data_offset How many bytes away from the beginning of the archive this file is stored. + /// @param file_path The file name including any path you want to be included in the archive. + FileEntry(const int64_t& data_size, const int64_t& data_offset, std::filesystem::path path) : path(std::move(path)), data_size(data_size), data_offset(data_offset) + {}; + ~FileEntry() = default; +public: + static std::vector Serialize(const FileEntry& file); +public: + bool operator ==(const FileEntry& other) const; +}; \ No newline at end of file diff --git a/include/ReArchive/types/FileTable.h b/include/ReArchive/types/FileTable.h new file mode 100644 index 0000000..f8dcc45 --- /dev/null +++ b/include/ReArchive/types/FileTable.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include + +namespace ReArchive { + class FileTable; +} + +class ReArchive::FileTable { +protected: + // count + std::vector entries; +public: + void Append(const FileEntry& file_entry); + void Remove(const FileEntry& file_entry); + [[nodiscard]] std::vector GetEntries() const { return entries; } + [[nodiscard]] int64_t Count() const { return entries.size(); } +public: + [[nodiscard]] static std::vector Serialize(const FileTable& file_table); +public: + FileTable() = default; + +}; \ No newline at end of file diff --git a/include/ReArchive/types/Header.h b/include/ReArchive/types/Header.h new file mode 100644 index 0000000..2819d0f --- /dev/null +++ b/include/ReArchive/types/Header.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include + + +namespace ReArchive { + class Header; + static constexpr std::array magic { 'R', 'S', 'A' }; +} + +class ReArchive::Header { +protected: + // The first 3 bytes of the file would always be this. + std::array magic { 'R', 'S', 'A' }; + + // Whether the file is compressed. + bool use_compression = false; + + //TODO Whether the archive is currently in a state that we have to wait to read or write. + //bool locked = false; + + // The distance from the beginning of the file to the "file table". + int64_t file_table_offset; +public: + [[nodiscard]] bool Compressed() const { return use_compression; } + [[nodiscard]] int64_t FileTableOffset() const { return file_table_offset; } + void FileTableOffset(const int64_t& offset) { file_table_offset = offset; } +public: + Header(bool use_compression, int64_t file_table_offset) : use_compression(use_compression), file_table_offset(file_table_offset) {} + ~Header() = default; +public: + static std::vector Serialize(const Header& header); + static Header DeSerialize(const unsigned char* serialized_header); + + // We can't use sizeof(Header) because it includes the RTTI :/ - Redacted. + static int64_t Size() { return sizeof(int64_t) + sizeof(bool) + ReArchive::magic.size(); } +}; \ No newline at end of file diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..66b7b24 --- /dev/null +++ b/main.cpp @@ -0,0 +1,32 @@ +#include +#include + +int main() { + ReArchive::FileTable running_tally; + + if(std::filesystem::exists("test.rsa")) + std::filesystem::remove("test.rsa"); + + if (!ReArchive::CreateArchive("test.rsa")) + return -1; + std::string some_string = "some other string0."; + if (!ReArchive::WriteFile("test.rsa", "assets/test0.png", reinterpret_cast(some_string.data()), some_string.size(), &running_tally)) + return -1; + + some_string = "some other string1."; + if (!ReArchive::WriteFile("test.rsa", "assets/test1.png", reinterpret_cast(some_string.data()), some_string.size(), &running_tally)) + return -1; + + + auto retrieved = ReArchive::ReadFile("test.rsa", "assets/test0.png"); + std::cout << std::string( retrieved.begin(), retrieved.end()) << std::endl; + + if (!ReArchive::EraseFile("test.rsa", "assets/test0.png", &running_tally)) + return -1; + + auto retrieved2 = ReArchive::ReadFile("test.rsa", "assets/test1.png"); + std::cout << std::string( retrieved2.begin(), retrieved2.end()) << std::endl; + + for (auto& e : running_tally.GetEntries()) + std::cout << e.Path() << std::endl; +} \ No newline at end of file diff --git a/src/ReArchive.cpp b/src/ReArchive.cpp new file mode 100644 index 0000000..30c765b --- /dev/null +++ b/src/ReArchive.cpp @@ -0,0 +1,270 @@ +#include + +#include +#include +#include +#include + +using ReArchive::Header; +using ReArchive::FileTable; +using ReArchive::FileEntry; + +Header GetHeader(const unsigned char* archive) { + ReArchive::Header h = ReArchive::Header::DeSerialize(archive); + return h; +} + +Header GetHeader(const std::filesystem::path& archive) { + std::ifstream file(archive, std::ios::binary); + + if (!file) + throw std::runtime_error("Trying to get the header of an archive which doesn't exist?"); + + std::vector buffer(ReArchive::Header::Size()); + file.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + file.close(); + + return GetHeader(buffer.data()); +} + +/// @param header our header. +/// @param in Our input stream to the file. +/// @note Does not close the input stream. +FileTable GetFileTable(const Header& header, std::ifstream& in) { + FileTable result; + + std::vector buffer; + + in.seekg(header.FileTableOffset(), std::ios::beg); + buffer.resize(sizeof(int64_t)); + + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + int64_t file_table_entry_count = be64toh(*reinterpret_cast(buffer.data())); + + if (file_table_entry_count) { + // To put us at the first "string size" for each FileEntry. + in.seekg(header.FileTableOffset() + 8, std::ios::beg); + + // for each file entry, + for (int64_t i = 0; i < file_table_entry_count; i++) { + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + int64_t string_size = be64toh(*reinterpret_cast(buffer.data())); + + buffer.resize(string_size); + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + std::string path(buffer.begin(), buffer.end()); + + buffer.resize(sizeof(int64_t)); + + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + int64_t data_size = be64toh(*reinterpret_cast(buffer.data())); + + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + int64_t data_offset = be64toh(*reinterpret_cast(buffer.data())); + + result.Append(FileEntry(data_size, data_offset, path)); + } + } + return result; +} + +bool ReArchive::CreateArchive(const std::filesystem::path& filesystem_path, bool use_compression, FileTable* running_tally) { + if (std::filesystem::exists(filesystem_path)) + return false; + + std::ofstream file(filesystem_path, std::ios::binary); + if (!file) + return false; + + auto serialized_file_header = Header::Serialize(Header(use_compression, Header::Size())); + auto file_table = FileTable(); + auto serialized_file_table = FileTable::Serialize(file_table); + + file.write(reinterpret_cast(serialized_file_header.data()), (int64_t) serialized_file_header.size()); + file.write(reinterpret_cast(serialized_file_table.data()), (int64_t) serialized_file_table.size()); + file.close(); + + if (running_tally) + *running_tally = file_table; + return true; +} + +bool ReArchive::WriteFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, const unsigned char* file_data, const int64_t& byte_count, FileTable* running_tally) { + if (!std::filesystem::exists(archive)) + return false; + + std::ifstream in(archive, std::ios::binary); + if (!in) + return false; + + in.seekg(0, std::ios::end); + if (in.tellg() < Header::Size()) + return false; + in.seekg(0, std::ios::beg); + + std::vector buffer (Header::Size()); + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + if (buffer[0] != 'R' || buffer[1] != 'S' || buffer[2] != 'A') + return false; + + auto header = GetHeader(buffer.data()); + auto file_table = GetFileTable(header, in); + + for (const auto& e : file_table.GetEntries()) + if (e.Path() == file_path) + return false; + in.close(); + + std::ofstream out(archive, std::ios::binary | std::ios::out | std::ios::in); + if (!out) + return false; + + out.seekp(header.FileTableOffset(), std::ios::beg); + out.write(reinterpret_cast(file_data), byte_count); + + file_table.Append(FileEntry(byte_count, header.FileTableOffset(), file_path)); + header.FileTableOffset(out.tellp()); + + auto new_file_table = FileTable::Serialize(file_table); + out.write(reinterpret_cast(new_file_table.data()), (int64_t) new_file_table.size()); + + auto new_header = Header::Serialize(header); + out.seekp(0, std::ios::beg); + out.write(reinterpret_cast(new_header.data()), (int64_t) new_header.size()); + out.close(); + + if (running_tally) + *running_tally = file_table; + return true; +} + +bool ReArchive::OverwriteFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, const unsigned char* file_data, const int64_t& byte_count) { + if (!std::filesystem::exists(archive)) + return false; + + std::ifstream in(archive, std::ios::binary); + if (!in) + return false; + + in.seekg(0, std::ios::end); + if (in.tellg() < Header::Size()) + return false; + in.seekg(0, std::ios::beg); + + std::vector buffer (Header::Size()); + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + if (buffer[0] != 'R' || buffer[1] != 'S' || buffer[2] != 'A') + return false; + + auto header = GetHeader(buffer.data()); + auto file_table = GetFileTable(header, in); + + const FileEntry* target = nullptr; + for (const auto& e : file_table.GetEntries()) + if (e.Path() == file_path) + target = &e; + + if (!target) + return false; + + if (byte_count != target->Size()) + return false; + + in.close(); + std::ofstream out(archive, std::ios::binary | std::ios::out | std::ios::in); + if (!out) + return false; + + out.seekp(target->Offset(), std::ios::beg); + out.write(reinterpret_cast(file_data), byte_count); + out.close(); + + return true; +} + +std::vector ReArchive::ReadFile(const std::filesystem::path& archive, const std::filesystem::path& file_path) { + if (!std::filesystem::exists(archive)) + return {}; + + std::ifstream in(archive, std::ios::binary); + if (!in) + return {}; + + in.seekg(0, std::ios::end); + if (in.tellg() < Header::Size()) + return {}; + in.seekg(0, std::ios::beg); + + std::vector buffer (Header::Size()); + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + if (buffer[0] != 'R' || buffer[1] != 'S' || buffer[2] != 'A') + return {}; + + auto header = GetHeader(buffer.data()); + auto file_table = GetFileTable(header, in); + + const FileEntry* target = nullptr; + for (const auto& e : file_table.GetEntries()) + if (e.Path() == file_path) + target = &e; + + if (!target) + return {}; + + std::vector result(target->Size()); + in.seekg(target->Offset(), std::ios::beg); + in.read(reinterpret_cast(result.data()), (int64_t) result.size()); + in.close(); + + return result; +} + +// I tried to do this several different ways but this seems to be the best approach - Redacted. +bool ReArchive::EraseFile(const std::filesystem::path& archive, const std::filesystem::path& file_path, FileTable* running_tally) { + if (!std::filesystem::exists(archive)) + return false; + + std::ifstream in(archive, std::ios::binary); + if (!in) + return false; + + in.seekg(0, std::ios::end); + int64_t file_size = in.tellg(); + if (file_size < Header::Size()) + return false; + in.seekg(0, std::ios::beg); + + std::vector buffer (Header::Size()); + in.read(reinterpret_cast(buffer.data()), (int64_t) buffer.size()); + if (buffer[0] != 'R' || buffer[1] != 'S' || buffer[2] != 'A') + return false; + + auto current_header = GetHeader(buffer.data()); + auto current_file_table = GetFileTable(current_header, in); + + if (!CreateArchive(archive.string() + ".tmp", current_header.Compressed())) + return false; + + for (auto& e : current_file_table.GetEntries()) + if (e.Path() == file_path) { + current_file_table.Remove(e); + break; + } + + for (auto& e : current_file_table.GetEntries()) { + auto file_buffer = ReadFile(archive, e.Path()); + if (!WriteFile(archive.string() + ".tmp", e.Path(), file_buffer.data(), (int64_t) file_buffer.size())) { + std::filesystem::remove(archive.string() + ".tmp"); + return false; + } + } + + std::filesystem::remove(archive); + std::filesystem::rename(archive.string() + ".tmp", archive); + + // TODO read the header from the file we just wrote. + if (running_tally) + *running_tally = current_file_table; + + return true; +} \ No newline at end of file diff --git a/src/types/FileEntry.cpp b/src/types/FileEntry.cpp new file mode 100644 index 0000000..fb848da --- /dev/null +++ b/src/types/FileEntry.cpp @@ -0,0 +1,38 @@ +#include +#include + + +using namespace ReArchive; + +std::vector FileEntry::Serialize(const FileEntry& file) { + std::string path_string = file.path.string(); + int64_t path_size = path_string.size(); + + std::vector result(sizeof(int64_t) + path_size + 2 * sizeof(int64_t)); + unsigned char* ptr = result.data(); + + auto network_path_size = htobe64(path_size); + memcpy(ptr, &network_path_size, sizeof(int64_t)); + ptr += sizeof(int64_t); + + memcpy(ptr, path_string.data(), path_size); + ptr += path_size; + + auto network_data_size = htobe64(file.data_size); + memcpy(ptr, &network_data_size, sizeof(int64_t)); + ptr += sizeof(int64_t); + + auto network_data_offset = htobe64(file.data_offset); + memcpy(ptr, &network_data_offset, sizeof(int64_t)); + return result; +} + +bool FileEntry::operator==(const FileEntry& rhs) const { + if (data_offset != rhs.data_offset) + return false; + if (data_size != rhs.data_size) + return false; + if (path != rhs.path) + return false; + return true; +} diff --git a/src/types/FileTable.cpp b/src/types/FileTable.cpp new file mode 100644 index 0000000..f8aad72 --- /dev/null +++ b/src/types/FileTable.cpp @@ -0,0 +1,44 @@ +#include +#include + +using namespace ReArchive; + +void FileTable::Append(const FileEntry& file_entry) { + for (const auto& e : entries) + if (e.Path() == file_entry.Path()) + return; + + entries.push_back(file_entry); +} + +void FileTable::Remove(const FileEntry& file_entry) { + if (entries.empty()) + return; + + for (int64_t i = 0; i < entries.size(); i++) { + if (entries[i] == file_entry) { + entries.erase(entries.begin() + i); + break; + } + } +} + +std::vector FileTable::Serialize(const FileTable& file_table) { + auto files = file_table.GetEntries(); + int64_t count = files.size(); + auto network_count = htobe64(count); + std::vector result(reinterpret_cast(&network_count), + reinterpret_cast(&network_count) + sizeof(network_count)); + + if (files.empty()) + return result; + + for (const auto& file : files) { + size_t current_size = result.size(); + auto serialization = FileEntry::Serialize(file); + + result.resize(current_size + serialization.size()); + memcpy(result.data() + current_size, serialization.data(), serialization.size()); + } + return result; +} diff --git a/src/types/Header.cpp b/src/types/Header.cpp new file mode 100644 index 0000000..2223fd1 --- /dev/null +++ b/src/types/Header.cpp @@ -0,0 +1,26 @@ +#include +#include +#include + +std::vector ReArchive::Header::Serialize(const ReArchive::Header& header) { + std::vector result; + result.insert(result.end(), ReArchive::magic.begin(), ReArchive::magic.end()); + + result.push_back(header.use_compression); + + size_t current_size = result.size(); + result.resize(current_size + sizeof(int64_t)); + auto network_file_table_offset = htobe64(header.file_table_offset); + memcpy(result.data() + current_size, &network_file_table_offset, sizeof(int64_t)); + + return result; +} + +ReArchive::Header ReArchive::Header::DeSerialize(const unsigned char* serialized_header) { + bool use_c; int64_t file_table_off; + + use_c = serialized_header[ReArchive::magic.size()]; + memcpy(&file_table_off, serialized_header + ReArchive::Header::Size() - sizeof(int64_t), sizeof(int64_t)); + + return { use_c, (int64_t) be64toh(file_table_off)}; +}