From fdda938352a177bb717db7fe78219a32be07a919 Mon Sep 17 00:00:00 2001 From: josh Date: Sat, 31 May 2025 01:35:33 -0500 Subject: [PATCH] Refactoring more. --- CMakeLists.txt | 10 +- include/JJX/JJX.hpp | 5 - include/error_handling.hpp | 6 ++ include/{JJX/JSON.hpp => json.hpp} | 5 +- include/lexer.hpp | 27 +++++ main.cpp | 3 +- samples/test.xml | 6 -- src/{JSON.cpp => json.cpp} | 154 ++++++----------------------- src/lexer.cpp | 134 +++++++++++++++++++++++++ 9 files changed, 205 insertions(+), 145 deletions(-) delete mode 100644 include/JJX/JJX.hpp create mode 100644 include/error_handling.hpp rename include/{JJX/JSON.hpp => json.hpp} (99%) create mode 100644 include/lexer.hpp delete mode 100644 samples/test.xml rename src/{JSON.cpp => json.cpp} (81%) create mode 100644 src/lexer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d633117..b2cd79f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18...3.28) -project(jjx +project(json VERSION 1.0 LANGUAGES CXX) @@ -21,13 +21,13 @@ include_directories("include") if (UNIX) - add_library(jjx SHARED ${SOURCES}) + add_library(json SHARED ${SOURCES}) endif() if (WIN32) - add_library(jjx STATIC ${SOURCES}) + add_library(json STATIC ${SOURCES}) endif() -add_executable(jjx_demo main.cpp) +add_executable(json_demo main.cpp) -target_link_libraries(jjx_demo PUBLIC jjx) \ No newline at end of file +target_link_libraries(json_demo PUBLIC json) \ No newline at end of file diff --git a/include/JJX/JJX.hpp b/include/JJX/JJX.hpp deleted file mode 100644 index 4339572..0000000 --- a/include/JJX/JJX.hpp +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - - - -#include \ No newline at end of file diff --git a/include/error_handling.hpp b/include/error_handling.hpp new file mode 100644 index 0000000..c6b83b9 --- /dev/null +++ b/include/error_handling.hpp @@ -0,0 +1,6 @@ +#pragma once + +namespace json +{ + +} \ No newline at end of file diff --git a/include/JJX/JSON.hpp b/include/json.hpp similarity index 99% rename from include/JJX/JSON.hpp rename to include/json.hpp index 4fdff43..a3bf173 100644 --- a/include/JJX/JSON.hpp +++ b/include/json.hpp @@ -18,7 +18,10 @@ // TODO: Auto-conversion from json::value *to* represented type. /// Redacted Software JSON API - Parse, Serialize, Validate, and Build JSON files. -namespace JJX::json { +namespace json { + + + std::string format_error(std::string base, std::string source, int index); /// An enumeration that represents the different types of tokens that exist. /// A token is an atomic chunk of a JSON file's text, which is the smallest piece of syntax that can be considered in isolation. diff --git a/include/lexer.hpp b/include/lexer.hpp new file mode 100644 index 0000000..a7c8c6c --- /dev/null +++ b/include/lexer.hpp @@ -0,0 +1,27 @@ +#include + +#pragma once + +/// @namespace json::lexers +/// @brief This namespace provides functions for lexical analysis (tokenizing) +/// +/// Each function in this namespace is responsible for "lexing" a specific type of token, +/// from a raw JSON string, +namespace json::lexers { + + int lex_whitespace(std::string raw_json, int index); + + std::tuple lex_syntax(std::string raw_json, int index); + + std::tuple lex_string(std::string raw_json, int original_index); + + std::tuple lex_number(std::string raw_json, int original_index); + + std::tuple lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index); + + std::tuple lex_null(std::string raw_json, int index); + + std::tuple lex_true(std::string raw_json, int index); + + std::tuple lex_false(std::string raw_json, int index); +} diff --git a/main.cpp b/main.cpp index dbbf2e7..2766a69 100644 --- a/main.cpp +++ b/main.cpp @@ -1,8 +1,7 @@ -#include +#include #include #include -using namespace JJX; /// Open a text file and return the contents. std::string read_file(const std::string& file_path) diff --git a/samples/test.xml b/samples/test.xml deleted file mode 100644 index 90f0a1b..0000000 --- a/samples/test.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Tove - Jani - Reminder - Don't forget me this weekend! - \ No newline at end of file diff --git a/src/JSON.cpp b/src/json.cpp similarity index 81% rename from src/JSON.cpp rename to src/json.cpp index ce1a576..c0cbd50 100644 --- a/src/JSON.cpp +++ b/src/json.cpp @@ -1,8 +1,30 @@ -#include -#include +#include +#include #include +#include <__msvc_ranges_tuple_formatter.hpp> + +namespace json { + + class file_error : public std::runtime_error + { + + }; + + class io_error : public std::runtime_error + { + + }; + + class parse_error : public std::runtime_error + { + + }; + + class json_runtime_error : public std::runtime_error + { + + }; -namespace JJX::json { std::string format_error(std::string base, std::string source, int index) { std::ostringstream s; int counter = 0; @@ -48,130 +70,9 @@ namespace JJX::json { s << whitespace << "^"; return s.str(); - } - int lex_whitespace(std::string raw_json, int index) { - while (std::isspace(raw_json[index])) { - if (index == raw_json.length()) { - break; - } - index++; - } - return index; - } - std::tuple lex_syntax(std::string raw_json, int index) - { - json::token token{"", token_type::syntax, index}; - std::string value = ""; - auto c = raw_json[index]; - if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') { - token.value += c; - index++; - } - - return {token, index, ""}; - } - - std::tuple lex_string(std::string raw_json, int original_index) { - int index = original_index; - json::token token {"", token_type::string, index}; - std::string value = ""; - auto c = raw_json[index]; - if (c != '"') { - return {token, original_index, ""}; - } - index++; - - // TODO: handle nested quotes - while (c = raw_json[index], c != '"') { - if (index == raw_json.length()) { - return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)}; - } - - token.value += c; - index++; - } - index++; - - return {token, index, ""}; - } - - std::tuple lex_number(std::string raw_json, int original_index) { - int index = original_index; - json::token token {"", token_type::number, index}; - std::string value = ""; - bool decimal_present = false; - - // Negative numbers. - if (raw_json[index] == '-') { - token.value += raw_json[index]; - index++; - } - - while(true) { - if (index == raw_json.length()) - break; - - auto c = raw_json[index]; - if (c == '.') { - if (decimal_present) - break; - decimal_present = true; - //token.value += c; // This caused two dots to be inserted. - } - - // Scientific notation. - else if (c == 'E' || c == 'e') { - token.value += c; - index++; - if (raw_json[index] == '-') { - token.value += raw_json[index]; - index++; - } - continue; // Loop early. - } - - // Only regex non-numeric values if we didn't catch it earlier. - else if (!(c >= '0' && c <= '9')) - break; - - token.value += c; - index++; - } - return {token, index, ""}; - } - - std::tuple lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index) { - int index = original_index; - json::token token{"", type, index}; - - while (keyword[index - original_index] == raw_json[index]) { - if (index == raw_json.length()) { - break; - } - index++; - } - if (index - original_index == keyword.length()) { - token.value = keyword; - } - return {token, index, ""}; - } - - std::tuple lex_null(std::string raw_json, int index) - { - return lex_keyword(raw_json, "null", token_type::null, index); - } - - std::tuple lex_true(std::string raw_json, int index) - { - return lex_keyword(raw_json, "true", token_type::boolean, index); - } - - std::tuple lex_false(std::string raw_json, int index) { - return lex_keyword(raw_json, "false", token_type::boolean, index); - } json::value& value::operator=(double in) { @@ -333,6 +234,7 @@ namespace JJX::json { } std::tuple, std::string> lex(std::string raw_json) { + using namespace lexers; std::vector tokens; // All tokens will embed a pointer to the raw JSON for debugging purposes auto original_copy = std::make_shared(raw_json); @@ -351,7 +253,7 @@ namespace JJX::json { if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) { // Error while lexing, return early if (error.length()) { - return std::make_tuple(std::vector{}, error); + return std::make_tuple(std::vector{}, error); } // Store reference to the original source @@ -366,7 +268,7 @@ namespace JJX::json { if (found) { continue; } - return std::make_tuple(std::vector{}, format_error("Unable to lex", raw_json, i)); + return std::make_tuple(std::vector{}, format_error("Unable to lex", raw_json, i)); } return {tokens, ""}; } diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..32ca1f2 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include + +namespace json { + int lexers::lex_whitespace(std::string raw_json, int index) + { + while (std::isspace(raw_json[index])) { + if (index == raw_json.length()) { + break; + } + index++; + } + return index; + } + + std::tuple lexers::lex_syntax(std::string raw_json, int index) + { + json::token token{"", token_type::syntax, index}; + std::string value = ""; + auto c = raw_json[index]; + if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') { + token.value += c; + index++; + } + + return {token, index, ""}; + } + + std::tuple lexers::lex_string(std::string raw_json, int original_index) + { + int index = original_index; + json::token token {"", token_type::string, index}; + std::string value = ""; + auto c = raw_json[index]; + if (c != '"') { + return {token, original_index, ""}; + } + index++; + + // TODO: handle nested quotes + while (c = raw_json[index], c != '"') { + if (index == raw_json.length()) { + return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)}; + } + + token.value += c; + index++; + } + index++; + + return {token, index, ""}; + } + + std::tuple lexers::lex_number(std::string raw_json, int original_index) + { + int index = original_index; + json::token token {"", token_type::number, index}; + std::string value = ""; + bool decimal_present = false; + + // Negative numbers. + if (raw_json[index] == '-') { + token.value += raw_json[index]; + index++; + } + + while(true) { + if (index == raw_json.length()) + break; + + auto c = raw_json[index]; + if (c == '.') { + if (decimal_present) + break; + decimal_present = true; + //token.value += c; // This caused two dots to be inserted. + } + + // Scientific notation. + else if (c == 'E' || c == 'e') { + token.value += c; + index++; + if (raw_json[index] == '-') { + token.value += raw_json[index]; + index++; + } + continue; // Loop early. + } + + // Only regex non-numeric values if we didn't catch it earlier. + else if (!(c >= '0' && c <= '9')) + break; + + token.value += c; + index++; + } + return {token, index, ""}; + } + + std::tuple lexers::lex_keyword(std::string raw_json, std::string keyword, + json::token_type type, int original_index) + { + int index = original_index; + json::token token{"", type, index}; + + while (keyword[index - original_index] == raw_json[index]) { + if (index == raw_json.length()) { + break; + } + index++; + } + if (index - original_index == keyword.length()) { + token.value = keyword; + } + return {token, index, ""}; + } + + std::tuple lexers::lex_null(std::string raw_json, int index) + { + return lex_keyword(raw_json, "null", token_type::null, index); + } + + std::tuple lexers::lex_true(std::string raw_json, int index) + { + return lex_keyword(raw_json, "true", token_type::boolean, index); + } + + std::tuple lexers::lex_false(std::string raw_json, int index) + { + return lex_keyword(raw_json, "false", token_type::boolean, index); + } +}