Refactoring more.

This commit is contained in:
2025-05-31 01:35:33 -05:00
parent 933d988cf6
commit fdda938352
9 changed files with 205 additions and 145 deletions

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18...3.28) cmake_minimum_required(VERSION 3.18...3.28)
project(jjx project(json
VERSION 1.0 VERSION 1.0
LANGUAGES CXX) LANGUAGES CXX)
@@ -21,13 +21,13 @@ include_directories("include")
if (UNIX) if (UNIX)
add_library(jjx SHARED ${SOURCES}) add_library(json SHARED ${SOURCES})
endif() endif()
if (WIN32) if (WIN32)
add_library(jjx STATIC ${SOURCES}) add_library(json STATIC ${SOURCES})
endif() endif()
add_executable(jjx_demo main.cpp) add_executable(json_demo main.cpp)
target_link_libraries(jjx_demo PUBLIC jjx) target_link_libraries(json_demo PUBLIC json)

View File

@@ -1,5 +0,0 @@
#pragma once
#include <JJX/JSON.hpp>

View File

@@ -0,0 +1,6 @@
#pragma once
namespace json
{
}

View File

@@ -18,7 +18,10 @@
// TODO: Auto-conversion from json::value *to* represented type. // TODO: Auto-conversion from json::value *to* represented type.
/// Redacted Software JSON API - Parse, Serialize, Validate, and Build JSON files. /// Redacted Software JSON API - Parse, Serialize, Validate, and Build JSON files.
namespace JJX::json { namespace json {
std::string format_error(std::string base, std::string source, int index);
/// An enumeration that represents the different types of tokens that exist. /// An enumeration that represents the different types of tokens that exist.
/// A token is an atomic chunk of a JSON file's text, which is the smallest piece of syntax that can be considered in isolation. /// A token is an atomic chunk of a JSON file's text, which is the smallest piece of syntax that can be considered in isolation.

27
include/lexer.hpp Normal file
View File

@@ -0,0 +1,27 @@
#include <json.hpp>
#pragma once
/// @namespace json::lexers
/// @brief This namespace provides functions for lexical analysis (tokenizing)
///
/// Each function in this namespace is responsible for "lexing" a specific type of token,
/// from a raw JSON string,
namespace json::lexers {
int lex_whitespace(std::string raw_json, int index);
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index);
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index);
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index);
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index);
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index);
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index);
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index);
}

View File

@@ -1,8 +1,7 @@
#include <JJX/JJX.hpp> #include <JSON.hpp>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
using namespace JJX;
/// Open a text file and return the contents. /// Open a text file and return the contents.
std::string read_file(const std::string& file_path) std::string read_file(const std::string& file_path)

View File

@@ -1,6 +0,0 @@
<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
</note>

View File

@@ -1,8 +1,30 @@
#include <JJX/JJX.hpp> #include <json.hpp>
#include <JJX/JSON.hpp> #include <lexer.hpp>
#include <sstream> #include <sstream>
#include <__msvc_ranges_tuple_formatter.hpp>
namespace json {
class file_error : public std::runtime_error
{
};
class io_error : public std::runtime_error
{
};
class parse_error : public std::runtime_error
{
};
class json_runtime_error : public std::runtime_error
{
};
namespace JJX::json {
std::string format_error(std::string base, std::string source, int index) { std::string format_error(std::string base, std::string source, int index) {
std::ostringstream s; std::ostringstream s;
int counter = 0; int counter = 0;
@@ -48,130 +70,9 @@ namespace JJX::json {
s << whitespace << "^"; s << whitespace << "^";
return s.str(); return s.str();
} }
int lex_whitespace(std::string raw_json, int index) {
while (std::isspace(raw_json[index])) {
if (index == raw_json.length()) {
break;
}
index++;
}
return index;
}
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index)
{
json::token token{"", token_type::syntax, index};
std::string value = "";
auto c = raw_json[index];
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
token.value += c;
index++;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index) {
int index = original_index;
json::token token {"", token_type::string, index};
std::string value = "";
auto c = raw_json[index];
if (c != '"') {
return {token, original_index, ""};
}
index++;
// TODO: handle nested quotes
while (c = raw_json[index], c != '"') {
if (index == raw_json.length()) {
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
}
token.value += c;
index++;
}
index++;
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index) {
int index = original_index;
json::token token {"", token_type::number, index};
std::string value = "";
bool decimal_present = false;
// Negative numbers.
if (raw_json[index] == '-') {
token.value += raw_json[index];
index++;
}
while(true) {
if (index == raw_json.length())
break;
auto c = raw_json[index];
if (c == '.') {
if (decimal_present)
break;
decimal_present = true;
//token.value += c; // This caused two dots to be inserted.
}
// Scientific notation.
else if (c == 'E' || c == 'e') {
token.value += c;
index++;
if (raw_json[index] == '-') {
token.value += raw_json[index];
index++;
}
continue; // Loop early.
}
// Only regex non-numeric values if we didn't catch it earlier.
else if (!(c >= '0' && c <= '9'))
break;
token.value += c;
index++;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index) {
int index = original_index;
json::token token{"", type, index};
while (keyword[index - original_index] == raw_json[index]) {
if (index == raw_json.length()) {
break;
}
index++;
}
if (index - original_index == keyword.length()) {
token.value = keyword;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index)
{
return lex_keyword(raw_json, "null", token_type::null, index);
}
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index)
{
return lex_keyword(raw_json, "true", token_type::boolean, index);
}
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index) {
return lex_keyword(raw_json, "false", token_type::boolean, index);
}
json::value& value::operator=(double in) json::value& value::operator=(double in)
{ {
@@ -333,6 +234,7 @@ namespace JJX::json {
} }
std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) { std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) {
using namespace lexers;
std::vector<json::token> tokens; std::vector<json::token> tokens;
// All tokens will embed a pointer to the raw JSON for debugging purposes // All tokens will embed a pointer to the raw JSON for debugging purposes
auto original_copy = std::make_shared<std::string>(raw_json); auto original_copy = std::make_shared<std::string>(raw_json);
@@ -351,7 +253,7 @@ namespace JJX::json {
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) { if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
// Error while lexing, return early // Error while lexing, return early
if (error.length()) { if (error.length()) {
return std::make_tuple(std::vector<JJX::json::token>{}, error); return std::make_tuple(std::vector<json::token>{}, error);
} }
// Store reference to the original source // Store reference to the original source
@@ -366,7 +268,7 @@ namespace JJX::json {
if (found) { if (found) {
continue; continue;
} }
return std::make_tuple(std::vector<JJX::json::token>{}, format_error("Unable to lex", raw_json, i)); return std::make_tuple(std::vector<json::token>{}, format_error("Unable to lex", raw_json, i));
} }
return {tokens, ""}; return {tokens, ""};
} }

134
src/lexer.cpp Normal file
View File

@@ -0,0 +1,134 @@
#include <lexer.hpp>
#include <json.hpp>
#include <tuple>
#include <vector>
namespace json {
int lexers::lex_whitespace(std::string raw_json, int index)
{
while (std::isspace(raw_json[index])) {
if (index == raw_json.length()) {
break;
}
index++;
}
return index;
}
std::tuple<json::token, int, std::string> lexers::lex_syntax(std::string raw_json, int index)
{
json::token token{"", token_type::syntax, index};
std::string value = "";
auto c = raw_json[index];
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
token.value += c;
index++;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lexers::lex_string(std::string raw_json, int original_index)
{
int index = original_index;
json::token token {"", token_type::string, index};
std::string value = "";
auto c = raw_json[index];
if (c != '"') {
return {token, original_index, ""};
}
index++;
// TODO: handle nested quotes
while (c = raw_json[index], c != '"') {
if (index == raw_json.length()) {
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
}
token.value += c;
index++;
}
index++;
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lexers::lex_number(std::string raw_json, int original_index)
{
int index = original_index;
json::token token {"", token_type::number, index};
std::string value = "";
bool decimal_present = false;
// Negative numbers.
if (raw_json[index] == '-') {
token.value += raw_json[index];
index++;
}
while(true) {
if (index == raw_json.length())
break;
auto c = raw_json[index];
if (c == '.') {
if (decimal_present)
break;
decimal_present = true;
//token.value += c; // This caused two dots to be inserted.
}
// Scientific notation.
else if (c == 'E' || c == 'e') {
token.value += c;
index++;
if (raw_json[index] == '-') {
token.value += raw_json[index];
index++;
}
continue; // Loop early.
}
// Only regex non-numeric values if we didn't catch it earlier.
else if (!(c >= '0' && c <= '9'))
break;
token.value += c;
index++;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lexers::lex_keyword(std::string raw_json, std::string keyword,
json::token_type type, int original_index)
{
int index = original_index;
json::token token{"", type, index};
while (keyword[index - original_index] == raw_json[index]) {
if (index == raw_json.length()) {
break;
}
index++;
}
if (index - original_index == keyword.length()) {
token.value = keyword;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lexers::lex_null(std::string raw_json, int index)
{
return lex_keyword(raw_json, "null", token_type::null, index);
}
std::tuple<json::token, int, std::string> lexers::lex_true(std::string raw_json, int index)
{
return lex_keyword(raw_json, "true", token_type::boolean, index);
}
std::tuple<json::token, int, std::string> lexers::lex_false(std::string raw_json, int index)
{
return lex_keyword(raw_json, "false", token_type::boolean, index);
}
}