Refactoring more.
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.18...3.28)
|
cmake_minimum_required(VERSION 3.18...3.28)
|
||||||
project(jjx
|
project(json
|
||||||
VERSION 1.0
|
VERSION 1.0
|
||||||
LANGUAGES CXX)
|
LANGUAGES CXX)
|
||||||
|
|
||||||
@@ -21,13 +21,13 @@ include_directories("include")
|
|||||||
|
|
||||||
|
|
||||||
if (UNIX)
|
if (UNIX)
|
||||||
add_library(jjx SHARED ${SOURCES})
|
add_library(json SHARED ${SOURCES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
add_library(jjx STATIC ${SOURCES})
|
add_library(json STATIC ${SOURCES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_executable(jjx_demo main.cpp)
|
add_executable(json_demo main.cpp)
|
||||||
|
|
||||||
target_link_libraries(jjx_demo PUBLIC jjx)
|
target_link_libraries(json_demo PUBLIC json)
|
@@ -1,5 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <JJX/JSON.hpp>
|
|
6
include/error_handling.hpp
Normal file
6
include/error_handling.hpp
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace json
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
@@ -18,7 +18,10 @@
|
|||||||
// TODO: Auto-conversion from json::value *to* represented type.
|
// TODO: Auto-conversion from json::value *to* represented type.
|
||||||
|
|
||||||
/// Redacted Software JSON API - Parse, Serialize, Validate, and Build JSON files.
|
/// Redacted Software JSON API - Parse, Serialize, Validate, and Build JSON files.
|
||||||
namespace JJX::json {
|
namespace json {
|
||||||
|
|
||||||
|
|
||||||
|
std::string format_error(std::string base, std::string source, int index);
|
||||||
|
|
||||||
/// An enumeration that represents the different types of tokens that exist.
|
/// An enumeration that represents the different types of tokens that exist.
|
||||||
/// A token is an atomic chunk of a JSON file's text, which is the smallest piece of syntax that can be considered in isolation.
|
/// A token is an atomic chunk of a JSON file's text, which is the smallest piece of syntax that can be considered in isolation.
|
27
include/lexer.hpp
Normal file
27
include/lexer.hpp
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#include <json.hpp>
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/// @namespace json::lexers
|
||||||
|
/// @brief This namespace provides functions for lexical analysis (tokenizing)
|
||||||
|
///
|
||||||
|
/// Each function in this namespace is responsible for "lexing" a specific type of token,
|
||||||
|
/// from a raw JSON string,
|
||||||
|
namespace json::lexers {
|
||||||
|
|
||||||
|
int lex_whitespace(std::string raw_json, int index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index);
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index);
|
||||||
|
}
|
3
main.cpp
3
main.cpp
@@ -1,8 +1,7 @@
|
|||||||
#include <JJX/JJX.hpp>
|
#include <JSON.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
using namespace JJX;
|
|
||||||
|
|
||||||
/// Open a text file and return the contents.
|
/// Open a text file and return the contents.
|
||||||
std::string read_file(const std::string& file_path)
|
std::string read_file(const std::string& file_path)
|
||||||
|
@@ -1,6 +0,0 @@
|
|||||||
<note>
|
|
||||||
<to>Tove</to>
|
|
||||||
<from>Jani</from>
|
|
||||||
<heading>Reminder</heading>
|
|
||||||
<body>Don't forget me this weekend!</body>
|
|
||||||
</note>
|
|
@@ -1,8 +1,30 @@
|
|||||||
#include <JJX/JJX.hpp>
|
#include <json.hpp>
|
||||||
#include <JJX/JSON.hpp>
|
#include <lexer.hpp>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <__msvc_ranges_tuple_formatter.hpp>
|
||||||
|
|
||||||
|
namespace json {
|
||||||
|
|
||||||
|
class file_error : public std::runtime_error
|
||||||
|
{
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class io_error : public std::runtime_error
|
||||||
|
{
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class parse_error : public std::runtime_error
|
||||||
|
{
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class json_runtime_error : public std::runtime_error
|
||||||
|
{
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
namespace JJX::json {
|
|
||||||
std::string format_error(std::string base, std::string source, int index) {
|
std::string format_error(std::string base, std::string source, int index) {
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
@@ -48,130 +70,9 @@ namespace JJX::json {
|
|||||||
s << whitespace << "^";
|
s << whitespace << "^";
|
||||||
|
|
||||||
return s.str();
|
return s.str();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int lex_whitespace(std::string raw_json, int index) {
|
|
||||||
while (std::isspace(raw_json[index])) {
|
|
||||||
if (index == raw_json.length()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index)
|
|
||||||
{
|
|
||||||
json::token token{"", token_type::syntax, index};
|
|
||||||
std::string value = "";
|
|
||||||
auto c = raw_json[index];
|
|
||||||
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
|
|
||||||
token.value += c;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {token, index, ""};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index) {
|
|
||||||
int index = original_index;
|
|
||||||
json::token token {"", token_type::string, index};
|
|
||||||
std::string value = "";
|
|
||||||
auto c = raw_json[index];
|
|
||||||
if (c != '"') {
|
|
||||||
return {token, original_index, ""};
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
|
|
||||||
// TODO: handle nested quotes
|
|
||||||
while (c = raw_json[index], c != '"') {
|
|
||||||
if (index == raw_json.length()) {
|
|
||||||
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
|
|
||||||
}
|
|
||||||
|
|
||||||
token.value += c;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
|
|
||||||
return {token, index, ""};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index) {
|
|
||||||
int index = original_index;
|
|
||||||
json::token token {"", token_type::number, index};
|
|
||||||
std::string value = "";
|
|
||||||
bool decimal_present = false;
|
|
||||||
|
|
||||||
// Negative numbers.
|
|
||||||
if (raw_json[index] == '-') {
|
|
||||||
token.value += raw_json[index];
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
|
|
||||||
while(true) {
|
|
||||||
if (index == raw_json.length())
|
|
||||||
break;
|
|
||||||
|
|
||||||
auto c = raw_json[index];
|
|
||||||
if (c == '.') {
|
|
||||||
if (decimal_present)
|
|
||||||
break;
|
|
||||||
decimal_present = true;
|
|
||||||
//token.value += c; // This caused two dots to be inserted.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scientific notation.
|
|
||||||
else if (c == 'E' || c == 'e') {
|
|
||||||
token.value += c;
|
|
||||||
index++;
|
|
||||||
if (raw_json[index] == '-') {
|
|
||||||
token.value += raw_json[index];
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
continue; // Loop early.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only regex non-numeric values if we didn't catch it earlier.
|
|
||||||
else if (!(c >= '0' && c <= '9'))
|
|
||||||
break;
|
|
||||||
|
|
||||||
token.value += c;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return {token, index, ""};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index) {
|
|
||||||
int index = original_index;
|
|
||||||
json::token token{"", type, index};
|
|
||||||
|
|
||||||
while (keyword[index - original_index] == raw_json[index]) {
|
|
||||||
if (index == raw_json.length()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
if (index - original_index == keyword.length()) {
|
|
||||||
token.value = keyword;
|
|
||||||
}
|
|
||||||
return {token, index, ""};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index)
|
|
||||||
{
|
|
||||||
return lex_keyword(raw_json, "null", token_type::null, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index)
|
|
||||||
{
|
|
||||||
return lex_keyword(raw_json, "true", token_type::boolean, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index) {
|
|
||||||
return lex_keyword(raw_json, "false", token_type::boolean, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
json::value& value::operator=(double in)
|
json::value& value::operator=(double in)
|
||||||
{
|
{
|
||||||
@@ -333,6 +234,7 @@ namespace JJX::json {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) {
|
std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) {
|
||||||
|
using namespace lexers;
|
||||||
std::vector<json::token> tokens;
|
std::vector<json::token> tokens;
|
||||||
// All tokens will embed a pointer to the raw JSON for debugging purposes
|
// All tokens will embed a pointer to the raw JSON for debugging purposes
|
||||||
auto original_copy = std::make_shared<std::string>(raw_json);
|
auto original_copy = std::make_shared<std::string>(raw_json);
|
||||||
@@ -351,7 +253,7 @@ namespace JJX::json {
|
|||||||
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
|
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
|
||||||
// Error while lexing, return early
|
// Error while lexing, return early
|
||||||
if (error.length()) {
|
if (error.length()) {
|
||||||
return std::make_tuple(std::vector<JJX::json::token>{}, error);
|
return std::make_tuple(std::vector<json::token>{}, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store reference to the original source
|
// Store reference to the original source
|
||||||
@@ -366,7 +268,7 @@ namespace JJX::json {
|
|||||||
if (found) {
|
if (found) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return std::make_tuple(std::vector<JJX::json::token>{}, format_error("Unable to lex", raw_json, i));
|
return std::make_tuple(std::vector<json::token>{}, format_error("Unable to lex", raw_json, i));
|
||||||
}
|
}
|
||||||
return {tokens, ""};
|
return {tokens, ""};
|
||||||
}
|
}
|
134
src/lexer.cpp
Normal file
134
src/lexer.cpp
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
#include <lexer.hpp>
|
||||||
|
#include <json.hpp>
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace json {
|
||||||
|
int lexers::lex_whitespace(std::string raw_json, int index)
|
||||||
|
{
|
||||||
|
while (std::isspace(raw_json[index])) {
|
||||||
|
if (index == raw_json.length()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_syntax(std::string raw_json, int index)
|
||||||
|
{
|
||||||
|
json::token token{"", token_type::syntax, index};
|
||||||
|
std::string value = "";
|
||||||
|
auto c = raw_json[index];
|
||||||
|
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
|
||||||
|
token.value += c;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {token, index, ""};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_string(std::string raw_json, int original_index)
|
||||||
|
{
|
||||||
|
int index = original_index;
|
||||||
|
json::token token {"", token_type::string, index};
|
||||||
|
std::string value = "";
|
||||||
|
auto c = raw_json[index];
|
||||||
|
if (c != '"') {
|
||||||
|
return {token, original_index, ""};
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
|
||||||
|
// TODO: handle nested quotes
|
||||||
|
while (c = raw_json[index], c != '"') {
|
||||||
|
if (index == raw_json.length()) {
|
||||||
|
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
|
||||||
|
}
|
||||||
|
|
||||||
|
token.value += c;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
|
||||||
|
return {token, index, ""};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_number(std::string raw_json, int original_index)
|
||||||
|
{
|
||||||
|
int index = original_index;
|
||||||
|
json::token token {"", token_type::number, index};
|
||||||
|
std::string value = "";
|
||||||
|
bool decimal_present = false;
|
||||||
|
|
||||||
|
// Negative numbers.
|
||||||
|
if (raw_json[index] == '-') {
|
||||||
|
token.value += raw_json[index];
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
if (index == raw_json.length())
|
||||||
|
break;
|
||||||
|
|
||||||
|
auto c = raw_json[index];
|
||||||
|
if (c == '.') {
|
||||||
|
if (decimal_present)
|
||||||
|
break;
|
||||||
|
decimal_present = true;
|
||||||
|
//token.value += c; // This caused two dots to be inserted.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scientific notation.
|
||||||
|
else if (c == 'E' || c == 'e') {
|
||||||
|
token.value += c;
|
||||||
|
index++;
|
||||||
|
if (raw_json[index] == '-') {
|
||||||
|
token.value += raw_json[index];
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
continue; // Loop early.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only regex non-numeric values if we didn't catch it earlier.
|
||||||
|
else if (!(c >= '0' && c <= '9'))
|
||||||
|
break;
|
||||||
|
|
||||||
|
token.value += c;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return {token, index, ""};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_keyword(std::string raw_json, std::string keyword,
|
||||||
|
json::token_type type, int original_index)
|
||||||
|
{
|
||||||
|
int index = original_index;
|
||||||
|
json::token token{"", type, index};
|
||||||
|
|
||||||
|
while (keyword[index - original_index] == raw_json[index]) {
|
||||||
|
if (index == raw_json.length()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
if (index - original_index == keyword.length()) {
|
||||||
|
token.value = keyword;
|
||||||
|
}
|
||||||
|
return {token, index, ""};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_null(std::string raw_json, int index)
|
||||||
|
{
|
||||||
|
return lex_keyword(raw_json, "null", token_type::null, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_true(std::string raw_json, int index)
|
||||||
|
{
|
||||||
|
return lex_keyword(raw_json, "true", token_type::boolean, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<json::token, int, std::string> lexers::lex_false(std::string raw_json, int index)
|
||||||
|
{
|
||||||
|
return lex_keyword(raw_json, "false", token_type::boolean, index);
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user