JSON parser, lexer, reassembler initial test
This commit is contained in:
@@ -1,6 +1,33 @@
|
||||
cmake_minimum_required(VERSION 3.28)
|
||||
project(jjx)
|
||||
cmake_minimum_required(VERSION 3.18...3.28)
|
||||
project(jjx
|
||||
VERSION 1.0
|
||||
LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
if (PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
|
||||
message(FATAL_ERROR "In-source builds are not allowed!")
|
||||
endif()
|
||||
|
||||
add_executable(jjx main.cpp)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
#include(cmake/CPM.cmake)
|
||||
|
||||
file(GLOB_RECURSE HEADERS "include/*.hpp")
|
||||
|
||||
file(GLOB_RECURSE SOURCES "src/*.cpp")
|
||||
|
||||
include_directories("include")
|
||||
|
||||
|
||||
if (UNIX)
|
||||
add_library(jjx SHARED ${SOURCES})
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
add_library(jjx STATIC ${SOURCES})
|
||||
endif()
|
||||
|
||||
add_executable(jjx_demo main.cpp)
|
||||
|
||||
target_link_libraries(jjx_demo PUBLIC jjx)
|
35
include/jjx.hpp
Normal file
35
include/jjx.hpp
Normal file
@@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <map>
|
||||
|
||||
namespace jjx
|
||||
{
|
||||
namespace json {
|
||||
enum class token_type { string, number, syntax, boolean, null };
|
||||
enum class value_type { string, number, object, array, boolean, null};
|
||||
struct token {
|
||||
std::string value;
|
||||
token_type type;
|
||||
int location;
|
||||
std::shared_ptr<std::string> full_source;
|
||||
};
|
||||
struct value {
|
||||
std::optional<std::string> string;
|
||||
std::optional<double> number;
|
||||
std::optional<bool> boolean;
|
||||
std::optional<std::vector<value>> array;
|
||||
std::optional<std::map<std::string, value>> object;
|
||||
value_type type;
|
||||
};
|
||||
std::tuple<std::vector<json::token>, std::string> lex(std::string);
|
||||
std::tuple<json::value, int, std::string> parse(std::vector<json::token>, int index = 0);
|
||||
std::tuple<json::value, std::string> parse(std::string);
|
||||
std::string deparse(json::value, std::string whitespace = "");
|
||||
}
|
||||
|
||||
namespace xml {}
|
||||
}
|
20
main.cpp
20
main.cpp
@@ -1,6 +1,22 @@
|
||||
#include <jjx.hpp>
|
||||
#include <iostream>
|
||||
|
||||
int main() {
|
||||
std::cout << "Hello, World!" << std::endl;
|
||||
using namespace jjx;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc == 1) {
|
||||
std::cerr << "Expected JSON input argument to parse" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string in{argv[1]};
|
||||
|
||||
auto [ast, error] = json::parse(in);
|
||||
if (error.size()) {
|
||||
std::cerr << error << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << json::deparse(ast);
|
||||
return 0;
|
||||
}
|
||||
|
26
samples/widgets.json
Normal file
26
samples/widgets.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{"widget": {
|
||||
"debug": "on",
|
||||
"window": {
|
||||
"title": "Sample Konfabulator Widget",
|
||||
"name": "main_window",
|
||||
"width": 500,
|
||||
"height": 500
|
||||
},
|
||||
"image": {
|
||||
"src": "Images/Sun.png",
|
||||
"name": "sun1",
|
||||
"hOffset": 250,
|
||||
"vOffset": 250,
|
||||
"alignment": "center"
|
||||
},
|
||||
"text": {
|
||||
"data": "Click Here",
|
||||
"size": 36,
|
||||
"style": "bold",
|
||||
"name": "text1",
|
||||
"hOffset": 250,
|
||||
"vOffset": 100,
|
||||
"alignment": "center",
|
||||
"onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;"
|
||||
}
|
||||
}}
|
3
src/jjx.cpp
Normal file
3
src/jjx.cpp
Normal file
@@ -0,0 +1,3 @@
|
||||
//
|
||||
// Created by josh on 8/19/24.
|
||||
//
|
378
src/json.cpp
Normal file
378
src/json.cpp
Normal file
@@ -0,0 +1,378 @@
|
||||
#include <jjx.hpp>
|
||||
#include <sstream>
|
||||
|
||||
namespace jjx::json {
|
||||
std::string format_error(std::string base, std::string source, int index) {
|
||||
std::ostringstream s;
|
||||
int counter = 0;
|
||||
int line = 1;
|
||||
int column = 0;
|
||||
std::string lastline = "";
|
||||
std::string whitespace = "";
|
||||
|
||||
for (auto c: source) {
|
||||
if (counter == index) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == '\n') {
|
||||
line++;
|
||||
column = 0;
|
||||
lastline = "";
|
||||
whitespace = "";
|
||||
} else if (c == '\t') {
|
||||
column++;
|
||||
lastline += " ";
|
||||
whitespace += " ";
|
||||
} else {
|
||||
column++;
|
||||
lastline += c;
|
||||
whitespace += " ";
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
while (counter < source.size()) {
|
||||
auto c = source[counter];
|
||||
if (c == '\n') {
|
||||
break;
|
||||
}
|
||||
lastline += c;
|
||||
counter++;
|
||||
}
|
||||
|
||||
// TODO: Migrate the below code bits to std::format
|
||||
|
||||
s << base << " at line " << line << ", column " << column << std::endl;
|
||||
s << lastline << std::endl;
|
||||
s << whitespace << "^";
|
||||
|
||||
return s.str();
|
||||
|
||||
}
|
||||
|
||||
int lex_whitespace(std::string raw_json, int index) {
|
||||
while (std::isspace(raw_json[index])) {
|
||||
if (index == raw_json.length()) {
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index)
|
||||
{
|
||||
json::token token{"", token_type::syntax, index};
|
||||
std::string value = "";
|
||||
auto c = raw_json[index];
|
||||
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
|
||||
token.value += c;
|
||||
index++;
|
||||
}
|
||||
|
||||
return {token, index, ""};
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index) {
|
||||
int index = original_index;
|
||||
json::token token {"", token_type::string, index};
|
||||
std::string value = "";
|
||||
auto c = raw_json[index];
|
||||
if (c != '"') {
|
||||
return {token, original_index, ""};
|
||||
}
|
||||
index++;
|
||||
|
||||
// TODO: handle nested quotes
|
||||
while (c = raw_json[index], c != '"') {
|
||||
if (index == raw_json.length()) {
|
||||
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
|
||||
}
|
||||
|
||||
token.value += c;
|
||||
index++;
|
||||
}
|
||||
index++;
|
||||
|
||||
return {token, index, ""};
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index) {
|
||||
int index = original_index;
|
||||
json::token token {"", token_type::number, index};
|
||||
std::string value = "";
|
||||
// TODO: handle not just integers
|
||||
while(true) {
|
||||
if (index == raw_json.length()) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto c = raw_json[index];
|
||||
if (!(c >= '0' && c <= '9')) {
|
||||
break;
|
||||
}
|
||||
|
||||
token.value += c;
|
||||
index++;
|
||||
}
|
||||
return {token, index, ""};
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index) {
|
||||
int index = original_index;
|
||||
json::token token{"", type, index};
|
||||
|
||||
while (keyword[index - original_index] == raw_json[index]) {
|
||||
if (index == raw_json.length()) {
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
if (index - original_index == keyword.length()) {
|
||||
token.value = keyword;
|
||||
}
|
||||
return {token, index, ""};
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index)
|
||||
{
|
||||
return lex_keyword(raw_json, "null", token_type::null, index);
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index)
|
||||
{
|
||||
return lex_keyword(raw_json, "true", token_type::boolean, index);
|
||||
}
|
||||
|
||||
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index) {
|
||||
return lex_keyword(raw_json, "false", token_type::boolean, index);
|
||||
}
|
||||
|
||||
std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) {
|
||||
std::vector<json::token> tokens;
|
||||
// All tokens will embed a pointer to the raw JSON for debugging purposes
|
||||
auto original_copy = std::make_shared<std::string>(raw_json);
|
||||
|
||||
auto generic_lexers = {lex_syntax, lex_string, lex_number, lex_null, lex_true, lex_false};
|
||||
|
||||
for (int i = 0; i < raw_json.length(); i++) {
|
||||
// Skip past whitespace
|
||||
if (auto new_index = lex_whitespace(raw_json, i); i != new_index) {
|
||||
i = new_index - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto found = false;
|
||||
for (auto lexer: generic_lexers) {
|
||||
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
|
||||
// Error while lexing, return early
|
||||
if (error.length()) {
|
||||
return {{}, error};
|
||||
}
|
||||
|
||||
// Store reference to the original source
|
||||
token.full_source = original_copy;
|
||||
tokens.push_back(token);
|
||||
i = new_index - 1;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
continue;
|
||||
}
|
||||
return {{}, format_error("Unable to lex", raw_json, i)};
|
||||
}
|
||||
return {tokens, ""};
|
||||
}
|
||||
|
||||
// It's very annoying when languages doesn't give you
|
||||
// stringifier methods for enums by default for debugging.
|
||||
// There are ways to do this with reflection but it seems hairy.
|
||||
// There's a better procedure IIRC.
|
||||
std::string token_type_tostring(token_type tok)
|
||||
{
|
||||
switch(tok) {
|
||||
case token_type::string: return "String";
|
||||
case token_type::number: return "Number";
|
||||
case token_type::syntax: return "Syntax";
|
||||
case token_type::boolean: return "Boolean";
|
||||
case token_type::null: return "Null";
|
||||
}
|
||||
}
|
||||
|
||||
std::string format_parse_error(std::string base, json::token token)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "Unexpected token '" << token.value << "', type' '"
|
||||
<< token_type_tostring(token.type) << "', index'";
|
||||
s << std::endl << base;
|
||||
return format_error(s.str(), *token.full_source, token.location);
|
||||
}
|
||||
|
||||
std::tuple<std::vector<json::value>, int, std::string> parse_array(std::vector<json::token> tokens, int index) {
|
||||
std::vector<json::value> children = {};
|
||||
while (index < tokens.size()) {
|
||||
auto t = tokens[index];
|
||||
if (t.type == token_type::syntax) {
|
||||
if (t.value == "]") {
|
||||
return {children, index + 1, ""};
|
||||
}
|
||||
if (t.value == ",") {
|
||||
index++;
|
||||
t = tokens[index];
|
||||
} else if (children.size() > 0) {
|
||||
return {{}, index,
|
||||
format_parse_error("Expected comma after element in array", t)};
|
||||
}
|
||||
}
|
||||
|
||||
auto [child, new_index, error] = parse(tokens, index);
|
||||
if (error.size()) { return {{}, index, error}; }
|
||||
children.push_back(child);
|
||||
index = new_index;
|
||||
}
|
||||
return {
|
||||
{}, index,
|
||||
format_parse_error("Unexpected EOF while parsing array", tokens[index])};
|
||||
|
||||
}
|
||||
|
||||
std::tuple<std::map<std::string, value>, int, std::string> parse_object(std::vector<token> tokens, int index) {
|
||||
std::map<std::string, value> values = {};
|
||||
while (index < tokens.size()) {
|
||||
auto t = tokens[index];
|
||||
if (t.type == token_type::syntax) {
|
||||
if (t.value == "}") {
|
||||
return {values, index + 1, ""};
|
||||
}
|
||||
if (t.value == ",") {
|
||||
index++;
|
||||
t = tokens[index];
|
||||
} else if (values.size() > 0) {
|
||||
return {{}, index,
|
||||
format_parse_error("Expected comma after element in object", t)
|
||||
};
|
||||
} else {
|
||||
return {{}, index,
|
||||
format_parse_error("Expected key-value pair or closing brace in object", t)
|
||||
};
|
||||
}
|
||||
}
|
||||
auto [key, new_index, error] = parse(tokens, index);
|
||||
if (error.size())
|
||||
{
|
||||
return {{}, index, error};
|
||||
}
|
||||
|
||||
if (key.type != value_type::string) {
|
||||
return {{}, index,
|
||||
format_parse_error("Expected string key in object", t)};
|
||||
}
|
||||
|
||||
index = new_index;
|
||||
t = tokens[index];
|
||||
|
||||
if (!(t.type == token_type::syntax && t.value == ":")) {
|
||||
return {{}, index,
|
||||
format_parse_error("Expected colon after key in object", t)};
|
||||
}
|
||||
index++;
|
||||
t = tokens[index];
|
||||
|
||||
auto [value, new_index1, error1] = parse(tokens, index);
|
||||
if (error1.size()) {
|
||||
return {{}, index, error1};
|
||||
}
|
||||
|
||||
values[key.string.value()] = value;
|
||||
index = new_index1;
|
||||
}
|
||||
return {values, index+1, ""};
|
||||
}
|
||||
|
||||
std::tuple<json::value, int, std::string> parse(std::vector<json::token> tokens, int index) {
|
||||
auto token = tokens[index];
|
||||
switch(token.type) {
|
||||
case token_type::number: {
|
||||
auto n = std::stod(token.value);
|
||||
return {json::value{.number = n, .type = value_type::number}, index+1, ""};
|
||||
}
|
||||
case token_type::boolean:
|
||||
return {json::value{.boolean = token.value == "true", .type = value_type::boolean}, index + 1, ""};
|
||||
case token_type::null:
|
||||
return {json::value{.type = value_type::null}, index+1, ""};
|
||||
case token_type::string:
|
||||
return {json::value{.string = token.value, .type = value_type::string}, index+1, ""};
|
||||
case token_type::syntax: {
|
||||
if (token.value == "[") {
|
||||
auto [array, new_index, error] = parse_array(tokens, index + 1);
|
||||
return {json::value{.array = array, .type = value_type::array}, new_index, error};
|
||||
}
|
||||
if (token.value == "{") {
|
||||
auto [object, new_index, error] = parse_object(tokens, index + 1);
|
||||
return {json::value{.object = std::optional(object), .type = value_type::object}, new_index, error};
|
||||
}
|
||||
}
|
||||
default:
|
||||
return {{}, index, format_parse_error("Failed to parse", token)};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
std::tuple<json::value, std::string> parse(std::string source) {
|
||||
auto [tokens, error] = json::lex(source);
|
||||
if (error.size())
|
||||
{
|
||||
return {{}, error};
|
||||
}
|
||||
|
||||
auto [ast, _, error1] = json::parse(tokens);
|
||||
return {ast, error1};
|
||||
}
|
||||
|
||||
std::string deparse(json::value v, std::string whitespace) {
|
||||
switch(v.type) {
|
||||
case json::value_type::string:
|
||||
return "\"" + v.string.value() + "\"";
|
||||
case json::value_type::boolean:
|
||||
return (v.boolean.value() ? "true" : "false");
|
||||
case json::value_type::number:
|
||||
return std::to_string(v.number.value());
|
||||
case json::value_type::null:
|
||||
return "null";
|
||||
case json::value_type::array: {
|
||||
std::string s = "[\n";
|
||||
auto a = v.array.value();
|
||||
for (int i = 0; i < a.size(); i++) {
|
||||
auto value = a[i];
|
||||
s += whitespace + " " + deparse(value, whitespace + " ");
|
||||
if (i < a.size() - 1) {
|
||||
s += ",";
|
||||
}
|
||||
s += "\n";
|
||||
}
|
||||
return s + whitespace + "]";
|
||||
}
|
||||
case json::value_type::object: {
|
||||
std::string s = "{\n";
|
||||
auto values = v.object.value();
|
||||
auto i = 0;
|
||||
for (auto const &[key, value] : values) {
|
||||
s += whitespace + " " + "\"" + key + "\":" + deparse(value, whitespace + " ");
|
||||
|
||||
if (i < values.size() - 1) {
|
||||
s += ",";
|
||||
}
|
||||
s += "\n";
|
||||
i++;
|
||||
}
|
||||
return s + whitespace + "}";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1
src/xml.cpp
Normal file
1
src/xml.cpp
Normal file
@@ -0,0 +1 @@
|
||||
#include "../include/jjx.hpp"
|
Reference in New Issue
Block a user