519 lines
16 KiB
C++
519 lines
16 KiB
C++
#include <JJX/JJX.hpp>
|
|
#include <JJX/JSON.hpp>
|
|
#include <sstream>
|
|
|
|
namespace JJX::json {
|
|
std::string format_error(std::string base, std::string source, int index) {
|
|
std::ostringstream s;
|
|
int counter = 0;
|
|
int line = 1;
|
|
int column = 0;
|
|
std::string lastline = "";
|
|
std::string whitespace = "";
|
|
|
|
for (auto c: source) {
|
|
if (counter == index) {
|
|
break;
|
|
}
|
|
|
|
if (c == '\n') {
|
|
line++;
|
|
column = 0;
|
|
lastline = "";
|
|
whitespace = "";
|
|
} else if (c == '\t') {
|
|
column++;
|
|
lastline += " ";
|
|
whitespace += " ";
|
|
} else {
|
|
column++;
|
|
lastline += c;
|
|
whitespace += " ";
|
|
}
|
|
counter++;
|
|
}
|
|
while (counter < source.size()) {
|
|
auto c = source[counter];
|
|
if (c == '\n') {
|
|
break;
|
|
}
|
|
lastline += c;
|
|
counter++;
|
|
}
|
|
|
|
// TODO: Migrate the below code bits to std::format
|
|
|
|
s << base << " at line " << line << ", column " << column << std::endl;
|
|
s << lastline << std::endl;
|
|
s << whitespace << "^";
|
|
|
|
return s.str();
|
|
|
|
}
|
|
|
|
int lex_whitespace(std::string raw_json, int index) {
|
|
while (std::isspace(raw_json[index])) {
|
|
if (index == raw_json.length()) {
|
|
break;
|
|
}
|
|
index++;
|
|
}
|
|
return index;
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index)
|
|
{
|
|
json::token token{"", token_type::syntax, index};
|
|
std::string value = "";
|
|
auto c = raw_json[index];
|
|
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
|
|
token.value += c;
|
|
index++;
|
|
}
|
|
|
|
return {token, index, ""};
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index) {
|
|
int index = original_index;
|
|
json::token token {"", token_type::string, index};
|
|
std::string value = "";
|
|
auto c = raw_json[index];
|
|
if (c != '"') {
|
|
return {token, original_index, ""};
|
|
}
|
|
index++;
|
|
|
|
// TODO: handle nested quotes
|
|
while (c = raw_json[index], c != '"') {
|
|
if (index == raw_json.length()) {
|
|
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
|
|
}
|
|
|
|
token.value += c;
|
|
index++;
|
|
}
|
|
index++;
|
|
|
|
return {token, index, ""};
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index) {
|
|
int index = original_index;
|
|
json::token token {"", token_type::number, index};
|
|
std::string value = "";
|
|
bool decimal_present = false;
|
|
|
|
// Negative numbers.
|
|
if (raw_json[index] == '-') {
|
|
token.value += raw_json[index];
|
|
index++;
|
|
}
|
|
|
|
while(true) {
|
|
if (index == raw_json.length())
|
|
break;
|
|
|
|
auto c = raw_json[index];
|
|
if (c == '.') {
|
|
if (decimal_present)
|
|
break;
|
|
decimal_present = true;
|
|
token.value += c;
|
|
}
|
|
|
|
// Scientific notation.
|
|
else if (c == 'E' || c == 'e') {
|
|
token.value += c;
|
|
index++;
|
|
if (raw_json[index] == '-') {
|
|
token.value += raw_json[index];
|
|
index++;
|
|
}
|
|
continue; // Loop early.
|
|
}
|
|
|
|
// Only regex non-numeric values if we didn't catch it earlier.
|
|
else if (!(c >= '0' && c <= '9'))
|
|
break;
|
|
|
|
token.value += c;
|
|
index++;
|
|
}
|
|
return {token, index, ""};
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index) {
|
|
int index = original_index;
|
|
json::token token{"", type, index};
|
|
|
|
while (keyword[index - original_index] == raw_json[index]) {
|
|
if (index == raw_json.length()) {
|
|
break;
|
|
}
|
|
index++;
|
|
}
|
|
if (index - original_index == keyword.length()) {
|
|
token.value = keyword;
|
|
}
|
|
return {token, index, ""};
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index)
|
|
{
|
|
return lex_keyword(raw_json, "null", token_type::null, index);
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index)
|
|
{
|
|
return lex_keyword(raw_json, "true", token_type::boolean, index);
|
|
}
|
|
|
|
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index) {
|
|
return lex_keyword(raw_json, "false", token_type::boolean, index);
|
|
}
|
|
|
|
void value::operator=(double in)
|
|
{
|
|
type = value_type::number;
|
|
number = in;
|
|
}
|
|
|
|
void value::operator=(const std::string& in)
|
|
{
|
|
type = value_type::string;
|
|
string = in;
|
|
}
|
|
|
|
void value::operator=(bool in)
|
|
{
|
|
type = value_type::boolean;
|
|
boolean = in;
|
|
}
|
|
|
|
void value::operator=(std::vector<value> in)
|
|
{
|
|
type = value_type::array;
|
|
array = in;
|
|
}
|
|
|
|
void value::operator=(std::map<std::string, value> in)
|
|
{
|
|
type = value_type::object;
|
|
object = in;
|
|
}
|
|
|
|
void object_val::add(const std::string& key, value val)
|
|
{
|
|
object.value().emplace(key, val);
|
|
}
|
|
|
|
void object_val::add(const std::string& key, const std::string& val)
|
|
{
|
|
object.value().emplace(key, json::string(val));
|
|
}
|
|
|
|
value& object_val::operator[](const std::string& key)
|
|
{
|
|
return object.value()[key];
|
|
}
|
|
|
|
void array_val::add(value val)
|
|
{
|
|
array.value().push_back(val);
|
|
}
|
|
|
|
value& array_val::operator[](int key)
|
|
{
|
|
return array.value()[key];
|
|
}
|
|
|
|
std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) {
|
|
std::vector<json::token> tokens;
|
|
// All tokens will embed a pointer to the raw JSON for debugging purposes
|
|
auto original_copy = std::make_shared<std::string>(raw_json);
|
|
|
|
auto generic_lexers = {lex_syntax, lex_string, lex_number, lex_null, lex_true, lex_false};
|
|
|
|
for (int i = 0; i < raw_json.length(); i++) {
|
|
// Skip past whitespace
|
|
if (auto new_index = lex_whitespace(raw_json, i); i != new_index) {
|
|
i = new_index - 1;
|
|
continue;
|
|
}
|
|
|
|
auto found = false;
|
|
for (auto lexer: generic_lexers) {
|
|
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
|
|
// Error while lexing, return early
|
|
if (error.length()) {
|
|
return std::make_tuple(std::vector<JJX::json::token>{}, error);
|
|
}
|
|
|
|
// Store reference to the original source
|
|
token.full_source = original_copy;
|
|
tokens.push_back(token);
|
|
i = new_index - 1;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
continue;
|
|
}
|
|
return std::make_tuple(std::vector<JJX::json::token>{}, format_error("Unable to lex", raw_json, i));
|
|
}
|
|
return {tokens, ""};
|
|
}
|
|
|
|
// It's very annoying when languages doesn't give you
|
|
// stringifier methods for enums by default for debugging.
|
|
// There are ways to do this with reflection but it seems hairy.
|
|
// There's a better procedure IIRC.
|
|
std::string token_type_tostring(token_type tok)
|
|
{
|
|
switch(tok) {
|
|
case token_type::string: return "String";
|
|
case token_type::number: return "Number";
|
|
case token_type::syntax: return "Syntax";
|
|
case token_type::boolean: return "Boolean";
|
|
case token_type::null: return "Null";
|
|
}
|
|
}
|
|
|
|
std::string format_parse_error(std::string base, json::token token)
|
|
{
|
|
std::ostringstream s;
|
|
s << "Unexpected token '" << token.value << "', type' '"
|
|
<< token_type_tostring(token.type) << "', index'";
|
|
s << std::endl << base;
|
|
return format_error(s.str(), *token.full_source, token.location);
|
|
}
|
|
|
|
std::tuple<std::vector<json::value>, int, std::string> parse_array(std::vector<json::token> tokens, int index) {
|
|
std::vector<json::value> children = {};
|
|
while (index < tokens.size()) {
|
|
auto t = tokens[index];
|
|
if (t.type == token_type::syntax) {
|
|
if (t.value == "]") {
|
|
return {children, index + 1, ""};
|
|
}
|
|
if (t.value == ",") {
|
|
index++;
|
|
t = tokens[index];
|
|
} else if (children.size() > 0) {
|
|
return {{}, index,
|
|
format_parse_error("Expected comma after element in array", t)};
|
|
}
|
|
}
|
|
|
|
auto [child, new_index, error] = parse(tokens, index);
|
|
if (error.size()) { return {{}, index, error}; }
|
|
children.push_back(child);
|
|
index = new_index;
|
|
}
|
|
return {
|
|
{}, index,
|
|
format_parse_error("Unexpected EOF while parsing array", tokens[index])};
|
|
|
|
}
|
|
|
|
std::tuple<std::map<std::string, value>, int, std::string> parse_object(std::vector<token> tokens, int index) {
|
|
std::map<std::string, value> values = {};
|
|
while (index < tokens.size()) {
|
|
auto t = tokens[index];
|
|
if (t.type == token_type::syntax) {
|
|
if (t.value == "}") {
|
|
return {values, index + 1, ""};
|
|
}
|
|
if (t.value == ",") {
|
|
index++;
|
|
t = tokens[index];
|
|
} else if (values.size() > 0) {
|
|
return {{}, index,
|
|
format_parse_error("Expected comma after element in object", t)
|
|
};
|
|
} else {
|
|
return {{}, index,
|
|
format_parse_error("Expected key-value pair or closing brace in object", t)
|
|
};
|
|
}
|
|
}
|
|
auto [key, new_index, error] = parse(tokens, index);
|
|
if (error.size())
|
|
{
|
|
return {{}, index, error};
|
|
}
|
|
|
|
if (key.type != value_type::string) {
|
|
return {{}, index,
|
|
format_parse_error("Expected string key in object", t)};
|
|
}
|
|
|
|
index = new_index;
|
|
t = tokens[index];
|
|
|
|
if (!(t.type == token_type::syntax && t.value == ":")) {
|
|
return {{}, index,
|
|
format_parse_error("Expected colon after key in object", t)};
|
|
}
|
|
index++;
|
|
t = tokens[index];
|
|
|
|
auto [value, new_index1, error1] = parse(tokens, index);
|
|
if (error1.size()) {
|
|
return {{}, index, error1};
|
|
}
|
|
|
|
values[key.string.value()] = value;
|
|
index = new_index1;
|
|
}
|
|
return {values, index+1, ""};
|
|
}
|
|
|
|
std::tuple<json::value, int, std::string> parse(std::vector<json::token> tokens, int index) {
|
|
auto token = tokens[index];
|
|
switch(token.type) {
|
|
case token_type::number: {
|
|
auto n = std::stod(token.value);
|
|
return {json::value{.number = n, .type = value_type::number}, index+1, ""};
|
|
}
|
|
case token_type::boolean:
|
|
return {json::value{.boolean = token.value == "true", .type = value_type::boolean}, index + 1, ""};
|
|
case token_type::null:
|
|
return {json::value{.type = value_type::null}, index+1, ""};
|
|
case token_type::string:
|
|
return {json::value{.string = token.value, .type = value_type::string}, index+1, ""};
|
|
case token_type::syntax: {
|
|
if (token.value == "[") {
|
|
auto [array, new_index, error] = parse_array(tokens, index + 1);
|
|
return {json::value{.array = array, .type = value_type::array}, new_index, error};
|
|
}
|
|
if (token.value == "{") {
|
|
auto [object, new_index, error] = parse_object(tokens, index + 1);
|
|
return {json::value{.object = std::optional(object), .type = value_type::object}, new_index, error};
|
|
}
|
|
}
|
|
default:
|
|
return {{}, index, format_parse_error("Failed to parse", token)};
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::tuple<json::value, std::string> parse(std::string source) {
|
|
auto [tokens, error] = json::lex(source);
|
|
if (error.size())
|
|
{
|
|
return std::make_tuple(JJX::json::value{}, error);
|
|
}
|
|
|
|
auto [ast, _, error1] = json::parse(tokens);
|
|
return {ast, error1};
|
|
}
|
|
|
|
std::string deparse(json::value v, std::string whitespace) {
|
|
switch(v.type) {
|
|
case json::value_type::string:
|
|
return "\"" + v.string.value() + "\"";
|
|
case json::value_type::boolean:
|
|
return (v.boolean.value() ? "true" : "false");
|
|
case json::value_type::number:
|
|
return std::to_string(v.number.value());
|
|
case json::value_type::null:
|
|
return "null";
|
|
case json::value_type::array: {
|
|
std::string s = "[\n";
|
|
auto a = v.array.value();
|
|
for (int i = 0; i < a.size(); i++) {
|
|
auto value = a[i];
|
|
s += whitespace + " " + deparse(value, whitespace + " ");
|
|
if (i < a.size() - 1) {
|
|
s += ",";
|
|
}
|
|
s += "\n";
|
|
}
|
|
return s + whitespace + "]";
|
|
}
|
|
case json::value_type::object: {
|
|
std::string s = "{\n";
|
|
auto values = v.object.value();
|
|
auto i = 0;
|
|
for (auto const &[key, value] : values) {
|
|
s += whitespace + " " + "\"" + key + "\":" + deparse(value, whitespace + " ");
|
|
|
|
if (i < values.size() - 1) {
|
|
s += ",";
|
|
}
|
|
s += "\n";
|
|
i++;
|
|
}
|
|
return s + whitespace + "}";
|
|
}
|
|
}
|
|
}
|
|
|
|
string_val string(const std::string &text) {
|
|
string_val out;
|
|
out.type = value_type::string;
|
|
out.string = text;
|
|
|
|
return out;
|
|
}
|
|
|
|
value number(double input) {
|
|
return {
|
|
.number = input,
|
|
.type = value_type::number
|
|
};
|
|
}
|
|
|
|
value boolean(bool input) {
|
|
return {
|
|
.boolean = input,
|
|
.type = value_type::boolean
|
|
};
|
|
}
|
|
|
|
array_val array(std::vector<value> input)
|
|
{
|
|
array_val arr;
|
|
arr.type = value_type::array;
|
|
arr.array = std::vector<value>();
|
|
|
|
for (auto& elem: input)
|
|
{
|
|
arr.array->push_back(elem);
|
|
}
|
|
|
|
return arr;
|
|
}
|
|
|
|
array_val array()
|
|
{
|
|
array_val out;
|
|
out.type = value_type::array;
|
|
out.array = std::vector<value>();
|
|
return out;
|
|
}
|
|
|
|
object_val object(std::map<std::string, value> input)
|
|
{
|
|
object_val out;
|
|
out.type = value_type::object;
|
|
out.object = input;
|
|
return out;
|
|
}
|
|
|
|
object_val object()
|
|
{
|
|
object_val out;
|
|
out.type = value_type::object;
|
|
out.object = std::map<std::string, value>();
|
|
return out;
|
|
}
|
|
}
|