Files
json/src/JSON.cpp
2024-11-04 19:20:14 -06:00

519 lines
16 KiB
C++

#include <JJX/JJX.hpp>
#include <JJX/JSON.hpp>
#include <sstream>
namespace JJX::json {
std::string format_error(std::string base, std::string source, int index) {
std::ostringstream s;
int counter = 0;
int line = 1;
int column = 0;
std::string lastline = "";
std::string whitespace = "";
for (auto c: source) {
if (counter == index) {
break;
}
if (c == '\n') {
line++;
column = 0;
lastline = "";
whitespace = "";
} else if (c == '\t') {
column++;
lastline += " ";
whitespace += " ";
} else {
column++;
lastline += c;
whitespace += " ";
}
counter++;
}
while (counter < source.size()) {
auto c = source[counter];
if (c == '\n') {
break;
}
lastline += c;
counter++;
}
// TODO: Migrate the below code bits to std::format
s << base << " at line " << line << ", column " << column << std::endl;
s << lastline << std::endl;
s << whitespace << "^";
return s.str();
}
int lex_whitespace(std::string raw_json, int index) {
while (std::isspace(raw_json[index])) {
if (index == raw_json.length()) {
break;
}
index++;
}
return index;
}
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index)
{
json::token token{"", token_type::syntax, index};
std::string value = "";
auto c = raw_json[index];
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
token.value += c;
index++;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index) {
int index = original_index;
json::token token {"", token_type::string, index};
std::string value = "";
auto c = raw_json[index];
if (c != '"') {
return {token, original_index, ""};
}
index++;
// TODO: handle nested quotes
while (c = raw_json[index], c != '"') {
if (index == raw_json.length()) {
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
}
token.value += c;
index++;
}
index++;
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index) {
int index = original_index;
json::token token {"", token_type::number, index};
std::string value = "";
bool decimal_present = false;
// Negative numbers.
if (raw_json[index] == '-') {
token.value += raw_json[index];
index++;
}
while(true) {
if (index == raw_json.length())
break;
auto c = raw_json[index];
if (c == '.') {
if (decimal_present)
break;
decimal_present = true;
token.value += c;
}
// Scientific notation.
else if (c == 'E' || c == 'e') {
token.value += c;
index++;
if (raw_json[index] == '-') {
token.value += raw_json[index];
index++;
}
continue; // Loop early.
}
// Only regex non-numeric values if we didn't catch it earlier.
else if (!(c >= '0' && c <= '9'))
break;
token.value += c;
index++;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index) {
int index = original_index;
json::token token{"", type, index};
while (keyword[index - original_index] == raw_json[index]) {
if (index == raw_json.length()) {
break;
}
index++;
}
if (index - original_index == keyword.length()) {
token.value = keyword;
}
return {token, index, ""};
}
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index)
{
return lex_keyword(raw_json, "null", token_type::null, index);
}
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index)
{
return lex_keyword(raw_json, "true", token_type::boolean, index);
}
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index) {
return lex_keyword(raw_json, "false", token_type::boolean, index);
}
void value::operator=(double in)
{
type = value_type::number;
number = in;
}
void value::operator=(const std::string& in)
{
type = value_type::string;
string = in;
}
void value::operator=(bool in)
{
type = value_type::boolean;
boolean = in;
}
void value::operator=(std::vector<value> in)
{
type = value_type::array;
array = in;
}
void value::operator=(std::map<std::string, value> in)
{
type = value_type::object;
object = in;
}
void object_val::add(const std::string& key, value val)
{
object.value().emplace(key, val);
}
void object_val::add(const std::string& key, const std::string& val)
{
object.value().emplace(key, json::string(val));
}
value& object_val::operator[](const std::string& key)
{
return object.value()[key];
}
void array_val::add(value val)
{
array.value().push_back(val);
}
value& array_val::operator[](int key)
{
return array.value()[key];
}
std::tuple<std::vector<json::token>, std::string> lex(std::string raw_json) {
std::vector<json::token> tokens;
// All tokens will embed a pointer to the raw JSON for debugging purposes
auto original_copy = std::make_shared<std::string>(raw_json);
auto generic_lexers = {lex_syntax, lex_string, lex_number, lex_null, lex_true, lex_false};
for (int i = 0; i < raw_json.length(); i++) {
// Skip past whitespace
if (auto new_index = lex_whitespace(raw_json, i); i != new_index) {
i = new_index - 1;
continue;
}
auto found = false;
for (auto lexer: generic_lexers) {
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
// Error while lexing, return early
if (error.length()) {
return std::make_tuple(std::vector<JJX::json::token>{}, error);
}
// Store reference to the original source
token.full_source = original_copy;
tokens.push_back(token);
i = new_index - 1;
found = true;
break;
}
}
if (found) {
continue;
}
return std::make_tuple(std::vector<JJX::json::token>{}, format_error("Unable to lex", raw_json, i));
}
return {tokens, ""};
}
// It's very annoying when languages doesn't give you
// stringifier methods for enums by default for debugging.
// There are ways to do this with reflection but it seems hairy.
// There's a better procedure IIRC.
std::string token_type_tostring(token_type tok)
{
switch(tok) {
case token_type::string: return "String";
case token_type::number: return "Number";
case token_type::syntax: return "Syntax";
case token_type::boolean: return "Boolean";
case token_type::null: return "Null";
}
}
std::string format_parse_error(std::string base, json::token token)
{
std::ostringstream s;
s << "Unexpected token '" << token.value << "', type' '"
<< token_type_tostring(token.type) << "', index'";
s << std::endl << base;
return format_error(s.str(), *token.full_source, token.location);
}
std::tuple<std::vector<json::value>, int, std::string> parse_array(std::vector<json::token> tokens, int index) {
std::vector<json::value> children = {};
while (index < tokens.size()) {
auto t = tokens[index];
if (t.type == token_type::syntax) {
if (t.value == "]") {
return {children, index + 1, ""};
}
if (t.value == ",") {
index++;
t = tokens[index];
} else if (children.size() > 0) {
return {{}, index,
format_parse_error("Expected comma after element in array", t)};
}
}
auto [child, new_index, error] = parse(tokens, index);
if (error.size()) { return {{}, index, error}; }
children.push_back(child);
index = new_index;
}
return {
{}, index,
format_parse_error("Unexpected EOF while parsing array", tokens[index])};
}
std::tuple<std::map<std::string, value>, int, std::string> parse_object(std::vector<token> tokens, int index) {
std::map<std::string, value> values = {};
while (index < tokens.size()) {
auto t = tokens[index];
if (t.type == token_type::syntax) {
if (t.value == "}") {
return {values, index + 1, ""};
}
if (t.value == ",") {
index++;
t = tokens[index];
} else if (values.size() > 0) {
return {{}, index,
format_parse_error("Expected comma after element in object", t)
};
} else {
return {{}, index,
format_parse_error("Expected key-value pair or closing brace in object", t)
};
}
}
auto [key, new_index, error] = parse(tokens, index);
if (error.size())
{
return {{}, index, error};
}
if (key.type != value_type::string) {
return {{}, index,
format_parse_error("Expected string key in object", t)};
}
index = new_index;
t = tokens[index];
if (!(t.type == token_type::syntax && t.value == ":")) {
return {{}, index,
format_parse_error("Expected colon after key in object", t)};
}
index++;
t = tokens[index];
auto [value, new_index1, error1] = parse(tokens, index);
if (error1.size()) {
return {{}, index, error1};
}
values[key.string.value()] = value;
index = new_index1;
}
return {values, index+1, ""};
}
std::tuple<json::value, int, std::string> parse(std::vector<json::token> tokens, int index) {
auto token = tokens[index];
switch(token.type) {
case token_type::number: {
auto n = std::stod(token.value);
return {json::value{.number = n, .type = value_type::number}, index+1, ""};
}
case token_type::boolean:
return {json::value{.boolean = token.value == "true", .type = value_type::boolean}, index + 1, ""};
case token_type::null:
return {json::value{.type = value_type::null}, index+1, ""};
case token_type::string:
return {json::value{.string = token.value, .type = value_type::string}, index+1, ""};
case token_type::syntax: {
if (token.value == "[") {
auto [array, new_index, error] = parse_array(tokens, index + 1);
return {json::value{.array = array, .type = value_type::array}, new_index, error};
}
if (token.value == "{") {
auto [object, new_index, error] = parse_object(tokens, index + 1);
return {json::value{.object = std::optional(object), .type = value_type::object}, new_index, error};
}
}
default:
return {{}, index, format_parse_error("Failed to parse", token)};
}
}
std::tuple<json::value, std::string> parse(std::string source) {
auto [tokens, error] = json::lex(source);
if (error.size())
{
return std::make_tuple(JJX::json::value{}, error);
}
auto [ast, _, error1] = json::parse(tokens);
return {ast, error1};
}
std::string deparse(json::value v, std::string whitespace) {
switch(v.type) {
case json::value_type::string:
return "\"" + v.string.value() + "\"";
case json::value_type::boolean:
return (v.boolean.value() ? "true" : "false");
case json::value_type::number:
return std::to_string(v.number.value());
case json::value_type::null:
return "null";
case json::value_type::array: {
std::string s = "[\n";
auto a = v.array.value();
for (int i = 0; i < a.size(); i++) {
auto value = a[i];
s += whitespace + " " + deparse(value, whitespace + " ");
if (i < a.size() - 1) {
s += ",";
}
s += "\n";
}
return s + whitespace + "]";
}
case json::value_type::object: {
std::string s = "{\n";
auto values = v.object.value();
auto i = 0;
for (auto const &[key, value] : values) {
s += whitespace + " " + "\"" + key + "\":" + deparse(value, whitespace + " ");
if (i < values.size() - 1) {
s += ",";
}
s += "\n";
i++;
}
return s + whitespace + "}";
}
}
}
string_val string(const std::string &text) {
string_val out;
out.type = value_type::string;
out.string = text;
return out;
}
value number(double input) {
return {
.number = input,
.type = value_type::number
};
}
value boolean(bool input) {
return {
.boolean = input,
.type = value_type::boolean
};
}
array_val array(std::vector<value> input)
{
array_val arr;
arr.type = value_type::array;
arr.array = std::vector<value>();
for (auto& elem: input)
{
arr.array->push_back(elem);
}
return arr;
}
array_val array()
{
array_val out;
out.type = value_type::array;
out.array = std::vector<value>();
return out;
}
object_val object(std::map<std::string, value> input)
{
object_val out;
out.type = value_type::object;
out.object = input;
return out;
}
object_val object()
{
object_val out;
out.type = value_type::object;
out.object = std::map<std::string, value>();
return out;
}
}