123 lines
6.7 KiB
C++
123 lines
6.7 KiB
C++
#include <json.hpp>
|
|
|
|
#pragma once
|
|
|
|
/// @namespace json::lexers
|
|
///
|
|
/// @brief This namespace provides functions for lexical analysis (tokenizing)
|
|
///
|
|
/// Each function in this namespace is responsible for "lexing" a specific type of token,
|
|
/// from a raw JSON string, starting from a given index. The typically return a tuple
|
|
/// containing the recognized token, the new index in the string after consuming the token,
|
|
/// and the string value of the 'lexeme'.
|
|
namespace json::lexers {
|
|
|
|
/// Lexes and skips over whitespace characters in a JSON string.
|
|
///
|
|
/// This function consumes all consecutive whitespace characters (space, tab, newline,
|
|
/// carriage return) starting from the given index.
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param index The starting index in `raw_json` from which to begin lexing.
|
|
/// @return The index in `raw_json` immediately after the last consumed whitespace character.
|
|
/// If no whitespace is found at the starting index, the original index is returned.
|
|
int lex_whitespace(std::string raw_json, int index);
|
|
|
|
|
|
/// Lexes a single JSON syntax character.
|
|
///
|
|
/// This function expects to find a specific JSON structural character (`{`, `}`, `[`, `]`,
|
|
/// `:`, or `,`) at the given index and tokenizes it.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param index The starting point in `raw_json` where the syntax character is expected.
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token`: The token representing the lexed syntax character.
|
|
/// - `int`: The index in `raw_json` immediately after the consumed character/
|
|
/// - `std::string`: The string value of the lexed syntax character (e.g. "{", ":").
|
|
/// @throws std::runtime_error if an unexpected character is encountered at the given index.
|
|
std::tuple<json::token, int, std::string> lex_syntax(std::string raw_json, int index);
|
|
|
|
/// Lexes a JSON string literal.
|
|
///
|
|
/// This function expects a double-quoted string (`"..."`) at the given index, including handling of escape sequences.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param original_index The starting index in `raw_json` where the string literal is expected to begin (at the opening quote).
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token`: The token representing the lexed string literal.
|
|
/// - `int`: The index in `raw_json` immediately after the closing quote of the string.
|
|
/// - `std::string`: The unescaped string value of the literal.
|
|
/// @throws std::runtime_error if the string is malformed (e.g., unclosed quote, invalid escape sequence).
|
|
std::tuple<json::token, int, std::string> lex_string(std::string raw_json, int original_index);
|
|
|
|
/// Lexes a JSON number literal.
|
|
///
|
|
/// This function expects a valid JSON number (integer, float, scientific notation)
|
|
/// at the given index and tokenizes it.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param original_index The starting index in `raw_json` where the number literal is expected to begin.
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token` The token representing the lexed number literal.
|
|
/// - `int`: The index in `raw_json` immediately after the last digit or part of the number.
|
|
/// - `std::string`: The string representation of the lexed number (e.g., "123", "3.14", "1e-6").
|
|
/// @throws std::runtime_error if the number if malformed.
|
|
std::tuple<json::token, int, std::string> lex_number(std::string raw_json, int original_index);
|
|
|
|
/// Lexes a specific keyword in the JSON string.
|
|
///
|
|
/// This is a general-purpose function for lexing fixed-string keywords like "true", "false, or "null"
|
|
/// It checks if the `keyword` matches the substring at `original_index`.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param keyword The specific keyword string to match (e.g., "true", "false", "null").
|
|
/// @param type The `json::token_type` to assign if the keyword is successfully matched.
|
|
/// @param original_index The starting index in `raw_json` where the keyword is expected to begin.
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token`: The token representing the lexed keyword.
|
|
/// - `int`: The index in `raw_json` immediately after the consumed keyword.
|
|
/// - `std::string`: The string value of the matched keyword.
|
|
/// @throws std::runtime_error if the expected keyword is not found at the given index.
|
|
std::tuple<json::token, int, std::string> lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index);
|
|
|
|
/// Lexes the JSON `null` literal.
|
|
///
|
|
/// This is a specialized version of `lex_keyword` for the "null" literal.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param index The starting index in `raw_json` where "null" is expected to begin.
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token`: The token representing the lexed `null` literal.
|
|
/// - `int`: The index in `raw_json` immediately after "null".
|
|
/// - `std::string`: The string value "null".
|
|
/// @throws std::runtime_error if "null" is not found at the given index.
|
|
std::tuple<json::token, int, std::string> lex_null(std::string raw_json, int index);
|
|
|
|
|
|
/// Lexes the JSON `true` literal.
|
|
///
|
|
/// This is a specialized version of `lex_keyword` for the "true" literal.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param index The starting index in `raw_json` where "true" is expected to begin.
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token`: The token representing the lexed `true` literal.
|
|
/// - `int`: The index in `raw_json` immediately after "true".
|
|
/// - `std::string`: The string value "true".
|
|
/// @throws std::runtime_error if "true" is not found at the given index.
|
|
std::tuple<json::token, int, std::string> lex_true(std::string raw_json, int index);
|
|
|
|
/// Lexes the JSON `false` literal.
|
|
///
|
|
/// This is a specialized version of `lex_keyword` for the "false" literal.
|
|
///
|
|
/// @param raw_json The complete JSON string to be lexed.
|
|
/// @param index The starting index in `raw_json` where "false" is expected to begin.
|
|
/// @return A `std::tuple` containing:
|
|
/// - `json::token`: The token representing the lexed `false` literal.
|
|
/// - `int`: The index in `raw_json` immediately after "false".
|
|
/// - `std::string`: The string value "false".
|
|
/// @throws std::runtime_error if "false: is not found at the given index.
|
|
std::tuple<json::token, int, std::string> lex_false(std::string raw_json, int index);
|
|
}
|