From 02cdf697fc8c1de715e8e967c47fc3f2dd6aa086 Mon Sep 17 00:00:00 2001 From: josh Date: Mon, 2 Jun 2025 15:01:24 -0500 Subject: [PATCH] Added documentation to json::lexer namespace. --- include/lexer.hpp | 97 ++++++++++++++++++++++++++++++++++++++++++++++- src/json.cpp | 2 +- src/lexer.cpp | 1 + 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/include/lexer.hpp b/include/lexer.hpp index a7c8c6c..494c660 100644 --- a/include/lexer.hpp +++ b/include/lexer.hpp @@ -3,25 +3,120 @@ #pragma once /// @namespace json::lexers +/// /// @brief This namespace provides functions for lexical analysis (tokenizing) /// /// Each function in this namespace is responsible for "lexing" a specific type of token, -/// from a raw JSON string, +/// from a raw JSON string, starting from a given index. The typically return a tuple +/// containing the recognized token, the new index in the string after consuming the token, +/// and the string value of the 'lexeme'. namespace json::lexers { + /// Lexes and skips over whitespace characters in a JSON string. + /// + /// This function consumes all consecutive whitespace characters (space, tab, newline, + /// carriage return) starting from the given index. + /// @param raw_json The complete JSON string to be lexed. + /// @param index The starting index in `raw_json` from which to begin lexing. + /// @return The index in `raw_json` immediately after the last consumed whitespace character. + /// If no whitespace is found at the starting index, the original index is returned. int lex_whitespace(std::string raw_json, int index); + + /// Lexes a single JSON syntax character. + /// + /// This function expects to find a specific JSON structural character (`{`, `}`, `[`, `]`, + /// `:`, or `,`) at the given index and tokenizes it. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param index The starting point in `raw_json` where the syntax character is expected. + /// @return A `std::tuple` containing: + /// - `json::token`: The token representing the lexed syntax character. + /// - `int`: The index in `raw_json` immediately after the consumed character/ + /// - `std::string`: The string value of the lexed syntax character (e.g. "{", ":"). + /// @throws std::runtime_error if an unexpected character is encountered at the given index. std::tuple lex_syntax(std::string raw_json, int index); + /// Lexes a JSON string literal. + /// + /// This function expects a double-quoted string (`"..."`) at the given index, including handling of escape sequences. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param original_index The starting index in `raw_json` where the string literal is expected to begin (at the opening quote). + /// @return A `std::tuple` containing: + /// - `json::token`: The token representing the lexed string literal. + /// - `int`: The index in `raw_json` immediately after the closing quote of the string. + /// - `std::string`: The unescaped string value of the literal. + /// @throws std::runtime_error if the string is malformed (e.g., unclosed quote, invalid escape sequence). std::tuple lex_string(std::string raw_json, int original_index); + /// Lexes a JSON number literal. + /// + /// This function expects a valid JSON number (integer, float, scientific notation) + /// at the given index and tokenizes it. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param original_index The starting index in `raw_json` where the number literal is expected to begin. + /// @return A `std::tuple` containing: + /// - `json::token` The token representing the lexed number literal. + /// - `int`: The index in `raw_json` immediately after the last digit or part of the number. + /// - `std::string`: The string representation of the lexed number (e.g., "123", "3.14", "1e-6"). + /// @throws std::runtime_error if the number if malformed. std::tuple lex_number(std::string raw_json, int original_index); + /// Lexes a specific keyword in the JSON string. + /// + /// This is a general-purpose function for lexing fixed-string keywords like "true", "false, or "null" + /// It checks if the `keyword` matches the substring at `original_index`. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param keyword The specific keyword string to match (e.g., "true", "false", "null"). + /// @param type The `json::token_type` to assign if the keyword is successfully matched. + /// @param original_index The starting index in `raw_json` where the keyword is expected to begin. + /// @return A `std::tuple` containing: + /// - `json::token`: The token representing the lexed keyword. + /// - `int`: The index in `raw_json` immediately after the consumed keyword. + /// - `std::string`: The string value of the matched keyword. + /// @throws std::runtime_error if the expected keyword is not found at the given index. std::tuple lex_keyword(std::string raw_json, std::string keyword, json::token_type type, int original_index); + /// Lexes the JSON `null` literal. + /// + /// This is a specialized version of `lex_keyword` for the "null" literal. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param index The starting index in `raw_json` where "null" is expected to begin. + /// @return A `std::tuple` containing: + /// - `json::token`: The token representing the lexed `null` literal. + /// - `int`: The index in `raw_json` immediately after "null". + /// - `std::string`: The string value "null". + /// @throws std::runtime_error if "null" is not found at the given index. std::tuple lex_null(std::string raw_json, int index); + + /// Lexes the JSON `true` literal. + /// + /// This is a specialized version of `lex_keyword` for the "true" literal. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param index The starting index in `raw_json` where "true" is expected to begin. + /// @return A `std::tuple` containing: + /// - `json::token`: The token representing the lexed `true` literal. + /// - `int`: The index in `raw_json` immediately after "true". + /// - `std::string`: The string value "true". + /// @throws std::runtime_error if "true" is not found at the given index. std::tuple lex_true(std::string raw_json, int index); + /// Lexes the JSON `false` literal. + /// + /// This is a specialized version of `lex_keyword` for the "false" literal. + /// + /// @param raw_json The complete JSON string to be lexed. + /// @param index The starting index in `raw_json` where "false" is expected to begin. + /// @return A `std::tuple` containing: + /// - `json::token`: The token representing the lexed `false` literal. + /// - `int`: The index in `raw_json` immediately after "false". + /// - `std::string`: The string value "false". + /// @throws std::runtime_error if "false: is not found at the given index. std::tuple lex_false(std::string raw_json, int index); } diff --git a/src/json.cpp b/src/json.cpp index c0cbd50..1365b40 100644 --- a/src/json.cpp +++ b/src/json.cpp @@ -410,7 +410,7 @@ namespace json { auto [tokens, error] = json::lex(source); if (error.size()) { - return std::make_tuple(JJX::json::value{}, error); + return std::make_tuple(json::value{}, error); } auto [ast, _, error1] = json::parse(tokens); diff --git a/src/lexer.cpp b/src/lexer.cpp index 32ca1f2..04c6e48 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -17,6 +17,7 @@ namespace json { std::tuple lexers::lex_syntax(std::string raw_json, int index) { + json::token token{"", token_type::syntax, index}; std::string value = ""; auto c = raw_json[index];