Skip to content

Commit

Permalink
Merge branch 'clang-tidy-pre-tdfa' into rename-vars
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed committed Feb 14, 2025
2 parents 2a4a842 + 1749138 commit 2ff9f3c
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 103 deletions.
2 changes: 0 additions & 2 deletions src/log_surgeon/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include <log_surgeon/finite_automata/DfaState.hpp>
#include <log_surgeon/finite_automata/NfaState.hpp>
#include <log_surgeon/finite_automata/RegexAST.hpp>
#include <log_surgeon/finite_automata/TagOperation.hpp>
#include <log_surgeon/finite_automata/RegexAST.hpp>
#include <log_surgeon/LexicalRule.hpp>
#include <log_surgeon/ParserInputBuffer.hpp>
#include <log_surgeon/Token.hpp>
Expand Down
44 changes: 21 additions & 23 deletions src/log_surgeon/finite_automata/Nfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,37 @@ class Nfa {
explicit Nfa(std::vector<LexicalRule<TypedNfaState>> const& rules);

/**
* Creates a unique_ptr for an NFA state with no tagged transitions and adds it to `m_states`.
* @return TypedNfaState*
* @return A pointer to the newly created NFA state with no spontaneous transitions.
*/
[[nodiscard]] auto new_state() -> TypedNfaState*;

/**
* Creates a unique_ptr for an NFA state that is accepting and adds it to `m_states`.
* @param matching_variable_id The variable id that the NFA state accepts.
* @return TypedNfaState*
/*
* @param matching_variable_id The id for the variable matched by this state.
* @return A point to the newly created accepting NFA state.
*/
[[nodiscard]] auto new_accepting_state(uint32_t matching_variable_id) -> TypedNfaState*;

/**
* Creates a unique_ptr for an NFA state with a negative tagged transition and adds it to
* `m_states`.
* @param captures
* @param dest_state
* @return TypedNfaState*
* @param captures A vector containing the captures of all alternate paths.
* @param dest_state The destination state to arrive at after negating the captures.
* @return A pointer to the newly created NFA state with a spontaneous transition to
* `dest_state`negating all the tags associated with `captures`.
*/
[[nodiscard]] auto new_state_for_negative_captures(
[[nodiscard]] auto new_state_from_negative_captures(
std::vector<Capture const*> const& captures,
TypedNfaState const* dest_state
) -> TypedNfaState*;

/**
* Creates the start and end states for a capture group.
* @param capture The capture associated with the capture group.
* @param dest_state
* @return A pair of states:
* - A state from `m_root` with an outgoing transition that sets the start tag for the capture.
* - A state with an outgoing transition to `dest_state` that sets the end tag for the capture.
* @param capture The positive capture to be tracked.
* @param dest_state The destination state to arrive at after tracking the capture.
* @return A pair of pointers to the two newly created NFA states:
* - A state arrived at from a spontaneous transition out of `m_root` that sets a tag to track
* the capture's start position.
* - A state with a spontaneous transition to `dest_state` that sets a tag to track the
* capture's end position
*/
[[nodiscard]] auto new_start_and_end_states_for_capture(
[[nodiscard]] auto new_start_and_end_states_from_positive_capture(
Capture const* capture,
TypedNfaState const* dest_state
) -> std::pair<TypedNfaState*, TypedNfaState*>;
Expand All @@ -84,7 +82,7 @@ class Nfa {
[[nodiscard]] auto get_bfs_traversal_order() const -> std::vector<TypedNfaState const*>;

/**
* @return A string representation of the NFA.
* @return A string representation of the NFA on success.
* @return Forwards `NfaState::serialize`'s return value (std::nullopt) on failure.
*/
[[nodiscard]] auto serialize() const -> std::optional<std::string>;
Expand Down Expand Up @@ -154,7 +152,7 @@ auto Nfa<TypedNfaState>::new_accepting_state(uint32_t const matching_variable_id
}

template <typename TypedNfaState>
auto Nfa<TypedNfaState>::new_state_for_negative_captures(
auto Nfa<TypedNfaState>::new_state_from_negative_captures(
std::vector<Capture const*> const& captures,
TypedNfaState const* dest_state
) -> TypedNfaState* {
Expand All @@ -172,12 +170,12 @@ auto Nfa<TypedNfaState>::new_state_for_negative_captures(
}

template <typename TypedNfaState>
auto Nfa<TypedNfaState>::new_start_and_end_states_for_capture(
auto Nfa<TypedNfaState>::new_start_and_end_states_from_positive_capture(
Capture const* capture,
TypedNfaState const* dest_state
) -> std::pair<TypedNfaState*, TypedNfaState*> {
auto const [start_tag, end_tag]{get_or_create_capture_tag_pair(capture)};
auto* start_state = new_state();
auto* start_state{new_state()};
m_root->add_spontaneous_transition(TagOperationType::Set, {start_tag}, start_state);
m_states.emplace_back(
std::make_unique<TypedNfaState>(TagOperationType::Set, std::vector{end_tag}, dest_state)
Expand Down
18 changes: 9 additions & 9 deletions src/log_surgeon/finite_automata/NfaState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cstdint>
#include <memory>
#include <optional>
#include <set>
#include <stack>
#include <string>
#include <tuple>
Expand All @@ -20,7 +21,9 @@
#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/SpontaneousTransition.hpp>
#include <log_surgeon/finite_automata/StateType.hpp>
#include <log_surgeon/finite_automata/TagOperation.hpp>
#include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>
#include <log_surgeon/types.hpp>

namespace log_surgeon::finite_automata {
template <StateType state_type>
Expand All @@ -42,14 +45,10 @@ class NfaState {

NfaState(
TagOperationType const op_type,
std::vector<tag_id_t> tag_ids,
std::vector<tag_id_t> const& tag_ids,
NfaState const* dest_state
) {
add_spontaneous_transition(op_type, std::move(tag_ids), dest_state);
}

auto add_spontaneous_transition(NfaState* dest_state) -> void {
m_spontaneous_transitions.emplace_back(dest_state);
add_spontaneous_transition(op_type, tag_ids, dest_state);
}

auto add_spontaneous_transition(
Expand Down Expand Up @@ -186,8 +185,9 @@ auto NfaState<state_type>::epsilon_closure() -> std::set<NfaState const*> {
template <StateType state_type>
auto NfaState<state_type>::serialize(std::unordered_map<NfaState const*, uint32_t> const& state_ids
) const -> std::optional<std::string> {
auto const accepting_tag_string
= m_accepting ? fmt::format("accepting_tag={},", m_matching_variable_id) : "";
auto const accepting_tag_string{
m_accepting ? fmt::format("accepting_tag={},", m_matching_variable_id) : ""
};

std::vector<std::string> byte_transitions;
for (uint32_t idx{0}; idx < cSizeOfByte; ++idx) {
Expand All @@ -200,7 +200,7 @@ auto NfaState<state_type>::serialize(std::unordered_map<NfaState const*, uint32_

std::vector<std::string> serialized_spontaneous_transitions;
for (auto const& spontaneous_transition : m_spontaneous_transitions) {
auto const optional_serialized_transition = spontaneous_transition.serialize(state_ids);
auto const optional_serialized_transition{spontaneous_transition.serialize(state_ids)};
if (false == optional_serialized_transition.has_value()) {
return std::nullopt;
}
Expand Down
22 changes: 12 additions & 10 deletions src/log_surgeon/finite_automata/RegexAST.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/finite_automata/TagOperation.hpp>
#include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>

namespace log_surgeon::finite_automata {
Expand Down Expand Up @@ -113,12 +114,12 @@ class RegexAST {
auto add_to_nfa_with_negative_captures(Nfa<TypedNfaState>* nfa, TypedNfaState* end_state) const
-> void {
// Handle negative captures as:
// root --(regex)--> state_with_spontaenous_transition --(negate tags)--> end_state
// root --(regex)--> state_with_spontaneous_transition --(negate tags)--> end_state
if (false == m_negative_captures.empty()) {
auto* state_with_spontaenous_transition{
nfa->new_state_for_negative_captures(m_negative_captures, end_state)
auto* state_with_spontaneous_transition{
nfa->new_state_from_negative_captures(m_negative_captures, end_state)
};
add_to_nfa(nfa, state_with_spontaenous_transition);
add_to_nfa(nfa, state_with_spontaneous_transition);
} else {
add_to_nfa(nfa, end_state);
}
Expand Down Expand Up @@ -860,9 +861,9 @@ void RegexASTMultiplication<TypedNfaState>::add_to_nfa(
) const {
TypedNfaState* saved_root = nfa->get_root();
if (m_min == 0) {
nfa->get_root()->add_spontaneous_transition(end_state);
nfa->get_root()->add_spontaneous_transition(TagOperationType::None, {}, end_state);
} else {
for (uint32_t i = 1; i < m_min; i++) {
for (uint32_t i{1}; i < m_min; ++i) {
TypedNfaState* intermediate_state = nfa->new_state();
m_operand->add_to_nfa_with_negative_captures(nfa, intermediate_state);
nfa->set_root(intermediate_state);
Expand Down Expand Up @@ -907,8 +908,8 @@ template <typename TypedNfaState>
auto RegexASTCapture<TypedNfaState>::add_to_nfa(Nfa<TypedNfaState>* nfa, TypedNfaState* dest_state)
const -> void {
// TODO: move this into a documentation file in the future, and reference it here.
// The NFA constructed for a capture group follows the structure below, with tagged transitions
// explicitly labeled for clarity:
// The NFA constructed for a capture group follows the structure below, with spontaneous
// transitions explicitly labeled for clarity:
// +---------------------+
// | `m_root` |
// +---------------------+
Expand Down Expand Up @@ -937,8 +938,9 @@ auto RegexASTCapture<TypedNfaState>::add_to_nfa(Nfa<TypedNfaState>* nfa, TypedNf
// +---------------------+
// | `dest_state` |
// +---------------------+
auto [capture_start_state, capture_end_state]
= nfa->new_start_and_end_states_for_capture(m_capture.get(), dest_state);
auto [capture_start_state, capture_end_state]{
nfa->new_start_and_end_states_from_positive_capture(m_capture.get(), dest_state)
};

auto* initial_root = nfa->get_root();
nfa->set_root(capture_start_state);
Expand Down
16 changes: 9 additions & 7 deletions src/log_surgeon/finite_automata/SpontaneousTransition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <optional>
#include <ranges>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
Expand All @@ -16,7 +15,9 @@

namespace log_surgeon::finite_automata {
/**
* Represents an NFA transition indicating a tag and an operation to perform on the tag.
* Represents an NFA transition with a collection of tag operations to be performed during the
* transition.
*
* @tparam TypedNfaState Specifies the type of transition (bytes or UTF-8 characters).
*/
template <typename TypedNfaState>
Expand All @@ -28,7 +29,7 @@ class SpontaneousTransition {
: m_tag_ops{std::move(tag_ops)},
m_dest_state{dest_state} {}

[[nodiscard]] auto get_tag_ops() const -> std::vector<TagOperation> { return m_tag_ops; }
[[nodiscard]] auto get_tag_ops() const -> std::vector<TagOperation> const& { return m_tag_ops; }

[[nodiscard]] auto get_dest_state() const -> TypedNfaState const* { return m_dest_state; }

Expand All @@ -52,10 +53,11 @@ auto SpontaneousTransition<TypedNfaState>::serialize(
if (false == state_ids.contains(m_dest_state)) {
return std::nullopt;
}
auto transformed_operations
= m_tag_ops | std::ranges::views::transform([](TagOperation const& tag_op) {
return tag_op.serialize();
});
auto transformed_operations{
m_tag_ops | std::ranges::views::transform([](TagOperation const& tag_op) {
return tag_op.serialize();
})
};

return fmt::format(
"{}[{}]",
Expand Down
11 changes: 6 additions & 5 deletions src/log_surgeon/finite_automata/TagOperation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@

#include <fmt/core.h>

namespace log_surgeon::finite_automata {
using tag_id_t = uint32_t;
#include <log_surgeon/types.hpp>

namespace log_surgeon::finite_automata {
enum class TagOperationType : uint8_t {
Set,
Negate
Negate,
None
};

class TagOperation {
Expand Down Expand Up @@ -42,8 +43,8 @@ class TagOperation {
return fmt::format("{}{}", m_tag_id, "p");
case TagOperationType::Negate:
return fmt::format("{}{}", m_tag_id, "n");
default:
return fmt::format("{}{}", m_tag_id, "?");
case TagOperationType::None:
return "none";
}
}

Expand Down
18 changes: 8 additions & 10 deletions tests/test-lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,12 @@ auto test_regex_ast(string_view var_schema, u32string const& expected_serialized
[[nodiscard]] auto u32string_to_string(u32string const& u32_str) -> string;

/**
* Initializes the lexer with the constant delimiters and the given schema.
* The constant delimiters (space and newline) are used to separate tokens in the input.
* The lexer's symbol mappings are initialized based on the schema variables.
* Assumes the lexer is in a clean state before initialization.
* @param schema_ast Contains the variables to add to the lexer.
* @return The initialized lexer.
* Creates a lexer with a constant set of delimiters (space and newline) and the given schema.
* The delimiters are used to separate tokens in the input.
* @param schema_ast The schema variables are used to set the lexer's symbol mappings.
* @return The lexer.
*/
auto initialize_lexer(std::unique_ptr<SchemaAST> schema_ast) -> ByteLexer;
[[nodiscard]] auto create_lexer(std::unique_ptr<SchemaAST> schema_ast) -> ByteLexer;

/**
* Lexes the given input and verifies the output is a token for the given rule name, folowed by the
Expand Down Expand Up @@ -99,7 +97,7 @@ auto u32string_to_string(u32string const& u32_str) -> string {
return converter.to_bytes(u32_str.data(), u32_str.data() + u32_str.size());
}

auto initialize_lexer(std::unique_ptr<SchemaAST> schema_ast) -> ByteLexer {
auto create_lexer(std::unique_ptr<SchemaAST> schema_ast) -> ByteLexer {
vector<uint32_t> const delimiters{' ', '\n'};

ByteLexer lexer;
Expand Down Expand Up @@ -319,7 +317,7 @@ TEST_CASE("Test basic Lexer", "[Lexer]") {
Schema schema;
schema.add_variable(cVarSchema, -1);

ByteLexer lexer{initialize_lexer(std::move(schema.release_schema_ast_ptr()))};
ByteLexer lexer{create_lexer(std::move(schema.release_schema_ast_ptr()))};

test_scanning_input(lexer, cTokenString1, cVarName);
test_scanning_input(lexer, cTokenString2, log_surgeon::cTokenUncaughtString);
Expand All @@ -336,7 +334,7 @@ TEST_CASE("Test Lexer with capture groups", "[Lexer]") {
Schema schema;
schema.add_variable(cVarSchema, -1);

ByteLexer lexer{initialize_lexer(std::move(schema.release_schema_ast_ptr()))};
ByteLexer lexer{create_lexer(std::move(schema.release_schema_ast_ptr()))};

string const var_name{cVarName};
REQUIRE(lexer.m_symbol_id.contains(var_name));
Expand Down
Loading

0 comments on commit 2ff9f3c

Please sign in to comment.