Skip to content

refactor: Unify all types of NFA transitions into NfaSpontaneousTransition. #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Feb 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ad30714
Change concept of tagged transitions to spontaenous transitions.
SharafMohamed Feb 13, 2025
8d8e5ee
Fix typo.
SharafMohamed Feb 13, 2025
5c01aa3
Return by reference; Fix docstring.
SharafMohamed Feb 13, 2025
f695c56
Rename method to new_state_from_negative_captures for clarity.
SharafMohamed Feb 13, 2025
fc1ef67
Update docstring of new_state_from_negative_captures.
SharafMohamed Feb 13, 2025
dbd6d2c
Rename to new_start_and_end_states_from_positive_capture.
SharafMohamed Feb 13, 2025
cd7e3a9
Update spontaneous doc strings.
SharafMohamed Feb 13, 2025
43c1764
Update Nfa::Serialize docstring.
SharafMohamed Feb 13, 2025
f42e341
Clean up NfaState; Fix double declaration of tag_id_t.
SharafMohamed Feb 13, 2025
563a619
Clean up Nfa.hpp.
SharafMohamed Feb 13, 2025
c55b5b3
Remove overload of add_spontaneous_transitions; Fix compiler errors t…
SharafMohamed Feb 13, 2025
eb10e95
Use uniform initialization.
SharafMohamed Feb 13, 2025
ad23402
Clang-tidy.
SharafMohamed Feb 13, 2025
452b6b4
Add const.
SharafMohamed Feb 13, 2025
04a65c2
Fix typo.
SharafMohamed Feb 13, 2025
abc1006
Add noexcept; Add None case.
SharafMohamed Feb 13, 2025
7967082
Fix docstring.
SharafMohamed Feb 13, 2025
9cafc39
Remove noexcept.
SharafMohamed Feb 13, 2025
01b4698
Rename to NfaSpontaneousTransition.hpp.
SharafMohamed Feb 14, 2025
2a0d573
Removing unused constructor.
SharafMohamed Feb 14, 2025
a454838
Add ticks for var in return field.
SharafMohamed Feb 14, 2025
472b674
Add [nodiscard] to comparator methods.
SharafMohamed Feb 14, 2025
35fbcbd
Refactor serialize.
SharafMohamed Feb 14, 2025
8a1fe9e
Remove None from TagOperationType; Add default case.
SharafMohamed Feb 14, 2025
3f8e6fb
Change find to at.
SharafMohamed Feb 14, 2025
3c611ef
Fix at call.
SharafMohamed Feb 14, 2025
9ccd118
Fix cmake order.
SharafMohamed Feb 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,14 @@ set(SOURCE_FILES
src/log_surgeon/finite_automata/DfaState.hpp
src/log_surgeon/finite_automata/DfaStatePair.hpp
src/log_surgeon/finite_automata/Nfa.hpp
src/log_surgeon/finite_automata/NfaSpontaneousTransition.hpp
src/log_surgeon/finite_automata/NfaState.hpp
src/log_surgeon/finite_automata/PrefixTree.cpp
src/log_surgeon/finite_automata/PrefixTree.hpp
src/log_surgeon/finite_automata/RegexAST.hpp
src/log_surgeon/finite_automata/RegisterHandler.hpp
src/log_surgeon/finite_automata/StateType.hpp
src/log_surgeon/finite_automata/TaggedTransition.hpp
src/log_surgeon/finite_automata/TagOperation.hpp
src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp
src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp
src/log_surgeon/Lalr1Parser.cpp
Expand Down
1 change: 1 addition & 0 deletions src/log_surgeon/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/Dfa.hpp>
#include <log_surgeon/finite_automata/DfaState.hpp>
#include <log_surgeon/finite_automata/NfaState.hpp>
#include <log_surgeon/finite_automata/RegexAST.hpp>
#include <log_surgeon/LexicalRule.hpp>
#include <log_surgeon/ParserInputBuffer.hpp>
Expand Down
1 change: 1 addition & 0 deletions src/log_surgeon/finite_automata/Dfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <map>
#include <memory>
#include <set>
#include <stack>
#include <vector>

#include <log_surgeon/finite_automata/DfaStatePair.hpp>
Expand Down
113 changes: 42 additions & 71 deletions src/log_surgeon/finite_automata/Nfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <cstdint>
#include <memory>
#include <optional>
#include <queue>
#include <string>
#include <unordered_map>
Expand All @@ -11,9 +12,12 @@
#include <vector>

#include <fmt/core.h>
#include <fmt/format.h>

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/NfaState.hpp>
#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/finite_automata/TagOperation.hpp>
#include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>
#include <log_surgeon/LexicalRule.hpp>
#include <log_surgeon/types.hpp>
#include <log_surgeon/UniqueIdGenerator.hpp>
Expand All @@ -35,32 +39,31 @@ class Nfa {
explicit Nfa(std::vector<LexicalRule<TypedNfaState>> const& rules);

/**
* Creates a unique_ptr for an NFA state with no tagged transitions and adds it to `m_states`.
* @return TypedNfaState*
* @return A pointer to the newly created NFA state with no spontaneous transitions.
*/
[[nodiscard]] auto new_state() -> TypedNfaState*;

/**
* Creates a unique_ptr for an NFA state with a negative tagged transition and adds it to
* `m_states`.
* @param captures
* @param dest_state
* @return TypedNfaState*
* @param captures A vector containing the captures of all alternate paths.
* @param dest_state The destination state to arrive at after negating the captures.
* @return A pointer to the newly created NFA state with a spontaneous transition to
* `dest_state`negating all the tags associated with `captures`.
*/
[[nodiscard]] auto new_state_with_negative_tagged_transition(
[[nodiscard]] auto new_state_from_negative_captures(
std::vector<Capture const*> const& captures,
TypedNfaState const* dest_state
) -> TypedNfaState*;

/**
* Creates the start and end states for a capture group.
* @param capture The capture associated with the capture group.
* @param dest_state
* @return A pair of states:
* - A new state with a positive tagged start transition from `m_root`.
* - A new state with a positive tagged end transition to `dest_state`.
* @param capture The positive capture to be tracked.
* @param dest_state The destination state to arrive at after tracking the capture.
* @return A pair of pointers to the two newly created NFA states:
* - A state arrived at from a spontaneous transition out of `m_root` that sets a tag to track
* the capture's start position.
* - A state with a spontaneous transition to `dest_state` that sets a tag to track the
* capture's end position
*/
[[nodiscard]] auto new_start_and_end_states_with_positive_tagged_transitions(
[[nodiscard]] auto new_start_and_end_states_from_positive_capture(
Capture const* capture,
TypedNfaState const* dest_state
) -> std::pair<TypedNfaState*, TypedNfaState*>;
Expand All @@ -72,9 +75,10 @@ class Nfa {
[[nodiscard]] auto get_bfs_traversal_order() const -> std::vector<TypedNfaState const*>;

/**
* @return A string representation of the NFA.
* @return A string representation of the NFA on success.
* @return Forwards `NfaState::serialize`'s return value (`std::nullopt`) on failure.
*/
[[nodiscard]] auto serialize() const -> std::string;
[[nodiscard]] auto serialize() const -> std::optional<std::string>;

auto add_root_interval(Interval interval, TypedNfaState* dest_state) -> void {
m_root->add_interval(interval, dest_state);
Expand All @@ -100,18 +104,6 @@ class Nfa {
[[nodiscard]] auto get_or_create_capture_tag_pair(Capture const* capture
) -> std::pair<tag_id_t, tag_id_t>;

/**
* Creates a `unique_ptr` for an NFA state with a positive tagged end transition and adds it to
* `m_states`.
* @param tag_id
* @param dest_state
* @return A new state with a positive tagged end transition to `dest_state`.
*/
[[nodiscard]] auto new_state_with_positive_tagged_end_transition(
tag_id_t tag_id,
TypedNfaState const* dest_state
) -> TypedNfaState*;

std::vector<std::unique_ptr<TypedNfaState>> m_states;
// TODO: Lexer currently enforces unique naming across capture groups. However, this limits use
// cases. Possibly initialize this in the lexer and pass it in during construction.
Expand Down Expand Up @@ -146,39 +138,35 @@ auto Nfa<TypedNfaState>::new_state() -> TypedNfaState* {
}

template <typename TypedNfaState>
auto Nfa<TypedNfaState>::new_state_with_positive_tagged_end_transition(
tag_id_t const tag_id,
TypedNfaState const* dest_state
) -> TypedNfaState* {
m_states.emplace_back(std::make_unique<TypedNfaState>(tag_id, dest_state));
return m_states.back().get();
}

template <typename TypedNfaState>
auto Nfa<TypedNfaState>::new_state_with_negative_tagged_transition(
auto Nfa<TypedNfaState>::new_state_from_negative_captures(
std::vector<Capture const*> const& captures,
TypedNfaState const* dest_state
) -> TypedNfaState* {
std::vector<tag_id_t> tags;
for (auto const capture : captures) {
for (auto const* capture : captures) {
auto const [start_tag, end_tag]{get_or_create_capture_tag_pair(capture)};
tags.push_back(start_tag);
tags.push_back(end_tag);
}

m_states.emplace_back(std::make_unique<TypedNfaState>(std::move(tags), dest_state));
m_states.emplace_back(
std::make_unique<TypedNfaState>(TagOperationType::Negate, std::move(tags), dest_state)
);
return m_states.back().get();
}

template <typename TypedNfaState>
auto Nfa<TypedNfaState>::new_start_and_end_states_with_positive_tagged_transitions(
auto Nfa<TypedNfaState>::new_start_and_end_states_from_positive_capture(
Capture const* capture,
TypedNfaState const* dest_state
) -> std::pair<TypedNfaState*, TypedNfaState*> {
auto const [start_tag, end_tag]{get_or_create_capture_tag_pair(capture)};
auto* start_state = new_state();
m_root->add_positive_tagged_start_transition(start_tag, start_state);
auto* end_state{new_state_with_positive_tagged_end_transition(end_tag, dest_state)};
auto* start_state{new_state()};
m_root->add_spontaneous_transition(TagOperationType::Set, {start_tag}, start_state);
m_states.emplace_back(
std::make_unique<TypedNfaState>(TagOperationType::Set, std::vector{end_tag}, dest_state)
);
auto* end_state{m_states.back().get()};
return {start_state, end_state};
}

Expand Down Expand Up @@ -208,33 +196,15 @@ auto Nfa<TypedNfaState>::get_bfs_traversal_order() const -> std::vector<TypedNfa
add_to_queue_and_visited(dest_state);
}
}
for (auto const* dest_state : current_state->get_epsilon_transitions()) {
add_to_queue_and_visited(dest_state);
}
for (auto const& positive_tagged_start_transition :
current_state->get_positive_tagged_start_transitions())
{
add_to_queue_and_visited(positive_tagged_start_transition.get_dest_state());
}

auto const& optional_positive_tagged_end_transition
= current_state->get_positive_tagged_end_transition();
if (optional_positive_tagged_end_transition.has_value()) {
add_to_queue_and_visited(optional_positive_tagged_end_transition.value().get_dest_state(
));
}

auto const& optional_negative_tagged_transition
= current_state->get_negative_tagged_transition();
if (optional_negative_tagged_transition.has_value()) {
add_to_queue_and_visited(optional_negative_tagged_transition.value().get_dest_state());
for (auto const& spontaneous_transition : current_state->get_spontaneous_transitions()) {
add_to_queue_and_visited(spontaneous_transition.get_dest_state());
}
}
return visited_order;
}

template <typename TypedNfaState>
auto Nfa<TypedNfaState>::serialize() const -> std::string {
auto Nfa<TypedNfaState>::serialize() const -> std::optional<std::string> {
auto const traversal_order = get_bfs_traversal_order();

std::unordered_map<TypedNfaState const*, uint32_t> state_ids;
Expand All @@ -244,10 +214,11 @@ auto Nfa<TypedNfaState>::serialize() const -> std::string {

std::vector<std::string> serialized_states;
for (auto const* state : traversal_order) {
// `state_ids` is well-formed as its generated from `get_bfs_traversal_order` so we can
// safely assume `state->serialize(state_ids)` will return a valid value.
// NOLINTNEXTLINE(bugprone-unchecked-optional-access)
serialized_states.emplace_back(state->serialize(state_ids).value());
auto const optional_serialized_state{state->serialize(state_ids)};
if (false == optional_serialized_state.has_value()) {
return std::nullopt;
}
serialized_states.emplace_back(optional_serialized_state.value());
}
return fmt::format("{}\n", fmt::join(serialized_states, "\n"));
}
Expand Down
65 changes: 65 additions & 0 deletions src/log_surgeon/finite_automata/NfaSpontaneousTransition.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef LOG_SURGEON_FINITE_AUTOMATA_NFASPONTANEOUSTRANSITION_HPP
#define LOG_SURGEON_FINITE_AUTOMATA_NFASPONTANEOUSTRANSITION_HPP

#include <cstdint>
#include <optional>
#include <ranges>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include <fmt/format.h>

#include <log_surgeon/finite_automata/TagOperation.hpp>

namespace log_surgeon::finite_automata {
/**
* Represents an NFA transition with a collection of tag operations to be performed during the
* transition.
*
* @tparam TypedNfaState Specifies the type of transition (bytes or UTF-8 characters).
*/
template <typename TypedNfaState>
class NfaSpontaneousTransition {
public:
NfaSpontaneousTransition(std::vector<TagOperation> tag_ops, TypedNfaState const* dest_state)
: m_tag_ops{std::move(tag_ops)},
m_dest_state{dest_state} {}

[[nodiscard]] auto get_tag_ops() const -> std::vector<TagOperation> const& { return m_tag_ops; }

[[nodiscard]] auto get_dest_state() const -> TypedNfaState const* { return m_dest_state; }

/**
* @param state_ids A map of states to their unique identifiers.
* @return A string representation of the spontaneous transition on success.
* @return std::nullopt if `m_dest_state` is not in `state_ids`.
*/
[[nodiscard]] auto serialize(std::unordered_map<TypedNfaState const*, uint32_t> const& state_ids
) const -> std::optional<std::string>;

private:
std::vector<TagOperation> m_tag_ops;
TypedNfaState const* m_dest_state;
};

template <typename TypedNfaState>
auto NfaSpontaneousTransition<TypedNfaState>::serialize(
std::unordered_map<TypedNfaState const*, uint32_t> const& state_ids
) const -> std::optional<std::string> {
if (false == state_ids.contains(m_dest_state)) {
return std::nullopt;
}
auto transformed_operations
= m_tag_ops | std::ranges::views::transform(&TagOperation::serialize);

return fmt::format(
"{}[{}]",
state_ids.at(m_dest_state),
fmt::join(transformed_operations, ",")
);
}
} // namespace log_surgeon::finite_automata

#endif // LOG_SURGEON_FINITE_AUTOMATA_NFASPONTANEOUSTRANSITION_HPP
Loading
Loading