mirror of
https://github.com/Z3Prover/z3
synced 2026-07-03 13:56:08 +00:00
Use lookahead for regex decomposition
Make snode const
This commit is contained in:
parent
671dfedebe
commit
be627007e1
22 changed files with 1868 additions and 2066 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -27,202 +27,6 @@ Abstract:
|
|||
-- nielsen_node: graph node with constraint set and outgoing edges
|
||||
-- nielsen_graph: the overall Nielsen transformation graph
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
ZIPT PORT COMPARISON SUMMARY
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
The ZIPT reference is organized as follows (all under ZIPT/Constraints/):
|
||||
NielsenGraph.cs -- the graph manager class
|
||||
NielsenNode.cs -- node class + BacktrackReasons enum
|
||||
NielsenEdge.cs -- edge class with string and character substitutions
|
||||
ConstraintElement/
|
||||
Constraint.cs -- abstract base for all constraints
|
||||
StrEqBase.cs -- abstract base for StrEq and StrMem
|
||||
StrEq.cs -- string equality with full simplification/splitting
|
||||
StrMem.cs -- regex membership with Brzozowski derivatives
|
||||
IntEq.cs -- integer equality over length polynomials
|
||||
IntLe.cs -- integer inequality over length polynomials
|
||||
Modifier/ -- ~15 modifier types driving graph expansion
|
||||
|
||||
A. PORTED FAITHFULLY
|
||||
--------------------
|
||||
1. backtrack_reason enum (BacktrackReasons): all eleven values (Unevaluated,
|
||||
Extended, SymbolClash, ParikhImage, Subsumption, Arithmetic, Regex,
|
||||
RegexWidening, CharacterRange, SMT, ChildrenFailed) are present with
|
||||
identical semantics.
|
||||
|
||||
2. simplify_result enum (SimplifyResult): all five values (Proceed, Conflict,
|
||||
Satisfied, Restart, RestartAndSatisfied) are present with identical semantics.
|
||||
Note: RestartAndSatisfied is declared but not yet exercised in this port.
|
||||
|
||||
3. nielsen_node status fields and accessors: m_is_general_conflict,
|
||||
m_is_extended, m_reason, m_eval_idx map directly to IsGeneralConflict,
|
||||
IsExtended, CurrentReason, evalIdx. The is_currently_conflict() predicate
|
||||
faithfully mirrors IsCurrentlyConflict (GeneralConflict || (reason !=
|
||||
Unevaluated && IsExtended)).
|
||||
|
||||
4. nielsen_node::reset_counter() mirrors NielsenNode.ResetCounter() exactly.
|
||||
|
||||
5. nielsen_node::clone_from() mirrors the copy constructor
|
||||
NielsenNode(graph, parent) for str_eq and str_mem constraints.
|
||||
|
||||
6. nielsen_edge identity (operator==) mirrors NielsenEdge.Equals(): both
|
||||
compare by source and target node pointer identity.
|
||||
|
||||
7. nielsen_graph::inc_run_idx() mirrors the RunIdx increment in NielsenGraph.
|
||||
Check(), including the UINT_MAX overflow guard that calls reset_counter()
|
||||
on all nodes.
|
||||
|
||||
8. str_eq::sort() mirrors StrEqBase.SortStr(): swaps lhs/rhs when lhs > rhs.
|
||||
(Z3 compares by snode id; ZIPT compares Str lexicographically.)
|
||||
|
||||
9. str_eq::is_trivial() mirrors the trivially-satisfied check when both sides
|
||||
are empty.
|
||||
|
||||
10. str_mem fields (m_str, m_regex, m_history, m_id, m_dep) mirror StrMem
|
||||
fields (Str, Regex, History, Id, Reason) faithfully, including the unique
|
||||
identifier used for cycle tracking.
|
||||
|
||||
11. str_mem::is_primitive() mirrors StrMem.IsPrimitiveRegex(): single variable
|
||||
on the left side of the membership constraint.
|
||||
|
||||
12. nielsen_subst::is_eliminating() mirrors the logic behind
|
||||
NielsenEdge.BumpedModCount: a substitution is non-eliminating (bumps the
|
||||
modification counter) when the substituted variable appears in the
|
||||
replacement.
|
||||
|
||||
13. nielsen_graph::mk_edge() faithfully mirrors NielsenEdge construction: it
|
||||
links src to tgt and registers the outgoing edge.
|
||||
|
||||
B. PORTED WITH ALGORITHMIC CHANGES
|
||||
------------------------------------
|
||||
1. dep_tracker (DependencyTracker): ZIPT's DependencyTracker is a .NET
|
||||
class using a BitArray-like structure for tracking constraint origins.
|
||||
Z3 uses scoped_dependency_manager<dep_source> (an arena-based binary
|
||||
join tree from util/dependency.h) where each leaf carries a dep_source
|
||||
value identifying the originating eq or mem constraint by kind and index.
|
||||
|
||||
2. Substitution application (nielsen_node::apply_subst): ZIPT uses an
|
||||
immutable, functional style -- Apply() returns a new constraint if
|
||||
changed, using C# reference equality to detect no-ops. Z3 uses
|
||||
in-place mutation via sgraph::subst(), modifying the constraint vectors
|
||||
directly. The functional change also propagates the substitution's
|
||||
dependency to the merged constraint.
|
||||
|
||||
3. Node constraint containers: ZIPT's NielsenNode stores str_eq constraints
|
||||
in NList<StrEq> (a sorted list for O(log n) subsumption lookup) and str_mem
|
||||
constraints in Dictionary<uint, StrMem> (keyed by id for O(1) cycle lookup).
|
||||
Z3 uses plain vector<str_eq> and vector<str_mem>, which is simpler.
|
||||
|
||||
4. nielsen_edge substitution list: ZIPT's NielsenEdge carries two substitution
|
||||
lists -- Subst (string-level, mapping string variables to strings) and
|
||||
SubstC (character-level, mapping symbolic character variables to concrete
|
||||
characters). Z3's nielsen_edge carries a single vector<nielsen_subst>,
|
||||
covering only string-level substitutions; character substitutions are not
|
||||
represented.
|
||||
|
||||
5. nielsen_graph node registry: ZIPT keeps nodes in a HashSet<NielsenNode> plus
|
||||
a Dictionary<NList<StrEq>, List<NielsenNode>> for subsumption candidate
|
||||
lookup. Z3 uses a ptr_vector<nielsen_node>, simplifying memory management.
|
||||
|
||||
6. nielsen_graph::display() vs NielsenGraph.ToDot(): ZIPT outputs a DOT-format
|
||||
graph with color highlighting for the current satisfying path. Z3 outputs
|
||||
plain human-readable text with node/edge details but no DOT syntax or path
|
||||
highlighting.
|
||||
|
||||
7. str_eq::contains_var() / str_mem::contains_var(): ZIPT performs occurrence
|
||||
checks through StrManager.Subst() (which uses hash-consing and reference
|
||||
equality). Z3 walks the snode tree via collect_tokens(), which is correct
|
||||
but re-traverses the DAG on every call.
|
||||
|
||||
C. NOT PORTED
|
||||
-------------
|
||||
The following ZIPT components are absent from this implementation.
|
||||
They represent the algorithmic core of the search procedure and
|
||||
are expected to be ported in subsequent work.
|
||||
|
||||
Constraint simplification and propagation:
|
||||
- Constraint.SimplifyAndPropagate() / SimplifyAndPropagateInternal(): the
|
||||
main constraint-driven simplification loop is not ported. str_eq and
|
||||
str_mem have no Simplify methods.
|
||||
- StrEq.SimplifyDir() / SimplifyFinal() / AddDefinition(): forward/backward
|
||||
simplification passes, including Makanin-style prefix cancellation, power
|
||||
token handling, and variable definition propagation.
|
||||
- StrEq.GetNielsenDep() / SplitEq(): the Nielsen dependency analysis and
|
||||
equation-splitting heuristic used to choose the best split point.
|
||||
- StrMem.SimplifyCharRegex() / SimplifyDir(): Brzozowski derivative-based
|
||||
simplification consuming ground prefixes/suffixes of the string.
|
||||
- StrMem.TrySubsume(): stabilizer-based subsumption (not ported, not needed).
|
||||
- StrMem.ExtractCycle() / StabilizerFromCycle(): cycle detection over the
|
||||
search path and extraction of a Kleene-star stabilizer to generalize the
|
||||
cycle. This is the key termination argument for regex membership.
|
||||
- StrMem.Extend(): the splitting driver that produces the next modifier
|
||||
(RegexVarSplitModifier, RegexCharSplitModifier, StarIntrModifier, etc.).
|
||||
|
||||
Integer constraints:
|
||||
- IntEq / IntLe: integer equality and inequality constraints over Presburger
|
||||
arithmetic polynomials (PDD<BigInteger>) are entirely absent. The Z3 port
|
||||
has no ConstraintsIntEq or ConstraintsIntLe in nielsen_node.
|
||||
- IntBounds / VarBoundWatcher: ZIPT-style cached interval maps and eager
|
||||
watcher propagation are not stored in nielsen_node; bounds are queried
|
||||
from the arithmetic subsolver on demand.
|
||||
- AddLowerIntBound() / AddHigherIntBound(): incremental interval tightening
|
||||
— PORTED as the above add_lower/upper_int_bound methods.
|
||||
|
||||
Character-level handling:
|
||||
- CharSubst: character-level variable substitution (symbolic char -> concrete
|
||||
char) is absent. ZIPT uses this to handle symbolic character tokens
|
||||
(SymCharToken) that represent a single unknown character.
|
||||
- SymCharToken / CharacterSet: symbolic character tokens with associated
|
||||
character range constraints (CharRanges) are not ported.
|
||||
- DisEqualities: per-node character disequality constraints used for conflict
|
||||
detection during character substitution are not ported.
|
||||
|
||||
Modifier hierarchy (Constraints/Modifier/):
|
||||
- 13 Modifier subclasses driving graph expansion are ported as
|
||||
apply_* methods in generate_extensions, matching ZIPT's TypeOrder
|
||||
priority: DetModifier(1), PowerEpsilonModifier(2), NumCmpModifier(3),
|
||||
ConstNumUnwindingModifier(4), EqSplitModifier(5), StarIntrModifier(6),
|
||||
GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9),
|
||||
RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12),
|
||||
VarNumUnwindingModifier(13).
|
||||
- NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier.
|
||||
- NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD
|
||||
integer polynomial infrastructure; power tokens are replaced with ε
|
||||
or peeled with fresh variables instead of exact exponent arithmetic).
|
||||
|
||||
Search procedure:
|
||||
- NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as
|
||||
nielsen_graph::solve() (iterative deepening, starting at depth 3,
|
||||
incrementing by 1 per failure, bounded by smt.nseq.max_depth) and
|
||||
search_dfs() (depth-bounded DFS with eval_idx cycle detection and
|
||||
node status tracking).
|
||||
- NielsenNode.SimplifyAndInit(): ported as
|
||||
nielsen_node::simplify_and_init() with prefix matching, symbol clash,
|
||||
empty propagation, and Brzozowski derivative consumption.
|
||||
- NielsenGraph.FindExisting() / subsumption cache lookup: not ported,
|
||||
not needed.
|
||||
|
||||
Auxiliary infrastructure:
|
||||
- LocalInfo: thread-local search bookkeeping (current path, modification
|
||||
counts, regex occurrence cache for cycle detection, current node pointer)
|
||||
is not ported.
|
||||
- NielsenGraph.SubSolver / InnerStringPropagator: the auxiliary Z3 solver
|
||||
for arithmetic lemma generation and the inner string propagator for
|
||||
model-based refinement are not ported.
|
||||
- PowerToken: word-repetition tokens of the form u^n (distinct from regex
|
||||
Kleene star) are not represented in Z3's snode.
|
||||
- GetSignature(): the constraint-pair signature used for subsumption
|
||||
candidate matching is not ported.
|
||||
- Constraint.Shared: the flag indicating whether a constraint should be
|
||||
forwarded to the outer solver — PORTED as
|
||||
nielsen_graph::assert_root_constraints_to_solver(), called at the start
|
||||
of solve() to make all root-level length/Parikh constraints immediately
|
||||
visible to m_solver.
|
||||
- Interpretation: the model-extraction class mapping string and integer
|
||||
variables to concrete values is not ported.
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
Author:
|
||||
|
||||
Clemens Eisenhofer 2026-03-02
|
||||
|
|
@ -316,8 +120,6 @@ namespace seq {
|
|||
using enode_pair_vector = svector<enode_pair>;
|
||||
using dep_source = std::variant<sat::literal, enode_pair>;
|
||||
|
||||
|
||||
|
||||
// Arena-based dependency manager: builds an immutable tree of dep_source
|
||||
// leaves joined by binary join nodes. Memory is managed via a region;
|
||||
// call dep_manager::reset() to release all allocations at once.
|
||||
|
|
@ -375,14 +177,26 @@ namespace seq {
|
|||
// and arithmetic <= dependencies.
|
||||
void deps_to_lits(dep_manager &dep_mgr, dep_tracker deps, svector<enode_pair> &eqs, svector<sat::literal> &lits);
|
||||
|
||||
// decompose a membership constraint into a set of pairs of regex splits
|
||||
std::pair<euf::snode const*, euf::snode const*> split_membership(euf::snode const* str, euf::snode const* regex, euf::sgraph& sg, unsigned threshold, split_set& result);
|
||||
|
||||
// Lookahead oracle for the split engine: is the split's right component
|
||||
// `n_regex` prefix-compatible with the constant character sequence `c`?
|
||||
// The factorization picks a boundary so the tail starts with c, hence the
|
||||
// tail-regex ∇ must be able to match c as a prefix. We use a *prefix* test
|
||||
// (not strict "starts-with"): we accept as soon as N accepts a prefix of c
|
||||
// (a suffix appended downstream can complete it). This is sound to apply
|
||||
// during split generation — it never drops a viable split.
|
||||
bool split_lookahead_viable(expr* n_regex, euf::sgraph& sg, zstring const& c);
|
||||
|
||||
// string equality constraint: lhs = rhs
|
||||
// mirrors ZIPT's StrEq (both sides are regex-free snode trees)
|
||||
struct str_eq {
|
||||
euf::snode* m_lhs; // assumed to be non-null
|
||||
euf::snode* m_rhs; // assumed to be non-null
|
||||
euf::snode const* m_lhs; // assumed to be non-null
|
||||
euf::snode const* m_rhs; // assumed to be non-null
|
||||
dep_tracker m_dep;
|
||||
|
||||
str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep):
|
||||
str_eq(euf::snode const* lhs, euf::snode const* rhs, dep_tracker const& dep):
|
||||
m_lhs(lhs), m_rhs(rhs), m_dep(dep) {
|
||||
SASSERT(well_formed());
|
||||
}
|
||||
|
|
@ -398,7 +212,7 @@ namespace seq {
|
|||
bool is_trivial() const;
|
||||
|
||||
// check if the constraint contains a given variable
|
||||
bool contains_var(euf::snode* var) const;
|
||||
bool contains_var(euf::snode const* var) const;
|
||||
|
||||
bool well_formed() const {
|
||||
// assumed to be always true
|
||||
|
|
@ -420,11 +234,11 @@ namespace seq {
|
|||
|
||||
// string disequality constraint: lhs != rhs
|
||||
struct str_deq {
|
||||
euf::snode* m_lhs; // assumed to be non-null
|
||||
euf::snode* m_rhs; // assumed to be non-null
|
||||
euf::snode const* m_lhs; // assumed to be non-null
|
||||
euf::snode const* m_rhs; // assumed to be non-null
|
||||
dep_tracker m_dep;
|
||||
|
||||
str_deq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep):
|
||||
str_deq(euf::snode const* lhs, euf::snode const* rhs, dep_tracker const& dep):
|
||||
m_lhs(lhs), m_rhs(rhs), m_dep(dep) {
|
||||
SASSERT(well_formed());
|
||||
}
|
||||
|
|
@ -439,7 +253,7 @@ namespace seq {
|
|||
}
|
||||
}
|
||||
|
||||
bool contains_var(euf::snode* var) const {
|
||||
bool contains_var(euf::snode const* var) const {
|
||||
return m_lhs->collect_tokens().contains(var) || m_rhs->collect_tokens().contains(var);
|
||||
}
|
||||
|
||||
|
|
@ -464,11 +278,11 @@ namespace seq {
|
|||
// regex membership constraint: str in regex
|
||||
// mirrors ZIPT's StrMem
|
||||
struct str_mem {
|
||||
euf::snode* m_str; // assumed to be non-null
|
||||
euf::snode* m_regex; // assumed to be non-null
|
||||
euf::snode const* m_str; // assumed to be non-null
|
||||
euf::snode const* m_regex; // assumed to be non-null
|
||||
dep_tracker m_dep;
|
||||
|
||||
str_mem(euf::snode* str, euf::snode* regex, dep_tracker const& dep):
|
||||
str_mem(euf::snode const* str, euf::snode const* regex, dep_tracker const& dep):
|
||||
m_str(str), m_regex(regex), m_dep(dep) {}
|
||||
|
||||
bool operator==(str_mem const& other) const {
|
||||
|
|
@ -484,7 +298,7 @@ namespace seq {
|
|||
bool is_contradiction(nielsen_node const* n) const;
|
||||
|
||||
// check if the constraint contains a given variable
|
||||
bool contains_var(euf::snode* var) const;
|
||||
bool contains_var(euf::snode const* var) const;
|
||||
|
||||
bool well_formed() const {
|
||||
// assumed to be always true
|
||||
|
|
@ -508,12 +322,12 @@ namespace seq {
|
|||
// (can be used as well to substitute arbitrary nodes - like powers)
|
||||
// mirrors ZIPT's Subst
|
||||
struct nielsen_subst {
|
||||
euf::snode* m_var;
|
||||
euf::snode* m_replacement;
|
||||
euf::snode const* m_var;
|
||||
euf::snode const* m_replacement;
|
||||
dep_tracker m_dep;
|
||||
|
||||
nielsen_subst(): m_var(nullptr), m_replacement(nullptr), m_dep(nullptr) {}
|
||||
nielsen_subst(euf::snode* var, euf::snode* repl, dep_tracker const& dep):
|
||||
nielsen_subst(euf::snode const* var, euf::snode const* repl, dep_tracker const& dep):
|
||||
m_var(var), m_replacement(repl), m_dep(dep) {
|
||||
SASSERT(var != nullptr);
|
||||
SASSERT(repl != nullptr);
|
||||
|
|
@ -718,7 +532,7 @@ namespace seq {
|
|||
|
||||
// add a character range constraint for a symbolic char.
|
||||
// intersects with existing range; sets conflict if result is empty.
|
||||
void add_char_range(euf::snode* sym_char, char_set const& range, dep_tracker dep);
|
||||
void add_char_range(euf::snode const* sym_char, char_set const& range, dep_tracker dep);
|
||||
|
||||
// edge access
|
||||
ptr_vector<nielsen_edge> const& outgoing() const { return m_outgoing; }
|
||||
|
|
@ -817,7 +631,7 @@ namespace seq {
|
|||
// Collects tokens from non_empty_side; if any token causes a conflict
|
||||
// (is a concrete character or an unexpected kind), sets conflict flags
|
||||
// and returns true. Otherwise returns false.
|
||||
bool check_empty_side_conflict(euf::sgraph& sg, euf::snode* non_empty_side,
|
||||
bool check_empty_side_conflict(euf::sgraph& sg, euf::snode const* non_empty_side,
|
||||
dep_tracker const& dep);
|
||||
|
||||
// Length bounds are queried from the arithmetic subsolver when needed.
|
||||
|
|
@ -868,7 +682,7 @@ namespace seq {
|
|||
friend class nielsen_node;
|
||||
friend class nielsen_edge;
|
||||
|
||||
// Edge endpoints are stored as expr* (not snode*) because the cache
|
||||
// Edge endpoints are stored as expr* (not snode const*) because the cache
|
||||
// must survive sgraph pops. snodes are allocated in a region that is
|
||||
// never freed, but their m_expr field is owned by the egraph trail and
|
||||
// becomes dangling on pop. We pin the referenced expressions via
|
||||
|
|
@ -1036,14 +850,14 @@ namespace seq {
|
|||
ptr_vector<nielsen_edge> const& sat_path() const { return m_sat_path; }
|
||||
|
||||
// add constraints to the root node from external solver
|
||||
void add_str_eq(euf::snode* lhs, euf::snode* rhs, smt::enode* l, smt::enode* r) const;
|
||||
void add_str_deq(euf::snode* lhs, euf::snode* rhs, sat::literal l) const;
|
||||
void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal l) const;
|
||||
void add_str_eq(euf::snode const* lhs, euf::snode const* rhs, smt::enode* l, smt::enode* r) const;
|
||||
void add_str_deq(euf::snode const* lhs, euf::snode const* rhs, sat::literal l) const;
|
||||
void add_str_mem(euf::snode const* str, euf::snode const* regex, sat::literal l) const;
|
||||
|
||||
// test-friendly overloads (no external dependency tracking)
|
||||
void add_str_eq(euf::snode* lhs, euf::snode* rhs);
|
||||
void add_str_deq(euf::snode* lhs, euf::snode* rhs);
|
||||
void add_str_mem(euf::snode* str, euf::snode* regex);
|
||||
void add_str_eq(euf::snode const* lhs, euf::snode const* rhs);
|
||||
void add_str_deq(euf::snode const* lhs, euf::snode const* rhs);
|
||||
void add_str_mem(euf::snode const* str, euf::snode const* regex);
|
||||
|
||||
// access all nodes
|
||||
ptr_vector<nielsen_node> const& nodes() const { return m_nodes; }
|
||||
|
|
@ -1075,9 +889,9 @@ namespace seq {
|
|||
|
||||
std::string to_dot() const;
|
||||
|
||||
std::ostream& partial_dfa_to_dot(std::ostream& out, euf::snode* start_state, bool keep_names) const;
|
||||
std::ostream& partial_dfa_to_dot(std::ostream& out, euf::snode const* start_state, bool keep_names) const;
|
||||
|
||||
std::string partial_dfa_to_dot(euf::snode* start_state, bool keep_names) const;
|
||||
std::string partial_dfa_to_dot(euf::snode const* start_state, bool keep_names) const;
|
||||
|
||||
// reset all nodes and state
|
||||
void reset();
|
||||
|
|
@ -1118,12 +932,12 @@ namespace seq {
|
|||
// build an arithmetic expression representing the length of an snode tree.
|
||||
// concatenations are expanded to sums, chars to 1, empty to 0,
|
||||
// variables to (str.len var_expr).
|
||||
expr_ref compute_length_expr(euf::snode* n);
|
||||
expr_ref compute_length_expr(euf::snode const* n);
|
||||
|
||||
// compute Parikh length interval [min_len, max_len] for a regex snode.
|
||||
// uses seq_util::rex min_length/max_length on the underlying expression.
|
||||
// max_len == UINT_MAX means unbounded.
|
||||
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len) const;
|
||||
void compute_regex_length_interval(euf::snode const* regex, unsigned& min_len, unsigned& max_len) const;
|
||||
|
||||
// accessor for the seq_regex module
|
||||
seq_regex* seq_regex_module() const { return m_seq_regex; }
|
||||
|
|
@ -1175,15 +989,15 @@ namespace seq {
|
|||
// Record a discovered derivative edge in the global partial DFA.
|
||||
// The `label` may be a concrete string token (converted to to_re)
|
||||
// or an already-regular-expression minterm.
|
||||
void record_partial_derivative_edge(euf::snode* src_re, euf::snode* label, euf::snode* dst_re);
|
||||
void record_partial_derivative_edge(euf::snode const* src_re, euf::snode const* label, euf::snode const* dst_re);
|
||||
|
||||
// Convert a transition label (string token or regex minterm) into a
|
||||
// one-character regex snode used by the partial DFA.
|
||||
euf::snode* to_partial_label_regex(euf::snode* label) const;
|
||||
euf::snode const* to_partial_label_regex(euf::snode const* label) const;
|
||||
|
||||
// Collect the SCC containing root_re in the current partial DFA.
|
||||
// Returns false if no cyclic SCC containing root_re exists.
|
||||
bool collect_scc_for_projection(euf::snode* root_re, uint_set& scc) const;
|
||||
bool collect_scc_for_projection(euf::snode const* root_re, uint_set& scc) const;
|
||||
|
||||
// Mark SCC edges with a monotone extraction index and return the
|
||||
// currently covered edge count for this extraction.
|
||||
|
|
@ -1194,17 +1008,17 @@ namespace seq {
|
|||
// snapshot index nu. This is the stabilizer of root_re kept symbolically
|
||||
// (the projection's derivative/nullability are evaluated lazily by the
|
||||
// sgraph consulting projection_state_in_Q).
|
||||
euf::snode* mk_projection_term(euf::snode* root_re, unsigned nu);
|
||||
euf::snode const* mk_projection_term(euf::snode const* root_re, unsigned nu);
|
||||
|
||||
// Try to extract a stronger projection for root_re. Returns true and
|
||||
// stores it in projection_re iff SCC coverage has grown.
|
||||
bool try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re);
|
||||
bool try_extract_partial_projection(euf::snode const* root_re, euf::snode const*& projection_re);
|
||||
|
||||
euf::snode* get_slice(euf::snode* v, expr* left, expr* right);
|
||||
euf::snode const* get_slice(euf::snode const* v, expr* left, expr* right);
|
||||
|
||||
euf::snode* get_tail(euf::snode* v, expr* cnt, bool fwd = true);
|
||||
euf::snode const* get_tail(euf::snode const* v, expr* cnt, bool fwd = true);
|
||||
|
||||
euf::snode* get_tail(euf::snode* v, unsigned cnt, bool fwd = true);
|
||||
euf::snode const* get_tail(euf::snode const* v, unsigned cnt, bool fwd = true);
|
||||
|
||||
// Apply the Parikh image filter to a node: generate modular length
|
||||
// constraints from regex memberships and append them to the node's
|
||||
|
|
@ -1217,10 +1031,10 @@ namespace seq {
|
|||
void apply_parikh_to_node(nielsen_node& node) const;
|
||||
|
||||
// simplify expression and create a node from simplified expression.
|
||||
euf::snode *mk_rewrite(expr *e) const;
|
||||
euf::snode const* mk_rewrite(expr *e) const;
|
||||
|
||||
// create a fresh variable with a unique name and the given sequence sort
|
||||
euf::snode* mk_fresh_var(sort* s);
|
||||
euf::snode const* mk_fresh_var(sort* s);
|
||||
|
||||
// deterministic modifier: var = ε, same-head cancel
|
||||
bool apply_det_modifier(nielsen_node* node);
|
||||
|
|
@ -1238,10 +1052,10 @@ namespace seq {
|
|||
|
||||
// helper: classify whether a token has variable (symbolic) length
|
||||
// returns true for variables, powers, etc.; false for chars, units, string literals
|
||||
bool token_has_variable_length(euf::snode* tok) const;
|
||||
bool token_has_variable_length(euf::snode const* tok) const;
|
||||
|
||||
// helper: get the constant length of a token (only valid when !token_has_variable_length)
|
||||
unsigned token_const_length(euf::snode* tok) const;
|
||||
unsigned token_const_length(euf::snode const* tok) const;
|
||||
|
||||
// helper: find a split point in a regex-free equation.
|
||||
// ports ZIPT's StrEq.SplitEq algorithm.
|
||||
|
|
@ -1293,19 +1107,19 @@ namespace seq {
|
|||
|
||||
// Return the current stabilizer s* for root_re from the partial DFA
|
||||
// (bypasses the novelty guard used by try_extract_partial_projection).
|
||||
euf::snode* get_current_stabilizer(euf::snode* root_re);
|
||||
euf::snode const* get_current_stabilizer(euf::snode const* root_re);
|
||||
|
||||
// BFS of Brzozowski derivatives from root_re up to `depth` steps,
|
||||
// eagerly recording concrete minterm edges in the partial DFA so that
|
||||
// collect_scc_for_projection can find cycles without first waiting for
|
||||
// concrete children to record them one level at a time.
|
||||
void precompute_partial_dfa(euf::snode* root_re, unsigned depth);
|
||||
void precompute_partial_dfa(euf::snode const* root_re, unsigned depth);
|
||||
|
||||
// Walk an ite-structured symbolic derivative expression and record
|
||||
// concrete DFA edges for each non-fail branch.
|
||||
// Called from simplify_and_init when a symbolic character is consumed,
|
||||
// so that cycle_decomp can detect SCCs lazily (as with concrete chars).
|
||||
void record_dfa_edges_from_ite(euf::snode* src_re, expr* ite_deriv);
|
||||
void record_dfa_edges_from_ite(euf::snode const* src_re, expr* ite_deriv);
|
||||
|
||||
// generalized power introduction: for an equation where one head is
|
||||
// a variable v and the other side has ground prefix + a variable x
|
||||
|
|
@ -1316,18 +1130,11 @@ namespace seq {
|
|||
// generalized regex factorization (Boolean closure derivation rule)
|
||||
bool apply_regex_factorization(nielsen_node* node);
|
||||
|
||||
// Lookahead oracle for apply_regex_factorization's split() call: returns
|
||||
// true iff the split's right component `n_regex` is prefix-compatible with
|
||||
// the constant character sequence `c` (the tail of the factorization starts
|
||||
// with c). Prunes splits whose tail-regex can never match c. Sound to
|
||||
// apply during split generation (prefix-, not strict-, match).
|
||||
bool split_lookahead_viable(expr* n_regex, zstring const& c);
|
||||
|
||||
// helper for apply_gpower_intr: fires the substitution.
|
||||
// `fwd=true` uses left-to-right decomposition; `fwd=false` mirrors ZIPT's
|
||||
// backward (right-to-left) direction.
|
||||
bool fire_gpower_intro(nielsen_node* node, str_eq const& eq,
|
||||
euf::snode* var, euf::snode_vector const& ground_prefix_orig, bool fwd);
|
||||
euf::snode const* var, euf::snode_vector const& ground_prefix_orig, bool fwd);
|
||||
|
||||
// heuristic string equation splitting. Left to right scanning for shortest prefix with matching variables.
|
||||
bool apply_signature_split(nielsen_node* node);
|
||||
|
|
@ -1353,17 +1160,17 @@ namespace seq {
|
|||
bool axiomatize_diseq(nielsen_node* node);
|
||||
|
||||
// find the first power token in any str_eq at this node
|
||||
static euf::snode* find_power_token(nielsen_node* node);
|
||||
static euf::snode const* find_power_token(nielsen_node* node);
|
||||
|
||||
// find a power token facing a constant (char/non-var) token at either end
|
||||
// of an equation; returns orientation via `fwd` (true=head, false=tail).
|
||||
static bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out, bool& fwd);
|
||||
static bool find_power_vs_non_var(nielsen_node* node, euf::snode const*& power, euf::snode const*& other_head, str_eq const*& eq_out, bool& fwd);
|
||||
|
||||
// find a power token facing a variable token at either end of an
|
||||
// equation; returns orientation via `fwd` (true=head, false=tail).
|
||||
static bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out, bool& fwd);
|
||||
static bool find_power_vs_var(nielsen_node* node, euf::snode const*& power, euf::snode const*& var_head, str_eq const*& eq_out, bool& fwd);
|
||||
|
||||
static bool find_power_vs_var(nielsen_node* node, euf::snode*& power, str_mem const*& mem_out, bool& fwd);
|
||||
static bool find_power_vs_var(nielsen_node* node, euf::snode const*& power, str_mem const*& mem_out, bool& fwd);
|
||||
|
||||
// -----------------------------------------------
|
||||
// Integer feasibility subsolver methods
|
||||
|
|
@ -1404,7 +1211,7 @@ namespace seq {
|
|||
constraint mk_constraint(expr *fml, dep_tracker const &dep) const;
|
||||
|
||||
// get the exponent expression from a power snode (arg(1))
|
||||
static expr * get_power_exponent(euf::snode* power);
|
||||
static expr * get_power_exponent(euf::snode const* power);
|
||||
|
||||
// -----------------------------------------------
|
||||
// Modification counter methods for substitution length tracking.
|
||||
|
|
@ -1413,13 +1220,13 @@ namespace seq {
|
|||
// -----------------------------------------------
|
||||
|
||||
// Get or create a fresh symbolic character variable for the given variable
|
||||
expr_ref get_or_create_char_var(euf::snode* var);
|
||||
expr_ref get_or_create_char_var(euf::snode const* var);
|
||||
|
||||
// Get or create a fresh integer variable for gpower n (full exponent) for the given variable
|
||||
expr_ref get_or_create_gpower_n_var(euf::snode* var);
|
||||
expr_ref get_or_create_gpower_n_var(euf::snode const* var);
|
||||
|
||||
// Get or create a fresh integer variable for gpower m (partial exponent) for the given variable
|
||||
expr_ref get_or_create_gpower_m_var(euf::snode* var);
|
||||
expr_ref get_or_create_gpower_m_var(euf::snode const* var);
|
||||
|
||||
// Compute and add |x| = |u| length constraints to an edge for all
|
||||
// its non-eliminating substitutions. Uses current m_mod_cnt.
|
||||
|
|
|
|||
|
|
@ -627,12 +627,12 @@ namespace seq {
|
|||
return;
|
||||
|
||||
// collect the original variables present in the root node's constraints
|
||||
ptr_vector<euf::snode> vars;
|
||||
euf::snode_vector vars;
|
||||
uint_set seen;
|
||||
auto add_vars = [&](euf::snode* s) {
|
||||
auto add_vars = [&](euf::snode const* s) {
|
||||
if (!s)
|
||||
return;
|
||||
for (euf::snode* t : s->collect_tokens())
|
||||
for (euf::snode const* t : s->collect_tokens())
|
||||
if (t->is_var() && !seen.contains(t->id())) {
|
||||
seen.insert(t->id());
|
||||
vars.push_back(t);
|
||||
|
|
@ -657,12 +657,14 @@ namespace seq {
|
|||
}
|
||||
|
||||
bool any = false;
|
||||
for (euf::snode* var : vars) {
|
||||
euf::snode* val = var;
|
||||
for (euf::snode const* var : vars) {
|
||||
euf::snode const* val = var;
|
||||
// apply substitutions in root-to-node order (path is node-to-root)
|
||||
for (unsigned i = path.size(); i-- > 0; )
|
||||
for (nielsen_subst const& s : path[i]->subst())
|
||||
for (unsigned i = path.size(); i-- > 0; ) {
|
||||
for (nielsen_subst const& s : path[i]->subst()) {
|
||||
val = sg.subst(val, s.m_var, s.m_replacement);
|
||||
}
|
||||
}
|
||||
if (val == var)
|
||||
continue; // unchanged: variable is still free at this node
|
||||
if (!any) { out << "<br/>Subst:<br/>"; any = true; }
|
||||
|
|
@ -763,7 +765,7 @@ namespace seq {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::ostream& nielsen_graph::partial_dfa_to_dot(std::ostream& out, euf::snode* start_state, bool keep_names) const {
|
||||
std::ostream& nielsen_graph::partial_dfa_to_dot(std::ostream& out, euf::snode const* start_state, bool keep_names) const {
|
||||
out << "digraph G {\n";
|
||||
out << " node [shape=box];\n";
|
||||
|
||||
|
|
@ -833,8 +835,8 @@ namespace seq {
|
|||
|
||||
bool accepting = false;
|
||||
if (node_expr) {
|
||||
euf::snode* sn = m_sg.mk(node_expr);
|
||||
accepting = (const_cast<euf::sgraph&>(m_sg).re_nullable(sn) == l_true);
|
||||
euf::snode const* sn = m_sg.mk(node_expr);
|
||||
accepting = m_sg.re_nullable(sn) == l_true;
|
||||
}
|
||||
|
||||
out << " N" << node_id << " [";
|
||||
|
|
@ -873,7 +875,7 @@ namespace seq {
|
|||
return out;
|
||||
}
|
||||
|
||||
std::string nielsen_graph::partial_dfa_to_dot(euf::snode* start_state, bool keep_names) const {
|
||||
std::string nielsen_graph::partial_dfa_to_dot(euf::snode const* start_state, bool keep_names) const {
|
||||
std::stringstream ss;
|
||||
partial_dfa_to_dot(ss, start_state, keep_names);
|
||||
return ss.str();
|
||||
|
|
|
|||
|
|
@ -87,22 +87,22 @@ namespace seq {
|
|||
m_self_stabilizing.reset();
|
||||
}
|
||||
|
||||
void seq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
|
||||
void seq_regex::add_stabilizer(euf::snode const* regex, euf::snode const* stabilizer) {
|
||||
if (!regex || !stabilizer)
|
||||
return;
|
||||
|
||||
unsigned id = regex->id();
|
||||
auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector<euf::snode>());
|
||||
const unsigned id = regex->id();
|
||||
auto& stabs = m_stabilizers.insert_if_not_there(id, euf::snode_vector());
|
||||
|
||||
// De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer
|
||||
// which checks reference equality before adding).
|
||||
for (euf::snode* s : stabs)
|
||||
for (euf::snode const* s : stabs)
|
||||
if (s == stabilizer)
|
||||
return;
|
||||
stabs.push_back(stabilizer);
|
||||
}
|
||||
|
||||
euf::snode* seq_regex::get_stabilizer_union(euf::snode* regex) {
|
||||
euf::snode const* seq_regex::get_stabilizer_union(euf::snode const* regex) {
|
||||
if (!regex)
|
||||
return nullptr;
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ namespace seq {
|
|||
|
||||
// Multiple stabilizers: build re.union chain.
|
||||
// union(s1, union(s2, ... union(sN-1, sN)...))
|
||||
euf::snode* result = stabs[stabs.size() - 1];
|
||||
euf::snode const* result = stabs[stabs.size() - 1];
|
||||
for (unsigned i = stabs.size() - 1; i-- > 0; ) {
|
||||
expr* lhs = stabs[i]->get_expr();
|
||||
expr* rhs = result->get_expr();
|
||||
|
|
@ -128,7 +128,7 @@ namespace seq {
|
|||
return result;
|
||||
}
|
||||
|
||||
bool seq_regex::has_stabilizers(euf::snode* regex) const {
|
||||
bool seq_regex::has_stabilizers(euf::snode const* regex) const {
|
||||
if (!regex)
|
||||
return false;
|
||||
if (!m_stabilizers.contains(regex->id()))
|
||||
|
|
@ -136,7 +136,7 @@ namespace seq {
|
|||
return !m_stabilizers[regex->id()].empty();
|
||||
}
|
||||
|
||||
ptr_vector<euf::snode> const* seq_regex::get_stabilizers(euf::snode* regex) const {
|
||||
euf::snode_vector const* seq_regex::get_stabilizers(euf::snode const* regex) const {
|
||||
if (!regex)
|
||||
return nullptr;
|
||||
if (!m_stabilizers.contains(regex->id()))
|
||||
|
|
@ -144,12 +144,12 @@ namespace seq {
|
|||
return &m_stabilizers[regex->id()];
|
||||
}
|
||||
|
||||
void seq_regex::set_self_stabilizing(euf::snode* regex) {
|
||||
void seq_regex::set_self_stabilizing(euf::snode const* regex) {
|
||||
if (regex)
|
||||
m_self_stabilizing.insert(regex->id());
|
||||
}
|
||||
|
||||
bool seq_regex::is_self_stabilizing(euf::snode* regex) const {
|
||||
bool seq_regex::is_self_stabilizing(euf::snode const* regex) const {
|
||||
return regex && m_self_stabilizing.contains(regex->id());
|
||||
}
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ namespace seq {
|
|||
// Self-stabilizing auto-detection
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool seq_regex::compute_self_stabilizing(euf::snode* regex) const {
|
||||
bool seq_regex::compute_self_stabilizing(euf::snode const* regex) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ namespace seq {
|
|||
// Self-stabilizing propagation through derivatives
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void seq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
|
||||
void seq_regex::propagate_self_stabilizing(euf::snode const* parent, euf::snode const* deriv) {
|
||||
if (!parent || !deriv)
|
||||
return;
|
||||
|
||||
|
|
@ -205,7 +205,7 @@ namespace seq {
|
|||
// If S is self-stabilizing, the D(c,R)·S branch inherits it.
|
||||
// If the whole parent R·S is self-stabilizing, the derivative is too.
|
||||
if (parent->is_concat() && parent->num_args() == 2) {
|
||||
euf::snode* tail = parent->arg(1);
|
||||
euf::snode const* tail = parent->arg(1);
|
||||
bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail);
|
||||
if (tail_ss || parent_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
|
|
@ -217,8 +217,8 @@ namespace seq {
|
|||
// D(c, R|S) = D(c,R) | D(c,S).
|
||||
// Self-stabilizing if both children are self-stabilizing.
|
||||
if (parent->is_union() && parent->num_args() == 2) {
|
||||
euf::snode* lhs = parent->arg(0);
|
||||
euf::snode* rhs = parent->arg(1);
|
||||
euf::snode const* lhs = parent->arg(0);
|
||||
euf::snode const* rhs = parent->arg(1);
|
||||
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
|
||||
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
|
||||
if (lhs_ss && rhs_ss) {
|
||||
|
|
@ -231,8 +231,8 @@ namespace seq {
|
|||
// D(c, R∩S) = D(c,R) ∩ D(c,S).
|
||||
// Self-stabilizing if both children are self-stabilizing.
|
||||
if (parent->is_intersect() && parent->num_args() == 2) {
|
||||
euf::snode* lhs = parent->arg(0);
|
||||
euf::snode* rhs = parent->arg(1);
|
||||
euf::snode const* lhs = parent->arg(0);
|
||||
euf::snode const* rhs = parent->arg(1);
|
||||
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
|
||||
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
|
||||
if (lhs_ss && rhs_ss) {
|
||||
|
|
@ -245,7 +245,7 @@ namespace seq {
|
|||
// D(c, ~R) = ~D(c, R).
|
||||
// Preserves self-stabilizing from R.
|
||||
if (parent->is_complement() && parent->num_args() == 1) {
|
||||
euf::snode* inner = parent->arg(0);
|
||||
euf::snode const* inner = parent->arg(0);
|
||||
bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner);
|
||||
if (inner_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
|
|
@ -266,10 +266,10 @@ namespace seq {
|
|||
// Derivative with propagation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
|
||||
euf::snode const* seq_regex::derivative_with_propagation(euf::snode const* re, euf::snode const* elem) {
|
||||
if (!re || !elem)
|
||||
return nullptr;
|
||||
euf::snode* deriv = derivative(re, elem);
|
||||
euf::snode const* deriv = derivative(re, elem);
|
||||
if (deriv)
|
||||
propagate_self_stabilizing(re, deriv);
|
||||
return deriv;
|
||||
|
|
@ -279,7 +279,7 @@ namespace seq {
|
|||
// Uniform derivative (symbolic character consumption)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::try_uniform_derivative(euf::snode* regex) {
|
||||
euf::snode const* seq_regex::try_uniform_derivative(euf::snode const* regex) const {
|
||||
if (!regex)
|
||||
return nullptr;
|
||||
|
||||
|
|
@ -303,11 +303,11 @@ namespace seq {
|
|||
// Compute the derivative for each non-empty minterm. If all produce
|
||||
// the same result, the derivative is independent of the character
|
||||
// value and we can consume a symbolic character deterministically.
|
||||
euf::snode* uniform = nullptr;
|
||||
for (euf::snode* mt : minterms) {
|
||||
euf::snode const* uniform = nullptr;
|
||||
for (euf::snode const* mt : minterms) {
|
||||
if (!mt || mt->is_fail())
|
||||
continue; // empty character class — no character belongs to it
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt);
|
||||
euf::snode const* deriv = m_sg.brzozowski_deriv(regex, mt);
|
||||
if (!deriv)
|
||||
return nullptr; // derivative computation failed
|
||||
if (!uniform) {
|
||||
|
|
@ -323,7 +323,7 @@ namespace seq {
|
|||
// Ground prefix consumption
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool seq_regex::is_empty_regex(euf::snode* re) const {
|
||||
bool seq_regex::is_empty_regex(euf::snode const* re) const {
|
||||
SASSERT(re);
|
||||
// direct empty language constant
|
||||
if (re->is_fail())
|
||||
|
|
@ -369,7 +369,7 @@ namespace seq {
|
|||
// BFS regex emptiness check — helper: collect character boundaries
|
||||
// This is faster than computing the actual minterms but probably not minimal
|
||||
// -----------------------------------------------------------------------
|
||||
void seq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
|
||||
void seq_regex::collect_char_boundaries(euf::snode const* re, unsigned_vector& bounds) const {
|
||||
SASSERT(re && re->get_expr());
|
||||
|
||||
expr* e = re->get_expr();
|
||||
|
|
@ -425,7 +425,7 @@ namespace seq {
|
|||
// BFS regex emptiness check — helper: alphabet representatives
|
||||
// Faster alternative of computing all min-terms and taking representatives of them
|
||||
// -----------------------------------------------------------------------
|
||||
bool seq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
|
||||
bool seq_regex::get_alphabet_representatives(euf::snode const* re, euf::snode_vector& reps) {
|
||||
if (!re || !re->get_expr())
|
||||
return false;
|
||||
|
||||
|
|
@ -466,7 +466,7 @@ namespace seq {
|
|||
|
||||
// NSB review: we have similar functionality in seq_rewriter::some_seq_in_re
|
||||
// currently both these versions only relly work for strings not general sequences
|
||||
lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
|
||||
lbool seq_regex::is_empty_bfs(euf::snode const* re, unsigned max_states) {
|
||||
SASSERT(re);
|
||||
const expr* e = re->get_expr();
|
||||
SASSERT(e);
|
||||
|
|
@ -517,7 +517,7 @@ namespace seq {
|
|||
if (states_explored >= max_states)
|
||||
return l_undef; // also don't cache
|
||||
|
||||
euf::snode* current = worklist.back();
|
||||
euf::snode const* current = worklist.back();
|
||||
worklist.pop_back();
|
||||
++states_explored;
|
||||
|
||||
|
|
@ -533,11 +533,11 @@ namespace seq {
|
|||
// Nothing found = dead-end
|
||||
continue;
|
||||
|
||||
for (euf::snode* ch : reps) {
|
||||
for (euf::snode const* ch : reps) {
|
||||
if (!m.inc())
|
||||
return l_undef; // don't cache
|
||||
// std::cout << "Deriving by " << snode_label_html(ch, sg().get_manager()) << std::endl;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(current, ch);
|
||||
euf::snode const* deriv = m_sg.brzozowski_deriv(current, ch);
|
||||
SASSERT(deriv);
|
||||
if (is_nullable(deriv))
|
||||
return cache_and_return(l_false); // found an accepting state
|
||||
|
|
@ -560,7 +560,7 @@ namespace seq {
|
|||
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
lbool seq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes, unsigned max_states) {
|
||||
lbool seq_regex::check_intersection_emptiness(euf::snode_vector const& regexes, unsigned max_states) {
|
||||
|
||||
if (regexes.empty())
|
||||
return l_false; // empty intersection = full language (vacuously non-empty)
|
||||
|
|
@ -569,7 +569,7 @@ namespace seq {
|
|||
if (regexes.size() == 1)
|
||||
return is_empty_bfs(regexes[0], max_states);
|
||||
|
||||
euf::snode* result = regexes[0];
|
||||
euf::snode const* result = regexes[0];
|
||||
for (unsigned i = 1; i < regexes.size(); ++i) {
|
||||
expr* r1 = result->get_expr();
|
||||
expr* r2 = regexes[i]->get_expr();
|
||||
|
|
@ -587,7 +587,7 @@ namespace seq {
|
|||
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
lbool seq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
|
||||
lbool seq_regex::is_language_subset(euf::snode const* subset_re, euf::snode const* superset_re) {
|
||||
if (!subset_re || !superset_re)
|
||||
return l_undef;
|
||||
|
||||
|
|
@ -601,13 +601,13 @@ namespace seq {
|
|||
|
||||
// Build complement(superset)
|
||||
expr* sup_expr = superset_re->get_expr();
|
||||
euf::snode *comp_sn = m_sg.mk(seq.re.mk_complement(sup_expr));
|
||||
euf::snode const* comp_sn = m_sg.mk(seq.re.mk_complement(sup_expr));
|
||||
|
||||
// Build intersection and check emptiness
|
||||
// subset ∩ complement(superset) should be empty for subset relation
|
||||
expr* sub_expr = subset_re->get_expr();
|
||||
auto inter = seq.re.mk_inter(sub_expr, comp_sn->get_expr());
|
||||
euf::snode* inter_sn = m_sg.mk(inter);
|
||||
euf::snode const* inter_sn = m_sg.mk(inter);
|
||||
return is_empty_bfs(inter_sn);
|
||||
}
|
||||
|
||||
|
|
@ -615,17 +615,17 @@ namespace seq {
|
|||
// Collect primitive regex intersection for a variable
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::collect_primitive_regex_intersection(
|
||||
euf::snode* var, nielsen_node const& node, dep_manager& dep_mgr, dep_tracker& dep) const {
|
||||
euf::snode const* seq_regex::collect_primitive_regex_intersection(
|
||||
euf::snode const* var, nielsen_node const& node, dep_manager& dep_mgr, dep_tracker& dep) const {
|
||||
SASSERT(var);
|
||||
|
||||
euf::snode* result = nullptr;
|
||||
euf::snode const* result = nullptr;
|
||||
|
||||
for (auto const& mem : node.str_mems()) {
|
||||
// Primitive constraint: str is a single variable
|
||||
if (!mem.is_primitive())
|
||||
continue;
|
||||
euf::snode *first = mem.m_str->first();
|
||||
euf::snode const* first = mem.m_str->first();
|
||||
// NSB review: why is this "first" and not mem.m_str?
|
||||
SASSERT(first);
|
||||
if (first != var)
|
||||
|
|
@ -666,11 +666,11 @@ namespace seq {
|
|||
return simplify_status::ok;
|
||||
|
||||
while (mem.m_str && !mem.m_str->is_empty()) {
|
||||
euf::snode* first = mem.m_str->first();
|
||||
euf::snode const* first = mem.m_str->first();
|
||||
if (!first || !first->is_char())
|
||||
break;
|
||||
euf::snode* parent_re = mem.m_regex;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first);
|
||||
euf::snode const* parent_re = mem.m_regex;
|
||||
euf::snode const* deriv = m_sg.brzozowski_deriv(parent_re, first);
|
||||
if (!deriv)
|
||||
break;
|
||||
if (deriv->is_fail())
|
||||
|
|
@ -732,7 +732,7 @@ namespace seq {
|
|||
// Minterm computation with filtering
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void seq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
|
||||
void seq_regex::get_minterms(euf::snode const* regex, euf::snode_vector& minterms) {
|
||||
if (!regex)
|
||||
return;
|
||||
|
||||
|
|
@ -744,7 +744,7 @@ namespace seq {
|
|||
// note: minterms are regex character-class expressions, not concrete
|
||||
// characters, so we cannot compute Brzozowski derivatives with them.
|
||||
// callers should compute derivatives using concrete or fresh chars.
|
||||
for (euf::snode* mt : raw) {
|
||||
for (euf::snode const* mt : raw) {
|
||||
if (!mt || mt->is_fail())
|
||||
continue;
|
||||
minterms.push_back(mt);
|
||||
|
|
@ -763,8 +763,8 @@ namespace seq {
|
|||
return is_nullable(mem.m_regex);
|
||||
|
||||
// consume ground prefix: derive regex by each leading concrete char
|
||||
seq::str_mem working = mem;
|
||||
simplify_status st = simplify_ground_prefix(working);
|
||||
str_mem working = mem;
|
||||
const simplify_status st = simplify_ground_prefix(working);
|
||||
if (st == simplify_status::conflict)
|
||||
return false;
|
||||
if (st == simplify_status::satisfied)
|
||||
|
|
@ -773,9 +773,9 @@ namespace seq {
|
|||
// after ground prefix consumption, if the front is still a concrete
|
||||
// character we can take one more step (shouldn't happen after
|
||||
// simplify_ground_prefix, but guard defensively)
|
||||
euf::snode* first = working.m_str->first();
|
||||
euf::snode const* first = working.m_str->first();
|
||||
if (first && first->is_char()) {
|
||||
seq::str_mem derived = derive(working, first);
|
||||
const str_mem derived = derive(working, first);
|
||||
if (is_empty_regex(derived.m_regex))
|
||||
return false;
|
||||
out_mems.push_back(derived);
|
||||
|
|
@ -793,7 +793,7 @@ namespace seq {
|
|||
// History recording
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
seq::str_mem seq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
|
||||
seq::str_mem seq_regex::record_history(seq::str_mem const& mem, euf::snode const* history_re) {
|
||||
|
||||
return str_mem(mem.m_str, mem.m_regex, mem.m_dep);
|
||||
}
|
||||
|
|
@ -802,15 +802,15 @@ namespace seq {
|
|||
// Cycle detection
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::extract_cycle(seq::str_mem const& mem) const {
|
||||
euf::snode const* seq_regex::extract_cycle(seq::str_mem const& mem) const {
|
||||
#if 0
|
||||
// Walk the history chain looking for a repeated regex.
|
||||
// A cycle exists when the current regex matches a regex in the history.
|
||||
if (!mem.m_regex || !mem.m_history)
|
||||
return nullptr;
|
||||
|
||||
euf::snode* current = mem.m_regex;
|
||||
euf::snode* hist = mem.m_history;
|
||||
euf::snode const* current = mem.m_regex;
|
||||
euf::snode const* hist = mem.m_history;
|
||||
|
||||
// Walk the history chain up to a bounded depth.
|
||||
// The history is structured as a chain of regex snapshots connected
|
||||
|
|
@ -818,8 +818,8 @@ namespace seq {
|
|||
// and arg(1) is the tail. A leaf (non-concat) is a terminal entry.
|
||||
unsigned bound = 1000;
|
||||
while (hist && bound-- > 0) {
|
||||
euf::snode* entry = hist;
|
||||
euf::snode* tail = nullptr;
|
||||
euf::snode const* entry = hist;
|
||||
euf::snode const* tail = nullptr;
|
||||
|
||||
// If the history node is a regex concat, decompose it:
|
||||
// arg(0) is the regex snapshot, arg(1) is the rest of the chain
|
||||
|
|
@ -842,8 +842,8 @@ namespace seq {
|
|||
// Stabilizer from cycle
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
|
||||
euf::snode* current_regex) {
|
||||
euf::snode const* seq_regex::stabilizer_from_cycle(euf::snode const* cycle_regex,
|
||||
euf::snode const* current_regex) {
|
||||
if (!cycle_regex || !current_regex)
|
||||
return nullptr;
|
||||
|
||||
|
|
@ -856,7 +856,7 @@ namespace seq {
|
|||
// Extract cycle history tokens
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::extract_cycle_history(seq::str_mem const& current,
|
||||
euf::snode const* seq_regex::extract_cycle_history(seq::str_mem const& current,
|
||||
seq::str_mem const& ancestor) {
|
||||
// The history is built by simplify_and_init as a left-associative
|
||||
// string concat chain: concat(concat(concat(nil, c1), c2), c3).
|
||||
|
|
@ -869,21 +869,21 @@ namespace seq {
|
|||
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::get_filtered_stabilizer_star(euf::snode* re,
|
||||
euf::snode* excluded_char) {
|
||||
euf::snode const* seq_regex::get_filtered_stabilizer_star(euf::snode const* re,
|
||||
euf::snode const* excluded_char) const {
|
||||
if (!re)
|
||||
return nullptr;
|
||||
|
||||
ptr_vector<euf::snode> const* stabs = get_stabilizers(re);
|
||||
euf::snode_vector const* stabs = get_stabilizers(re);
|
||||
if (!stabs || stabs->empty())
|
||||
return nullptr;
|
||||
euf::snode* filtered_union = nullptr;
|
||||
euf::snode const* filtered_union = nullptr;
|
||||
|
||||
for (euf::snode* s : *stabs) {
|
||||
for (euf::snode const* s : *stabs) {
|
||||
if (!s)
|
||||
continue;
|
||||
// Keep only stabilizers whose language cannot start with excluded_char
|
||||
euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char);
|
||||
euf::snode const* d = m_sg.brzozowski_deriv(s, excluded_char);
|
||||
if (d && d->is_fail()) {
|
||||
if (!filtered_union) {
|
||||
filtered_union = s;
|
||||
|
|
@ -913,8 +913,8 @@ namespace seq {
|
|||
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* seq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
|
||||
euf::snode* cycle_history) {
|
||||
euf::snode const* seq_regex::strengthened_stabilizer(euf::snode const* cycle_regex,
|
||||
euf::snode const* cycle_history) {
|
||||
if (!cycle_regex || !cycle_history)
|
||||
return nullptr;
|
||||
|
||||
|
|
@ -929,14 +929,14 @@ namespace seq {
|
|||
// A sub-cycle is detected when the derivative returns to cycle_regex.
|
||||
svector<std::pair<unsigned, unsigned>> sub_cycles;
|
||||
unsigned cycle_start = 0;
|
||||
euf::snode* current_re = cycle_regex;
|
||||
euf::snode const* current_re = cycle_regex;
|
||||
|
||||
for (unsigned i = 0; i < tokens.size(); ++i) {
|
||||
euf::snode* tok = tokens[i];
|
||||
euf::snode const* tok = tokens[i];
|
||||
if (!tok)
|
||||
return nullptr;
|
||||
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok);
|
||||
euf::snode const* deriv = m_sg.brzozowski_deriv(current_re, tok);
|
||||
if (!deriv)
|
||||
return nullptr;
|
||||
|
||||
|
|
@ -961,7 +961,7 @@ namespace seq {
|
|||
|
||||
// Build a stabilizer body for each sub-cycle.
|
||||
// body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1})
|
||||
euf::snode* overall_union = nullptr;
|
||||
euf::snode const* overall_union = nullptr;
|
||||
|
||||
for (auto const& sc : sub_cycles) {
|
||||
unsigned start = sc.first;
|
||||
|
|
@ -969,17 +969,17 @@ namespace seq {
|
|||
if (start >= end)
|
||||
continue;
|
||||
|
||||
euf::snode* re_state = cycle_regex;
|
||||
euf::snode* body = nullptr;
|
||||
euf::snode const* re_state = cycle_regex;
|
||||
euf::snode const* body = nullptr;
|
||||
|
||||
for (unsigned i = start; i < end; ++i) {
|
||||
euf::snode* tok = tokens[i];
|
||||
euf::snode const* tok = tokens[i];
|
||||
if (!tok)
|
||||
break;
|
||||
|
||||
// Insert filtered stabilizer star before each token after the first
|
||||
if (i > start) {
|
||||
euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok);
|
||||
euf::snode const* filtered = get_filtered_stabilizer_star(re_state, tok);
|
||||
if (filtered) {
|
||||
expr* fe = filtered->get_expr();
|
||||
if (fe) {
|
||||
|
|
@ -998,7 +998,7 @@ namespace seq {
|
|||
|
||||
expr_ref unit_str(seq.str.mk_unit(tok_expr), m);
|
||||
expr_ref tok_re(seq.re.mk_to_re(unit_str), m);
|
||||
euf::snode* tok_re_sn = m_sg.mk(tok_re);
|
||||
euf::snode const* tok_re_sn = m_sg.mk(tok_re);
|
||||
|
||||
if (!body) {
|
||||
body = tok_re_sn;
|
||||
|
|
@ -1012,7 +1012,7 @@ namespace seq {
|
|||
}
|
||||
|
||||
// Advance the regex state
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok);
|
||||
euf::snode const* deriv = m_sg.brzozowski_deriv(re_state, tok);
|
||||
if (!deriv)
|
||||
break;
|
||||
re_state = deriv;
|
||||
|
|
@ -1046,7 +1046,7 @@ namespace seq {
|
|||
SASSERT(mem.m_str && mem.m_regex);
|
||||
|
||||
// 1. Leading token must be a variable
|
||||
euf::snode* first = mem.m_str->first();
|
||||
euf::snode const* first = mem.m_str->first();
|
||||
if (!first || !first->is_var())
|
||||
return false;
|
||||
|
||||
|
|
@ -1055,16 +1055,16 @@ namespace seq {
|
|||
return false;
|
||||
|
||||
// 3. Build stabStar = star(union(all stabilizers for this regex))
|
||||
euf::snode* stab_union = get_stabilizer_union(mem.m_regex);
|
||||
euf::snode const* stab_union = get_stabilizer_union(mem.m_regex);
|
||||
if (!stab_union)
|
||||
return false;
|
||||
|
||||
expr* su_expr = stab_union->get_expr();
|
||||
expr_ref stab_star(seq.re.mk_star(su_expr), m);
|
||||
euf::snode* stab_star_sn = m_sg.mk(stab_star);
|
||||
euf::snode const* stab_star_sn = m_sg.mk(stab_star);
|
||||
|
||||
// 4. Collect all primitive regex constraints on variable `first`
|
||||
euf::snode* x_range = collect_primitive_regex_intersection(first, node, dep);
|
||||
euf::snode const* x_range = collect_primitive_regex_intersection(first, node, dep);
|
||||
if (!x_range)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ namespace seq {
|
|||
// Maps regex snode id → list of stabilizer snodes.
|
||||
// Each regex may accumulate multiple stabilizers from different
|
||||
// cycle detections. The list is deduplicated by pointer equality.
|
||||
u_map<ptr_vector<euf::snode>> m_stabilizers;
|
||||
u_map<euf::snode_vector> m_stabilizers;
|
||||
|
||||
// Set of regex snode ids that are self-stabilizing, i.e., the
|
||||
// stabilizer for the regex is the regex itself (e.g., r*).
|
||||
|
|
@ -69,12 +69,12 @@ namespace seq {
|
|||
// to_re string literals in a regex. Boundaries partition the
|
||||
// alphabet into equivalence classes where all characters in
|
||||
// the same class produce identical derivatives.
|
||||
void collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const;
|
||||
void collect_char_boundaries(euf::snode const* re, unsigned_vector& bounds) const;
|
||||
|
||||
// Build a set of representative character snodes, one per
|
||||
// alphabet equivalence class, derived from the boundary points
|
||||
// of the given regex.
|
||||
bool get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
|
||||
bool get_alphabet_representatives(euf::snode const* re, euf::snode_vector& reps);
|
||||
|
||||
public:
|
||||
|
||||
|
|
@ -114,28 +114,28 @@ namespace seq {
|
|||
|
||||
// Add a stabilizer for a regex. De-duplicates by pointer equality.
|
||||
// Mirrors ZIPT Environment.AddStabilizer (Environment.cs:114-123).
|
||||
void add_stabilizer(euf::snode* regex, euf::snode* stabilizer);
|
||||
void add_stabilizer(euf::snode const* regex, euf::snode const* stabilizer);
|
||||
|
||||
// Get the union of all stabilizers registered for a regex.
|
||||
// Returns a single re.union snode combining all stabilizers,
|
||||
// or nullptr if no stabilizers exist for the regex.
|
||||
// Mirrors ZIPT Environment.GetStabilizerUnion (Environment.cs:125-128).
|
||||
euf::snode* get_stabilizer_union(euf::snode* regex);
|
||||
euf::snode const* get_stabilizer_union(euf::snode const* regex);
|
||||
|
||||
// Check if any stabilizers have been registered for a regex.
|
||||
bool has_stabilizers(euf::snode* regex) const;
|
||||
bool has_stabilizers(euf::snode const* regex) const;
|
||||
|
||||
// Get raw stabilizer list for a regex (read-only).
|
||||
// Returns nullptr if no stabilizers exist.
|
||||
ptr_vector<euf::snode> const* get_stabilizers(euf::snode* regex) const;
|
||||
euf::snode_vector const* get_stabilizers(euf::snode const* regex) const;
|
||||
|
||||
// Mark a regex as self-stabilizing (stabilizer == regex itself).
|
||||
// Mirrors ZIPT Environment.SetSelfStabilizing (Environment.cs:143-146).
|
||||
void set_self_stabilizing(euf::snode* regex);
|
||||
void set_self_stabilizing(euf::snode const* regex);
|
||||
|
||||
// Check if a regex is marked as self-stabilizing.
|
||||
// Mirrors ZIPT Environment.IsSelfStabilizing (Environment.cs:134-141).
|
||||
bool is_self_stabilizing(euf::snode* regex) const;
|
||||
bool is_self_stabilizing(euf::snode const* regex) const;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Self-stabilizing auto-detection and propagation through derivatives
|
||||
|
|
@ -149,7 +149,7 @@ namespace seq {
|
|||
// - ∅ (fail/empty language): no live derivatives, trivially stable.
|
||||
// - Complement of full_seq (~Σ* = ∅): also trivially stable.
|
||||
// Does NOT mark the snode; call set_self_stabilizing to persist.
|
||||
bool compute_self_stabilizing(euf::snode* regex) const;
|
||||
bool compute_self_stabilizing(euf::snode const* regex) const;
|
||||
|
||||
// After computing a derivative of parent, propagate the self-
|
||||
// stabilizing flag to the derivative result if warranted.
|
||||
|
|
@ -162,12 +162,12 @@ namespace seq {
|
|||
// - If parent is R∩S and both are self-stabilizing → derivative is.
|
||||
// - If parent is ~R and R is self-stabilizing → derivative is.
|
||||
// Updates the internal self-stabilizing set for the derivative.
|
||||
void propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv);
|
||||
void propagate_self_stabilizing(euf::snode const* parent, euf::snode const* deriv);
|
||||
|
||||
// Convenience: compute derivative and propagate self-stabilizing flags.
|
||||
// Equivalent to calling derivative() followed by
|
||||
// propagate_self_stabilizing().
|
||||
euf::snode* derivative_with_propagation(euf::snode* re, euf::snode* elem);
|
||||
euf::snode const* derivative_with_propagation(euf::snode const* re, euf::snode const* elem);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Basic regex predicates
|
||||
|
|
@ -176,7 +176,7 @@ namespace seq {
|
|||
// check if regex is the empty language (∅ / re.empty).
|
||||
// performs structural analysis beyond is_fail() to detect
|
||||
// derived emptiness (e.g., union of empties, concat with empty).
|
||||
bool is_empty_regex(euf::snode* re) const;
|
||||
bool is_empty_regex(euf::snode const* re) const;
|
||||
|
||||
// BFS emptiness check over the Brzozowski derivative automaton.
|
||||
// Explores reachable derivative states using representative
|
||||
|
|
@ -185,7 +185,7 @@ namespace seq {
|
|||
// l_false — regex is definitely non-empty (found a nullable state)
|
||||
// l_undef — inconclusive (hit exploration bound or failed derivative)
|
||||
// max_states caps the number of explored states to prevent blowup.
|
||||
lbool is_empty_bfs(euf::snode* re, unsigned max_states = 10000);
|
||||
lbool is_empty_bfs(euf::snode const* re, unsigned max_states = 10000);
|
||||
|
||||
// Check emptiness of the intersection of multiple regexes.
|
||||
// Uses BFS over the product of Brzozowski derivative automata.
|
||||
|
|
@ -193,33 +193,33 @@ namespace seq {
|
|||
// l_false — intersection is definitely non-empty
|
||||
// l_undef — inconclusive (hit exploration bound)
|
||||
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
|
||||
lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes, unsigned max_states = UINT_MAX);
|
||||
lbool check_intersection_emptiness(euf::snode_vector const& regexes, unsigned max_states = UINT_MAX);
|
||||
|
||||
// Check if L(subset_re) ⊆ L(superset_re).
|
||||
// Computed as: subset_re ∩ complement(superset_re) = ∅.
|
||||
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
|
||||
lbool is_language_subset(euf::snode* subset_re, euf::snode* superset_re);
|
||||
lbool is_language_subset(euf::snode const* subset_re, euf::snode const* superset_re);
|
||||
|
||||
// Collect all primitive regex constraints on variable `var` from
|
||||
// the node's str_mem list and return their intersection as a
|
||||
// single regex snode (using re.inter).
|
||||
// Returns nullptr if no primitive constraints found.
|
||||
euf::snode* collect_primitive_regex_intersection(
|
||||
euf::snode* var, nielsen_node const& node, dep_manager& dep_mgr, dep_tracker& dep) const;
|
||||
euf::snode const* collect_primitive_regex_intersection(
|
||||
euf::snode const* var, nielsen_node const& node, dep_manager& dep_mgr, dep_tracker& dep) const;
|
||||
|
||||
// check if regex is the full language (Σ* / re.all)
|
||||
bool is_full_regex(euf::snode* re) const {
|
||||
static bool is_full_regex(euf::snode const* re) {
|
||||
return re && re->is_full_seq();
|
||||
}
|
||||
|
||||
// check if regex accepts the empty string
|
||||
// (projection-aware: re may contain re.proj operators)
|
||||
bool is_nullable(euf::snode* re) const {
|
||||
bool is_nullable(euf::snode const* re) const {
|
||||
return re && m_sg.re_nullable(re) == l_true;
|
||||
}
|
||||
|
||||
// check if regex is ground (no string variables)
|
||||
bool is_ground(euf::snode* re) const {
|
||||
bool is_ground(euf::snode const* re) const {
|
||||
return re && re->is_ground();
|
||||
}
|
||||
|
||||
|
|
@ -229,7 +229,7 @@ namespace seq {
|
|||
|
||||
// compute Brzozowski derivative of regex w.r.t. character element.
|
||||
// returns nullptr on failure.
|
||||
euf::snode* derivative(euf::snode* re, euf::snode* elem) {
|
||||
euf::snode const* derivative(euf::snode const* re, euf::snode const* elem) {
|
||||
return m_sg.brzozowski_deriv(re, elem);
|
||||
}
|
||||
|
||||
|
|
@ -240,17 +240,17 @@ namespace seq {
|
|||
// of symbolic (variable) characters without branching.
|
||||
// Returns the uniform derivative if found, nullptr otherwise.
|
||||
// Mirrors ZIPT's SimplifyCharRegex uniform-derivative fast path.
|
||||
euf::snode* try_uniform_derivative(euf::snode* regex);
|
||||
euf::snode const* try_uniform_derivative(euf::snode const* regex) const;
|
||||
|
||||
// compute derivative of a str_mem constraint: advance past one character.
|
||||
// the string side is shortened by drop_first and the regex is derived.
|
||||
// Propagates self-stabilizing flags from the parent regex to the derivative.
|
||||
str_mem derive(str_mem const& mem, euf::snode* elem) {
|
||||
euf::snode* parent_re = mem.m_regex;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, elem);
|
||||
str_mem derive(str_mem const& mem, euf::snode const* elem) {
|
||||
euf::snode const* parent_re = mem.m_regex;
|
||||
euf::snode const* deriv = m_sg.brzozowski_deriv(parent_re, elem);
|
||||
if (deriv)
|
||||
propagate_self_stabilizing(parent_re, deriv);
|
||||
euf::snode* new_str = m_sg.drop_first(mem.m_str);
|
||||
euf::snode const* new_str = m_sg.drop_first(mem.m_str);
|
||||
return str_mem(new_str, deriv, mem.m_dep);
|
||||
}
|
||||
|
||||
|
|
@ -268,13 +268,13 @@ namespace seq {
|
|||
// - the string becomes empty and regex is nullable (satisfied)
|
||||
// - the string becomes empty and regex is not nullable (conflict)
|
||||
// modifies mem in-place.
|
||||
simplify_status simplify_ground_prefix(seq::str_mem& mem);
|
||||
simplify_status simplify_ground_prefix(str_mem& mem);
|
||||
|
||||
// consume ground characters from the back of mem.m_str by computing
|
||||
// reverse derivatives. modifies mem in-place.
|
||||
// (reverse derivatives require regex reversal; this is a best-effort
|
||||
// simplification that handles the common case of trailing constants.)
|
||||
simplify_status simplify_ground_suffix(seq::str_mem& mem);
|
||||
simplify_status simplify_ground_suffix(str_mem& mem);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Trivial checks
|
||||
|
|
@ -284,14 +284,14 @@ namespace seq {
|
|||
// returns 1 if satisfied (empty string in nullable regex, or full regex)
|
||||
// returns -1 if conflicting (empty string in non-nullable, or ∅ regex)
|
||||
// returns 0 if undetermined
|
||||
int check_trivial(seq::str_mem const& mem) const;
|
||||
int check_trivial(str_mem const& mem) const;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Minterm and character computation
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// compute minterms (character class partition) from regex
|
||||
void compute_minterms(euf::snode* re, euf::snode_vector& minterms) {
|
||||
void compute_minterms(euf::snode const* re, euf::snode_vector& minterms) {
|
||||
m_sg.compute_minterms(re, minterms);
|
||||
}
|
||||
|
||||
|
|
@ -299,7 +299,7 @@ namespace seq {
|
|||
// (fail) minterms. Minterms are regex character-class expressions
|
||||
// forming a partition of the alphabet; callers use them to drive
|
||||
// fresh-variable creation in character-split modifiers.
|
||||
void get_minterms(euf::snode* regex, euf::snode_vector& minterms);
|
||||
void get_minterms(euf::snode const* regex, euf::snode_vector& minterms);
|
||||
|
||||
// collect concrete first-position characters from a regex.
|
||||
// extracts characters reachable from to_re leaves and simple ranges.
|
||||
|
|
@ -314,8 +314,8 @@ namespace seq {
|
|||
// for the Nielsen graph to expand via character-split modifiers.
|
||||
// returns false if the constraint is immediately conflicting
|
||||
// (empty string in non-nullable regex, or derivative yields ∅).
|
||||
bool process_str_mem(seq::str_mem const& mem,
|
||||
vector<seq::str_mem>& out_mems);
|
||||
bool process_str_mem(str_mem const& mem,
|
||||
vector<str_mem>& out_mems);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Cycle detection and stabilizers
|
||||
|
|
@ -324,24 +324,24 @@ namespace seq {
|
|||
// record current regex in the derivation history of a str_mem.
|
||||
// the history tracks a chain of (regex, id) pairs for cycle detection.
|
||||
// returns the updated str_mem.
|
||||
seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re);
|
||||
str_mem record_history(str_mem const& mem, euf::snode const* history_re);
|
||||
|
||||
// check if the derivation history of mem contains a cycle, i.e.,
|
||||
// the same regex id appears twice in the history chain.
|
||||
// if found, returns the cycle entry point regex; nullptr otherwise.
|
||||
euf::snode* extract_cycle(seq::str_mem const& mem) const;
|
||||
euf::snode const* extract_cycle(str_mem const& mem) const;
|
||||
|
||||
// check if the derivation history exhibits a cycle.
|
||||
// returns true when the current regex matches a previously seen regex
|
||||
// in the history chain. used to trigger stabilizer introduction.
|
||||
bool detect_cycle(seq::str_mem const& mem) const;
|
||||
bool detect_cycle(str_mem const& mem) const;
|
||||
|
||||
// compute a Kleene star stabilizer from a cycle.
|
||||
// given the regex at the cycle point and the current regex,
|
||||
// builds r* that over-approximates any number of cycle iterations.
|
||||
// returns nullptr if no stabilizer can be computed.
|
||||
euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
|
||||
euf::snode* current_regex);
|
||||
euf::snode const* stabilizer_from_cycle(euf::snode const* cycle_regex,
|
||||
euf::snode const* current_regex);
|
||||
|
||||
// Strengthened stabilizer construction with sub-cycle detection.
|
||||
// Replays the consumed character tokens from cycle_history on the
|
||||
|
|
@ -352,8 +352,8 @@ namespace seq {
|
|||
// Returns a union of all sub-cycle stabilizer bodies, or nullptr
|
||||
// if no non-trivial stabilizer can be built.
|
||||
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225).
|
||||
euf::snode* strengthened_stabilizer(euf::snode* cycle_regex,
|
||||
euf::snode* cycle_history);
|
||||
euf::snode const* strengthened_stabilizer(euf::snode const* cycle_regex,
|
||||
euf::snode const* cycle_history);
|
||||
|
||||
// Get filtered stabilizer star: for regex state re, retrieve
|
||||
// existing stabilizers, filter out those whose language can
|
||||
|
|
@ -361,15 +361,15 @@ namespace seq {
|
|||
// remaining in star(union(...)).
|
||||
// Returns nullptr (or empty-equivalent) if no valid stabilizers.
|
||||
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243).
|
||||
euf::snode* get_filtered_stabilizer_star(euf::snode* re,
|
||||
euf::snode* excluded_char);
|
||||
euf::snode const* get_filtered_stabilizer_star(euf::snode const* re,
|
||||
euf::snode const* excluded_char) const;
|
||||
|
||||
// Extract the cycle portion of a str_mem's history by comparing
|
||||
// the current history with an ancestor's history length.
|
||||
// Returns the sub-sequence of tokens consumed since the ancestor,
|
||||
// or nullptr if the history did not advance.
|
||||
euf::snode* extract_cycle_history(seq::str_mem const& current,
|
||||
seq::str_mem const& ancestor);
|
||||
euf::snode const* extract_cycle_history(str_mem const& current,
|
||||
str_mem const& ancestor);
|
||||
|
||||
// try to subsume a str_mem constraint using stabilizer-based
|
||||
// reasoning. Enhanced version: checks if the leading variable's
|
||||
|
|
@ -378,7 +378,7 @@ namespace seq {
|
|||
// Falls back to cycle-based pointer equality check.
|
||||
// returns true if the constraint can be dropped.
|
||||
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386).
|
||||
bool try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node);
|
||||
bool try_subsume(str_mem const& mem, nielsen_node const& node);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,19 +29,19 @@ namespace smt {
|
|||
|
||||
struct tracked_str_eq : seq::str_eq {
|
||||
enode *m_l, *m_r;
|
||||
tracked_str_eq(euf::snode *lhs, euf::snode *rhs, enode *l, enode *r, seq::dep_tracker const &dep)
|
||||
tracked_str_eq(euf::snode const* lhs, euf::snode const* rhs, enode* l, enode* r, seq::dep_tracker const &dep)
|
||||
: str_eq(lhs, rhs, dep), m_l(l), m_r(r) {}
|
||||
};
|
||||
|
||||
struct tracked_str_deq : seq::str_deq {
|
||||
sat::literal lit;
|
||||
tracked_str_deq(euf::snode *lhs, euf::snode *rhs, const sat::literal lit, seq::dep_tracker const &dep)
|
||||
tracked_str_deq(euf::snode const* lhs, euf::snode const* rhs, const sat::literal lit, seq::dep_tracker const &dep)
|
||||
: str_deq(lhs, rhs, dep), lit(lit) {}
|
||||
};
|
||||
|
||||
struct tracked_str_mem : seq::str_mem {
|
||||
sat::literal lit;
|
||||
tracked_str_mem(euf::snode *str, euf::snode *regex, const sat::literal lit, seq::dep_tracker const &dep)
|
||||
tracked_str_mem(euf::snode const* str, euf::snode const* regex, const sat::literal lit, seq::dep_tracker const &dep)
|
||||
: str_mem(str, regex, dep), lit(lit) {}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue