3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-17 18:43:45 +00:00
z3/src/smt/nseq_regex.h
Nikolaj Bjorner 5aa3713d19 first end-pass. Atomic
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-03-04 02:05:26 -08:00

185 lines
8 KiB
C++

/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_regex.h
Abstract:
Lazy regex membership processing for the Nielsen-based string solver.
Provides Brzozowski derivative computation, ground prefix/suffix
consumption, cycle detection in derivation histories, and
stabilizer-based subsumption for regex membership constraints.
Ports the following ZIPT StrMem operations:
- SimplifyCharRegex / SimplifyDir (ground prefix/suffix consumption)
- ExtractCycle / StabilizerFromCycle (cycle detection and widening)
- TrySubsume (stabilizer-based subsumption)
The class wraps sgraph operations (brzozowski_deriv, compute_minterms,
drop_first, etc.) and provides a higher-level interface for
nielsen_graph and theory_nseq.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
namespace smt {
class nseq_regex {
euf::sgraph& m_sg;
public:
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
euf::sgraph& sg() { return m_sg; }
// -----------------------------------------------------------------
// Basic regex predicates
// -----------------------------------------------------------------
// check if regex is the empty language (∅ / re.empty).
// performs structural analysis beyond is_fail() to detect
// derived emptiness (e.g., union of empties, concat with empty).
bool is_empty_regex(euf::snode* re) const;
// check if regex is the full language (Σ* / re.all)
bool is_full_regex(euf::snode* re) const {
return re && re->is_full_seq();
}
// check if regex accepts the empty string
bool is_nullable(euf::snode* re) const {
return re && re->is_nullable();
}
// check if regex is ground (no string variables)
bool is_ground(euf::snode* re) const {
return re && re->is_ground();
}
// -----------------------------------------------------------------
// Derivative computation
// -----------------------------------------------------------------
// compute Brzozowski derivative of regex w.r.t. character element.
// returns nullptr on failure.
euf::snode* derivative(euf::snode* re, euf::snode* elem) {
return m_sg.brzozowski_deriv(re, elem);
}
// compute derivative of a str_mem constraint: advance past one character.
// the string side is shortened by drop_first and the regex is derived.
seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
euf::snode* new_str = m_sg.drop_first(mem.m_str);
return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
}
// -----------------------------------------------------------------
// Ground prefix/suffix consumption
// -----------------------------------------------------------------
enum class simplify_status { ok, conflict, satisfied };
// consume ground characters from the front of mem.m_str by computing
// Brzozowski derivatives against mem.m_regex.
// stops when:
// - the string front is not a concrete character (ok)
// - a derivative produces ∅ (conflict)
// - the string becomes empty and regex is nullable (satisfied)
// - the string becomes empty and regex is not nullable (conflict)
// modifies mem in-place.
simplify_status simplify_ground_prefix(seq::str_mem& mem);
// consume ground characters from the back of mem.m_str by computing
// reverse derivatives. modifies mem in-place.
// (reverse derivatives require regex reversal; this is a best-effort
// simplification that handles the common case of trailing constants.)
simplify_status simplify_ground_suffix(seq::str_mem& mem);
// -----------------------------------------------------------------
// Trivial checks
// -----------------------------------------------------------------
// quick check for trivially sat/unsat membership.
// returns 1 if satisfied (empty string in nullable regex, or full regex)
// returns -1 if conflicting (empty string in non-nullable, or ∅ regex)
// returns 0 if undetermined
int check_trivial(seq::str_mem const& mem) const;
// -----------------------------------------------------------------
// Minterm and character computation
// -----------------------------------------------------------------
// compute minterms (character class partition) from regex
void compute_minterms(euf::snode* re, euf::snode_vector& minterms) {
m_sg.compute_minterms(re, minterms);
}
// compute minterms for character splitting, filtering out empty
// (fail) minterms. Minterms are regex character-class expressions
// forming a partition of the alphabet; callers use them to drive
// fresh-variable creation in character-split modifiers.
void get_minterms(euf::snode* regex, euf::snode_vector& minterms);
// collect concrete first-position characters from a regex.
// extracts characters reachable from to_re leaves and simple ranges.
void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
// -----------------------------------------------------------------
// Membership processing
// -----------------------------------------------------------------
// process a str_mem constraint by consuming ground characters from
// the string front via Brzozowski derivatives. If the entire ground
// prefix is consumed and the constraint is neither satisfied nor
// conflicting, the (simplified) constraint is pushed to out_mems
// for the Nielsen graph to expand via character-split modifiers.
// returns false if the constraint is immediately conflicting
// (empty string in non-nullable regex, or derivative yields ∅).
bool process_str_mem(seq::str_mem const& mem,
vector<seq::str_mem>& out_mems);
// -----------------------------------------------------------------
// Cycle detection and stabilizers
// -----------------------------------------------------------------
// record current regex in the derivation history of a str_mem.
// the history tracks a chain of (regex, id) pairs for cycle detection.
// returns the updated str_mem.
seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re);
// check if the derivation history of mem contains a cycle, i.e.,
// the same regex id appears twice in the history chain.
// if found, returns the cycle entry point regex; nullptr otherwise.
euf::snode* extract_cycle(seq::str_mem const& mem) const;
// check if the derivation history exhibits a cycle.
// returns true when the current regex matches a previously seen regex
// in the history chain. used to trigger stabilizer introduction.
bool detect_cycle(seq::str_mem const& mem) const;
// compute a Kleene star stabilizer from a cycle.
// given the regex at the cycle point and the current regex,
// builds r* that over-approximates any number of cycle iterations.
// returns nullptr if no stabilizer can be computed.
euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
euf::snode* current_regex);
// try to subsume a str_mem constraint using stabilizer-based
// reasoning: if extract_cycle finds a cycle, check whether
// the current regex is already covered by the stabilizer.
// returns true if the constraint can be dropped.
bool try_subsume(seq::str_mem const& mem);
};
}