mirror of
https://github.com/Z3Prover/z3
synced 2026-03-17 18:43:45 +00:00
Merge pull request #8938 from Z3Prover/copilot/add-parikh-filter-implementation-again
Add seq_parikh.h and seq_parikh.cpp: Parikh image filter for ZIPT Nielsen solver
This commit is contained in:
commit
acbf5c6e4a
8 changed files with 1480 additions and 2 deletions
|
|
@ -1,5 +1,6 @@
|
|||
z3_add_component(smt_seq
|
||||
SOURCES
|
||||
seq_parikh.cpp
|
||||
seq_nielsen.cpp
|
||||
COMPONENT_DEPENDENCIES
|
||||
euf
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ Author:
|
|||
--*/
|
||||
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "smt/seq/seq_parikh.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/rewriter/th_rewriter.h"
|
||||
|
|
@ -443,10 +444,12 @@ namespace seq {
|
|||
nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
|
||||
m_sg(sg),
|
||||
m_solver(solver),
|
||||
m_parikh(alloc(seq_parikh, sg)) {
|
||||
m_len_vars(sg.get_manager()) {
|
||||
}
|
||||
|
||||
nielsen_graph::~nielsen_graph() {
|
||||
dealloc(m_parikh);
|
||||
reset();
|
||||
}
|
||||
|
||||
|
|
@ -1864,6 +1867,23 @@ namespace seq {
|
|||
// nielsen_graph: search
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nielsen_graph::apply_parikh_to_node(nielsen_node& node) {
|
||||
if (node.m_parikh_applied) return;
|
||||
node.m_parikh_applied = true;
|
||||
|
||||
// Generate modular length constraints (len(str) = min_len + stride·k, etc.)
|
||||
// and append them to the node's integer constraint list.
|
||||
m_parikh->apply_to_node(node);
|
||||
|
||||
// Lightweight feasibility pre-check: does the Parikh modular constraint
|
||||
// contradict the variable's current integer bounds? If so, mark this
|
||||
// node as a Parikh-image conflict immediately (avoids a solver call).
|
||||
if (!node.is_currently_conflict() && m_parikh->check_parikh_conflict(node)) {
|
||||
node.set_general_conflict(true);
|
||||
node.set_reason(backtrack_reason::parikh_image);
|
||||
}
|
||||
}
|
||||
|
||||
void nielsen_graph::assert_root_constraints_to_solver() {
|
||||
if (m_root_constraints_asserted) return;
|
||||
m_root_constraints_asserted = true;
|
||||
|
|
@ -1970,6 +1990,17 @@ namespace seq {
|
|||
return search_result::unsat;
|
||||
}
|
||||
|
||||
// Apply Parikh image filter: generate modular length constraints and
|
||||
// perform a lightweight feasibility pre-check. The filter is guarded
|
||||
// internally (m_parikh_applied) so it only runs once per node.
|
||||
// Note: Parikh filtering is skipped for satisfied nodes (returned above);
|
||||
// a fully satisfied node has no remaining memberships to filter.
|
||||
apply_parikh_to_node(*node);
|
||||
if (node->is_currently_conflict()) {
|
||||
++m_stats.m_num_simplify_conflict;
|
||||
return search_result::unsat;
|
||||
}
|
||||
|
||||
// Assert any new int_constraints added during simplify_and_init for this
|
||||
// node into the current solver scope. Constraints inherited from the parent
|
||||
// (indices 0..m_parent_ic_count-1) are already present at the enclosing
|
||||
|
|
@ -3333,8 +3364,15 @@ namespace seq {
|
|||
nielsen_subst s(first, replacement, mem.m_dep);
|
||||
e->add_subst(s);
|
||||
child->apply_subst(m_sg, s);
|
||||
// TODO: derive char_set from minterm and add as range constraint
|
||||
// child->add_char_range(fresh_char, minterm_to_char_set(mt));
|
||||
// Constrain fresh_char to the character class of this minterm.
|
||||
// This is the key Parikh pruning step: when x → ?c · x' is
|
||||
// generated from minterm m_i, ?c must belong to the character
|
||||
// class described by m_i so that str ∈ derivative(R, m_i).
|
||||
if (mt->get_expr()) {
|
||||
char_set cs = m_parikh->minterm_to_char_set(mt->get_expr());
|
||||
if (!cs.is_empty())
|
||||
child->add_char_range(fresh_char, cs);
|
||||
}
|
||||
created = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -250,6 +250,7 @@ namespace seq {
|
|||
class nielsen_node;
|
||||
class nielsen_edge;
|
||||
class nielsen_graph;
|
||||
class seq_parikh; // Parikh image filter (see seq_parikh.h)
|
||||
|
||||
/**
|
||||
* Abstract interface for an incremental solver used by nielsen_graph
|
||||
|
|
@ -528,6 +529,9 @@ namespace seq {
|
|||
// evaluation index for run tracking
|
||||
unsigned m_eval_idx = 0;
|
||||
|
||||
// Parikh filter: set to true once apply_parikh_to_node has been applied
|
||||
// to this node. Prevents duplicate constraint generation across DFS runs.
|
||||
bool m_parikh_applied = false;
|
||||
// number of int_constraints inherited from the parent node at clone time.
|
||||
// int_constraints[0..m_parent_ic_count) are already asserted at the
|
||||
// parent's solver scope; only [m_parent_ic_count..end) need to be
|
||||
|
|
@ -731,6 +735,9 @@ namespace seq {
|
|||
// Set to true after assert_root_constraints_to_solver() is first called.
|
||||
bool m_root_constraints_asserted = false;
|
||||
|
||||
// Parikh image filter: generates modular length constraints from regex
|
||||
// memberships. Allocated in the constructor; owned by this graph.
|
||||
seq_parikh* m_parikh = nullptr;
|
||||
// -----------------------------------------------
|
||||
// Modification counter for substitution length tracking.
|
||||
// mirrors ZIPT's LocalInfo.CurrentModificationCnt
|
||||
|
|
@ -863,6 +870,16 @@ namespace seq {
|
|||
private:
|
||||
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
|
||||
|
||||
// Apply the Parikh image filter to a node: generate modular length
|
||||
// constraints from regex memberships and append them to the node's
|
||||
// int_constraints. Also performs a lightweight feasibility pre-check;
|
||||
// if a Parikh conflict is detected the node's conflict flag is set with
|
||||
// backtrack_reason::parikh_image.
|
||||
//
|
||||
// Guarded by node.m_parikh_applied so that constraints are generated
|
||||
// only once per node across DFS iterations.
|
||||
void apply_parikh_to_node(nielsen_node& node);
|
||||
|
||||
// create a fresh variable with a unique name
|
||||
euf::snode* mk_fresh_var();
|
||||
|
||||
|
|
|
|||
387
src/smt/seq/seq_parikh.cpp
Normal file
387
src/smt/seq/seq_parikh.cpp
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
seq_parikh.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Parikh image filter implementation for the ZIPT-based Nielsen string
|
||||
solver. See seq_parikh.h for the full design description.
|
||||
|
||||
The key operation is compute_length_stride(re), which performs a
|
||||
structural traversal of the regex to find the period k such that all
|
||||
string lengths in L(re) are congruent to min_length(re) modulo k.
|
||||
The stride is used to generate modular length constraints that help
|
||||
the integer subsolver prune infeasible Nielsen graph nodes.
|
||||
|
||||
Author:
|
||||
|
||||
Clemens Eisenhofer 2026-03-10
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-10
|
||||
|
||||
--*/
|
||||
|
||||
#include "smt/seq/seq_parikh.h"
|
||||
#include "util/mpz.h"
|
||||
#include <string>
|
||||
|
||||
namespace seq {
|
||||
|
||||
seq_parikh::seq_parikh(euf::sgraph& sg)
|
||||
: m(sg.get_manager()), seq(m), a(m), m_fresh_cnt(0) {}
|
||||
|
||||
expr_ref seq_parikh::mk_fresh_int_var() {
|
||||
std::string name = "pk!" + std::to_string(m_fresh_cnt++);
|
||||
return expr_ref(m.mk_fresh_const(name.c_str(), a.mk_int()), m);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Stride computation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// compute_length_stride: structural traversal of regex expression.
|
||||
//
|
||||
// Return value semantics:
|
||||
// 0 — fixed length (or empty language): no modular constraint needed
|
||||
// beyond the min == max bounds.
|
||||
// 1 — all integer lengths ≥ min_len are achievable: no useful modular
|
||||
// constraint.
|
||||
// k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k):
|
||||
// modular constraint len(str) = min_len + k·j is useful.
|
||||
unsigned seq_parikh::compute_length_stride(expr* re) {
|
||||
if (!re) return 1;
|
||||
|
||||
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
|
||||
unsigned lo = 0, hi = 0;
|
||||
|
||||
// Empty language: no strings exist; stride is irrelevant.
|
||||
if (seq.re.is_empty(re))
|
||||
return 0;
|
||||
|
||||
// Epsilon regex {""}: single fixed length 0.
|
||||
if (seq.re.is_epsilon(re))
|
||||
return 0;
|
||||
|
||||
// to_re(concrete_string): fixed-length, no modular constraint needed.
|
||||
if (seq.re.is_to_re(re, s)) {
|
||||
// min_length == max_length, covered by bounds.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Single character: range, full_char — fixed length 1.
|
||||
if (seq.re.is_range(re) || seq.re.is_full_char(re))
|
||||
return 0;
|
||||
|
||||
// full_seq (.* / Σ*): every length ≥ 0 is possible.
|
||||
if (seq.re.is_full_seq(re))
|
||||
return 1;
|
||||
|
||||
// r* — Kleene star.
|
||||
// L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ...
|
||||
// If all lengths in L(r) are congruent to c modulo s (c = min_len, s = stride),
|
||||
// then L(r*) includes lengths {0, c, c+s, 2c, 2c+s, 2c+2s, ...} and
|
||||
// the overall GCD is gcd(c, s). This is strictly more accurate than
|
||||
// the previous gcd(min, max) approximation, which can be unsound when
|
||||
// the body contains lengths whose GCD is smaller than gcd(min, max).
|
||||
if (seq.re.is_star(re, r1)) {
|
||||
unsigned mn = seq.re.min_length(r1);
|
||||
unsigned inner = compute_length_stride(r1);
|
||||
// stride(r*) = gcd(min_length(r), stride(r))
|
||||
// when inner=0 (fixed-length body), gcd(mn, 0) = mn → stride = mn
|
||||
return u_gcd(mn, inner);
|
||||
}
|
||||
|
||||
// r+ — one or more: same stride analysis as r*.
|
||||
if (seq.re.is_plus(re, r1)) {
|
||||
unsigned mn = seq.re.min_length(r1);
|
||||
unsigned inner = compute_length_stride(r1);
|
||||
return u_gcd(mn, inner);
|
||||
}
|
||||
|
||||
// r? — zero or one: lengths = {0} ∪ lengths(r)
|
||||
// stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length).
|
||||
if (seq.re.is_opt(re, r1)) {
|
||||
unsigned mn = seq.re.min_length(r1);
|
||||
unsigned inner = compute_length_stride(r1);
|
||||
// L(r?) includes length 0 and all lengths of L(r).
|
||||
// GCD(stride(r), min_len(r)) is a valid stride because:
|
||||
// - the gap from 0 to min_len(r) is min_len(r) itself, and
|
||||
// - subsequent lengths grow in steps governed by stride(r).
|
||||
// A result > 1 gives a useful modular constraint; result == 1
|
||||
// means every non-negative integer is achievable (no constraint).
|
||||
if (inner == 0)
|
||||
return u_gcd(mn, 0); // gcd(mn, 0) = mn; useful when mn > 1
|
||||
return u_gcd(inner, mn);
|
||||
}
|
||||
|
||||
// concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)).
|
||||
if (seq.re.is_concat(re, r1, r2)) {
|
||||
unsigned s1 = compute_length_stride(r1);
|
||||
unsigned s2 = compute_length_stride(r2);
|
||||
return u_gcd(s1, s2);
|
||||
}
|
||||
|
||||
// union(r1, r2): lengths from either branch → need GCD of both
|
||||
// strides and the difference between their minimum lengths.
|
||||
if (seq.re.is_union(re, r1, r2)) {
|
||||
unsigned s1 = compute_length_stride(r1);
|
||||
unsigned s2 = compute_length_stride(r2);
|
||||
unsigned m1 = seq.re.min_length(r1);
|
||||
unsigned m2 = seq.re.min_length(r2);
|
||||
unsigned d = (m1 >= m2) ? (m1 - m2) : (m2 - m1);
|
||||
// Replace 0-strides with d for GCD computation:
|
||||
// a fixed-length branch only introduces constraint via its offset.
|
||||
unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2);
|
||||
g = u_gcd(g, d);
|
||||
return g;
|
||||
}
|
||||
|
||||
// loop(r, lo, hi): the length of any word is a sum of lo..hi copies of
|
||||
// lengths from L(r). Since all lengths in L(r) are ≡ min_len(r) mod
|
||||
// stride(r), the overall stride is gcd(min_len(r), stride(r)).
|
||||
if (seq.re.is_loop(re, r1, lo, hi)) {
|
||||
unsigned mn = seq.re.min_length(r1);
|
||||
unsigned inner = compute_length_stride(r1);
|
||||
return u_gcd(mn, inner);
|
||||
}
|
||||
if (seq.re.is_loop(re, r1, lo)) {
|
||||
unsigned mn = seq.re.min_length(r1);
|
||||
unsigned inner = compute_length_stride(r1);
|
||||
return u_gcd(mn, inner);
|
||||
}
|
||||
|
||||
// intersection(r1, r2): lengths must be in both languages.
|
||||
// A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid
|
||||
// stride for the intersection (every length in the intersection is
|
||||
// also in r1 and in r2).
|
||||
if (seq.re.is_intersection(re, r1, r2)) {
|
||||
unsigned s1 = compute_length_stride(r1);
|
||||
unsigned s2 = compute_length_stride(r2);
|
||||
return u_gcd(s1, s2);
|
||||
}
|
||||
|
||||
// For complement, diff, reverse, derivative, of_pred, and anything
|
||||
// else we cannot analyse statically: be conservative and return 1
|
||||
// (no useful modular constraint rather than an unsound one).
|
||||
return 1;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Constraint generation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void seq_parikh::generate_parikh_constraints(str_mem const& mem,
|
||||
vector<int_constraint>& out) {
|
||||
if (!mem.m_regex || !mem.m_str)
|
||||
return;
|
||||
|
||||
expr* re_expr = mem.m_regex->get_expr();
|
||||
if (!re_expr || !seq.is_re(re_expr))
|
||||
return;
|
||||
|
||||
// Length bounds from the regex.
|
||||
unsigned min_len = seq.re.min_length(re_expr);
|
||||
unsigned max_len = seq.re.max_length(re_expr);
|
||||
|
||||
// If min_len >= max_len the bounds already pin the length exactly
|
||||
// (or the language is empty — empty language is detected by simplify_and_init
|
||||
// via Brzozowski derivative / is_empty checks, not here).
|
||||
// We only generate modular constraints when the length is variable.
|
||||
if (min_len >= max_len)
|
||||
return;
|
||||
|
||||
unsigned stride = compute_length_stride(re_expr);
|
||||
|
||||
// stride == 1: every integer length is possible — no useful constraint.
|
||||
// stride == 0: fixed length or empty — handled by bounds.
|
||||
if (stride <= 1)
|
||||
return;
|
||||
|
||||
// Build len(str) as an arithmetic expression.
|
||||
expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m);
|
||||
|
||||
// Introduce fresh integer variable k ≥ 0.
|
||||
expr_ref k_var = mk_fresh_int_var();
|
||||
|
||||
// Constraint 1: len(str) = min_len + stride · k
|
||||
expr_ref min_expr(a.mk_int(min_len), m);
|
||||
expr_ref stride_expr(a.mk_int(stride), m);
|
||||
expr_ref stride_k(a.mk_mul(stride_expr, k_var), m);
|
||||
expr_ref rhs(a.mk_add(min_expr, stride_k), m);
|
||||
out.push_back(int_constraint(len_str, rhs,
|
||||
int_constraint_kind::eq, mem.m_dep, m));
|
||||
|
||||
// Constraint 2: k ≥ 0
|
||||
expr_ref zero(a.mk_int(0), m);
|
||||
out.push_back(int_constraint(k_var, zero,
|
||||
int_constraint_kind::ge, mem.m_dep, m));
|
||||
|
||||
// Constraint 3 (optional): k ≤ max_k when max_len is bounded.
|
||||
// max_k = floor((max_len - min_len) / stride)
|
||||
// This gives the solver an explicit upper bound on k.
|
||||
// The subtraction is safe because min_len < max_len is guaranteed
|
||||
// by the early return above.
|
||||
if (max_len != UINT_MAX) {
|
||||
SASSERT(max_len > min_len);
|
||||
unsigned range = max_len - min_len;
|
||||
unsigned max_k = range / stride;
|
||||
expr_ref max_k_expr(a.mk_int(max_k), m);
|
||||
out.push_back(int_constraint(k_var, max_k_expr,
|
||||
int_constraint_kind::le, mem.m_dep, m));
|
||||
}
|
||||
}
|
||||
|
||||
void seq_parikh::apply_to_node(nielsen_node& node) {
|
||||
vector<int_constraint> constraints;
|
||||
for (str_mem const& mem : node.str_mems())
|
||||
generate_parikh_constraints(mem, constraints);
|
||||
for (auto& ic : constraints)
|
||||
node.add_int_constraint(ic);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Quick Parikh feasibility check (no solver call)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// Returns true if a Parikh conflict is detected: there exists a membership
|
||||
// str ∈ re for a single-variable str where the modular length constraint
|
||||
// len(str) = min_len + stride * k (k ≥ 0)
|
||||
// is inconsistent with the variable's current integer bounds [lb, ub].
|
||||
//
|
||||
// This check is lightweight — it uses only modular arithmetic on the already-
|
||||
// known regex min/max lengths and the per-variable bounds stored in the node.
|
||||
bool seq_parikh::check_parikh_conflict(nielsen_node& node) {
|
||||
for (str_mem const& mem : node.str_mems()) {
|
||||
if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var())
|
||||
continue;
|
||||
|
||||
expr* re_expr = mem.m_regex->get_expr();
|
||||
if (!re_expr || !seq.is_re(re_expr))
|
||||
continue;
|
||||
|
||||
unsigned min_len = seq.re.min_length(re_expr);
|
||||
unsigned max_len = seq.re.max_length(re_expr);
|
||||
if (min_len >= max_len) continue; // fixed or empty — no stride constraint
|
||||
|
||||
unsigned stride = compute_length_stride(re_expr);
|
||||
if (stride <= 1) continue; // no useful modular constraint
|
||||
// stride > 1 guaranteed from here onward.
|
||||
SASSERT(stride > 1);
|
||||
|
||||
unsigned lb = node.var_lb(mem.m_str);
|
||||
unsigned ub = node.var_ub(mem.m_str);
|
||||
|
||||
// Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ?
|
||||
//
|
||||
// First find the smallest k satisfying the lower bound:
|
||||
// k_min = 0 if min_len ≥ lb
|
||||
// k_min = ⌈(lb - min_len) / stride⌉ otherwise
|
||||
//
|
||||
// Then verify min_len + stride * k_min ≤ ub.
|
||||
unsigned k_min = 0;
|
||||
if (lb > min_len) {
|
||||
unsigned gap = lb - min_len;
|
||||
// Ceiling division: k_min = ceil(gap / stride).
|
||||
// Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX.
|
||||
// In that case k_min would be huge, and min_len + stride*k_min would
|
||||
// also overflow ub → treat as a conflict immediately.
|
||||
if (gap > UINT_MAX - (stride - 1)) {
|
||||
return true; // ceiling division would overflow → k_min too large
|
||||
}
|
||||
k_min = (gap + stride - 1) / stride;
|
||||
}
|
||||
|
||||
// Overflow guard: stride * k_min may overflow unsigned.
|
||||
unsigned len_at_k_min;
|
||||
if (k_min > (UINT_MAX - min_len) / stride) {
|
||||
// Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict.
|
||||
return true;
|
||||
}
|
||||
len_at_k_min = min_len + stride * k_min;
|
||||
|
||||
if (ub != UINT_MAX && len_at_k_min > ub)
|
||||
return true; // no valid k exists → Parikh conflict
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// minterm → char_set conversion
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// Convert a regex minterm expression to a char_set.
|
||||
//
|
||||
// Minterms are Boolean combinations of character-class predicates
|
||||
// (re.range, re.full_char, complement, intersection) produced by
|
||||
// sgraph::compute_minterms(). This function converts them to a
|
||||
// concrete char_set so that fresh character variables introduced by
|
||||
// apply_regex_var_split can be constrained with add_char_range().
|
||||
//
|
||||
// The implementation is recursive and handles:
|
||||
// re.full_char → [0, max_char] (full alphabet)
|
||||
// re.range(lo, hi) → [lo, hi+1) (hi inclusive in Z3)
|
||||
// re.complement(r) → alphabet \ minterm_to_char_set(r)
|
||||
// re.inter(r1, r2) → intersection of both sides
|
||||
// re.diff(r1, r2) → r1 ∩ complement(r2)
|
||||
// re.to_re(str.unit(c)) → singleton {c}
|
||||
// re.empty → empty set
|
||||
// anything else → [0, max_char] (conservative)
|
||||
char_set seq_parikh::minterm_to_char_set(expr* re_expr) {
|
||||
unsigned max_c = seq.max_char();
|
||||
|
||||
if (!re_expr)
|
||||
return char_set::full(max_c);
|
||||
|
||||
// full_char: the whole alphabet [0, max_char]
|
||||
if (seq.re.is_full_char(re_expr))
|
||||
return char_set::full(max_c);
|
||||
|
||||
// range [lo, hi] (hi inclusive in Z3's regex representation)
|
||||
unsigned lo = 0, hi = 0;
|
||||
if (seq.re.is_range(re_expr, lo, hi)) {
|
||||
// lo > hi is a degenerate range; should not arise from well-formed minterms
|
||||
SASSERT(lo <= hi);
|
||||
if (lo > hi) return char_set();
|
||||
// char_range uses exclusive upper bound; Z3 hi is inclusive
|
||||
return char_set(char_range(lo, hi + 1));
|
||||
}
|
||||
|
||||
// complement: alphabet minus the inner set
|
||||
expr* inner = nullptr;
|
||||
if (seq.re.is_complement(re_expr, inner))
|
||||
return minterm_to_char_set(inner).complement(max_c);
|
||||
|
||||
// intersection: characters present in both sets
|
||||
expr* r1 = nullptr, *r2 = nullptr;
|
||||
if (seq.re.is_intersection(re_expr, r1, r2))
|
||||
return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2));
|
||||
|
||||
// difference: r1 minus r2 = r1 ∩ complement(r2)
|
||||
if (seq.re.is_diff(re_expr, r1, r2))
|
||||
return minterm_to_char_set(r1).intersect_with(
|
||||
minterm_to_char_set(r2).complement(max_c));
|
||||
|
||||
// to_re(str.unit(c)): singleton character set
|
||||
expr* str_arg = nullptr;
|
||||
expr* ch_expr = nullptr;
|
||||
unsigned char_val = 0;
|
||||
if (seq.re.is_to_re(re_expr, str_arg) &&
|
||||
seq.str.is_unit(str_arg, ch_expr) &&
|
||||
seq.is_const_char(ch_expr, char_val)) {
|
||||
char_set cs;
|
||||
cs.add(char_val);
|
||||
return cs;
|
||||
}
|
||||
|
||||
// empty regex: no characters can appear
|
||||
if (seq.re.is_empty(re_expr))
|
||||
return char_set();
|
||||
|
||||
// Conservative fallback: return the full alphabet so that
|
||||
// no unsound constraints are added for unrecognised expressions.
|
||||
return char_set::full(max_c);
|
||||
}
|
||||
|
||||
} // namespace seq
|
||||
150
src/smt/seq/seq_parikh.h
Normal file
150
src/smt/seq/seq_parikh.h
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
seq_parikh.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Parikh image filter for the ZIPT-based Nielsen string solver.
|
||||
|
||||
Implements Parikh-based arithmetic constraint generation for
|
||||
nielsen_node instances. For a regex membership constraint str ∈ r,
|
||||
the Parikh image of r constrains the multiset of characters in str.
|
||||
This module computes the "length stride" (period) of the length
|
||||
language of r and generates modular arithmetic constraints of the form
|
||||
|
||||
len(str) = min_len + stride · k (k ≥ 0, k fresh integer)
|
||||
|
||||
which tighten the arithmetic subproblem beyond the simple min/max
|
||||
length bounds already produced by nielsen_node::init_var_bounds_from_mems().
|
||||
|
||||
For example:
|
||||
• str ∈ (ab)* → min_len = 0, stride = 2 → len(str) = 2·k
|
||||
• str ∈ a(bc)* → min_len = 1, stride = 2 → len(str) = 1 + 2·k
|
||||
• str ∈ ab|abc → stride = 1 (no useful modular constraint)
|
||||
|
||||
The generated int_constraints are added to the node's integer constraint
|
||||
set and discharged by the integer subsolver (see seq_nielsen.h,
|
||||
simple_solver).
|
||||
|
||||
Implements the Parikh filter described in ZIPT
|
||||
(https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
|
||||
replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic.
|
||||
|
||||
Author:
|
||||
|
||||
Clemens Eisenhofer 2026-03-10
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-10
|
||||
|
||||
--*/
|
||||
#pragma once
|
||||
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
|
||||
namespace seq {
|
||||
|
||||
/**
|
||||
* Parikh image filter: generates modular length constraints from
|
||||
* regex membership constraints in a nielsen_node.
|
||||
*
|
||||
* Usage:
|
||||
* seq_parikh parikh(sg);
|
||||
* parikh.apply_to_node(node); // adds constraints to node
|
||||
*
|
||||
* Or per-membership:
|
||||
* vector<int_constraint> out;
|
||||
* parikh.generate_parikh_constraints(mem, out);
|
||||
*/
|
||||
class seq_parikh {
|
||||
ast_manager& m;
|
||||
seq_util seq;
|
||||
arith_util a;
|
||||
unsigned m_fresh_cnt; // counter for fresh variable names
|
||||
|
||||
// Compute the stride (period) of the length language of a regex.
|
||||
//
|
||||
// The stride k satisfies: all lengths in L(re) are congruent to
|
||||
// min_length(re) modulo k. A stride of 1 means every integer
|
||||
// length is possible (no useful modular constraint). A stride of
|
||||
// 0 is a sentinel meaning the language is empty or has a single
|
||||
// fixed length (already captured by bounds).
|
||||
//
|
||||
// Examples:
|
||||
// stride(to_re("ab")) = 0 (fixed length 2)
|
||||
// stride((ab)*) = 2 (lengths 0, 2, 4, ...)
|
||||
// stride((abc)*) = 3 (lengths 0, 3, 6, ...)
|
||||
// stride(a*b*) = 1 (all lengths possible)
|
||||
// stride((ab)*(cd)*) = 2 (lengths 0, 2, 4, ...)
|
||||
// stride((ab)*|(abc)*) = 1 (lengths 0, 2, 3, 4, ...)
|
||||
unsigned compute_length_stride(expr* re);
|
||||
|
||||
// Create a fresh integer variable (name "pk!N") for use as the
|
||||
// Parikh multiplier variable k in len(str) = min_len + stride·k.
|
||||
expr_ref mk_fresh_int_var();
|
||||
|
||||
public:
|
||||
explicit seq_parikh(euf::sgraph& sg);
|
||||
|
||||
// Generate Parikh modular length constraints for one membership.
|
||||
//
|
||||
// When stride > 1 and min_len < max_len (bounds don't pin length exactly,
|
||||
// and the language is non-empty):
|
||||
// adds: len(str) = min_len + stride · k (equality)
|
||||
// k ≥ 0 (non-negativity)
|
||||
// k ≤ (max_len - min_len) / stride (upper bound, when max_len bounded)
|
||||
// These tighten the integer constraint set for the subsolver.
|
||||
// Dependencies are copied from mem.m_dep.
|
||||
// Does nothing when min_len ≥ max_len (empty or fixed-length language).
|
||||
void generate_parikh_constraints(str_mem const& mem,
|
||||
vector<int_constraint>& out);
|
||||
|
||||
// Apply Parikh constraints to all memberships at a node.
|
||||
// Calls generate_parikh_constraints for each str_mem in the node
|
||||
// and appends the resulting int_constraints to node.int_constraints().
|
||||
void apply_to_node(nielsen_node& node);
|
||||
|
||||
// Quick Parikh feasibility check (no solver call).
|
||||
//
|
||||
// For each single-variable membership str ∈ re, checks whether the
|
||||
// modular constraint len(str) = min_len + stride · k (k ≥ 0)
|
||||
// has any solution given the current per-variable bounds stored in
|
||||
// node.var_lb(str) and node.var_ub(str).
|
||||
//
|
||||
// Returns true when a conflict is detected (no valid k exists for
|
||||
// some membership). The caller should then mark the node with
|
||||
// backtrack_reason::parikh_image.
|
||||
//
|
||||
// This is a lightweight pre-check that avoids calling the integer
|
||||
// subsolver. It is sound (never returns true for a satisfiable node)
|
||||
// but incomplete (may miss conflicts that require the full solver).
|
||||
bool check_parikh_conflict(nielsen_node& node);
|
||||
|
||||
// Compute the length stride of a regex expression.
|
||||
// Exposed for testing and external callers.
|
||||
unsigned get_length_stride(expr* re) { return compute_length_stride(re); }
|
||||
|
||||
// Convert a regex minterm expression to a char_set.
|
||||
//
|
||||
// A minterm is a Boolean combination of character-class predicates
|
||||
// (re.range, re.full_char, complement, intersection) that names a
|
||||
// single, indivisible character equivalence class. Minterms are
|
||||
// produced by sgraph::compute_minterms and used in
|
||||
// apply_regex_var_split to constrain fresh character variables.
|
||||
//
|
||||
// Supported expressions:
|
||||
// re.full_char → full set [0, max_char]
|
||||
// re.range(lo, hi) → char_set [lo, hi] (inclusive on both ends)
|
||||
// re.complement(r) → complement of minterm_to_char_set(r)
|
||||
// re.inter(r1, r2) → intersection of both sets
|
||||
// re.diff(r1, r2) → r1 minus r2 (= r1 ∩ complement(r2))
|
||||
// re.to_re(unit(c)) → singleton {c}
|
||||
// re.empty → empty set
|
||||
// anything else → full set (conservative, sound over-approximation)
|
||||
char_set minterm_to_char_set(expr* minterm_re);
|
||||
};
|
||||
|
||||
} // namespace seq
|
||||
|
|
@ -132,6 +132,7 @@ add_executable(test-z3
|
|||
sls_test.cpp
|
||||
sls_seq_plugin.cpp
|
||||
seq_nielsen.cpp
|
||||
seq_parikh.cpp
|
||||
nseq_basic.cpp
|
||||
nseq_regex.cpp
|
||||
nseq_zipt.cpp
|
||||
|
|
|
|||
|
|
@ -287,6 +287,7 @@ int main(int argc, char ** argv) {
|
|||
TST(scoped_vector);
|
||||
TST(sls_seq_plugin);
|
||||
TST(seq_nielsen);
|
||||
TST(seq_parikh);
|
||||
TST(nseq_basic);
|
||||
TST(nseq_regex);
|
||||
TST(nseq_zipt);
|
||||
|
|
|
|||
883
src/test/seq_parikh.cpp
Normal file
883
src/test/seq_parikh.cpp
Normal file
|
|
@ -0,0 +1,883 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
seq_parikh.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Unit tests for seq_parikh (Parikh image filter for the ZIPT Nielsen solver).
|
||||
|
||||
Tests cover:
|
||||
- compute_length_stride / get_length_stride for all regex forms
|
||||
- generate_parikh_constraints: constraint shape, count, and dependency
|
||||
- apply_to_node: integration with nielsen_node
|
||||
- check_parikh_conflict: lightweight feasibility pre-check
|
||||
- minterm_to_char_set: regex-minterm to char_set conversion
|
||||
|
||||
Author:
|
||||
|
||||
Clemens Eisenhofer 2026-03-11
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-11
|
||||
|
||||
--*/
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/zstring.h"
|
||||
#include "ast/euf/euf_egraph.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "smt/seq/seq_parikh.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include <iostream>
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Minimal solver stub (no-op)
|
||||
// ---------------------------------------------------------------------------
|
||||
class parikh_test_solver : public seq::simple_solver {
|
||||
public:
|
||||
void push() override {}
|
||||
void pop(unsigned) override {}
|
||||
void assert_expr(expr*) override {}
|
||||
lbool check() override { return l_true; }
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers to build common regex expressions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// build to_re("AB") — a fixed two-character string regex
|
||||
static expr_ref mk_to_re_ab(ast_manager& m, seq_util& seq) {
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref ch_b(seq.str.mk_char('B'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
expr_ref unit_b(seq.str.mk_unit(ch_b), m);
|
||||
expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m);
|
||||
return expr_ref(seq.re.mk_to_re(ab), m);
|
||||
}
|
||||
|
||||
// build (ab)* — star of the two-character sequence
|
||||
static expr_ref mk_ab_star(ast_manager& m, seq_util& seq) {
|
||||
return expr_ref(seq.re.mk_star(mk_to_re_ab(m, seq)), m);
|
||||
}
|
||||
|
||||
// build (abc)* — star of a three-character sequence
|
||||
static expr_ref mk_abc_star(ast_manager& m, seq_util& seq) {
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref ch_b(seq.str.mk_char('B'), m);
|
||||
expr_ref ch_c(seq.str.mk_char('C'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
expr_ref unit_b(seq.str.mk_unit(ch_b), m);
|
||||
expr_ref unit_c(seq.str.mk_unit(ch_c), m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
expr_ref abc(seq.str.mk_concat(unit_a, seq.str.mk_concat(unit_b, unit_c)), m);
|
||||
return expr_ref(seq.re.mk_star(seq.re.mk_to_re(abc)), m);
|
||||
}
|
||||
|
||||
// build to_re("A") — single-character regex
|
||||
static expr_ref mk_to_re_a(ast_manager& m, seq_util& seq) {
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
return expr_ref(seq.re.mk_to_re(unit_a), m);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stride tests: compute_length_stride / get_length_stride
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// stride(to_re("AB")) == 0 (fixed length)
|
||||
static void test_stride_fixed_length() {
|
||||
std::cout << "test_stride_fixed_length\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref re = mk_to_re_ab(m, seq);
|
||||
SASSERT(parikh.get_length_stride(re) == 0);
|
||||
}
|
||||
|
||||
// stride((ab)*) == 2
|
||||
static void test_stride_star_fixed_body() {
|
||||
std::cout << "test_stride_star_fixed_body\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref re = mk_ab_star(m, seq);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride((ab)*) = " << stride << "\n";
|
||||
SASSERT(stride == 2);
|
||||
}
|
||||
|
||||
// stride((abc)*) == 3
|
||||
static void test_stride_star_three_char() {
|
||||
std::cout << "test_stride_star_three_char\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref re = mk_abc_star(m, seq);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride((abc)*) = " << stride << "\n";
|
||||
SASSERT(stride == 3);
|
||||
}
|
||||
|
||||
// stride((ab)+) == 2
|
||||
static void test_stride_plus() {
|
||||
std::cout << "test_stride_plus\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref re_body = mk_to_re_ab(m, seq);
|
||||
expr_ref re(seq.re.mk_plus(re_body), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride((ab)+) = " << stride << "\n";
|
||||
SASSERT(stride == 2);
|
||||
}
|
||||
|
||||
// stride(a* b*) == 1 — union of independent stars → every length possible
|
||||
static void test_stride_concat_stars() {
|
||||
std::cout << "test_stride_concat_stars\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref a_star(seq.re.mk_star(mk_to_re_a(m, seq)), m);
|
||||
expr_ref b_star(seq.re.mk_star(mk_to_re_a(m, seq)), m);
|
||||
expr_ref re(seq.re.mk_concat(a_star, b_star), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride(a* b*) = " << stride << "\n";
|
||||
// both stars have stride 1 (single-char body → gcd(1,0)=1) → gcd(1,1)=1
|
||||
SASSERT(stride == 1);
|
||||
}
|
||||
|
||||
// stride((ab)* | (abc)*) == gcd(2,3) = 1
|
||||
static void test_stride_union_no_common_period() {
|
||||
std::cout << "test_stride_union_no_common_period\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref ab_star = mk_ab_star(m, seq);
|
||||
expr_ref abc_star = mk_abc_star(m, seq);
|
||||
expr_ref re(seq.re.mk_union(ab_star, abc_star), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride((ab)*|(abc)*) = " << stride << "\n";
|
||||
// lengths: {0,2,4,...} union {0,3,6,...} → GCD(2,3)=1
|
||||
SASSERT(stride == 1);
|
||||
}
|
||||
|
||||
// stride((ab)*|(de)*) == gcd(2,2) = 2
|
||||
static void test_stride_union_same_period() {
|
||||
std::cout << "test_stride_union_same_period\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref ab_star = mk_ab_star(m, seq);
|
||||
// de_star: (de)* — same stride 2
|
||||
expr_ref ch_d(seq.str.mk_char('D'), m);
|
||||
expr_ref ch_e(seq.str.mk_char('E'), m);
|
||||
expr_ref unit_d(seq.str.mk_unit(ch_d), m);
|
||||
expr_ref unit_e(seq.str.mk_unit(ch_e), m);
|
||||
expr_ref de(seq.str.mk_concat(unit_d, unit_e), m);
|
||||
expr_ref de_star(seq.re.mk_star(seq.re.mk_to_re(de)), m);
|
||||
expr_ref re(seq.re.mk_union(ab_star, de_star), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride((ab)*|(de)*) = " << stride << "\n";
|
||||
SASSERT(stride == 2);
|
||||
}
|
||||
|
||||
// stride(loop((ab), 1, 3)) == 2 — loop with fixed-length body
|
||||
static void test_stride_loop() {
|
||||
std::cout << "test_stride_loop\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref ab = mk_to_re_ab(m, seq);
|
||||
expr_ref re(seq.re.mk_loop(ab, 1, 3), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride(loop(ab,1,3)) = " << stride << "\n";
|
||||
SASSERT(stride == 2);
|
||||
}
|
||||
|
||||
// stride(re.full_seq) == 1 (every length possible)
|
||||
static void test_stride_full_seq() {
|
||||
std::cout << "test_stride_full_seq\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref re(seq.re.mk_full_seq(str_sort), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride(.*) = " << stride << "\n";
|
||||
SASSERT(stride == 1);
|
||||
}
|
||||
|
||||
// stride(re.empty) == 0
|
||||
static void test_stride_empty_regex() {
|
||||
std::cout << "test_stride_empty_regex\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref re(seq.re.mk_empty(str_sort), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride(empty) = " << stride << "\n";
|
||||
SASSERT(stride == 0);
|
||||
}
|
||||
|
||||
// stride(re.epsilon) == 0
|
||||
static void test_stride_epsilon() {
|
||||
std::cout << "test_stride_epsilon\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
// epsilon is to_re("") — empty string
|
||||
sort_ref str_s(seq.str.mk_string_sort(), m);
|
||||
expr_ref empty_str(seq.str.mk_empty(str_s), m);
|
||||
expr_ref re(seq.re.mk_to_re(empty_str), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride(epsilon) = " << stride << "\n";
|
||||
SASSERT(stride == 0);
|
||||
}
|
||||
|
||||
// stride((ab)?) == 2 (gcd(2, 0) = 2 via opt handling; min_len(ab)=2)
|
||||
static void test_stride_opt() {
|
||||
std::cout << "test_stride_opt\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
expr_ref ab = mk_to_re_ab(m, seq);
|
||||
expr_ref re(seq.re.mk_opt(ab), m);
|
||||
unsigned stride = parikh.get_length_stride(re);
|
||||
std::cout << " stride((ab)?) = " << stride << "\n";
|
||||
SASSERT(stride == 2);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// generate_parikh_constraints tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// (ab)* → len(x) = 0 + 2·k, k ≥ 0 (stride 2, min_len 0)
|
||||
static void test_generate_constraints_ab_star() {
|
||||
std::cout << "test_generate_constraints_ab_star\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
arith_util arith(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_ab_star(m, seq);
|
||||
euf::snode* regex = sg.mk(re);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
seq::str_mem mem(x, regex, nullptr, 0, dep);
|
||||
|
||||
vector<seq::int_constraint> out;
|
||||
parikh.generate_parikh_constraints(mem, out);
|
||||
|
||||
// expect at least: len(x)=0+2k and k>=0
|
||||
// (no upper bound because max_length is UINT_MAX for unbounded star)
|
||||
std::cout << " generated " << out.size() << " constraints\n";
|
||||
SASSERT(out.size() >= 2);
|
||||
|
||||
// check that one constraint is an equality (len(x) = 0 + 2·k)
|
||||
bool has_eq = false;
|
||||
for (auto const& ic : out)
|
||||
if (ic.m_kind == seq::int_constraint_kind::eq) has_eq = true;
|
||||
SASSERT(has_eq);
|
||||
|
||||
// check that one constraint is k >= 0
|
||||
bool has_ge = false;
|
||||
for (auto const& ic : out)
|
||||
if (ic.m_kind == seq::int_constraint_kind::ge) has_ge = true;
|
||||
SASSERT(has_ge);
|
||||
|
||||
// should NOT have an upper bound (star is unbounded)
|
||||
bool has_le = false;
|
||||
for (auto const& ic : out)
|
||||
if (ic.m_kind == seq::int_constraint_kind::le) has_le = true;
|
||||
SASSERT(!has_le);
|
||||
}
|
||||
|
||||
// loop((ab), 1, 3): bounded → k ≤ floor((6-2)/2) = 2
|
||||
static void test_generate_constraints_bounded_loop() {
|
||||
std::cout << "test_generate_constraints_bounded_loop\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
// loop("ab", 1, 3): min_len=2, max_len=6, stride=2
|
||||
expr_ref ab = mk_to_re_ab(m, seq);
|
||||
expr_ref re(seq.re.mk_loop(ab, 1, 3), m);
|
||||
euf::snode* regex = sg.mk(re);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
seq::str_mem mem(x, regex, nullptr, 0, dep);
|
||||
|
||||
vector<seq::int_constraint> out;
|
||||
parikh.generate_parikh_constraints(mem, out);
|
||||
|
||||
// expect: eq + ge + le = 3 constraints
|
||||
std::cout << " generated " << out.size() << " constraints\n";
|
||||
SASSERT(out.size() == 3);
|
||||
|
||||
bool has_eq = false, has_ge = false, has_le = false;
|
||||
for (auto const& ic : out) {
|
||||
if (ic.m_kind == seq::int_constraint_kind::eq) has_eq = true;
|
||||
if (ic.m_kind == seq::int_constraint_kind::ge) has_ge = true;
|
||||
if (ic.m_kind == seq::int_constraint_kind::le) has_le = true;
|
||||
}
|
||||
SASSERT(has_eq);
|
||||
SASSERT(has_ge);
|
||||
SASSERT(has_le);
|
||||
}
|
||||
|
||||
// stride == 1 → no constraints generated
|
||||
static void test_generate_constraints_stride_one() {
|
||||
std::cout << "test_generate_constraints_stride_one\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
// full_seq: stride=1 → no modular constraint
|
||||
expr_ref re(seq.re.mk_full_seq(str_sort), m);
|
||||
euf::snode* regex = sg.mk(re);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
seq::str_mem mem(x, regex, nullptr, 0, dep);
|
||||
|
||||
vector<seq::int_constraint> out;
|
||||
parikh.generate_parikh_constraints(mem, out);
|
||||
std::cout << " generated " << out.size() << " constraints (expect 0)\n";
|
||||
SASSERT(out.empty());
|
||||
}
|
||||
|
||||
// fixed-length regex (min == max) → no constraints generated
|
||||
static void test_generate_constraints_fixed_length() {
|
||||
std::cout << "test_generate_constraints_fixed_length\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_to_re_ab(m, seq); // fixed len 2
|
||||
euf::snode* regex = sg.mk(re);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
seq::str_mem mem(x, regex, nullptr, 0, dep);
|
||||
|
||||
vector<seq::int_constraint> out;
|
||||
parikh.generate_parikh_constraints(mem, out);
|
||||
std::cout << " generated " << out.size() << " constraints (expect 0)\n";
|
||||
SASSERT(out.empty());
|
||||
}
|
||||
|
||||
// dependency is propagated to all generated constraints
|
||||
static void test_generate_constraints_dep_propagated() {
|
||||
std::cout << "test_generate_constraints_dep_propagated\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_ab_star(m, seq);
|
||||
euf::snode* regex = sg.mk(re);
|
||||
seq::dep_tracker dep; dep.insert(7);
|
||||
seq::str_mem mem(x, regex, nullptr, 0, dep);
|
||||
|
||||
vector<seq::int_constraint> out;
|
||||
parikh.generate_parikh_constraints(mem, out);
|
||||
|
||||
// all generated constraints must carry bit 7 in their dependency
|
||||
for (auto const& ic : out) {
|
||||
SASSERT(!ic.m_dep.empty());
|
||||
seq::dep_tracker d7; d7.insert(7);
|
||||
SASSERT(d7.subset_of(ic.m_dep));
|
||||
}
|
||||
std::cout << " all constraints carry dep bit 7\n";
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// apply_to_node tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// applying to a node with one membership adds constraints to node
|
||||
static void test_apply_to_node_adds_constraints() {
|
||||
std::cout << "test_apply_to_node_adds_constraints\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_ab_star(m, seq); // stride 2 → generates constraints
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
// root node should have no int_constraints initially
|
||||
SASSERT(ng.root() != nullptr);
|
||||
unsigned before = ng.root()->int_constraints().size();
|
||||
|
||||
parikh.apply_to_node(*ng.root());
|
||||
|
||||
unsigned after = ng.root()->int_constraints().size();
|
||||
std::cout << " before=" << before << " after=" << after << "\n";
|
||||
SASSERT(after > before);
|
||||
}
|
||||
|
||||
// applying twice is idempotent (m_parikh_applied would prevent double-add
|
||||
// via nielsen_graph::apply_parikh_to_node, but seq_parikh::apply_to_node
|
||||
// itself does not guard — so calling apply_to_node directly adds again;
|
||||
// this test verifies the direct call does add, not the idempotency guard)
|
||||
static void test_apply_to_node_stride_one_no_constraints() {
|
||||
std::cout << "test_apply_to_node_stride_one_no_constraints\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re(seq.re.mk_full_seq(str_sort), m); // stride 1 → no constraints
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
unsigned before = ng.root()->int_constraints().size();
|
||||
parikh.apply_to_node(*ng.root());
|
||||
unsigned after = ng.root()->int_constraints().size();
|
||||
std::cout << " before=" << before << " after=" << after << " (expect no change)\n";
|
||||
SASSERT(after == before);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// check_parikh_conflict tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// no conflict when var_lb=0, var_ub=UINT_MAX (unconstrained)
|
||||
static void test_check_conflict_unconstrained_no_conflict() {
|
||||
std::cout << "test_check_conflict_unconstrained_no_conflict\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_ab_star(m, seq); // stride 2, min_len 0
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
// no bounds set → default lb=0, ub=UINT_MAX → no conflict
|
||||
bool conflict = parikh.check_parikh_conflict(*ng.root());
|
||||
std::cout << " conflict = " << conflict << " (expect 0)\n";
|
||||
SASSERT(!conflict);
|
||||
}
|
||||
|
||||
// conflict: lb=3, ub=5, stride=2, min_len=0
|
||||
// valid lengths: 0,2,4,6,... ∩ [3,5] = {4} → no conflict
|
||||
static void test_check_conflict_valid_k_exists() {
|
||||
std::cout << "test_check_conflict_valid_k_exists\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_ab_star(m, seq); // stride 2, min_len 0; lengths 0,2,4,...
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
// lb=3, ub=5: length 4 is achievable (k=2) → no conflict
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
ng.root()->add_lower_int_bound(x, 3, dep);
|
||||
ng.root()->add_upper_int_bound(x, 5, dep);
|
||||
|
||||
bool conflict = parikh.check_parikh_conflict(*ng.root());
|
||||
std::cout << " conflict = " << conflict << " (expect 0)\n";
|
||||
SASSERT(!conflict);
|
||||
}
|
||||
|
||||
// conflict: lb=3, ub=3, stride=2, min_len=0
|
||||
// valid lengths: {0,2,4,...} ∩ [3,3] = {} → conflict
|
||||
static void test_check_conflict_no_valid_k() {
|
||||
std::cout << "test_check_conflict_no_valid_k\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_ab_star(m, seq); // stride 2, min_len 0; lengths {0,2,4,...}
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
// lb=3, ub=3: only odd length 3 — never a multiple of 2 → conflict
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
ng.root()->add_lower_int_bound(x, 3, dep);
|
||||
ng.root()->add_upper_int_bound(x, 3, dep);
|
||||
|
||||
bool conflict = parikh.check_parikh_conflict(*ng.root());
|
||||
std::cout << " conflict = " << conflict << " (expect 1)\n";
|
||||
SASSERT(conflict);
|
||||
}
|
||||
|
||||
// conflict: lb=5, ub=5, stride=3, min_len=0
|
||||
// valid lengths of (abc)*: {0,3,6,...} ∩ {5} = {} → conflict
|
||||
static void test_check_conflict_abc_star() {
|
||||
std::cout << "test_check_conflict_abc_star\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re = mk_abc_star(m, seq); // stride 3, min_len 0; lengths {0,3,6,...}
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
// lb=5, ub=5 → no valid k (5 is not a multiple of 3) → conflict
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
ng.root()->add_lower_int_bound(x, 5, dep);
|
||||
ng.root()->add_upper_int_bound(x, 5, dep);
|
||||
|
||||
bool conflict = parikh.check_parikh_conflict(*ng.root());
|
||||
std::cout << " conflict = " << conflict << " (expect 1)\n";
|
||||
SASSERT(conflict);
|
||||
}
|
||||
|
||||
// no conflict for stride==1 regex even with narrow bounds
|
||||
static void test_check_conflict_stride_one_never_conflicts() {
|
||||
std::cout << "test_check_conflict_stride_one_never_conflicts\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
parikh_test_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
expr_ref re(seq.re.mk_full_seq(str_sort), m); // stride 1 → no constraint
|
||||
euf::snode* regex = sg.mk(re);
|
||||
ng.add_str_mem(x, regex);
|
||||
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
ng.root()->add_lower_int_bound(x, 7, dep);
|
||||
ng.root()->add_upper_int_bound(x, 7, dep);
|
||||
|
||||
bool conflict = parikh.check_parikh_conflict(*ng.root());
|
||||
std::cout << " conflict = " << conflict << " (expect 0: stride=1 skipped)\n";
|
||||
SASSERT(!conflict);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// minterm_to_char_set tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// re.full_char → full alphabet [0, max_char]
|
||||
static void test_minterm_full_char() {
|
||||
std::cout << "test_minterm_full_char\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref re(seq.re.mk_full_char(str_sort), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
std::cout << " full_char char_count = " << cs.char_count() << "\n";
|
||||
SASSERT(cs.is_full(seq.max_char()));
|
||||
}
|
||||
|
||||
// re.empty → empty char_set
|
||||
static void test_minterm_empty_regex() {
|
||||
std::cout << "test_minterm_empty_regex\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref re(seq.re.mk_empty(str_sort), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
std::cout << " empty regex → char_set empty: " << cs.is_empty() << "\n";
|
||||
SASSERT(cs.is_empty());
|
||||
}
|
||||
|
||||
// re.range('A','Z') → 26 characters
|
||||
static void test_minterm_range() {
|
||||
std::cout << "test_minterm_range\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
// Z3 re.range takes string arguments "A" and "Z"
|
||||
expr_ref lo_str(seq.str.mk_string(zstring("A")), m);
|
||||
expr_ref hi_str(seq.str.mk_string(zstring("Z")), m);
|
||||
expr_ref re(seq.re.mk_range(lo_str, hi_str), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
std::cout << " range(A,Z) char_count = " << cs.char_count() << "\n";
|
||||
SASSERT(cs.char_count() == 26);
|
||||
SASSERT(cs.contains('A'));
|
||||
SASSERT(cs.contains('Z'));
|
||||
SASSERT(!cs.contains('a'));
|
||||
}
|
||||
|
||||
// complement of re.range('A','Z') should not contain A-Z
|
||||
static void test_minterm_complement() {
|
||||
std::cout << "test_minterm_complement\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref lo_str(seq.str.mk_string(zstring("A")), m);
|
||||
expr_ref hi_str(seq.str.mk_string(zstring("Z")), m);
|
||||
expr_ref range(seq.re.mk_range(lo_str, hi_str), m);
|
||||
expr_ref re(seq.re.mk_complement(range), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
|
||||
// complement of [A-Z] should not contain any letter in A-Z
|
||||
for (unsigned c = 'A'; c <= 'Z'; ++c)
|
||||
SASSERT(!cs.contains(c));
|
||||
// but should contain e.g. '0'
|
||||
SASSERT(cs.contains('0'));
|
||||
std::cout << " complement ok: A-Z excluded, '0' included\n";
|
||||
}
|
||||
|
||||
// intersection of range('A','Z') and range('M','Z') == range('M','Z')
|
||||
static void test_minterm_intersection() {
|
||||
std::cout << "test_minterm_intersection\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
expr_ref lo_az(seq.str.mk_string(zstring("A")), m);
|
||||
expr_ref hi_az(seq.str.mk_string(zstring("Z")), m);
|
||||
expr_ref lo_mz(seq.str.mk_string(zstring("M")), m);
|
||||
|
||||
expr_ref range_az(seq.re.mk_range(lo_az, hi_az), m);
|
||||
expr_ref range_mz(seq.re.mk_range(lo_mz, hi_az), m);
|
||||
expr_ref re(seq.re.mk_inter(range_az, range_mz), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
|
||||
// intersection [A-Z] ∩ [M-Z] = [M-Z]: 14 characters
|
||||
std::cout << " intersection [A-Z]∩[M-Z] char_count = " << cs.char_count() << "\n";
|
||||
SASSERT(cs.char_count() == 14); // M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z
|
||||
SASSERT(!cs.contains('A'));
|
||||
SASSERT(cs.contains('M'));
|
||||
SASSERT(cs.contains('Z'));
|
||||
}
|
||||
|
||||
// diff(range('A','Z'), range('A','M')) == range('N','Z')
|
||||
static void test_minterm_diff() {
|
||||
std::cout << "test_minterm_diff\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
expr_ref lo_az(seq.str.mk_string(zstring("A")), m);
|
||||
expr_ref hi_az(seq.str.mk_string(zstring("Z")), m);
|
||||
expr_ref lo_am(seq.str.mk_string(zstring("A")), m);
|
||||
expr_ref hi_am(seq.str.mk_string(zstring("M")), m);
|
||||
|
||||
expr_ref range_az(seq.re.mk_range(lo_az, hi_az), m);
|
||||
expr_ref range_am(seq.re.mk_range(lo_am, hi_am), m);
|
||||
expr_ref re(seq.re.mk_diff(range_az, range_am), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
|
||||
// diff [A-Z] \ [A-M] = [N-Z]: 13 characters
|
||||
std::cout << " diff [A-Z]\\[A-M] char_count = " << cs.char_count() << "\n";
|
||||
SASSERT(cs.char_count() == 13); // N..Z
|
||||
SASSERT(!cs.contains('A'));
|
||||
SASSERT(!cs.contains('M'));
|
||||
SASSERT(cs.contains('N'));
|
||||
SASSERT(cs.contains('Z'));
|
||||
}
|
||||
|
||||
// to_re(unit('A')) → singleton {'A'}
|
||||
static void test_minterm_singleton() {
|
||||
std::cout << "test_minterm_singleton\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
seq::seq_parikh parikh(sg);
|
||||
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
expr_ref re(seq.re.mk_to_re(unit_a), m);
|
||||
char_set cs = parikh.minterm_to_char_set(re);
|
||||
|
||||
std::cout << " singleton char_count = " << cs.char_count() << "\n";
|
||||
SASSERT(cs.char_count() == 1);
|
||||
SASSERT(cs.contains('A'));
|
||||
SASSERT(!cs.contains('B'));
|
||||
}
|
||||
|
||||
// nullptr → full set (conservative fallback)
|
||||
static void test_minterm_nullptr_is_full() {
|
||||
std::cout << "test_minterm_nullptr_is_full\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::seq_parikh parikh(sg);
|
||||
seq_util seq(m);
|
||||
|
||||
char_set cs = parikh.minterm_to_char_set(nullptr);
|
||||
SASSERT(cs.is_full(seq.max_char()));
|
||||
std::cout << " nullptr → full set ok\n";
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Entry point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
void tst_seq_parikh() {
|
||||
// stride tests
|
||||
test_stride_fixed_length();
|
||||
test_stride_star_fixed_body();
|
||||
test_stride_star_three_char();
|
||||
test_stride_plus();
|
||||
test_stride_concat_stars();
|
||||
test_stride_union_no_common_period();
|
||||
test_stride_union_same_period();
|
||||
test_stride_loop();
|
||||
test_stride_full_seq();
|
||||
test_stride_empty_regex();
|
||||
test_stride_epsilon();
|
||||
test_stride_opt();
|
||||
|
||||
// generate_parikh_constraints tests
|
||||
test_generate_constraints_ab_star();
|
||||
test_generate_constraints_bounded_loop();
|
||||
test_generate_constraints_stride_one();
|
||||
test_generate_constraints_fixed_length();
|
||||
test_generate_constraints_dep_propagated();
|
||||
|
||||
// apply_to_node tests
|
||||
test_apply_to_node_adds_constraints();
|
||||
test_apply_to_node_stride_one_no_constraints();
|
||||
|
||||
// check_parikh_conflict tests
|
||||
test_check_conflict_unconstrained_no_conflict();
|
||||
test_check_conflict_valid_k_exists();
|
||||
test_check_conflict_no_valid_k();
|
||||
test_check_conflict_abc_star();
|
||||
test_check_conflict_stride_one_never_conflicts();
|
||||
|
||||
// minterm_to_char_set tests
|
||||
test_minterm_full_char();
|
||||
test_minterm_empty_regex();
|
||||
test_minterm_range();
|
||||
test_minterm_complement();
|
||||
test_minterm_intersection();
|
||||
test_minterm_diff();
|
||||
test_minterm_singleton();
|
||||
test_minterm_nullptr_is_full();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue