mirror of
https://github.com/Z3Prover/z3
synced 2026-07-04 06:16:09 +00:00
Derive with ranges (#9965)
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Margus Veanes <margus@microsoft.com> Co-authored-by: Margus Veanes <veanes@users.noreply.github.com>
This commit is contained in:
parent
e76239ceda
commit
15f33f458d
27 changed files with 3597 additions and 1541 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -1,9 +1,12 @@
|
||||||
*~
|
*~
|
||||||
rebase.cmd
|
rebase.cmd
|
||||||
|
reports/
|
||||||
|
crashes/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
# Ignore callgrind files
|
# Ignore callgrind files
|
||||||
callgrind.out.*
|
callgrind.out.*
|
||||||
|
gmon.out
|
||||||
# .hpp files are automatically generated
|
# .hpp files are automatically generated
|
||||||
*.hpp
|
*.hpp
|
||||||
.env
|
.env
|
||||||
|
|
|
||||||
BIN
gmon.out
BIN
gmon.out
Binary file not shown.
|
|
@ -39,7 +39,11 @@ z3_add_component(rewriter
|
||||||
rewriter.cpp
|
rewriter.cpp
|
||||||
seq_axioms.cpp
|
seq_axioms.cpp
|
||||||
seq_eq_solver.cpp
|
seq_eq_solver.cpp
|
||||||
|
seq_derive.cpp
|
||||||
seq_subset.cpp
|
seq_subset.cpp
|
||||||
|
seq_derive.cpp
|
||||||
|
seq_range_collapse.cpp
|
||||||
|
seq_range_predicate.cpp
|
||||||
seq_rewriter.cpp
|
seq_rewriter.cpp
|
||||||
seq_regex_bisim.cpp
|
seq_regex_bisim.cpp
|
||||||
seq_skolem.cpp
|
seq_skolem.cpp
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ Notes:
|
||||||
#include "ast/rewriter/bool_rewriter.h"
|
#include "ast/rewriter/bool_rewriter.h"
|
||||||
#include "params/bool_rewriter_params.hpp"
|
#include "params/bool_rewriter_params.hpp"
|
||||||
#include "ast/rewriter/rewriter_def.h"
|
#include "ast/rewriter/rewriter_def.h"
|
||||||
|
#include "ast/rewriter/expr_safe_replace.h"
|
||||||
#include "ast/ast_lt.h"
|
#include "ast/ast_lt.h"
|
||||||
#include "ast/for_each_expr.h"
|
#include "ast/for_each_expr.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
@ -1185,4 +1186,30 @@ void bool_rewriter::mk_ge2(expr* a, expr* b, expr* c, expr_ref& r) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool bool_rewriter::decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el) {
|
||||||
|
expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr;
|
||||||
|
if (m().is_ite(r, cond, r1, r2)) {
|
||||||
|
c = cond;
|
||||||
|
th = r1;
|
||||||
|
el = r2;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (expr *e : subterms::ground(expr_ref(r, m()))) {
|
||||||
|
if (m().is_ite(e, cond, r1, r2)) {
|
||||||
|
m_rep1.reset();
|
||||||
|
m_rep2.reset();
|
||||||
|
m_rep1.insert(e, r1);
|
||||||
|
m_rep2.insert(e, r2);
|
||||||
|
c = cond;
|
||||||
|
th = r;
|
||||||
|
el = r;
|
||||||
|
m_rep1(th);
|
||||||
|
m_rep2(el);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template class rewriter_tpl<bool_rewriter_cfg>;
|
template class rewriter_tpl<bool_rewriter_cfg>;
|
||||||
|
|
@ -20,6 +20,7 @@ Notes:
|
||||||
|
|
||||||
#include "ast/ast.h"
|
#include "ast/ast.h"
|
||||||
#include "ast/rewriter/rewriter.h"
|
#include "ast/rewriter/rewriter.h"
|
||||||
|
#include "ast/rewriter/expr_safe_replace.h"
|
||||||
#include "util/params.h"
|
#include "util/params.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -64,6 +65,7 @@ class bool_rewriter {
|
||||||
ptr_vector<expr> m_todo1, m_todo2;
|
ptr_vector<expr> m_todo1, m_todo2;
|
||||||
unsigned_vector m_counts1, m_counts2;
|
unsigned_vector m_counts1, m_counts2;
|
||||||
expr_mark m_marked;
|
expr_mark m_marked;
|
||||||
|
expr_safe_replace m_rep1, m_rep2;
|
||||||
|
|
||||||
br_status mk_flat_and_core(unsigned num_args, expr * const * args, expr_ref & result);
|
br_status mk_flat_and_core(unsigned num_args, expr * const * args, expr_ref & result);
|
||||||
br_status mk_flat_or_core(unsigned num_args, expr * const * args, expr_ref & result);
|
br_status mk_flat_or_core(unsigned num_args, expr * const * args, expr_ref & result);
|
||||||
|
|
@ -87,7 +89,7 @@ class bool_rewriter {
|
||||||
expr_ref simplify_eq_ite(expr* value, expr* ite);
|
expr_ref simplify_eq_ite(expr* value, expr* ite);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
bool_rewriter(ast_manager & m, params_ref const & p = params_ref()):m_manager(m), m_local_ctx_cost(0) {
|
bool_rewriter(ast_manager & m, params_ref const & p = params_ref()):m_manager(m), m_local_ctx_cost(0), m_rep1(m), m_rep2(m) {
|
||||||
updt_params(p);
|
updt_params(p);
|
||||||
}
|
}
|
||||||
ast_manager & m() const { return m_manager; }
|
ast_manager & m() const { return m_manager; }
|
||||||
|
|
@ -242,6 +244,11 @@ public:
|
||||||
void mk_nand(expr * arg1, expr * arg2, expr_ref & result);
|
void mk_nand(expr * arg1, expr * arg2, expr_ref & result);
|
||||||
void mk_nor(expr * arg1, expr * arg2, expr_ref & result);
|
void mk_nor(expr * arg1, expr * arg2, expr_ref & result);
|
||||||
void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result);
|
void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result);
|
||||||
|
|
||||||
|
// If r is, or contains, an if-then-else, decompose it into a top-level
|
||||||
|
// ite by hoisting the (first) inner ite condition: returns c, th, el such
|
||||||
|
// that r is equivalent to (ite c th el). Returns false if r has no ite.
|
||||||
|
bool decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bool_rewriter_cfg : public default_rewriter_cfg {
|
struct bool_rewriter_cfg : public default_rewriter_cfg {
|
||||||
|
|
|
||||||
1520
src/ast/rewriter/seq_derive.cpp
Normal file
1520
src/ast/rewriter/seq_derive.cpp
Normal file
File diff suppressed because it is too large
Load diff
266
src/ast/rewriter/seq_derive.h
Normal file
266
src/ast/rewriter/seq_derive.h
Normal file
|
|
@ -0,0 +1,266 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
seq_derive.h
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Symbolic derivative computation for regular expressions.
|
||||||
|
Produces an ITE-tree (transition regex) representation where
|
||||||
|
the free variable is de Bruijn index 0 representing the input character.
|
||||||
|
|
||||||
|
Based on the theory of symbolic derivatives and transition regexes:
|
||||||
|
- Veanes et al., "On Symbolic Derivatives and Transition Regexes" (LPAR 2024)
|
||||||
|
- Varatalu, Veanes, Ernits, "RE#" (POPL 2025)
|
||||||
|
- Stanford, Veanes, Bjørner, "Symbolic Boolean Derivatives" (PLDI 2021)
|
||||||
|
|
||||||
|
Authors:
|
||||||
|
|
||||||
|
Nikolaj Bjorner (nbjorner) 2025-06-03
|
||||||
|
|
||||||
|
--*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ast/seq_decl_plugin.h"
|
||||||
|
#include "ast/arith_decl_plugin.h"
|
||||||
|
#include "ast/array_decl_plugin.h"
|
||||||
|
#include "ast/rewriter/bool_rewriter.h"
|
||||||
|
#include "util/obj_pair_hashtable.h"
|
||||||
|
#include "util/obj_triple_hashtable.h"
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
class seq_rewriter;
|
||||||
|
|
||||||
|
namespace seq {
|
||||||
|
|
||||||
|
enum class derivative_kind { antimirov_t, brzozowski_t };
|
||||||
|
/**
|
||||||
|
* Symbolic derivative engine for regular expressions.
|
||||||
|
*
|
||||||
|
* Given a regex r, operator()(r) computes a symbolic derivative δ(r)
|
||||||
|
* represented as an ITE-tree over character predicates (using de Bruijn
|
||||||
|
* variable 0 for the character). Evaluating the ITE-tree for a concrete
|
||||||
|
* character 'a' yields the classical Brzozowski derivative δ_a(r).
|
||||||
|
*
|
||||||
|
* The ITE-tree structure implicitly defines minterms (equivalence classes
|
||||||
|
* of characters indistinguishable by the regex).
|
||||||
|
*
|
||||||
|
* Key properties:
|
||||||
|
* - Results are memoized for termination on cyclic derivative graphs
|
||||||
|
* - Union/intersection operands are sorted for ACI canonicalization
|
||||||
|
* - Depth-bounded to prevent stack overflow
|
||||||
|
*/
|
||||||
|
class derive {
|
||||||
|
ast_manager& m;
|
||||||
|
seq_util m_util;
|
||||||
|
arith_util m_autil;
|
||||||
|
bool_rewriter m_br;
|
||||||
|
seq_rewriter& m_re;
|
||||||
|
|
||||||
|
// Cache: maps (ele, regex) pair to its derivative
|
||||||
|
obj_pair_map<expr, expr, expr*> m_acache, m_bcache;
|
||||||
|
obj_pair_map<expr, expr, expr*> m_atop_cache, m_btop_cache; // post-simplify cache
|
||||||
|
expr_ref_vector m_trail; // pin cached results
|
||||||
|
|
||||||
|
// Op cache for ITE-hoisting operations (union, inter, concat, complement)
|
||||||
|
// Path-aware caches: key is (a, b, path_expr) for binary ops, (a, path_expr) for complement
|
||||||
|
obj_triple_map<expr, expr, expr, expr *> m_aunion_cache, m_bunion_cache, m_ainter_cache, m_binter_cache, m_axor_cache, m_bxor_cache;
|
||||||
|
obj_pair_map<expr, expr, expr*> m_aconcat_cache, m_bconcat_cache;
|
||||||
|
obj_pair_map<expr, expr, expr*> m_acomplement_cache, m_bcomplement_cache;
|
||||||
|
|
||||||
|
// Depth limiting
|
||||||
|
unsigned m_depth { 0 };
|
||||||
|
static const unsigned m_max_depth = 512;
|
||||||
|
|
||||||
|
seq_util::rex& re() { return m_util.re; }
|
||||||
|
seq_util& u() { return m_util; }
|
||||||
|
|
||||||
|
derivative_kind m_derivative_kind = derivative_kind::antimirov_t;
|
||||||
|
|
||||||
|
// The element (character) for the current derivative computation
|
||||||
|
expr_ref m_ele;
|
||||||
|
|
||||||
|
// Path state for inline pruning during mk_inter/mk_union/mk_complement
|
||||||
|
using intervals_t = svector<std::pair<unsigned, unsigned>>;
|
||||||
|
|
||||||
|
// Path: vector of signed atoms
|
||||||
|
svector<std::pair<expr*, bool>> m_path;
|
||||||
|
// Intervals: feasible character ranges under current path (append-only)
|
||||||
|
intervals_t m_intervals;
|
||||||
|
unsigned m_intervals_start { 0 };
|
||||||
|
// Stack of saved states for push/pop
|
||||||
|
struct path_save { unsigned path_sz; unsigned intervals_sz; unsigned intervals_start; expr* path_expr; };
|
||||||
|
svector<path_save> m_path_stack;
|
||||||
|
// Boolean expression encoding of current path (for cache keys)
|
||||||
|
expr_ref m_path_expr;
|
||||||
|
|
||||||
|
// Path interface
|
||||||
|
lbool push(expr* c, bool sign); // l_true: implied, l_undef: pushed (must pop), l_false: contradicts
|
||||||
|
void pop(); // restore state to matching push
|
||||||
|
expr* get_path_expr() { return m_path_expr; }
|
||||||
|
|
||||||
|
obj_pair_map<expr, expr, expr *> &cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_acache : m_bcache;
|
||||||
|
}
|
||||||
|
|
||||||
|
obj_pair_map<expr, expr, expr *> &top_cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_atop_cache : m_btop_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
obj_triple_map<expr, expr, expr, expr *> &union_cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_aunion_cache : m_bunion_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
obj_triple_map<expr, expr, expr, expr *> &inter_cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_ainter_cache : m_binter_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
obj_triple_map<expr, expr, expr, expr *> &xor_cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_axor_cache : m_bxor_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
obj_pair_map<expr, expr, expr *> &concat_cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_aconcat_cache : m_bconcat_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
obj_pair_map<expr, expr, expr *> &complement_cache() {
|
||||||
|
return m_derivative_kind == derivative_kind::antimirov_t ? m_acomplement_cache : m_bcomplement_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hoist ITE: apply_op through ite(c, t, e) with path pruning
|
||||||
|
expr_ref apply_ite(expr* c, expr* t, expr* e, expr* r, std::function<expr_ref(expr*, expr*)> apply_op);
|
||||||
|
expr_ref apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function<expr_ref(expr*, expr*)> apply_op);
|
||||||
|
expr_ref apply_ite(expr* c, expr* t, expr* e, std::function<expr_ref(expr*)> apply_op);
|
||||||
|
// Common ITE dispatch for binary ops (union/inter)
|
||||||
|
expr_ref hoist_ite(expr* a, expr* b, std::function<expr_ref(expr*, expr*)> apply_op);
|
||||||
|
|
||||||
|
// Evaluate a condition against the current path/intervals
|
||||||
|
lbool eval_path_cond(expr* c);
|
||||||
|
|
||||||
|
// Internal helpers for push
|
||||||
|
lbool push_path_atoms(expr* c, bool sign);
|
||||||
|
lbool push_intervals_impl(expr* c, bool sign);
|
||||||
|
|
||||||
|
// Core derivative computation
|
||||||
|
expr_ref derive_rec(expr* r);
|
||||||
|
expr_ref derive_core(expr* r);
|
||||||
|
|
||||||
|
// Helpers for specific regex constructs
|
||||||
|
expr_ref derive_to_re(expr* s, sort* seq_sort);
|
||||||
|
expr_ref derive_range(expr* lo, expr* hi, sort* seq_sort);
|
||||||
|
expr_ref derive_of_pred(expr* pred, sort* seq_sort);
|
||||||
|
|
||||||
|
// Nullable check: returns a Boolean expression
|
||||||
|
expr_ref is_nullable(expr* r);
|
||||||
|
expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort);
|
||||||
|
|
||||||
|
// Smart constructors with path-aware simplification and ACI canonicalization
|
||||||
|
expr_ref mk_union(expr* a, expr* b);
|
||||||
|
bool are_complements(expr* a, expr* b);
|
||||||
|
unsigned union_id(expr* e); // complement-aware ID for sorting
|
||||||
|
bool is_subset(expr* a, expr* b);
|
||||||
|
expr_ref mk_union_core(expr* a, expr* b);
|
||||||
|
void add_union_elem(expr_ref_vector& set, expr* e);
|
||||||
|
expr_ref mk_inter(expr* a, expr* b);
|
||||||
|
expr_ref mk_inter_core(expr* a, expr* b);
|
||||||
|
expr_ref mk_concat(expr* a, expr* b);
|
||||||
|
expr_ref mk_complement(expr* a);
|
||||||
|
expr_ref mk_complement_core(expr* a);
|
||||||
|
expr_ref mk_xor(expr *a, expr *b);
|
||||||
|
expr_ref mk_xor_core(expr *a, expr *b);
|
||||||
|
expr_ref mk_core(decl_kind k, expr* a, expr* b);
|
||||||
|
expr_ref mk_ite(expr* c, expr* t, expr* e);
|
||||||
|
|
||||||
|
// Distribute concatenation through ITE/union in derivative
|
||||||
|
expr_ref mk_deriv_concat(expr* d, expr* tail);
|
||||||
|
expr_ref mk_deriv_concat_core(expr* d, expr* tail);
|
||||||
|
|
||||||
|
// Extract head character and tail from a sequence expression
|
||||||
|
bool get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl);
|
||||||
|
|
||||||
|
// Predicate implication for character range conditions.
|
||||||
|
bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b);
|
||||||
|
bool pred_implies(expr* a, expr* b);
|
||||||
|
|
||||||
|
// Normalize reverse(r)
|
||||||
|
expr_ref mk_regex_reverse(expr* r);
|
||||||
|
|
||||||
|
// Condition evaluation helpers
|
||||||
|
lbool eval_cond(expr* cond);
|
||||||
|
lbool eval_range_cond(expr* c);
|
||||||
|
void intersect_intervals(unsigned lo, unsigned hi);
|
||||||
|
void exclude_interval(unsigned lo, unsigned hi);
|
||||||
|
|
||||||
|
// Cofactor enumeration over a transition regex (ITE-tree).
|
||||||
|
void get_cofactors_rec(expr* r, expr_ref_pair_vector& result);
|
||||||
|
|
||||||
|
// Re-apply union/intersection simplifications bottom-up to a cofactor
|
||||||
|
// leaf. decompose_ite substitutes ITE branch values structurally
|
||||||
|
// (no simplification), so leaves can contain un-normalized nodes such
|
||||||
|
// as union(R, none) or inter(R, none); this rebuilds them through
|
||||||
|
// mk_union/mk_inter so equal states share a canonical form.
|
||||||
|
expr_ref clean_leaf(expr* r);
|
||||||
|
|
||||||
|
sort* re_sort(expr* r) { return r->get_sort(); }
|
||||||
|
sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; }
|
||||||
|
sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; }
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
void reset_op_caches();
|
||||||
|
|
||||||
|
public:
|
||||||
|
derive(ast_manager& m, seq_rewriter& re);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the derivative of regex r with respect to element ele.
|
||||||
|
* When ele is a de Bruijn variable, produces a symbolic ITE-tree.
|
||||||
|
* When ele is a concrete character, produces the concrete derivative.
|
||||||
|
*/
|
||||||
|
expr_ref operator()(derivative_kind k, expr* ele, expr* r);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience: symbolic derivative using de Bruijn var 0.
|
||||||
|
*/
|
||||||
|
expr_ref operator()(derivative_kind k, expr* r);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nullable check: returns a Boolean expression that is true iff r accepts the empty string.
|
||||||
|
*/
|
||||||
|
expr_ref nullable(expr* r) { return is_nullable(r); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enumerate the cofactors (min-terms) of a transition regex r taken with
|
||||||
|
* respect to element ele. r is an ITE-tree over character predicates on
|
||||||
|
* ele; for every feasible path through the tree this produces a pair
|
||||||
|
* (path_condition, leaf_regex). Infeasible character-interval
|
||||||
|
* combinations are pruned using the same path/interval context that the
|
||||||
|
* derivative engine uses while hoisting ITEs.
|
||||||
|
*/
|
||||||
|
void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the symbolic derivative of r and enumerate its reachable
|
||||||
|
* leaves in fully ITE-hoisted normal form.
|
||||||
|
*
|
||||||
|
* Concretely this returns, for every feasible minterm (character
|
||||||
|
* class) of δ(r), a pair (path_condition, target_regex). Every
|
||||||
|
* if-then-else over the input character (including ones that would
|
||||||
|
* otherwise be buried under a concat/union) is hoisted to the top
|
||||||
|
* via the same path/interval pruning used by the derivative engine,
|
||||||
|
* so each target_regex is free of (:var 0) and its nullability is
|
||||||
|
* always decidable. Unions are kept intact as single leaves (a
|
||||||
|
* union leaf denotes a single bisimulation state). Infeasible
|
||||||
|
* minterms are pruned, so all returned leaves are reachable.
|
||||||
|
*
|
||||||
|
* This is the entry point the regex_bisim equivalence procedure
|
||||||
|
* uses: it consumes the target_regex of each pair and ignores the
|
||||||
|
* (redundant) path condition.
|
||||||
|
*/
|
||||||
|
void derivative_cofactors(expr* r, expr_ref_pair_vector& result);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
160
src/ast/rewriter/seq_range_collapse.cpp
Normal file
160
src/ast/rewriter/seq_range_collapse.cpp
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
seq_range_collapse.cpp
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Implementation of regex <-> range_predicate translation for the
|
||||||
|
boolean-combination-of-ranges fragment. See header for the recognized
|
||||||
|
grammar and the canonical regex AST emitted by materialization.
|
||||||
|
|
||||||
|
Authors:
|
||||||
|
|
||||||
|
Margus Veanes (veanes) 2026
|
||||||
|
|
||||||
|
--*/
|
||||||
|
|
||||||
|
#include "ast/rewriter/seq_range_collapse.h"
|
||||||
|
|
||||||
|
namespace seq {
|
||||||
|
|
||||||
|
bool regex_to_range_predicate(seq_util& u, expr* r, range_predicate& out) {
|
||||||
|
// The range algebra only models sets of single characters over the
|
||||||
|
// unsigned character domain [0, max_char]. Guard against any regex
|
||||||
|
// whose element type is not a sequence of characters (e.g. a regex
|
||||||
|
// over (Seq Int) or (Seq (Seq Char))): for such regexes the
|
||||||
|
// re.range/re.union/... matchers below would silently fabricate a
|
||||||
|
// character-class predicate and change semantics. Reject them up
|
||||||
|
// front so callers fall back to the generic regex path.
|
||||||
|
sort* seq_sort = nullptr;
|
||||||
|
if (!u.is_re(r, seq_sort) || !u.is_string(seq_sort))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned const max_char = u.max_char();
|
||||||
|
auto& re = u.re;
|
||||||
|
|
||||||
|
if (re.is_empty(r)) {
|
||||||
|
out = range_predicate::empty(max_char);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (re.is_full_char(r)) {
|
||||||
|
out = range_predicate::top(max_char);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
unsigned lo = 0, hi = 0;
|
||||||
|
expr* lo_e = nullptr;
|
||||||
|
expr* hi_e = nullptr;
|
||||||
|
if (re.is_range(r, lo_e, hi_e)) {
|
||||||
|
auto extract_char = [&](expr* e, unsigned& c) -> bool {
|
||||||
|
if (u.is_const_char(e, c)) return true;
|
||||||
|
expr* inner = nullptr;
|
||||||
|
if (u.str.is_unit(e, inner) && u.is_const_char(inner, c)) return true;
|
||||||
|
zstring s;
|
||||||
|
if (u.str.is_string(e, s) && s.length() == 1) {
|
||||||
|
c = s[0];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
if (!extract_char(lo_e, lo) || !extract_char(hi_e, hi))
|
||||||
|
return false;
|
||||||
|
// Empty/inverted range [lo > hi] is the empty regex.
|
||||||
|
if (lo > hi) {
|
||||||
|
out = range_predicate::empty(max_char);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
out = range_predicate::range(lo, hi, max_char);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
expr *a = nullptr, *b = nullptr, *c = nullptr;
|
||||||
|
if (re.is_union(r, a, b)) {
|
||||||
|
range_predicate pa(max_char), pb(max_char);
|
||||||
|
if (!regex_to_range_predicate(u, a, pa)) return false;
|
||||||
|
if (!regex_to_range_predicate(u, b, pb)) return false;
|
||||||
|
out = pa | pb;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto mk_diff = [&](expr *a, expr *b) -> bool {
|
||||||
|
range_predicate pa(max_char), pb(max_char);
|
||||||
|
if (!regex_to_range_predicate(u, a, pa))
|
||||||
|
return false;
|
||||||
|
if (!regex_to_range_predicate(u, b, pb))
|
||||||
|
return false;
|
||||||
|
out = pa - pb;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if (re.is_diff(r, a, b))
|
||||||
|
return mk_diff(a, b);
|
||||||
|
|
||||||
|
if (re.is_intersection(r, a, b) && re.is_complement(b, c))
|
||||||
|
return mk_diff(a, c);
|
||||||
|
|
||||||
|
if (re.is_intersection(r, a, b) && re.is_complement(a, c))
|
||||||
|
return mk_diff(b, c);
|
||||||
|
|
||||||
|
if (re.is_intersection(r, a, b)) {
|
||||||
|
range_predicate pa(max_char), pb(max_char);
|
||||||
|
if (!regex_to_range_predicate(u, a, pa)) return false;
|
||||||
|
if (!regex_to_range_predicate(u, b, pb)) return false;
|
||||||
|
out = pa & pb;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// NOTE: re.complement is intentionally NOT handled here.
|
||||||
|
// re.complement is the SEQUENCE-level complement: its language
|
||||||
|
// includes the empty string, strings of length >= 2, and any
|
||||||
|
// length-1 string outside the operand. A character-class
|
||||||
|
// range_predicate can only describe a set of length-1 strings,
|
||||||
|
// so collapsing re.complement(R) to ~R (character-level
|
||||||
|
// complement) would change semantics whenever R is wrapped in
|
||||||
|
// any sequence-level context (e.g. re.diff at the top level,
|
||||||
|
// or membership tests). De-Morgan equivalences and the
|
||||||
|
// special cases re.complement(re.empty) / re.complement(re.full)
|
||||||
|
// are already handled directly in seq_rewriter::mk_re_complement.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static expr_ref mk_unit_string_from_char(seq_util& u, unsigned c) {
|
||||||
|
return expr_ref(u.str.mk_string(zstring(c)), u.get_manager());
|
||||||
|
}
|
||||||
|
|
||||||
|
static expr_ref mk_single_range_regex(seq_util& u, unsigned lo, unsigned hi, sort* re_sort) {
|
||||||
|
ast_manager& m = u.get_manager();
|
||||||
|
return expr_ref(u.re.mk_range(re_sort, lo, hi), m);
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ref range_predicate_to_regex(seq_util& u, range_predicate const& p, sort* seq_sort) {
|
||||||
|
ast_manager& m = u.get_manager();
|
||||||
|
sort* re_sort = u.re.mk_re(seq_sort);
|
||||||
|
if (p.is_empty())
|
||||||
|
return expr_ref(u.re.mk_empty(re_sort), m);
|
||||||
|
unsigned const n = p.num_ranges();
|
||||||
|
SASSERT(n > 0);
|
||||||
|
if (n == 1) {
|
||||||
|
auto [lo, hi] = p[0];
|
||||||
|
return mk_single_range_regex(u, lo, hi, re_sort);
|
||||||
|
}
|
||||||
|
// Build single-range AST nodes first, then sort by expression id
|
||||||
|
// so the resulting right-associated union matches the canonical
|
||||||
|
// id-sorted shape that seq_rewriter::merge_regex_sets expects.
|
||||||
|
// Without this the merge algorithm produces incorrect unions
|
||||||
|
// when it has to combine our materialized output with another
|
||||||
|
// (id-sorted) regex set.
|
||||||
|
expr_ref_vector ranges(m);
|
||||||
|
for (unsigned i = 0; i < n; ++i) {
|
||||||
|
auto [lo, hi] = p[i];
|
||||||
|
ranges.push_back(mk_single_range_regex(u, lo, hi, re_sort));
|
||||||
|
}
|
||||||
|
std::sort(ranges.data(), ranges.data() + ranges.size(),
|
||||||
|
[](expr* a, expr* b) { return a->get_id() < b->get_id(); });
|
||||||
|
expr_ref acc(ranges.get(n - 1), m);
|
||||||
|
for (unsigned i = n - 1; i-- > 0; )
|
||||||
|
acc = expr_ref(u.re.mk_union(ranges.get(i), acc), m);
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
71
src/ast/rewriter/seq_range_collapse.h
Normal file
71
src/ast/rewriter/seq_range_collapse.h
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
seq_range_collapse.h
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Recognize regexes that are boolean combinations of character-class
|
||||||
|
primitives (re.empty, re.full_char, re.range with concrete chars,
|
||||||
|
and re.union/inter/comp/diff over translatable arguments), and
|
||||||
|
materialize a seq::range_predicate back into a canonical regex AST.
|
||||||
|
|
||||||
|
Together with seq_rewriter integration, this lets any boolean
|
||||||
|
combination of character-class regexes collapse to a canonical
|
||||||
|
multi-range form, so that equivalent character classes share AST
|
||||||
|
identity, and downstream consumers (derivative, OneStep, caching)
|
||||||
|
can short-circuit them as pure range predicates.
|
||||||
|
|
||||||
|
Authors:
|
||||||
|
|
||||||
|
Margus Veanes (veanes) 2026
|
||||||
|
|
||||||
|
--*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ast/rewriter/seq_range_predicate.h"
|
||||||
|
#include "ast/seq_decl_plugin.h"
|
||||||
|
|
||||||
|
namespace seq {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If r is a boolean combination of character-class regex primitives
|
||||||
|
* over the unsigned character domain [0, max_char], compute the
|
||||||
|
* equivalent range_predicate and return true. Otherwise return false
|
||||||
|
* with out untouched.
|
||||||
|
*
|
||||||
|
* Recognized fragment (all character-class-preserving operations):
|
||||||
|
* re.empty -> empty
|
||||||
|
* re.full_char_set -> top
|
||||||
|
* re.range "c_lo" "c_hi" (concrete) -> [c_lo, c_hi]
|
||||||
|
* re.union r1 r2 -> p1 | p2
|
||||||
|
* re.intersection r1 r2 -> p1 & p2
|
||||||
|
* re.diff r1 r2 -> p1 - p2
|
||||||
|
*
|
||||||
|
* Notably re.complement is NOT recognized: it is a SEQUENCE-level
|
||||||
|
* complement (over all of Σ*), not a character-class complement, so
|
||||||
|
* collapsing it would change semantics whenever the result is used
|
||||||
|
* in any non-character-class context. Sequence-level rewrites for
|
||||||
|
* re.complement (double-comp, deMorgan, etc.) are handled directly
|
||||||
|
* in seq_rewriter::mk_re_complement.
|
||||||
|
*/
|
||||||
|
bool regex_to_range_predicate(seq_util& u, expr* r, range_predicate& out);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Canonical materialization of p as a regex AST over the given
|
||||||
|
* sequence sort. Two range_predicates with equal canonical
|
||||||
|
* representations produce structurally identical regex ASTs:
|
||||||
|
*
|
||||||
|
* empty -> re.empty
|
||||||
|
* top -> re.full_char_set
|
||||||
|
* single range [lo, hi] -> re.range "lo" "hi"
|
||||||
|
* multiple ranges -> right-associated re.union of single
|
||||||
|
* ranges, in increasing order of lo
|
||||||
|
* (matching the canonical range order
|
||||||
|
* held by range_predicate).
|
||||||
|
*/
|
||||||
|
expr_ref range_predicate_to_regex(seq_util& u, range_predicate const& p, sort* seq_sort);
|
||||||
|
|
||||||
|
}
|
||||||
292
src/ast/rewriter/seq_range_predicate.cpp
Normal file
292
src/ast/rewriter/seq_range_predicate.cpp
Normal file
|
|
@ -0,0 +1,292 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
seq_range_predicate.cpp
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Implementation of the specialized range-algebra used by symbolic
|
||||||
|
derivative computation and regex rewriting. See seq_range_predicate.h
|
||||||
|
for the algebraic specification.
|
||||||
|
|
||||||
|
All Boolean operations are implemented as single linear sweeps over
|
||||||
|
the canonical sorted range vectors and produce canonical output
|
||||||
|
(sorted, disjoint, non-adjacent).
|
||||||
|
|
||||||
|
Authors:
|
||||||
|
|
||||||
|
Margus Veanes (veanes) 2026
|
||||||
|
|
||||||
|
--*/
|
||||||
|
|
||||||
|
#include "ast/rewriter/seq_range_predicate.h"
|
||||||
|
#include "util/debug.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <ostream>
|
||||||
|
|
||||||
|
namespace seq {
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Factories
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
range_predicate range_predicate::empty(unsigned max_char) {
|
||||||
|
return range_predicate(max_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::top(unsigned max_char) {
|
||||||
|
range_predicate r(max_char);
|
||||||
|
r.m_ranges.push_back({0u, max_char});
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::singleton(unsigned c, unsigned max_char) {
|
||||||
|
SASSERT(c <= max_char);
|
||||||
|
range_predicate r(max_char);
|
||||||
|
r.m_ranges.push_back({c, c});
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::range(unsigned lo, unsigned hi, unsigned max_char) {
|
||||||
|
range_predicate r(max_char);
|
||||||
|
if (lo <= hi && lo <= max_char) {
|
||||||
|
unsigned clipped_hi = hi <= max_char ? hi : max_char;
|
||||||
|
r.m_ranges.push_back({lo, clipped_hi});
|
||||||
|
}
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Invariants and observers
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
bool range_predicate::well_formed() const {
|
||||||
|
for (unsigned i = 0; i < m_ranges.size(); ++i) {
|
||||||
|
auto [lo, hi] = m_ranges[i];
|
||||||
|
if (lo > hi) return false;
|
||||||
|
if (hi > m_max_char) return false;
|
||||||
|
if (i > 0) {
|
||||||
|
unsigned prev_hi = m_ranges[i - 1].second;
|
||||||
|
// Non-adjacent and sorted: prev_hi + 1 < lo, with care
|
||||||
|
// around prev_hi == UINT_MAX which we never expect because
|
||||||
|
// hi <= m_max_char.
|
||||||
|
if (prev_hi + 1 >= lo) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool range_predicate::contains(unsigned c) const {
|
||||||
|
// Binary search on first element of pairs.
|
||||||
|
unsigned lo = 0, hi = m_ranges.size();
|
||||||
|
while (lo < hi) {
|
||||||
|
unsigned mid = lo + (hi - lo) / 2;
|
||||||
|
auto [a, b] = m_ranges[mid];
|
||||||
|
if (c < a) hi = mid;
|
||||||
|
else if (c > b) lo = mid + 1;
|
||||||
|
else return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t range_predicate::cardinality() const {
|
||||||
|
uint64_t n = 0;
|
||||||
|
for (auto [lo, hi] : m_ranges)
|
||||||
|
n += static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo) + 1u;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Equality, ordering, hashing
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
bool range_predicate::equals(range_predicate const& o) const {
|
||||||
|
if (m_max_char != o.m_max_char) return false;
|
||||||
|
if (m_ranges.size() != o.m_ranges.size()) return false;
|
||||||
|
for (unsigned i = 0; i < m_ranges.size(); ++i)
|
||||||
|
if (m_ranges[i] != o.m_ranges[i]) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool range_predicate::operator<(range_predicate const& o) const {
|
||||||
|
if (m_max_char != o.m_max_char)
|
||||||
|
return m_max_char < o.m_max_char;
|
||||||
|
unsigned n = std::min(m_ranges.size(), o.m_ranges.size());
|
||||||
|
for (unsigned i = 0; i < n; ++i) {
|
||||||
|
auto a = m_ranges[i];
|
||||||
|
auto b = o.m_ranges[i];
|
||||||
|
if (a.first != b.first) return a.first < b.first;
|
||||||
|
if (a.second != b.second) return a.second < b.second;
|
||||||
|
}
|
||||||
|
return m_ranges.size() < o.m_ranges.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned range_predicate::hash() const {
|
||||||
|
// FNV-1a 32-bit over (max_char, then each (lo, hi)).
|
||||||
|
uint32_t h = 2166136261u;
|
||||||
|
auto step = [&](uint32_t x) {
|
||||||
|
h ^= x;
|
||||||
|
h *= 16777619u;
|
||||||
|
};
|
||||||
|
step(m_max_char);
|
||||||
|
for (auto [lo, hi] : m_ranges) {
|
||||||
|
step(lo);
|
||||||
|
step(hi);
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Boolean operations
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// Append (lo, hi) to result, merging with the previous range if
|
||||||
|
// adjacent or overlapping. Maintains canonical form.
|
||||||
|
inline void append_merged(svector<std::pair<unsigned, unsigned>>& result,
|
||||||
|
unsigned lo, unsigned hi) {
|
||||||
|
SASSERT(lo <= hi);
|
||||||
|
if (!result.empty() && result.back().second + 1 >= lo) {
|
||||||
|
if (result.back().second < hi)
|
||||||
|
result.back().second = hi;
|
||||||
|
} else {
|
||||||
|
result.push_back({lo, hi});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::operator|(range_predicate const& o) const {
|
||||||
|
SASSERT(m_max_char == o.m_max_char);
|
||||||
|
range_predicate r(m_max_char);
|
||||||
|
unsigned i = 0, j = 0;
|
||||||
|
const unsigned n = m_ranges.size();
|
||||||
|
const unsigned m = o.m_ranges.size();
|
||||||
|
while (i < n && j < m) {
|
||||||
|
auto a = m_ranges[i];
|
||||||
|
auto b = o.m_ranges[j];
|
||||||
|
if (a.first <= b.first) {
|
||||||
|
append_merged(r.m_ranges, a.first, a.second);
|
||||||
|
++i;
|
||||||
|
} else {
|
||||||
|
append_merged(r.m_ranges, b.first, b.second);
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (i < n) {
|
||||||
|
auto a = m_ranges[i++];
|
||||||
|
append_merged(r.m_ranges, a.first, a.second);
|
||||||
|
}
|
||||||
|
while (j < m) {
|
||||||
|
auto b = o.m_ranges[j++];
|
||||||
|
append_merged(r.m_ranges, b.first, b.second);
|
||||||
|
}
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::operator&(range_predicate const& o) const {
|
||||||
|
SASSERT(m_max_char == o.m_max_char);
|
||||||
|
range_predicate r(m_max_char);
|
||||||
|
unsigned i = 0, j = 0;
|
||||||
|
const unsigned n = m_ranges.size();
|
||||||
|
const unsigned m = o.m_ranges.size();
|
||||||
|
while (i < n && j < m) {
|
||||||
|
auto [a_lo, a_hi] = m_ranges[i];
|
||||||
|
auto [b_lo, b_hi] = o.m_ranges[j];
|
||||||
|
unsigned lo = std::max(a_lo, b_lo);
|
||||||
|
unsigned hi = std::min(a_hi, b_hi);
|
||||||
|
if (lo <= hi)
|
||||||
|
r.m_ranges.push_back({lo, hi});
|
||||||
|
// Advance the range that ends first.
|
||||||
|
if (a_hi < b_hi) ++i;
|
||||||
|
else if (b_hi < a_hi) ++j;
|
||||||
|
else { ++i; ++j; }
|
||||||
|
}
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::operator~() const {
|
||||||
|
range_predicate r(m_max_char);
|
||||||
|
unsigned cursor = 0;
|
||||||
|
for (auto [lo, hi] : m_ranges) {
|
||||||
|
if (cursor < lo)
|
||||||
|
r.m_ranges.push_back({cursor, lo - 1});
|
||||||
|
// Step past hi without overflow: hi <= m_max_char and we
|
||||||
|
// only step when more characters remain.
|
||||||
|
if (hi >= m_max_char) {
|
||||||
|
cursor = m_max_char + 1; // sentinel: no more characters
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cursor = hi + 1;
|
||||||
|
}
|
||||||
|
if (cursor <= m_max_char)
|
||||||
|
r.m_ranges.push_back({cursor, m_max_char});
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::operator-(range_predicate const& o) const {
|
||||||
|
SASSERT(m_max_char == o.m_max_char);
|
||||||
|
// A - B by linear sweep: for each range of A, subtract overlapping
|
||||||
|
// ranges of B. Both inputs are sorted so we advance j monotonically.
|
||||||
|
range_predicate r(m_max_char);
|
||||||
|
unsigned j = 0;
|
||||||
|
const unsigned m = o.m_ranges.size();
|
||||||
|
for (auto [a_lo, a_hi] : m_ranges) {
|
||||||
|
unsigned cursor = a_lo;
|
||||||
|
while (j < m && o.m_ranges[j].second < cursor)
|
||||||
|
++j;
|
||||||
|
unsigned k = j;
|
||||||
|
while (k < m && o.m_ranges[k].first <= a_hi) {
|
||||||
|
auto [b_lo, b_hi] = o.m_ranges[k];
|
||||||
|
if (cursor < b_lo)
|
||||||
|
r.m_ranges.push_back({cursor, std::min(a_hi, b_lo - 1)});
|
||||||
|
if (b_hi >= a_hi) {
|
||||||
|
cursor = a_hi + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cursor = b_hi + 1;
|
||||||
|
++k;
|
||||||
|
}
|
||||||
|
if (cursor <= a_hi)
|
||||||
|
r.m_ranges.push_back({cursor, a_hi});
|
||||||
|
}
|
||||||
|
SASSERT(r.well_formed());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_predicate range_predicate::operator^(range_predicate const& o) const {
|
||||||
|
SASSERT(m_max_char == o.m_max_char);
|
||||||
|
// (A | B) - (A & B), but implemented directly with one linear sweep
|
||||||
|
// over the union of breakpoints.
|
||||||
|
return (*this | o) - (*this & o);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Display
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
std::ostream& range_predicate::display(std::ostream& out) const {
|
||||||
|
if (m_ranges.empty()) {
|
||||||
|
return out << "[]";
|
||||||
|
}
|
||||||
|
out << "[";
|
||||||
|
bool first = true;
|
||||||
|
for (auto [lo, hi] : m_ranges) {
|
||||||
|
if (!first) out << ",";
|
||||||
|
first = false;
|
||||||
|
if (lo == hi)
|
||||||
|
out << lo;
|
||||||
|
else
|
||||||
|
out << lo << "-" << hi;
|
||||||
|
}
|
||||||
|
return out << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
127
src/ast/rewriter/seq_range_predicate.h
Normal file
127
src/ast/rewriter/seq_range_predicate.h
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
seq_range_predicate.h
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Specialized range-algebra over an unsigned character domain [0, max_char].
|
||||||
|
|
||||||
|
A range_predicate represents a subset of the character domain as a
|
||||||
|
sorted sequence of non-overlapping, non-adjacent, non-empty ranges:
|
||||||
|
|
||||||
|
[(lo_0, hi_0), (lo_1, hi_1), ...] with hi_i + 1 < lo_{i+1}.
|
||||||
|
|
||||||
|
The representation is canonical, so two range_predicates over the same
|
||||||
|
domain are extensionally equivalent iff their internal vectors are
|
||||||
|
elementwise equal.
|
||||||
|
|
||||||
|
All Boolean operations (union, intersection, complement, difference)
|
||||||
|
are linear in the total number of ranges and produce the canonical
|
||||||
|
representation.
|
||||||
|
|
||||||
|
Intended use:
|
||||||
|
* path conditions for symbolic derivative computation,
|
||||||
|
* OneStep predicates capturing length-1 acceptance,
|
||||||
|
* smart-constructor side conditions for regex rewrites such as
|
||||||
|
R & psi --> toregex(OneStep(R) & psi).
|
||||||
|
|
||||||
|
The type is a pure value: no ast_manager allocation occurs in its
|
||||||
|
construction or its Boolean operations. Conversion to and from
|
||||||
|
expr* is the responsibility of a separate translator (see callers
|
||||||
|
in seq_derive / seq_rewriter).
|
||||||
|
|
||||||
|
Authors:
|
||||||
|
|
||||||
|
Margus Veanes (veanes) 2026
|
||||||
|
|
||||||
|
--*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "util/vector.h"
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace seq {
|
||||||
|
|
||||||
|
class range_predicate {
|
||||||
|
using range_t = std::pair<unsigned, unsigned>;
|
||||||
|
using ranges_t = svector<range_t>;
|
||||||
|
|
||||||
|
// Sorted by first; ranges are disjoint and non-adjacent;
|
||||||
|
// every range satisfies lo <= hi <= m_max_char.
|
||||||
|
ranges_t m_ranges;
|
||||||
|
unsigned m_max_char { 0 };
|
||||||
|
|
||||||
|
// Invariant check used in debug builds.
|
||||||
|
bool well_formed() const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
range_predicate() = default;
|
||||||
|
explicit range_predicate(unsigned max_char) : m_max_char(max_char) {}
|
||||||
|
|
||||||
|
// ---------------- Factory functions ----------------
|
||||||
|
|
||||||
|
static range_predicate empty(unsigned max_char);
|
||||||
|
static range_predicate top(unsigned max_char);
|
||||||
|
static range_predicate singleton(unsigned c, unsigned max_char);
|
||||||
|
static range_predicate range(unsigned lo, unsigned hi, unsigned max_char);
|
||||||
|
|
||||||
|
// ---------------- Observers ----------------
|
||||||
|
|
||||||
|
unsigned max_char() const { return m_max_char; }
|
||||||
|
unsigned num_ranges() const { return m_ranges.size(); }
|
||||||
|
range_t operator[](unsigned i) const { return m_ranges[i]; }
|
||||||
|
ranges_t const& ranges() const { return m_ranges; }
|
||||||
|
|
||||||
|
bool is_empty() const { return m_ranges.empty(); }
|
||||||
|
bool is_top() const {
|
||||||
|
return m_ranges.size() == 1
|
||||||
|
&& m_ranges[0].first == 0
|
||||||
|
&& m_ranges[0].second == m_max_char;
|
||||||
|
}
|
||||||
|
bool is_singleton(unsigned& c) const {
|
||||||
|
if (m_ranges.size() != 1) return false;
|
||||||
|
if (m_ranges[0].first != m_ranges[0].second) return false;
|
||||||
|
c = m_ranges[0].first;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
bool contains(unsigned c) const;
|
||||||
|
|
||||||
|
// Number of characters in the predicate (well-defined for any domain).
|
||||||
|
uint64_t cardinality() const;
|
||||||
|
|
||||||
|
// ---------------- Equality, ordering, hashing ----------------
|
||||||
|
|
||||||
|
bool equals(range_predicate const& o) const;
|
||||||
|
bool operator==(range_predicate const& o) const { return equals(o); }
|
||||||
|
bool operator!=(range_predicate const& o) const { return !equals(o); }
|
||||||
|
|
||||||
|
// Total order: lexicographic on the canonical range sequence,
|
||||||
|
// with shorter sequences ordered before longer prefixes.
|
||||||
|
// Predicates over different domains compare by max_char first.
|
||||||
|
bool operator<(range_predicate const& o) const;
|
||||||
|
bool less_than(range_predicate const& o) const { return *this < o; }
|
||||||
|
|
||||||
|
unsigned hash() const;
|
||||||
|
|
||||||
|
// ---------------- Boolean operations ----------------
|
||||||
|
|
||||||
|
range_predicate operator|(range_predicate const& o) const; // union
|
||||||
|
range_predicate operator&(range_predicate const& o) const; // intersection
|
||||||
|
range_predicate operator-(range_predicate const& o) const; // difference
|
||||||
|
range_predicate operator^(range_predicate const& o) const; // symmetric diff
|
||||||
|
range_predicate operator~() const; // complement
|
||||||
|
|
||||||
|
// ---------------- Display ----------------
|
||||||
|
|
||||||
|
std::ostream& display(std::ostream& out) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream& operator<<(std::ostream& out, range_predicate const& p) {
|
||||||
|
return p.display(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -85,45 +85,6 @@ namespace seq {
|
||||||
return is_ground(r);
|
return is_ground(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
Collect the leaves of a t-regex der (an ITE-tree whose leaves are
|
|
||||||
regex expressions) into the output vector. Empty (re.empty) leaves
|
|
||||||
are dropped.
|
|
||||||
|
|
||||||
Each leaf is treated as a single bisimulation state regardless of
|
|
||||||
its top-level shape (including re.union and re.antimirov_union):
|
|
||||||
descending into a union at the leaf would split one state into
|
|
||||||
several, which is semantically unsound for the bisimulation /
|
|
||||||
union-find merging that follows.
|
|
||||||
|
|
||||||
Returns false if we encountered an unexpected node (e.g. a free
|
|
||||||
variable creeping in) — in that case the caller should bail out.
|
|
||||||
*/
|
|
||||||
bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
|
|
||||||
ptr_vector<expr> work;
|
|
||||||
obj_hashtable<expr> seen;
|
|
||||||
work.push_back(der);
|
|
||||||
seen.insert(der);
|
|
||||||
while (!work.empty()) {
|
|
||||||
expr* e = work.back();
|
|
||||||
work.pop_back();
|
|
||||||
expr* c = nullptr, * t = nullptr, * f = nullptr;
|
|
||||||
if (m.is_ite(e, c, t, f)) {
|
|
||||||
if (seen.insert_if_not_there(t))
|
|
||||||
work.push_back(t);
|
|
||||||
if (seen.insert_if_not_there(f))
|
|
||||||
work.push_back(f);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (m_util.re.is_empty(e))
|
|
||||||
continue;
|
|
||||||
if (!m_util.is_re(e))
|
|
||||||
return false;
|
|
||||||
leaves.push_back(e);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Fast inequivalence check based on the get_info().classical flag.
|
Fast inequivalence check based on the get_info().classical flag.
|
||||||
|
|
||||||
|
|
@ -232,15 +193,19 @@ namespace seq {
|
||||||
m_worklist.pop_back();
|
m_worklist.pop_back();
|
||||||
|
|
||||||
// Compute the symbolic derivative wrt the canonical variable
|
// Compute the symbolic derivative wrt the canonical variable
|
||||||
// (:var 0). The result is a transition regex (ITE tree) whose
|
// (:var 0) and enumerate its reachable leaves in fully
|
||||||
// leaves are regex expressions. We use the classical Brzozowski
|
// ITE-hoisted normal form. Every if-then-else over the input
|
||||||
// entry point so the derivative stays as a single TRegex and
|
// character — even one that would otherwise be buried under a
|
||||||
// does not lift unions to the top via antimirov nodes — this
|
// concat or union — is hoisted to the top and infeasible
|
||||||
// preserves the XOR-pair invariant the bisimulation relies on.
|
// minterms are pruned, so each leaf is a ground regex free of
|
||||||
expr_ref d(m_rw.mk_brz_derivative(r), m);
|
// (:var 0) whose nullability is always decidable. Unions are
|
||||||
|
// kept intact as single leaves (a union leaf denotes a single
|
||||||
|
// bisimulation state, never a split into separate states).
|
||||||
|
expr_ref_pair_vector cofs(m);
|
||||||
|
m_rw.brz_derivative_cofactors(r, cofs);
|
||||||
expr_ref_vector leaves(m);
|
expr_ref_vector leaves(m);
|
||||||
if (!collect_leaves(d, leaves))
|
for (auto const& p : cofs)
|
||||||
return l_undef;
|
leaves.push_back(p.second);
|
||||||
|
|
||||||
// First pass: check for any nullable leaf (definitive
|
// First pass: check for any nullable leaf (definitive
|
||||||
// distinguishing empty-continuation word) or any classically
|
// distinguishing empty-continuation word) or any classically
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,6 @@ namespace seq {
|
||||||
|
|
||||||
unsigned node_of(expr* r);
|
unsigned node_of(expr* r);
|
||||||
bool merge_leaf(expr* xor_pair);
|
bool merge_leaf(expr* xor_pair);
|
||||||
bool collect_leaves(expr* der, expr_ref_vector& leaves);
|
|
||||||
lbool nullability(expr* r);
|
lbool nullability(expr* r);
|
||||||
bool is_supported(expr* r);
|
bool is_supported(expr* r);
|
||||||
// Returns true if the leaf l proves that the original pair is
|
// Returns true if the leaf l proves that the original pair is
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -19,6 +19,7 @@ Notes:
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "ast/seq_decl_plugin.h"
|
#include "ast/seq_decl_plugin.h"
|
||||||
|
#include "ast/rewriter/seq_derive.h"
|
||||||
#include "ast/ast_pp.h"
|
#include "ast/ast_pp.h"
|
||||||
#include "ast/arith_decl_plugin.h"
|
#include "ast/arith_decl_plugin.h"
|
||||||
#include "ast/rewriter/rewriter_types.h"
|
#include "ast/rewriter/rewriter_types.h"
|
||||||
|
|
@ -128,15 +129,20 @@ class seq_rewriter {
|
||||||
void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r);
|
void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
friend class seq::derive;
|
||||||
|
|
||||||
seq_util m_util;
|
seq_util m_util;
|
||||||
seq_subset m_subset;
|
seq_subset m_subset;
|
||||||
arith_util m_autil;
|
arith_util m_autil;
|
||||||
bool_rewriter m_br;
|
bool_rewriter m_br;
|
||||||
|
seq::derive m_derive;
|
||||||
// re2automaton m_re2aut;
|
// re2automaton m_re2aut;
|
||||||
op_cache m_op_cache;
|
op_cache m_op_cache;
|
||||||
expr_ref_vector m_es, m_lhs, m_rhs;
|
expr_ref_vector m_es, m_lhs, m_rhs;
|
||||||
bool m_coalesce_chars;
|
bool m_coalesce_chars = true;
|
||||||
bool m_in_bisim { false };
|
bool m_in_bisim { false };
|
||||||
|
unsigned m_re_deriv_depth { 0 };
|
||||||
|
static const unsigned m_max_re_deriv_depth = 512;
|
||||||
|
|
||||||
enum length_comparison {
|
enum length_comparison {
|
||||||
shorter_c,
|
shorter_c,
|
||||||
|
|
@ -178,50 +184,17 @@ class seq_rewriter {
|
||||||
// - occurrences of a_ch are replaced by empty (replace_all never outputs a)
|
// - occurrences of a_ch are replaced by empty (replace_all never outputs a)
|
||||||
expr_ref re_replace_char(expr *r, unsigned a_ch, unsigned b_ch, expr *a_str, expr *b_str);
|
expr_ref re_replace_char(expr *r, unsigned a_ch, unsigned b_ch, expr *a_str, expr *b_str);
|
||||||
|
|
||||||
// Calculate derivative, memoized and enforcing a normal form
|
|
||||||
expr_ref is_nullable_rec(expr* r);
|
|
||||||
expr_ref mk_derivative_rec(expr* ele, expr* r);
|
|
||||||
expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
|
|
||||||
expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);
|
|
||||||
expr_ref mk_der_concat(expr* a, expr* b);
|
|
||||||
expr_ref mk_der_union(expr* a, expr* b);
|
|
||||||
expr_ref mk_der_inter(expr* a, expr* b);
|
|
||||||
expr_ref mk_der_xor(expr* a, expr* b);
|
|
||||||
expr_ref mk_der_compl(expr* a);
|
|
||||||
expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
|
|
||||||
expr_ref mk_der_antimirov_union(expr* r1, expr* r2);
|
|
||||||
bool ite_bdds_compatible(expr* a, expr* b);
|
|
||||||
/* if r has the form deriv(en..deriv(e1,to_re(s))..) returns 's = [e1..en]' else returns '() in r'*/
|
|
||||||
expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort);
|
|
||||||
#ifdef Z3DEBUG
|
|
||||||
bool check_deriv_normal_form(expr* r, int level = 3);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result);
|
|
||||||
|
|
||||||
expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path);
|
|
||||||
expr_ref mk_in_antimirov_rec(expr* s, expr* d);
|
|
||||||
expr_ref mk_in_antimirov(expr* s, expr* d);
|
|
||||||
|
|
||||||
expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path);
|
|
||||||
expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
|
|
||||||
expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d);
|
|
||||||
expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
|
|
||||||
expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond);
|
|
||||||
expr_ref mk_regex_reverse(expr* r);
|
|
||||||
expr_ref mk_regex_concat(expr* r1, expr* r2);
|
|
||||||
|
|
||||||
expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function<bool(expr*, expr*&, expr*&)>& decompose, std::function<expr* (expr*, expr*)>& compose);
|
expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function<bool(expr*, expr*&, expr*&)>& decompose, std::function<expr* (expr*, expr*)>& compose);
|
||||||
|
|
||||||
// elem is (:var 0) and path a condition that may have (:var 0) as a free variable
|
// elem is (:var 0) and path a condition that may have (:var 0) as a free variable
|
||||||
// simplify path, e.g., (:var 0) = 'a' & (:var 0) = 'b' is simplified to false
|
// simplify path, e.g., (:var 0) = 'a' & (:var 0) = 'b' is simplified to false
|
||||||
expr_ref simplify_path(expr* elem, expr* path);
|
// expr_ref simplify_path(expr* elem, expr* path);
|
||||||
|
|
||||||
bool lt_char(expr* ch1, expr* ch2);
|
bool lt_char(expr* ch1, expr* ch2);
|
||||||
bool eq_char(expr* ch1, expr* ch2);
|
bool eq_char(expr* ch1, expr* ch2);
|
||||||
bool neq_char(expr* ch1, expr* ch2);
|
bool neq_char(expr* ch1, expr* ch2);
|
||||||
bool le_char(expr* ch1, expr* ch2);
|
bool le_char(expr* ch1, expr* ch2);
|
||||||
bool pred_implies(expr* a, expr* b);
|
|
||||||
bool are_complements(expr* r1, expr* r2) const;
|
bool are_complements(expr* r1, expr* r2) const;
|
||||||
bool is_subset(expr* r1, expr* r2) const;
|
bool is_subset(expr* r1, expr* r2) const;
|
||||||
|
|
||||||
|
|
@ -267,6 +240,14 @@ class seq_rewriter {
|
||||||
br_status mk_re_union0(expr* a, expr* b, expr_ref& result);
|
br_status mk_re_union0(expr* a, expr* b, expr_ref& result);
|
||||||
br_status mk_re_inter0(expr* a, expr* b, expr_ref& result);
|
br_status mk_re_inter0(expr* a, expr* b, expr_ref& result);
|
||||||
br_status mk_re_complement(expr* a, expr_ref& result);
|
br_status mk_re_complement(expr* a, expr_ref& result);
|
||||||
|
// Range-set collapse helpers: if the operands form a boolean
|
||||||
|
// combination of character-class regexes, materialize the result as a
|
||||||
|
// canonical regex over a single range_predicate. See
|
||||||
|
// ast/rewriter/seq_range_collapse.h for the recognized fragment.
|
||||||
|
// NOTE: re.complement is intentionally not in this set because it
|
||||||
|
// operates at the sequence level, not the character-class level.
|
||||||
|
bool try_collapse_re_union(expr* a, expr* b, expr_ref& result);
|
||||||
|
bool try_collapse_re_inter(expr* a, expr* b, expr_ref& result);
|
||||||
br_status mk_re_star(expr* a, expr_ref& result);
|
br_status mk_re_star(expr* a, expr_ref& result);
|
||||||
br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
|
br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
|
||||||
br_status mk_re_xor(expr* a, expr* b, expr_ref& result);
|
br_status mk_re_xor(expr* a, expr* b, expr_ref& result);
|
||||||
|
|
@ -351,9 +332,9 @@ class seq_rewriter {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
|
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
|
||||||
m_util(m), m_subset(m_util.re), m_autil(m), m_br(m, p), // m_re2aut(m),
|
m_util(m), m_subset(m_util.re), m_autil(m), m_br(m, p), m_derive(m, *this),
|
||||||
m_op_cache(m), m_es(m),
|
m_op_cache(m), m_es(m),
|
||||||
m_lhs(m), m_rhs(m), m_coalesce_chars(true) {
|
m_lhs(m), m_rhs(m) {
|
||||||
}
|
}
|
||||||
ast_manager & m() const { return m_util.get_manager(); }
|
ast_manager & m() const { return m_util.get_manager(); }
|
||||||
family_id get_fid() const { return m_util.get_family_id(); }
|
family_id get_fid() const { return m_util.get_family_id(); }
|
||||||
|
|
@ -364,7 +345,7 @@ public:
|
||||||
static void get_param_descrs(param_descrs & r);
|
static void get_param_descrs(param_descrs & r);
|
||||||
|
|
||||||
|
|
||||||
bool coalesce_chars() const { return m_coalesce_chars; }
|
// bool coalesce_chars() const { return m_coalesce_chars; }
|
||||||
|
|
||||||
br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result);
|
br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result);
|
||||||
br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result);
|
br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result);
|
||||||
|
|
@ -380,6 +361,34 @@ public:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
expr_ref mk_xor0(expr *a, expr *b) {
|
||||||
|
expr_ref result(m());
|
||||||
|
if (mk_re_xor0(a, b, result) == BR_FAILED)
|
||||||
|
result = re().mk_xor(a, b);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ref mk_union(expr *a, expr *b) {
|
||||||
|
expr_ref result(m());
|
||||||
|
if (mk_re_union(a, b, result) == BR_FAILED)
|
||||||
|
result = re().mk_union(a, b);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ref mk_inter(expr *a, expr *b) {
|
||||||
|
expr_ref result(m());
|
||||||
|
if (mk_re_inter(a, b, result) == BR_FAILED)
|
||||||
|
result = re().mk_inter(a, b);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr_ref mk_complement(expr *a) {
|
||||||
|
expr_ref result(m());
|
||||||
|
if (mk_re_complement(a, result) == BR_FAILED)
|
||||||
|
result = re().mk_complement(a);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* makes concat and simplifies
|
* makes concat and simplifies
|
||||||
*/
|
*/
|
||||||
|
|
@ -436,11 +445,35 @@ public:
|
||||||
variable v0 = (:var 0). Unlike `mk_derivative` this entry point keeps
|
variable v0 = (:var 0). Unlike `mk_derivative` this entry point keeps
|
||||||
the symbolic derivative as a single transition regex (TRegex): boolean
|
the symbolic derivative as a single transition regex (TRegex): boolean
|
||||||
operators are pushed into the ITE leaves rather than lifted to the top
|
operators are pushed into the ITE leaves rather than lifted to the top
|
||||||
via _OP_RE_ANTIMIROV_UNION. Used by the regex_bisim equivalence
|
as a union. Used by the regex_bisim equivalence
|
||||||
procedure which relies on each leaf of D(p XOR q) being a coherent
|
procedure which relies on each leaf of D(p XOR q) being a coherent
|
||||||
XOR pair (D_v p) XOR (D_v q).
|
XOR pair (D_v p) XOR (D_v q).
|
||||||
*/
|
*/
|
||||||
expr_ref mk_brz_derivative(expr* r);
|
expr_ref mk_brz_derivative(expr *r) {
|
||||||
|
return mk_derivative(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Enumerate the cofactors (min-terms) of a transition regex r taken with
|
||||||
|
respect to ele. Produces (path_condition, leaf_regex) pairs for every
|
||||||
|
feasible path through the ITE-tree, pruning infeasible character ranges.
|
||||||
|
Delegates to the derivative engine so the same path/interval context used
|
||||||
|
while hoisting ITEs is reused for the leaf simplification.
|
||||||
|
*/
|
||||||
|
void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result) {
|
||||||
|
m_derive.get_cofactors(ele, r, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Compute the symbolic derivative of r and enumerate its reachable leaves
|
||||||
|
in fully ITE-hoisted normal form: a list of (path_condition, target)
|
||||||
|
pairs where every target is free of (:var 0) (so nullability is always
|
||||||
|
decidable) and unions are kept intact as single states. Used by
|
||||||
|
regex_bisim, which consumes the targets and ignores the path conditions.
|
||||||
|
*/
|
||||||
|
void brz_derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
|
||||||
|
m_derive.derivative_cofactors(r, result);
|
||||||
|
}
|
||||||
|
|
||||||
// heuristic elimination of element from condition that comes form a derivative.
|
// heuristic elimination of element from condition that comes form a derivative.
|
||||||
// special case optimization for conjunctions of equalities, disequalities and ranges.
|
// special case optimization for conjunctions of equalities, disequalities and ranges.
|
||||||
|
|
@ -451,6 +484,8 @@ public:
|
||||||
/* Apply simplifications to the intersection to keep it normalized (r1 and r2 are not normalized)*/
|
/* Apply simplifications to the intersection to keep it normalized (r1 and r2 are not normalized)*/
|
||||||
expr_ref mk_regex_inter_normalize(expr* r1, expr* r2);
|
expr_ref mk_regex_inter_normalize(expr* r1, expr* r2);
|
||||||
|
|
||||||
|
expr_ref mk_regex_concat(expr *r1, expr *r2);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract some string that is a member of r.
|
* Extract some string that is a member of r.
|
||||||
* Return true if a valid string was extracted.
|
* Return true if a valid string was extracted.
|
||||||
|
|
|
||||||
|
|
@ -39,16 +39,12 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
|
||||||
if (m_re.is_dot_plus(b) && m_re.get_info(a).nullable == l_false)
|
if (m_re.is_dot_plus(b) && m_re.get_info(a).nullable == l_false)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// a ⊆ a*
|
|
||||||
if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// e ⊆ a*
|
// e ⊆ a*
|
||||||
if (m_re.is_epsilon(a) && m_re.is_star(b, b1))
|
if (m_re.is_epsilon(a) && m_re.is_star(b, b1))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// R ⊆ R*
|
// a ⊆ a*: if b = b1* and a ⊆ b1, then a ⊆ b1*
|
||||||
if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth + 1))
|
if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// R1* ⊆ R2* if R1 ⊆ R2
|
// R1* ⊆ R2* if R1 ⊆ R2
|
||||||
|
|
@ -112,6 +108,12 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
|
||||||
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && is_subset_rec(a, b2, depth))
|
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && is_subset_rec(a, b2, depth))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
// prefix absorption: P·R' ⊆ Σ*·R' for any prefix P (since P ⊆ Σ*).
|
||||||
|
// Detect that a has R' (= b2) as a concatenation suffix, where b = Σ*·R'.
|
||||||
|
// Covers contains-patterns, e.g. Σ*·a·Σ*·b·Σ* ⊆ Σ*·b·Σ*.
|
||||||
|
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && ends_with(a, b2))
|
||||||
|
return true;
|
||||||
|
|
||||||
// R ⊆ R'·Σ* if R ⊆ R'
|
// R ⊆ R'·Σ* if R ⊆ R'
|
||||||
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b2) && is_subset_rec(a, b1, depth))
|
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b2) && is_subset_rec(a, b1, depth))
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -144,3 +146,30 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
|
||||||
bool seq_subset::is_subset(expr* a, expr* b) const {
|
bool seq_subset::is_subset(expr* a, expr* b) const {
|
||||||
return is_subset_rec(a, b, 0);
|
return is_subset_rec(a, b, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool seq_subset::ends_with(expr* a, expr* suf) const {
|
||||||
|
if (a == suf)
|
||||||
|
return true;
|
||||||
|
// Flatten both regexes into their sequence of concatenation factors
|
||||||
|
// (independent of left/right associativity) and test list-suffix equality.
|
||||||
|
ptr_vector<expr> af, sf;
|
||||||
|
flatten_concat(a, af);
|
||||||
|
flatten_concat(suf, sf);
|
||||||
|
if (sf.size() > af.size())
|
||||||
|
return false;
|
||||||
|
unsigned off = af.size() - sf.size();
|
||||||
|
for (unsigned i = 0; i < sf.size(); ++i)
|
||||||
|
if (af[off + i] != sf[i])
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void seq_subset::flatten_concat(expr* a, ptr_vector<expr>& out) const {
|
||||||
|
expr* a1 = nullptr, * a2 = nullptr;
|
||||||
|
if (m_re.is_concat(a, a1, a2)) {
|
||||||
|
flatten_concat(a1, out);
|
||||||
|
flatten_concat(a2, out);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
out.push_back(a);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,12 @@ class seq_subset {
|
||||||
|
|
||||||
bool is_subset_rec(expr* a, expr* b, unsigned depth) const;
|
bool is_subset_rec(expr* a, expr* b, unsigned depth) const;
|
||||||
|
|
||||||
|
// true if regex a, viewed as a flattened concatenation, has suf as a
|
||||||
|
// structural (concatenation) suffix.
|
||||||
|
bool ends_with(expr* a, expr* suf) const;
|
||||||
|
|
||||||
|
void flatten_concat(expr* a, ptr_vector<expr>& out) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit seq_subset(seq_util::rex& re) : m_re(re) {}
|
explicit seq_subset(seq_util::rex& re) : m_re(re) {}
|
||||||
bool is_subset(expr* a, expr* b) const;
|
bool is_subset(expr* a, expr* b) const;
|
||||||
|
|
|
||||||
|
|
@ -240,7 +240,6 @@ void seq_decl_plugin::init() {
|
||||||
m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
|
m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
|
||||||
m_sigs[OP_RE_REVERSE] = alloc(psig, m, "re.reverse", 1, 1, &reA, reA);
|
m_sigs[OP_RE_REVERSE] = alloc(psig, m, "re.reverse", 1, 1, &reA, reA);
|
||||||
m_sigs[OP_RE_DERIVATIVE] = alloc(psig, m, "re.derivative", 1, 2, AreA, reA);
|
m_sigs[OP_RE_DERIVATIVE] = alloc(psig, m, "re.derivative", 1, 2, AreA, reA);
|
||||||
m_sigs[_OP_RE_ANTIMIROV_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
|
|
||||||
m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA);
|
m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA);
|
||||||
m_sigs[OP_SEQ_IN_RE] = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
|
m_sigs[OP_SEQ_IN_RE] = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
|
||||||
m_sigs[OP_SEQ_REPLACE_RE_ALL] = alloc(psig, m, "str.replace_re_all", 1, 3, seqAreAseqA, seqA);
|
m_sigs[OP_SEQ_REPLACE_RE_ALL] = alloc(psig, m, "str.replace_re_all", 1, 3, seqAreAseqA, seqA);
|
||||||
|
|
@ -412,7 +411,6 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
|
||||||
case OP_RE_COMPLEMENT:
|
case OP_RE_COMPLEMENT:
|
||||||
case OP_RE_REVERSE:
|
case OP_RE_REVERSE:
|
||||||
case OP_RE_DERIVATIVE:
|
case OP_RE_DERIVATIVE:
|
||||||
case _OP_RE_ANTIMIROV_UNION:
|
|
||||||
m_has_re = true;
|
m_has_re = true;
|
||||||
Z3_fallthrough;
|
Z3_fallthrough;
|
||||||
case OP_SEQ_UNIT:
|
case OP_SEQ_UNIT:
|
||||||
|
|
@ -1211,6 +1209,8 @@ app* seq_util::rex::mk_of_pred(expr* p) {
|
||||||
app* seq_util::rex::mk_range(sort* re_sort, unsigned lo, unsigned hi) {
|
app* seq_util::rex::mk_range(sort* re_sort, unsigned lo, unsigned hi) {
|
||||||
if (lo > hi)
|
if (lo > hi)
|
||||||
return mk_empty(re_sort);
|
return mk_empty(re_sort);
|
||||||
|
if (lo == 0 && hi == u.max_char())
|
||||||
|
return mk_full_char(re_sort);
|
||||||
app* lo_str = u.str.mk_string(zstring(lo));
|
app* lo_str = u.str.mk_string(zstring(lo));
|
||||||
if (lo == hi)
|
if (lo == hi)
|
||||||
return mk_to_re(lo_str);
|
return mk_to_re(lo_str);
|
||||||
|
|
@ -1445,7 +1445,7 @@ std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
|
||||||
print(out, r1);
|
print(out, r1);
|
||||||
print(out, r2);
|
print(out, r2);
|
||||||
}
|
}
|
||||||
else if (re.is_antimirov_union(e, r1, r2) || re.is_union(e, r1, r2)) {
|
else if (re.is_union(e, r1, r2)) {
|
||||||
out << "(";
|
out << "(";
|
||||||
print(out, r1);
|
print(out, r1);
|
||||||
out << (html_encode ? "⋃" : "|");
|
out << (html_encode ? "⋃" : "|");
|
||||||
|
|
|
||||||
|
|
@ -108,7 +108,6 @@ enum seq_op_kind {
|
||||||
_OP_REGEXP_EMPTY,
|
_OP_REGEXP_EMPTY,
|
||||||
_OP_REGEXP_FULL_CHAR,
|
_OP_REGEXP_FULL_CHAR,
|
||||||
_OP_RE_IS_NULLABLE,
|
_OP_RE_IS_NULLABLE,
|
||||||
_OP_RE_ANTIMIROV_UNION, // Lifted union for antimirov-style derivatives
|
|
||||||
_OP_SEQ_SKOLEM,
|
_OP_SEQ_SKOLEM,
|
||||||
LAST_SEQ_OP
|
LAST_SEQ_OP
|
||||||
};
|
};
|
||||||
|
|
@ -544,7 +543,6 @@ public:
|
||||||
app* mk_of_pred(expr* p);
|
app* mk_of_pred(expr* p);
|
||||||
app* mk_reverse(expr* r) { return m.mk_app(m_fid, OP_RE_REVERSE, r); }
|
app* mk_reverse(expr* r) { return m.mk_app(m_fid, OP_RE_REVERSE, r); }
|
||||||
app* mk_derivative(expr* ele, expr* r) { return m.mk_app(m_fid, OP_RE_DERIVATIVE, ele, r); }
|
app* mk_derivative(expr* ele, expr* r) { return m.mk_app(m_fid, OP_RE_DERIVATIVE, ele, r); }
|
||||||
app* mk_antimirov_union(expr* r1, expr* r2) { return m.mk_app(m_fid, _OP_RE_ANTIMIROV_UNION, r1, r2); }
|
|
||||||
|
|
||||||
bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); }
|
bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); }
|
||||||
bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); }
|
bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); }
|
||||||
|
|
@ -578,7 +576,6 @@ public:
|
||||||
bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); }
|
bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); }
|
||||||
bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); }
|
bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); }
|
||||||
bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); }
|
bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); }
|
||||||
bool is_antimirov_union(expr const* n) const { return is_app_of(n, m_fid, _OP_RE_ANTIMIROV_UNION); }
|
|
||||||
MATCH_UNARY(is_to_re);
|
MATCH_UNARY(is_to_re);
|
||||||
MATCH_BINARY(is_concat);
|
MATCH_BINARY(is_concat);
|
||||||
MATCH_BINARY(is_union);
|
MATCH_BINARY(is_union);
|
||||||
|
|
@ -593,7 +590,6 @@ public:
|
||||||
MATCH_UNARY(is_of_pred);
|
MATCH_UNARY(is_of_pred);
|
||||||
MATCH_UNARY(is_reverse);
|
MATCH_UNARY(is_reverse);
|
||||||
MATCH_BINARY(is_derivative);
|
MATCH_BINARY(is_derivative);
|
||||||
MATCH_BINARY(is_antimirov_union);
|
|
||||||
bool is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const;
|
bool is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const;
|
||||||
bool is_loop(expr const* n, expr*& body, unsigned& lo) const;
|
bool is_loop(expr const* n, expr*& body, unsigned& lo) const;
|
||||||
bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const;
|
bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const;
|
||||||
|
|
|
||||||
|
|
@ -224,6 +224,17 @@ namespace smt {
|
||||||
th.add_axiom(~lit);
|
th.add_axiom(~lit);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// Fall back to antimirov NFA reachability. The lazy state graph
|
||||||
|
// keys states by AST identity and cannot close on intersections /
|
||||||
|
// complements whose derivative product states do not canonicalize,
|
||||||
|
// so it never detects their emptiness. re_is_empty decides
|
||||||
|
// emptiness directly (the same procedure propagate_eq already uses
|
||||||
|
// for re.none equalities).
|
||||||
|
if (re_is_empty(r) == l_true) {
|
||||||
|
STRACE(seq_regex_brief, tout << "(empty:re) ";);
|
||||||
|
th.add_axiom(~lit);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -461,6 +472,24 @@ namespace smt {
|
||||||
if (re().is_empty(r))
|
if (re().is_empty(r))
|
||||||
//trivially true
|
//trivially true
|
||||||
return;
|
return;
|
||||||
|
// When one side is re.none the equation is a pure emptiness check on
|
||||||
|
// the other regex (symmetric_diff already returned it as r). Decide
|
||||||
|
// it directly by antimirov NFA reachability instead of running the
|
||||||
|
// bisimulation/XOR closure, which would build large un-canonicalized
|
||||||
|
// product states for intersections of contains-patterns.
|
||||||
|
if ((re().is_empty(r1) || re().is_empty(r2)) && is_ground(r)) {
|
||||||
|
switch (re_is_empty(r)) {
|
||||||
|
case l_true:
|
||||||
|
STRACE(seq_regex_brief, tout << "empty:eq ";);
|
||||||
|
return; // languages equal (both empty): trivially true
|
||||||
|
case l_false:
|
||||||
|
STRACE(seq_regex_brief, tout << "empty:neq ";);
|
||||||
|
th.add_axiom(~th.mk_eq(r1, r2, false), false_literal);
|
||||||
|
return;
|
||||||
|
case l_undef:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
// Try the bisimulation procedure on ground regexes first. If it
|
// Try the bisimulation procedure on ground regexes first. If it
|
||||||
// returns a definite answer, dispatch the corresponding axiom and
|
// returns a definite answer, dispatch the corresponding axiom and
|
||||||
// bypass the symbolic emptiness/derivative closure.
|
// bypass the symbolic emptiness/derivative closure.
|
||||||
|
|
@ -562,16 +591,16 @@ namespace smt {
|
||||||
lits.push_back(null_lit);
|
lits.push_back(null_lit);
|
||||||
|
|
||||||
expr_ref_pair_vector cofactors(m);
|
expr_ref_pair_vector cofactors(m);
|
||||||
get_cofactors(d, cofactors);
|
seq_rw().get_cofactors(hd, d, cofactors);
|
||||||
for (auto const& p : cofactors) {
|
for (auto const& [c, r] : cofactors) {
|
||||||
if (is_member(p.second, u))
|
if (is_member(r, u))
|
||||||
continue;
|
continue;
|
||||||
expr_ref cond(p.first, m);
|
expr_ref cond(c, m);
|
||||||
seq_rw().elim_condition(hd, cond);
|
seq_rw().elim_condition(hd, cond);
|
||||||
rewrite(cond);
|
rewrite(cond);
|
||||||
if (m.is_false(cond))
|
if (m.is_false(cond))
|
||||||
continue;
|
continue;
|
||||||
expr_ref next_non_empty = sk().mk_is_non_empty(p.second, re().mk_union(u, p.second), n);
|
expr_ref next_non_empty = sk().mk_is_non_empty(r, re().mk_union(u, r), n);
|
||||||
if (!m.is_true(cond))
|
if (!m.is_true(cond))
|
||||||
next_non_empty = m.mk_and(cond, next_non_empty);
|
next_non_empty = m.mk_and(cond, next_non_empty);
|
||||||
lits.push_back(th.mk_literal(next_non_empty));
|
lits.push_back(th.mk_literal(next_non_empty));
|
||||||
|
|
@ -672,88 +701,113 @@ namespace smt {
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Return a list of all target regexes in the derivative of a regex r,
|
Decide emptiness of a ground regex r via antimirov-mode NFA
|
||||||
ignoring the conditions along each path.
|
reachability.
|
||||||
|
|
||||||
The derivative construction uses (:var 0) and tries
|
The symbolic derivative engine runs in antimirov mode, so the
|
||||||
to eliminate unsat condition paths but it does not perform
|
derivative of an intersection distributes into a *set* of individual
|
||||||
full satisfiability checks and it is not guaranteed
|
product states inter(A_i, B_j) (each a small, ground regex) rather
|
||||||
that all targets are actually reachable
|
than one giant union-of-intersections term. get_derivative_targets
|
||||||
|
enumerates these NFA successor states.
|
||||||
|
|
||||||
|
We short-circuit to l_false (non-empty) as soon as a reachable state
|
||||||
|
is nullable (accepts the empty word) or classical (a regex built only
|
||||||
|
from to_re/all/union/concat/star/plus/opt/loop, hence non-empty). An
|
||||||
|
intersection itself is never classical, but once one operand reduces
|
||||||
|
to Σ* the intersection collapses (via the derivative's subset
|
||||||
|
simplification) to the other, classical, operand.
|
||||||
|
|
||||||
|
If the worklist is exhausted with no such state, r is empty (l_true).
|
||||||
|
Returns l_undef if a step bound is hit, so callers can fall back to
|
||||||
|
the general procedure.
|
||||||
|
*/
|
||||||
|
lbool seq_regex::re_is_empty(expr* r) {
|
||||||
|
if (re().is_empty(r))
|
||||||
|
return l_true;
|
||||||
|
expr_ref_vector pinned(m);
|
||||||
|
obj_hashtable<expr> visited;
|
||||||
|
ptr_vector<expr> work;
|
||||||
|
work.push_back(r);
|
||||||
|
visited.insert(r);
|
||||||
|
pinned.push_back(r);
|
||||||
|
unsigned const bound = 100000;
|
||||||
|
unsigned steps = 0;
|
||||||
|
while (!work.empty()) {
|
||||||
|
if (++steps > bound)
|
||||||
|
return l_undef;
|
||||||
|
expr* s = work.back();
|
||||||
|
work.pop_back();
|
||||||
|
auto info = re().get_info(s);
|
||||||
|
if (!info.is_known())
|
||||||
|
return l_undef;
|
||||||
|
// ε ∈ L(s) or s is a non-empty classical regex ⇒ L(r) non-empty.
|
||||||
|
if (info.nullable == l_true || info.classical)
|
||||||
|
return l_false;
|
||||||
|
// Dead state: prune (min_length == UINT_MAX means no word is
|
||||||
|
// accepted from here).
|
||||||
|
if (info.min_length == UINT_MAX)
|
||||||
|
continue;
|
||||||
|
expr_ref_vector targets(m);
|
||||||
|
get_derivative_targets(s, targets);
|
||||||
|
for (expr* t : targets) {
|
||||||
|
if (visited.contains(t))
|
||||||
|
continue;
|
||||||
|
visited.insert(t);
|
||||||
|
pinned.push_back(t);
|
||||||
|
work.push_back(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return l_true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Return a list of all reachable target regexes in the derivative of a
|
||||||
|
regex r.
|
||||||
|
|
||||||
|
The derivative is taken wrt (:var 0) and its reachable leaves are
|
||||||
|
enumerated with the path-aware cofactor engine, which conjoins the
|
||||||
|
ITE-path conditions and prunes infeasible character-range combinations
|
||||||
|
(e.g. a nested branch requiring elem = 'a' and elem = 'B'). Each leaf
|
||||||
|
is re-normalized with the path-aware smart constructors so that
|
||||||
|
semantically equal states stay syntactically identical (essential for
|
||||||
|
state dedup in the emptiness closure).
|
||||||
|
|
||||||
|
Without this pruning the naive ITE-tree DFS would reach infeasible
|
||||||
|
leaves; an infeasible classical (intersection/complement-free) leaf
|
||||||
|
would then be misjudged as a non-empty residual.
|
||||||
*/
|
*/
|
||||||
void seq_regex::get_derivative_targets(expr* r, expr_ref_vector& targets) {
|
void seq_regex::get_derivative_targets(expr* r, expr_ref_vector& targets) {
|
||||||
// constructs the derivative wrt (:var 0)
|
expr_ref_pair_vector cofactors(m);
|
||||||
expr_ref d(seq_rw().mk_derivative(r), m);
|
seq_rw().brz_derivative_cofactors(r, cofactors);
|
||||||
|
for (auto const& [c, t] : cofactors) {
|
||||||
// use DFS to collect all the targets (leaf regexes) in d.
|
if (!re().is_empty(t))
|
||||||
expr* _1 = nullptr, * e1 = nullptr, * e2 = nullptr;
|
targets.push_back(t);
|
||||||
obj_hashtable<expr>::entry* _2 = nullptr;
|
|
||||||
vector<expr*> workset;
|
|
||||||
workset.push_back(d);
|
|
||||||
obj_hashtable<expr> done;
|
|
||||||
done.insert(d);
|
|
||||||
while (workset.size() > 0) {
|
|
||||||
expr* e = workset.back();
|
|
||||||
workset.pop_back();
|
|
||||||
if (m.is_ite(e, _1, e1, e2) || re().is_union(e, e1, e2)) {
|
|
||||||
if (done.insert_if_not_there_core(e1, _2))
|
|
||||||
workset.push_back(e1);
|
|
||||||
if (done.insert_if_not_there_core(e2, _2))
|
|
||||||
workset.push_back(e2);
|
|
||||||
}
|
|
||||||
else if (!re().is_empty(e))
|
|
||||||
targets.push_back(e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Return a list of all (cond, leaf) pairs in a given derivative
|
Return a list of all (cond, leaf) pairs in a given derivative
|
||||||
expression r.
|
expression r, where elem is the character symbol the derivative was
|
||||||
|
taken with respect to.
|
||||||
|
|
||||||
Note: this implementation is inefficient: it simply collects all expressions under an if and
|
The transition regexes produced by the symbolic derivative engine are
|
||||||
iterates over all combinations.
|
ITE-trees over character predicates ci on elem (equalities such as
|
||||||
|
elem = 'A', and ranges such as 'a' <= elem <= 'z'). These predicates
|
||||||
|
are typically mutually exclusive, so the number of feasible truth
|
||||||
|
assignments to {c1,..,ck} ("minterms") is small.
|
||||||
|
|
||||||
This method is still used by:
|
The enumeration is delegated to seq::derive (via seq_rw().get_cofactors)
|
||||||
|
so it reuses the very same path/interval context that the derivative
|
||||||
|
engine uses while hoisting ITEs: each feasible path through the ITE-tree
|
||||||
|
yields one (path_condition, leaf) cofactor, infeasible character-range
|
||||||
|
combinations are pruned, and the leaf is simplified with the path-aware
|
||||||
|
smart constructors.
|
||||||
|
|
||||||
|
This is used by:
|
||||||
propagate_is_empty
|
propagate_is_empty
|
||||||
propagate_is_non_empty
|
propagate_is_non_empty
|
||||||
*/
|
*/
|
||||||
void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) {
|
|
||||||
obj_hashtable<expr> ifs;
|
|
||||||
expr* cond = nullptr, * r1 = nullptr, * r2 = nullptr;
|
|
||||||
for (expr* e : subterms::ground(expr_ref(r, m)))
|
|
||||||
if (m.is_ite(e, cond, r1, r2))
|
|
||||||
ifs.insert(cond);
|
|
||||||
|
|
||||||
expr_ref_vector rs(m);
|
|
||||||
vector<expr_ref_vector> conds;
|
|
||||||
conds.push_back(expr_ref_vector(m));
|
|
||||||
rs.push_back(r);
|
|
||||||
for (expr* c : ifs) {
|
|
||||||
unsigned sz = conds.size();
|
|
||||||
expr_safe_replace rep1(m);
|
|
||||||
expr_safe_replace rep2(m);
|
|
||||||
rep1.insert(c, m.mk_true());
|
|
||||||
rep2.insert(c, m.mk_false());
|
|
||||||
expr_ref r2(m);
|
|
||||||
for (unsigned i = 0; i < sz; ++i) {
|
|
||||||
expr_ref_vector cs = conds[i];
|
|
||||||
cs.push_back(mk_not(m, c));
|
|
||||||
conds.push_back(cs);
|
|
||||||
conds[i].push_back(c);
|
|
||||||
expr_ref r1(rs.get(i), m);
|
|
||||||
rep1(r1, r2);
|
|
||||||
rs[i] = r2;
|
|
||||||
rep2(r1, r2);
|
|
||||||
rs.push_back(r2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (unsigned i = 0; i < conds.size(); ++i) {
|
|
||||||
expr_ref conj = mk_and(conds[i]);
|
|
||||||
expr_ref r(rs.get(i), m);
|
|
||||||
ctx.get_rewriter()(r);
|
|
||||||
if (!m.is_false(conj) && !re().is_empty(r))
|
|
||||||
result.push_back(conj, r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
is_empty(r, u) => ~is_nullable(r)
|
is_empty(r, u) => ~is_nullable(r)
|
||||||
|
|
@ -781,11 +835,11 @@ namespace smt {
|
||||||
d = mk_derivative_wrapper(hd, r);
|
d = mk_derivative_wrapper(hd, r);
|
||||||
literal_vector lits;
|
literal_vector lits;
|
||||||
expr_ref_pair_vector cofactors(m);
|
expr_ref_pair_vector cofactors(m);
|
||||||
get_cofactors(d, cofactors);
|
seq_rw().get_cofactors(hd, d, cofactors);
|
||||||
for (auto const& p : cofactors) {
|
for (auto const& [c, r] : cofactors) {
|
||||||
if (is_member(p.second, u))
|
if (is_member(r, u))
|
||||||
continue;
|
continue;
|
||||||
expr_ref cond(p.first, m);
|
expr_ref cond(c, m);
|
||||||
seq_rw().elim_condition(hd, cond);
|
seq_rw().elim_condition(hd, cond);
|
||||||
rewrite(cond);
|
rewrite(cond);
|
||||||
if (m.is_false(cond))
|
if (m.is_false(cond))
|
||||||
|
|
@ -796,7 +850,7 @@ namespace smt {
|
||||||
expr_ref ncond(mk_not(m, cond), m);
|
expr_ref ncond(mk_not(m, cond), m);
|
||||||
lits.push_back(th.mk_literal(mk_forall(m, hd, ncond)));
|
lits.push_back(th.mk_literal(mk_forall(m, hd, ncond)));
|
||||||
}
|
}
|
||||||
expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, p.second), n);
|
expr_ref is_empty1 = sk().mk_is_empty(r, re().mk_union(u, r), n);
|
||||||
lits.push_back(th.mk_literal(is_empty1));
|
lits.push_back(th.mk_literal(is_empty1));
|
||||||
th.add_axiom(lits);
|
th.add_axiom(lits);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -164,7 +164,12 @@ namespace smt {
|
||||||
// returned by derivative_wrapper
|
// returned by derivative_wrapper
|
||||||
expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
|
expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
|
||||||
void get_derivative_targets(expr* r, expr_ref_vector& targets);
|
void get_derivative_targets(expr* r, expr_ref_vector& targets);
|
||||||
void get_cofactors(expr* r, expr_ref_pair_vector& result);
|
|
||||||
|
// Decide emptiness of a ground regex by antimirov-mode NFA
|
||||||
|
// reachability: explore derivative target states, short-circuiting to
|
||||||
|
// "non-empty" on the first reachable nullable or classical state.
|
||||||
|
// Returns l_true (empty), l_false (non-empty), l_undef (gave up).
|
||||||
|
lbool re_is_empty(expr* r);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Pretty print the regex of the state id to the out stream,
|
Pretty print the regex of the state id to the out stream,
|
||||||
|
|
|
||||||
|
|
@ -115,15 +115,18 @@ add_executable(test-z3
|
||||||
polynomial_factorization.cpp
|
polynomial_factorization.cpp
|
||||||
polynorm.cpp
|
polynorm.cpp
|
||||||
prime_generator.cpp
|
prime_generator.cpp
|
||||||
|
seq_regex_bisim.cpp
|
||||||
proof_checker.cpp
|
proof_checker.cpp
|
||||||
qe_arith.cpp
|
qe_arith.cpp
|
||||||
mbp_qel.cpp
|
mbp_qel.cpp
|
||||||
quant_elim.cpp
|
quant_elim.cpp
|
||||||
quant_solve.cpp
|
quant_solve.cpp
|
||||||
random.cpp
|
random.cpp
|
||||||
|
range_predicate.cpp
|
||||||
rational.cpp
|
rational.cpp
|
||||||
rcf.cpp
|
rcf.cpp
|
||||||
region.cpp
|
region.cpp
|
||||||
|
regex_range_collapse.cpp
|
||||||
sat_local_search.cpp
|
sat_local_search.cpp
|
||||||
sat_lookahead.cpp
|
sat_lookahead.cpp
|
||||||
sat_user_scope.cpp
|
sat_user_scope.cpp
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,8 @@
|
||||||
X(api_bug) \
|
X(api_bug) \
|
||||||
X(api_special_relations) \
|
X(api_special_relations) \
|
||||||
X(arith_rewriter) \
|
X(arith_rewriter) \
|
||||||
|
X(range_predicate) \
|
||||||
|
X(regex_range_collapse) \
|
||||||
X(seq_rewriter) \
|
X(seq_rewriter) \
|
||||||
X(check_assumptions) \
|
X(check_assumptions) \
|
||||||
X(smt_context) \
|
X(smt_context) \
|
||||||
|
|
@ -195,6 +197,7 @@
|
||||||
X(finite_set) \
|
X(finite_set) \
|
||||||
X(finite_set_rewriter) \
|
X(finite_set_rewriter) \
|
||||||
X(fpa) \
|
X(fpa) \
|
||||||
|
X(seq_regex_bisim) \
|
||||||
X(term_enumeration) \
|
X(term_enumeration) \
|
||||||
X(lcube)
|
X(lcube)
|
||||||
|
|
||||||
|
|
|
||||||
260
src/test/range_predicate.cpp
Normal file
260
src/test/range_predicate.cpp
Normal file
|
|
@ -0,0 +1,260 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
test/range_predicate.cpp
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Unit tests for the range-algebra value type seq::range_predicate.
|
||||||
|
|
||||||
|
The tests exercise:
|
||||||
|
* factory constructors and canonical-form invariants,
|
||||||
|
* extensional equality and total ordering,
|
||||||
|
* Boolean operations (|, &, ~, -, ^) on hand-picked instances,
|
||||||
|
* exhaustive verification of de-Morgan and lattice laws on a
|
||||||
|
small character domain, by enumerating every subset.
|
||||||
|
|
||||||
|
Author:
|
||||||
|
|
||||||
|
Margus Veanes (veanes) 2026
|
||||||
|
|
||||||
|
--*/
|
||||||
|
|
||||||
|
#include "ast/rewriter/seq_range_predicate.h"
|
||||||
|
#include "util/debug.h"
|
||||||
|
#include <cstdint>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
using seq::range_predicate;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Build a range_predicate from a bitmask over [0, max_char] for testing.
|
||||||
|
range_predicate from_mask(uint64_t mask, unsigned max_char) {
|
||||||
|
range_predicate r = range_predicate::empty(max_char);
|
||||||
|
for (unsigned c = 0; c <= max_char; ++c)
|
||||||
|
if ((mask >> c) & 1u)
|
||||||
|
r = r | range_predicate::singleton(c, max_char);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert a range_predicate back to a bitmask for cross-checking.
|
||||||
|
uint64_t to_mask(range_predicate const& r) {
|
||||||
|
uint64_t mask = 0;
|
||||||
|
for (unsigned c = 0; c <= r.max_char(); ++c)
|
||||||
|
if (r.contains(c))
|
||||||
|
mask |= (uint64_t(1) << c);
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_factories() {
|
||||||
|
auto e = range_predicate::empty(255);
|
||||||
|
ENSURE(e.is_empty());
|
||||||
|
ENSURE(!e.is_top());
|
||||||
|
ENSURE(e.num_ranges() == 0);
|
||||||
|
ENSURE(e.cardinality() == 0);
|
||||||
|
|
||||||
|
auto t = range_predicate::top(255);
|
||||||
|
ENSURE(!t.is_empty());
|
||||||
|
ENSURE(t.is_top());
|
||||||
|
ENSURE(t.num_ranges() == 1);
|
||||||
|
ENSURE(t.cardinality() == 256);
|
||||||
|
ENSURE(t.contains(0));
|
||||||
|
ENSURE(t.contains(255));
|
||||||
|
|
||||||
|
auto s = range_predicate::singleton(42, 255);
|
||||||
|
ENSURE(s.num_ranges() == 1);
|
||||||
|
ENSURE(s.cardinality() == 1);
|
||||||
|
ENSURE(s.contains(42));
|
||||||
|
ENSURE(!s.contains(41));
|
||||||
|
unsigned c = 0;
|
||||||
|
ENSURE(s.is_singleton(c));
|
||||||
|
ENSURE(c == 42);
|
||||||
|
|
||||||
|
auto r = range_predicate::range(10, 20, 255);
|
||||||
|
ENSURE(r.num_ranges() == 1);
|
||||||
|
ENSURE(r.cardinality() == 11);
|
||||||
|
ENSURE(r.contains(10));
|
||||||
|
ENSURE(r.contains(20));
|
||||||
|
ENSURE(!r.contains(9));
|
||||||
|
ENSURE(!r.contains(21));
|
||||||
|
|
||||||
|
// Reversed bounds produce empty.
|
||||||
|
auto bad = range_predicate::range(20, 10, 255);
|
||||||
|
ENSURE(bad.is_empty());
|
||||||
|
|
||||||
|
// Clipping at max_char.
|
||||||
|
auto clipped = range_predicate::range(200, 1000, 255);
|
||||||
|
ENSURE(clipped.num_ranges() == 1);
|
||||||
|
ENSURE(clipped[0] == std::make_pair(200u, 255u));
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_equality_and_order() {
|
||||||
|
auto a = range_predicate::range(1, 5, 31);
|
||||||
|
auto b = range_predicate::range(1, 5, 31);
|
||||||
|
auto c = range_predicate::range(1, 6, 31);
|
||||||
|
ENSURE(a == b);
|
||||||
|
ENSURE(a != c);
|
||||||
|
ENSURE(a.hash() == b.hash());
|
||||||
|
ENSURE(a < c || c < a);
|
||||||
|
ENSURE(!(a < a));
|
||||||
|
|
||||||
|
auto empty = range_predicate::empty(31);
|
||||||
|
ENSURE(empty < a);
|
||||||
|
|
||||||
|
// Canonical merging of adjacent ranges.
|
||||||
|
auto d = range_predicate::range(0, 4, 31) | range_predicate::range(5, 10, 31);
|
||||||
|
auto e = range_predicate::range(0, 10, 31);
|
||||||
|
ENSURE(d == e);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_union_intersection_hand() {
|
||||||
|
unsigned const M = 31;
|
||||||
|
auto a = range_predicate::range(0, 4, M) | range_predicate::range(10, 14, M);
|
||||||
|
auto b = range_predicate::range(3, 11, M);
|
||||||
|
|
||||||
|
auto u = a | b; // [0,14]
|
||||||
|
ENSURE(u.num_ranges() == 1);
|
||||||
|
ENSURE(u[0] == std::make_pair(0u, 14u));
|
||||||
|
|
||||||
|
auto i = a & b; // [3,4] U [10,11]
|
||||||
|
ENSURE(i.num_ranges() == 2);
|
||||||
|
ENSURE(i[0] == std::make_pair(3u, 4u));
|
||||||
|
ENSURE(i[1] == std::make_pair(10u, 11u));
|
||||||
|
|
||||||
|
auto d = a - b; // [0,2] U [12,14]
|
||||||
|
ENSURE(d.num_ranges() == 2);
|
||||||
|
ENSURE(d[0] == std::make_pair(0u, 2u));
|
||||||
|
ENSURE(d[1] == std::make_pair(12u, 14u));
|
||||||
|
|
||||||
|
auto x = a ^ b; // [0,2] U [5,9] U [12,14]
|
||||||
|
ENSURE(x.num_ranges() == 3);
|
||||||
|
ENSURE(x[0] == std::make_pair(0u, 2u));
|
||||||
|
ENSURE(x[1] == std::make_pair(5u, 9u));
|
||||||
|
ENSURE(x[2] == std::make_pair(12u, 14u));
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_complement_hand() {
|
||||||
|
unsigned const M = 10;
|
||||||
|
auto e = range_predicate::empty(M);
|
||||||
|
ENSURE((~e).is_top());
|
||||||
|
auto t = range_predicate::top(M);
|
||||||
|
ENSURE((~t).is_empty());
|
||||||
|
|
||||||
|
// ~([2,3] U [7,8]) = [0,1] U [4,6] U [9,10]
|
||||||
|
auto a = range_predicate::range(2, 3, M) | range_predicate::range(7, 8, M);
|
||||||
|
auto na = ~a;
|
||||||
|
ENSURE(na.num_ranges() == 3);
|
||||||
|
ENSURE(na[0] == std::make_pair(0u, 1u));
|
||||||
|
ENSURE(na[1] == std::make_pair(4u, 6u));
|
||||||
|
ENSURE(na[2] == std::make_pair(9u, 10u));
|
||||||
|
|
||||||
|
// ~([0,4]) = [5,10]
|
||||||
|
auto b = range_predicate::range(0, 4, M);
|
||||||
|
auto nb = ~b;
|
||||||
|
ENSURE(nb.num_ranges() == 1);
|
||||||
|
ENSURE(nb[0] == std::make_pair(5u, 10u));
|
||||||
|
|
||||||
|
// ~([5,10]) = [0,4]
|
||||||
|
auto cnb = ~nb;
|
||||||
|
ENSURE(cnb == b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exhaustively verify the lattice / de-Morgan laws on a small domain
|
||||||
|
// by enumerating every possible subset (bitmask).
|
||||||
|
void test_exhaustive_laws() {
|
||||||
|
unsigned const M = 5; // 6 characters -> 64 subsets
|
||||||
|
unsigned const N = 1u << (M + 1);
|
||||||
|
for (unsigned i = 0; i < N; ++i) {
|
||||||
|
range_predicate A = from_mask(i, M);
|
||||||
|
ENSURE(to_mask(A) == i);
|
||||||
|
// ~ ~ A == A
|
||||||
|
ENSURE(~~A == A);
|
||||||
|
// A | ~A == top
|
||||||
|
ENSURE((A | ~A).is_top());
|
||||||
|
// A & ~A == empty
|
||||||
|
ENSURE((A & ~A).is_empty());
|
||||||
|
// cardinality matches popcount
|
||||||
|
unsigned pop = 0;
|
||||||
|
for (unsigned k = 0; k <= M; ++k) if ((i >> k) & 1u) ++pop;
|
||||||
|
ENSURE(A.cardinality() == pop);
|
||||||
|
}
|
||||||
|
for (unsigned i = 0; i < N; ++i) {
|
||||||
|
range_predicate A = from_mask(i, M);
|
||||||
|
for (unsigned j = 0; j < N; ++j) {
|
||||||
|
range_predicate B = from_mask(j, M);
|
||||||
|
// Bitmask reference semantics.
|
||||||
|
ENSURE(to_mask(A | B) == (i | j));
|
||||||
|
ENSURE(to_mask(A & B) == (i & j));
|
||||||
|
ENSURE(to_mask(A - B) == (i & ~j & ((1u << (M + 1)) - 1u)));
|
||||||
|
ENSURE(to_mask(A ^ B) == (i ^ j));
|
||||||
|
// de-Morgan
|
||||||
|
ENSURE(~(A | B) == (~A & ~B));
|
||||||
|
ENSURE(~(A & B) == (~A | ~B));
|
||||||
|
// Commutativity
|
||||||
|
ENSURE((A | B) == (B | A));
|
||||||
|
ENSURE((A & B) == (B & A));
|
||||||
|
// (A - B) == A & ~B
|
||||||
|
ENSURE((A - B) == (A & ~B));
|
||||||
|
// (A ^ B) == (A | B) - (A & B)
|
||||||
|
ENSURE((A ^ B) == ((A | B) - (A & B)));
|
||||||
|
// Extensional equality is reflexive on equal masks.
|
||||||
|
if (i == j) {
|
||||||
|
ENSURE(A == B);
|
||||||
|
ENSURE(A.hash() == B.hash());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_total_order_strict() {
|
||||||
|
unsigned const M = 5;
|
||||||
|
unsigned const N = 1u << (M + 1);
|
||||||
|
// Strict total order: for any distinct A, B exactly one of A<B, B<A holds.
|
||||||
|
for (unsigned i = 0; i < N; ++i) {
|
||||||
|
range_predicate A = from_mask(i, M);
|
||||||
|
ENSURE(!(A < A));
|
||||||
|
for (unsigned j = i + 1; j < N; ++j) {
|
||||||
|
range_predicate B = from_mask(j, M);
|
||||||
|
bool lt = A < B;
|
||||||
|
bool gt = B < A;
|
||||||
|
ENSURE(lt != gt);
|
||||||
|
ENSURE(lt || gt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_display() {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << range_predicate::empty(31);
|
||||||
|
ENSURE(oss.str() == "[]");
|
||||||
|
|
||||||
|
oss.str("");
|
||||||
|
oss << range_predicate::range(3, 7, 31);
|
||||||
|
ENSURE(oss.str() == "[3-7]");
|
||||||
|
|
||||||
|
oss.str("");
|
||||||
|
oss << range_predicate::singleton(9, 31);
|
||||||
|
ENSURE(oss.str() == "[9]");
|
||||||
|
|
||||||
|
oss.str("");
|
||||||
|
auto p = range_predicate::range(0, 2, 31) | range_predicate::singleton(5, 31);
|
||||||
|
oss << p;
|
||||||
|
ENSURE(oss.str() == "[0-2,5]");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_range_predicate() {
|
||||||
|
test_factories();
|
||||||
|
test_equality_and_order();
|
||||||
|
test_union_intersection_hand();
|
||||||
|
test_complement_hand();
|
||||||
|
test_exhaustive_laws();
|
||||||
|
test_total_order_strict();
|
||||||
|
test_display();
|
||||||
|
std::cout << "range_predicate unit tests passed\n";
|
||||||
|
}
|
||||||
260
src/test/regex_range_collapse.cpp
Normal file
260
src/test/regex_range_collapse.cpp
Normal file
|
|
@ -0,0 +1,260 @@
|
||||||
|
/*++
|
||||||
|
Copyright (c) 2026 Microsoft Corporation
|
||||||
|
|
||||||
|
Module Name:
|
||||||
|
|
||||||
|
regex_range_collapse.cpp - unit tests
|
||||||
|
|
||||||
|
--*/
|
||||||
|
|
||||||
|
#include "ast/rewriter/seq_range_collapse.h"
|
||||||
|
#include "ast/reg_decl_plugins.h"
|
||||||
|
#include "ast/ast_pp.h"
|
||||||
|
#include "ast/arith_decl_plugin.h"
|
||||||
|
#include "util/util.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using seq::range_predicate;
|
||||||
|
using seq::regex_to_range_predicate;
|
||||||
|
using seq::range_predicate_to_regex;
|
||||||
|
|
||||||
|
static void check(bool ok, char const* what) {
|
||||||
|
if (!ok) {
|
||||||
|
std::cerr << "regex_range_collapse FAILED: " << what << "\n";
|
||||||
|
ENSURE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static expr_ref mk_singleton_str(seq_util& u, unsigned c) {
|
||||||
|
return expr_ref(u.str.mk_string(zstring(c)), u.get_manager());
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool extract_range_chars(seq_util& u, expr* e, unsigned& lo, unsigned& hi) {
|
||||||
|
expr* lo_e = nullptr; expr* hi_e = nullptr;
|
||||||
|
expr *s = nullptr;
|
||||||
|
zstring str;
|
||||||
|
if (u.re.is_to_re(e, s) && u.str.is_string(s, str) && str.length() == 1) {
|
||||||
|
lo = hi = str[0];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (u.re.is_range(e, lo_e, hi_e) && u.str.is_string(lo_e) && u.str.is_string(hi_e)) {
|
||||||
|
zstring lo_str, hi_str;
|
||||||
|
u.str.is_string(lo_e, lo_str);
|
||||||
|
u.str.is_string(hi_e, hi_str);
|
||||||
|
if (lo_str.length() == 1 && hi_str.length() == 1) {
|
||||||
|
lo = lo_str[0];
|
||||||
|
hi = hi_str[0];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!u.re.is_range(e, lo_e, hi_e))
|
||||||
|
return false;
|
||||||
|
// Accept either string-constant or (seq.unit (Char N)) bound form.
|
||||||
|
if (u.re.is_range(e, lo, hi))
|
||||||
|
return true;
|
||||||
|
expr* lc = nullptr; expr* hc = nullptr;
|
||||||
|
if (u.str.is_unit(lo_e, lc) && u.is_const_char(lc, lo) &&
|
||||||
|
u.str.is_unit(hi_e, hc) && u.is_const_char(hc, hi))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void run() {
|
||||||
|
ast_manager m;
|
||||||
|
reg_decl_plugins(m);
|
||||||
|
seq_util u(m);
|
||||||
|
unsigned const M = u.max_char();
|
||||||
|
|
||||||
|
sort* str_sort = u.str.mk_string_sort();
|
||||||
|
sort* re_sort = u.re.mk_re(str_sort);
|
||||||
|
|
||||||
|
// primitives
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
check(regex_to_range_predicate(u, u.re.mk_empty(re_sort), p) && p.is_empty(),
|
||||||
|
"re.empty -> empty");
|
||||||
|
check(regex_to_range_predicate(u, u.re.mk_full_char(re_sort), p) && p.is_top(),
|
||||||
|
"re.full_char -> top");
|
||||||
|
}
|
||||||
|
// re.range "a" "z"
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref a = mk_singleton_str(u, 'a');
|
||||||
|
expr_ref z = mk_singleton_str(u, 'z');
|
||||||
|
expr_ref r(u.re.mk_range(a, z), m);
|
||||||
|
check(regex_to_range_predicate(u, r, p) && p.num_ranges() == 1 &&
|
||||||
|
p[0].first == 'a' && p[0].second == 'z',
|
||||||
|
"re.range a z -> [a,z]");
|
||||||
|
}
|
||||||
|
// Disjoint union: (a..z) | (0..9)
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||||
|
expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
|
||||||
|
expr_ref un(u.re.mk_union(r1, r2), m);
|
||||||
|
check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 2,
|
||||||
|
"(a-z)|(0-9) -> 2 ranges");
|
||||||
|
// canonical order: lower lo first
|
||||||
|
check(p[0].first == '0' && p[0].second == '9' && p[1].first == 'a' && p[1].second == 'z',
|
||||||
|
"(a-z)|(0-9) ranges in canonical order");
|
||||||
|
}
|
||||||
|
// Overlapping union: (a..c) | (b..f) -> (a..f)
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
|
||||||
|
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'b'), mk_singleton_str(u, 'f')), m);
|
||||||
|
expr_ref un(u.re.mk_union(r1, r2), m);
|
||||||
|
check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
|
||||||
|
p[0].first == 'a' && p[0].second == 'f',
|
||||||
|
"(a-c)|(b-f) -> (a-f)");
|
||||||
|
}
|
||||||
|
// Adjacent union: (a..c) | (d..f) -> (a..f) (canonical predicate merges adjacent)
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
|
||||||
|
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'd'), mk_singleton_str(u, 'f')), m);
|
||||||
|
expr_ref un(u.re.mk_union(r1, r2), m);
|
||||||
|
check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
|
||||||
|
p[0].first == 'a' && p[0].second == 'f',
|
||||||
|
"(a-c)|(d-f) -> (a-f) via adjacency");
|
||||||
|
}
|
||||||
|
// Disjoint intersection: (a..z) & (0..9) -> empty
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||||
|
expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
|
||||||
|
expr_ref ix(u.re.mk_inter(r1, r2), m);
|
||||||
|
check(regex_to_range_predicate(u, ix, p) && p.is_empty(),
|
||||||
|
"(a-z)&(0-9) -> empty");
|
||||||
|
}
|
||||||
|
// Overlapping intersection: (a..f) & (c..z) -> (c..f)
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
|
||||||
|
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
|
||||||
|
expr_ref ix(u.re.mk_inter(r1, r2), m);
|
||||||
|
check(regex_to_range_predicate(u, ix, p) && p.num_ranges() == 1 &&
|
||||||
|
p[0].first == 'c' && p[0].second == 'f',
|
||||||
|
"(a-f)&(c-z) -> (c-f)");
|
||||||
|
}
|
||||||
|
// Complement: re.complement is intentionally NOT a char-class op
|
||||||
|
// (it operates over Σ*), so it must NOT be translated.
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||||
|
expr_ref cmp(u.re.mk_complement(r1), m);
|
||||||
|
check(!regex_to_range_predicate(u, cmp, p),
|
||||||
|
"re.comp of range is NOT translatable (sequence-level complement)");
|
||||||
|
}
|
||||||
|
// Diff: (a..f) \ (c..z) -> (a..b)
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
|
||||||
|
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
|
||||||
|
expr_ref df(u.re.mk_diff(r1, r2), m);
|
||||||
|
check(regex_to_range_predicate(u, df, p) && p.num_ranges() == 1 &&
|
||||||
|
p[0].first == 'a' && p[0].second == 'b',
|
||||||
|
"(a-f) \\ (c-z) -> (a-b)");
|
||||||
|
}
|
||||||
|
// Negative: re.* of a range is NOT a char class
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||||
|
expr_ref star(u.re.mk_star(r1), m);
|
||||||
|
check(!regex_to_range_predicate(u, star, p),
|
||||||
|
"re.* of range not translatable");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Negative: a regex whose element type is NOT a sequence of
|
||||||
|
// characters (here (Seq Int)) must be rejected outright, even for
|
||||||
|
// shapes that structurally resemble char-class operators.
|
||||||
|
{
|
||||||
|
range_predicate p(M);
|
||||||
|
arith_util a(m);
|
||||||
|
sort* int_seq = u.str.mk_seq(a.mk_int());
|
||||||
|
sort* int_re = u.re.mk_re(int_seq);
|
||||||
|
check(!regex_to_range_predicate(u, u.re.mk_empty(int_re), p),
|
||||||
|
"re.empty over (Seq Int) is NOT a char class");
|
||||||
|
check(!regex_to_range_predicate(u, u.re.mk_full_char(int_re), p),
|
||||||
|
"re.full_char over (Seq Int) is NOT a char class");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- materialization round-trip ----
|
||||||
|
|
||||||
|
// empty -> re.empty
|
||||||
|
{
|
||||||
|
range_predicate p = range_predicate::empty(M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||||
|
check(u.re.is_empty(e), "empty -> re.empty");
|
||||||
|
}
|
||||||
|
// top -> re.full_char
|
||||||
|
{
|
||||||
|
range_predicate p = range_predicate::top(M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||||
|
check(u.re.is_full_char(e), "top -> re.full_char");
|
||||||
|
}
|
||||||
|
// single range -> re.range
|
||||||
|
{
|
||||||
|
range_predicate p = range_predicate::range('a', 'z', M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||||
|
unsigned lo = 0, hi = 0;
|
||||||
|
check(extract_range_chars(u, e, lo, hi) && lo == 'a' && hi == 'z',
|
||||||
|
"[a-z] -> re.range a z");
|
||||||
|
}
|
||||||
|
// singleton -> re.range c c
|
||||||
|
{
|
||||||
|
range_predicate p = range_predicate::singleton('A', M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||||
|
unsigned lo = 0, hi = 0;
|
||||||
|
check(extract_range_chars(u, e, lo, hi) && lo == 'A' && hi == 'A',
|
||||||
|
"{A} -> re.range A A");
|
||||||
|
}
|
||||||
|
// 2 ranges -> re.union(range_0, range_1) in canonical order
|
||||||
|
{
|
||||||
|
range_predicate p = range_predicate::range('0', '9', M)
|
||||||
|
| range_predicate::range('a', 'z', M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||||
|
expr* a = nullptr; expr* b = nullptr;
|
||||||
|
check(u.re.is_union(e, a, b), "2-range -> union");
|
||||||
|
unsigned lo0 = 0, hi0 = 0, lo1 = 0, hi1 = 0;
|
||||||
|
check(extract_range_chars(u, a, lo0, hi0) && lo0 == '0' && hi0 == '9',
|
||||||
|
"union arg0 = (0-9) (canonical: lower lo first)");
|
||||||
|
check(extract_range_chars(u, b, lo1, hi1) && lo1 == 'a' && hi1 == 'z',
|
||||||
|
"union arg1 = (a-z)");
|
||||||
|
}
|
||||||
|
// 3 ranges -> right-associated union
|
||||||
|
{
|
||||||
|
range_predicate p = range_predicate::range(0, 5, M)
|
||||||
|
| range_predicate::range(10, 15, M)
|
||||||
|
| range_predicate::range(20, 25, M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||||
|
expr* a = nullptr; expr* rest = nullptr;
|
||||||
|
check(u.re.is_union(e, a, rest), "3-range -> union(...)");
|
||||||
|
unsigned lo = 0, hi = 0;
|
||||||
|
check(extract_range_chars(u, a, lo, hi) && lo == 0 && hi == 5, "first arg = (0-5)");
|
||||||
|
expr* b = nullptr; expr* c = nullptr;
|
||||||
|
check(u.re.is_union(rest, b, c), "rest is union(...,...)");
|
||||||
|
check(extract_range_chars(u, b, lo, hi) && lo == 10 && hi == 15, "second range");
|
||||||
|
check(extract_range_chars(u, c, lo, hi) && lo == 20 && hi == 25, "third range");
|
||||||
|
}
|
||||||
|
// Round-trip identity for an arbitrary range-set
|
||||||
|
{
|
||||||
|
range_predicate p_in = range_predicate::range('a', 'c', M)
|
||||||
|
| range_predicate::range('m', 'p', M)
|
||||||
|
| range_predicate::range('x', 'z', M);
|
||||||
|
expr_ref e = range_predicate_to_regex(u, p_in, str_sort);
|
||||||
|
range_predicate p_out(M);
|
||||||
|
check(regex_to_range_predicate(u, e, p_out), "round-trip translatable");
|
||||||
|
check(p_in == p_out, "round-trip equal");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cerr << "regex_range_collapse tests passed\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_regex_range_collapse() {
|
||||||
|
run();
|
||||||
|
}
|
||||||
127
src/test/seq_regex_bisim.cpp
Normal file
127
src/test/seq_regex_bisim.cpp
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
// Regression test for the seq::derive::intersect_intervals bug.
|
||||||
|
//
|
||||||
|
// Background: derive uses a path-tracking interval set to compute symbolic
|
||||||
|
// derivatives. The intersect_intervals routine used to react to a single
|
||||||
|
// disjoint interval by dropping the entire kept suffix and skipping the rest
|
||||||
|
// of the list, which silently killed valid branches in derivatives such as
|
||||||
|
// D(a|b). That made the bisimulation procedure conclude bogus equalities
|
||||||
|
// like a* == (a|b)*.
|
||||||
|
//
|
||||||
|
// This file also covers the seq::derive top-level-cache poisoning bug.
|
||||||
|
// `m_top_cache` is keyed only by the regex; the routine used to populate it
|
||||||
|
// while `m_ele` was set to a *concrete* character, baking that character
|
||||||
|
// into the cached "symbolic" derivative. Subsequent calls with the same
|
||||||
|
// regex but a different ele then returned a stale concrete answer instead
|
||||||
|
// of the true symbolic derivative. The simplest victim is
|
||||||
|
// (str.in_re "aP" (re.++ (re.* "a") "P"))
|
||||||
|
// which used to return false because the derivative wrt 'a' was cached and
|
||||||
|
// re-used as the derivative wrt 'P'.
|
||||||
|
#include "ast/ast.h"
|
||||||
|
#include "ast/ast_pp.h"
|
||||||
|
#include "ast/reg_decl_plugins.h"
|
||||||
|
#include "ast/seq_decl_plugin.h"
|
||||||
|
#include "ast/rewriter/seq_rewriter.h"
|
||||||
|
#include "ast/rewriter/seq_regex_bisim.h"
|
||||||
|
#include "ast/rewriter/th_rewriter.h"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
static void test_a_star_neq_ab_star() {
|
||||||
|
ast_manager m;
|
||||||
|
reg_decl_plugins(m);
|
||||||
|
seq_util u(m);
|
||||||
|
seq_rewriter rw(m);
|
||||||
|
|
||||||
|
sort_ref str_sort(u.str.mk_string_sort(), m);
|
||||||
|
|
||||||
|
zstring sa("a"), sb("b");
|
||||||
|
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
|
||||||
|
expr_ref re_b(u.re.mk_to_re(u.str.mk_string(sb)), m);
|
||||||
|
expr_ref a_star(u.re.mk_star(re_a), m);
|
||||||
|
expr_ref ab(u.re.mk_union(re_a, re_b), m);
|
||||||
|
expr_ref ab_star(u.re.mk_star(ab), m);
|
||||||
|
|
||||||
|
expr_ref d_ab = rw.mk_brz_derivative(ab);
|
||||||
|
std::cout << "D(a|b) = " << mk_pp(d_ab, m) << "\n";
|
||||||
|
|
||||||
|
// Both the 'a' branch and the 'b' branch of D(a|b) must reach epsilon.
|
||||||
|
// Collect the regex leaves of the symbolic derivative and require at
|
||||||
|
// least two distinct accepting leaves (one for 'a' and one for 'b').
|
||||||
|
expr_ref_vector leaves(m);
|
||||||
|
auto collect = [&](expr* e, auto&& self) -> void {
|
||||||
|
expr* c, *t, *f;
|
||||||
|
if (m.is_ite(e, c, t, f) || u.re.is_union(e, t, f)) {
|
||||||
|
self(t, self);
|
||||||
|
self(f, self);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (u.re.is_empty(e)) return;
|
||||||
|
leaves.push_back(e);
|
||||||
|
};
|
||||||
|
collect(d_ab, collect);
|
||||||
|
unsigned nullable_leaves = 0;
|
||||||
|
for (expr* l : leaves) {
|
||||||
|
expr_ref n = rw.is_nullable(l);
|
||||||
|
if (m.is_true(n)) ++nullable_leaves;
|
||||||
|
}
|
||||||
|
std::cout << "D(a|b) leaves=" << leaves.size()
|
||||||
|
<< " nullable=" << nullable_leaves << "\n";
|
||||||
|
ENSURE(nullable_leaves >= 2);
|
||||||
|
|
||||||
|
// Bisim must report the two languages are not equivalent.
|
||||||
|
seq::regex_bisim bisim(rw);
|
||||||
|
lbool eq = bisim.are_equivalent(a_star, ab_star);
|
||||||
|
std::cout << "bisim(a*, (a|b)*) = "
|
||||||
|
<< (eq == l_true ? "true" : eq == l_false ? "false" : "undef") << "\n";
|
||||||
|
ENSURE(eq == l_false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regression for the derive top-level-cache poisoning bug.
|
||||||
|
// Take r = (re.* "a") ++ "P" and check str.in_re "aP" r. Before the fix
|
||||||
|
// the first per-char derivative call (wrt 'a') populated m_top_cache with
|
||||||
|
// 'a' baked into the symbolic ITE-tree, so the next call (wrt 'P') returned
|
||||||
|
// that stale cached value instead of computing D_P(r) = epsilon, making
|
||||||
|
// str.in_re wrongly return false.
|
||||||
|
static void test_derive_cache_per_ele() {
|
||||||
|
ast_manager m;
|
||||||
|
reg_decl_plugins(m);
|
||||||
|
seq_util u(m);
|
||||||
|
seq_rewriter rw(m);
|
||||||
|
|
||||||
|
sort_ref str_sort(u.str.mk_string_sort(), m);
|
||||||
|
|
||||||
|
zstring sa("a"), sP("P"), s_aP("aP");
|
||||||
|
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
|
||||||
|
expr_ref re_P(u.re.mk_to_re(u.str.mk_string(sP)), m);
|
||||||
|
expr_ref a_star(u.re.mk_star(re_a), m);
|
||||||
|
expr_ref r(u.re.mk_concat(a_star, re_P), m);
|
||||||
|
expr_ref aP(u.str.mk_string(s_aP), m);
|
||||||
|
|
||||||
|
// Compute D_'a'(a*P) and D_'P'(a*P) directly via mk_derivative.
|
||||||
|
// Before the fix, m_top_cache was populated while m_ele = ele (the
|
||||||
|
// concrete char), so the second call hit the stale cached answer from
|
||||||
|
// the first. After the fix the cache is keyed by a symbolic var, so
|
||||||
|
// each concrete-ele substitution produces the right answer.
|
||||||
|
expr_ref ch_a(u.mk_char('a'), m);
|
||||||
|
expr_ref ch_P(u.mk_char('P'), m);
|
||||||
|
expr_ref d_a = rw.mk_derivative(ch_a, r);
|
||||||
|
expr_ref d_P = rw.mk_derivative(ch_P, r);
|
||||||
|
std::cout << "D_a(a*P) = " << mk_pp(d_a, m) << "\n";
|
||||||
|
std::cout << "D_P(a*P) = " << mk_pp(d_P, m) << "\n";
|
||||||
|
|
||||||
|
// D_P(a*P) must be nullable (it accepts the empty suffix), while
|
||||||
|
// D_a(a*P) must not be (it still needs a trailing 'P').
|
||||||
|
expr_ref n_a = rw.is_nullable(d_a);
|
||||||
|
expr_ref n_P = rw.is_nullable(d_P);
|
||||||
|
th_rewriter trw(m);
|
||||||
|
trw(n_a);
|
||||||
|
trw(n_P);
|
||||||
|
std::cout << "nullable(D_a) = " << mk_pp(n_a, m) << "\n";
|
||||||
|
std::cout << "nullable(D_P) = " << mk_pp(n_P, m) << "\n";
|
||||||
|
ENSURE(m.is_false(n_a));
|
||||||
|
ENSURE(m.is_true(n_P));
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_seq_regex_bisim() {
|
||||||
|
test_a_star_neq_ab_star();
|
||||||
|
test_derive_cache_per_ele();
|
||||||
|
}
|
||||||
|
|
@ -124,27 +124,6 @@ void tst_seq_rewriter() {
|
||||||
ENSURE(!su.re.is_range(e));
|
ENSURE(!su.re.is_range(e));
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
// 9. Range complement (general): no longer a complement node
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
{
|
|
||||||
expr_ref e(su.re.mk_complement(range('b', 'y')), m);
|
|
||||||
rw(e);
|
|
||||||
std::cout << "range comp general: " << mk_pp(e, m) << "\n";
|
|
||||||
ENSURE(!su.re.is_complement(e));
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
// 10. Range complement (lo = 0): single range e union [hi+1, max].*
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
{
|
|
||||||
expr_ref lo_str(su.str.mk_string(zstring(0u)), m);
|
|
||||||
expr_ref hi_str(su.str.mk_string(zstring((unsigned)'f')), m);
|
|
||||||
expr_ref e(su.re.mk_complement(su.re.mk_range(lo_str, hi_str)), m);
|
|
||||||
rw(e);
|
|
||||||
std::cout << "range comp lo=min: " << mk_pp(e, m) << "\n";
|
|
||||||
ENSURE(!su.re.is_complement(e));
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// 11. Downstream: (re.* (re.range "z" "a")) → str.to_re ""
|
// 11. Downstream: (re.* (re.range "z" "a")) → str.to_re ""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue