new regex automata start; add complexity estimation

2025-11-16 19:05:46 +00:00 · 2017-12-04 18:05:00 -05:00 · 2017-12-04 18:05:00 -05:00 · fbe8d1577e
commit fbe8d1577e
parent b581ab70ed
7 changed files with 127 additions and 3 deletions
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@ -200,6 +200,9 @@ void re2automaton::set_solver(expr_solver* solver) {
    m_sa = alloc(symbolic_automata_t, sm, *m_ba.get());
 }
 eautomaton* re2automaton::mk_product(eautomaton* a1, eautomaton* a2) {
    return m_sa->mk_product(*a1, *a2);
 }
 eautomaton* re2automaton::operator()(expr* e) { 
    eautomaton* r = re2aut(e); 
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@ -53,7 +53,9 @@ public:
    bool is_range() const { return m_ty == t_range; }
    sort* get_sort() const { return m_sort; }
    expr* get_char() const { SASSERT(is_char()); return m_t; }
-
+    expr* get_pred() const { SASSERT(is_pred()); return m_t; }
    expr* get_lo() const { SASSERT(is_range()); return m_t; }
    expr* get_hi() const { SASSERT(is_range()); return m_s; }
 };
 class sym_expr_manager {
@ -87,6 +89,7 @@ public:
    ~re2automaton();
    eautomaton* operator()(expr* e);
    void set_solver(expr_solver* solver);
    eautomaton* mk_product(eautomaton *a1, eautomaton *a2);
 };
 /**
--- a/src/smt/params/smt_params_helper.pyg
+++ b/src/smt/params/smt_params_helper.pyg
@ -79,6 +79,7 @@ def_module_params(module_name='smt',
                          ('theory_aware_branching', BOOL, False, 'Allow the context to use extra information from theory solvers regarding literal branching prioritization.'),
                          ('str.finite_overlap_models', BOOL, False, 'attempt a finite model search for overlapping variables instead of completely giving up on the arrangement'),
                          ('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
                          ('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
                          ('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
                          ('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
                          ('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
--- a/src/smt/params/theory_str_params.cpp
+++ b/src/smt/params/theory_str_params.cpp
@ -31,4 +31,5 @@ void theory_str_params::updt_params(params_ref const & _p) {
    m_UseBinarySearch = p.str_use_binary_search();
    m_BinarySearchInitialUpperBound = p.str_binary_search_start();
    m_OverlapTheoryAwarePriority = p.str_overlap_priority();
    m_RegexAutomata = p.str_regex_automata();
 }
--- a/src/smt/params/theory_str_params.h
+++ b/src/smt/params/theory_str_params.h
@ -80,6 +80,13 @@ struct theory_str_params {
    double m_OverlapTheoryAwarePriority;
    /*
     * If RegexAutomata is set to true,
     * Z3str3 will use automata-based methods to reason about
     * regular expression constraints.
     */
    bool m_RegexAutomata;
    theory_str_params(params_ref const & p = params_ref()):
        m_StrongArrangements(true),
        m_AggressiveLengthTesting(false),
@ -91,7 +98,8 @@ struct theory_str_params {
        m_FiniteOverlapModels(false),
        m_UseBinarySearch(false),
        m_BinarySearchInitialUpperBound(64),
-        m_OverlapTheoryAwarePriority(-0.1)
+        m_OverlapTheoryAwarePriority(-0.1),
        m_RegexAutomata(true)
    {
        updt_params(p);
    }
--- a/src/smt/theory_str.cpp
+++ b/src/smt/theory_str.cpp
@ -6446,6 +6446,111 @@ namespace smt {
        }
    }
    // saturating unsigned addition
    unsigned inline _qadd(unsigned a, unsigned b) {
        if (a == UINT_MAX || b == UINT_MAX) {
            return UINT_MAX;
        }
        unsigned result = a + b;
        if (result < a || result < b) {
            return UINT_MAX;
        }
        return result;
    }
    // saturating unsigned multiply
    unsigned inline _qmul(unsigned a, unsigned b) {
        if (a == UINT_MAX || b == UINT_MAX) {
            return UINT_MAX;
        }
        unsigned result = a * b;
        if (result < a || result < b) {
            return UINT_MAX;
        }
        return result;
    }
    unsigned theory_str::estimate_regex_complexity(expr * re) {
        ENSURE(u.is_re(re));
        expr * sub1;
        expr * sub2;
        if (u.re.is_to_re(re, sub1)) {
            SASSERT(u.str.is_string(sub1));
            zstring str;
            u.str.is_string(sub1, str);
            return str.length();
        } else if (u.re.is_complement(re, sub1)) {
            return estimate_regex_complexity_under_complement(sub1);
        } else if (u.re.is_concat(re, sub1, sub2)) {
            unsigned cx1 = estimate_regex_complexity(sub1);
            unsigned cx2 = estimate_regex_complexity(sub2);
            return _qadd(cx1, cx2);
        } else if (u.re.is_union(re, sub1, sub2)) {
            unsigned cx1 = estimate_regex_complexity(sub1);
            unsigned cx2 = estimate_regex_complexity(sub2);
            return _qadd(cx1, cx2);
        } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) {
            unsigned cx = estimate_regex_complexity(sub1);
            return _qmul(2, cx);
        } else if (u.re.is_range(re, sub1, sub2)) {
            SASSERT(u.str.is_string(sub1));
            SASSERT(u.str.is_string(sub2));
            zstring str1, str2;
            u.str.is_string(sub1, str1);
            u.str.is_string(sub2, str2);
            SASSERT(str1.length() == 1);
            SASSERT(str2.length() == 1);
            return 1 + str2[0] - str1[0];
        } else if (u.re.is_full(re)) {
            return 1;
        } else {
            TRACE("str", tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;);
            return 1;
        }
    }
    unsigned theory_str::estimate_regex_complexity_under_complement(expr * re) {
        ENSURE(u.is_re(re));
        expr * sub1;
        expr * sub2;
        if (u.re.is_to_re(re, sub1)) {
            SASSERT(u.str.is_string(sub1));
            zstring str;
            u.str.is_string(sub1, str);
            return str.length();
        } else if (u.re.is_complement(re, sub1)) {
            // Why don't we return the regular complexity here?
            // We could, but this might be called from under another complemented subexpression.
            // It's better to give a worst-case complexity.
            return estimate_regex_complexity_under_complement(sub1);
        } else if (u.re.is_concat(re, sub1, sub2)) {
            unsigned cx1 = estimate_regex_complexity_under_complement(sub1);
            unsigned cx2 = estimate_regex_complexity_under_complement(sub2);
            return _qadd(_qmul(2, cx1), cx2);
        } else if (u.re.is_union(re, sub1, sub2)) {
            unsigned cx1 = estimate_regex_complexity_under_complement(sub1);
            unsigned cx2 = estimate_regex_complexity_under_complement(sub2);
            return _qmul(cx1, cx2);
        } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) {
            unsigned cx = estimate_regex_complexity_under_complement(sub1);
            return _qmul(2, cx);
        } else if (u.re.is_range(re, sub1, sub2)) {
            SASSERT(u.str.is_string(sub1));
            SASSERT(u.str.is_string(sub2));
            zstring str1, str2;
            u.str.is_string(sub1, str1);
            u.str.is_string(sub2, str2);
            SASSERT(str1.length() == 1);
            SASSERT(str2.length() == 1);
            return 1 + str2[0] - str1[0];
        } else if (u.re.is_full(re)) {
            return 1;
        } else {
            TRACE("str", tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;);
            return 1;
        }
    }
    /*
     * strArgmt::solve_concat_eq_str()
     * Solve concatenations of the form:
--- a/src/smt/theory_str.h
+++ b/src/smt/theory_str.h
@ -457,11 +457,14 @@ protected:
    expr * mk_RegexIn(expr * str, expr * regexp);
    void instantiate_axiom_RegexIn(enode * e);
    app * mk_unroll(expr * n, expr * bound);
    void process_unroll_eq_const_str(expr * unrollFunc, expr * constStr);
    void unroll_str2reg_constStr(expr * unrollFunc, expr * eqConstStr);
    void process_concat_eq_unroll(expr * concat, expr * unroll);
    // regex automata
    unsigned estimate_regex_complexity(expr * re);
    unsigned estimate_regex_complexity_under_complement(expr * re);
    void set_up_axioms(expr * ex);
    void handle_equality(expr * lhs, expr * rhs);