add some notes to regex

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2025-11-21 21:26:40 +00:00 · 2020-05-26 13:22:57 -07:00 · 2020-05-26 13:22:57 -07:00 · 5e79eb62fd
commit 5e79eb62fd
parent 9336e823ed
6 changed files with 78 additions and 55 deletions
--- a/src/math/lp/int_cube.cpp
+++ b/src/math/lp/int_cube.cpp
@ -10,8 +10,8 @@ Abstract:
    Cube finder
 Author:
    Nikolaj Bjorner (nbjorner)
    Lev Nachmanson (levnach)
    Nikolaj Bjorner (nbjorner)
 Revision History:
 --*/
@ -41,7 +41,7 @@ namespace lp {
        lp_status st = lra.find_feasible_solution();
        if (st != lp_status::FEASIBLE && st != lp_status::OPTIMAL) {
-            TRACE("cube", tout << "cannot find a feasiblie solution";);
+            TRACE("cube", tout << "cannot find a feasible solution";);
            lra.pop();
            lra.move_non_basic_columns_to_bounds();
            // it can happen that we found an integer solution here
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@ -34,7 +34,6 @@ namespace smt {
    arith_util& seq_regex::a() { return th.m_autil; }
    void seq_regex::rewrite(expr_ref& e) { th.m_rewrite(e); }
    bool seq_regex::propagate() {
        bool change = false;
        for (unsigned i = 0; !ctx.inconsistent() && i < m_to_propagate.size(); ++i) {
@ -46,6 +45,23 @@ namespace smt {
        return change;
    }
    /**
     * is_string_equality holds of str.in_re s R, if R is of the form .* ++ x ++ .* ++ y ++ .* ++ 
     * s = fresh1 ++ x ++ fresh2 ++ y ++ fresh3
     * 
     * example rewrite:
     * (str.in_re s .* ++ R) => s = x ++ y and (str.in_re y R)
     * 
     * is_string_equality is currently placed under propagate_accept.
     * this allows extracting string equalities after processing regexes that are not
     * simple unions of simple concatentations. Though, it may produce different equations for 
     * alternate values of the unfolding index.
     */
    bool seq_regex::is_string_equality(literal lit) {
        return false;
    }
    /**
     * Propagate the atom (str.in.re s r)
     * 
@ -77,7 +93,11 @@ namespace smt {
        if (coallesce_in_re(lit))
            return;
        //
        // TBD s in R => R != {}
        // non-emptiness enforcement could instead of here, 
        // be added to propagate_accept after some threshold is met.
        // 
        if (false) {
            expr_ref is_empty(m.mk_eq(r, re().mk_empty(m.get_sort(s))), m);
            rewrite(is_empty);
@ -135,7 +155,7 @@ namespace smt {
     * (accept s i r[if(c,r1,r2)]) & c => (accept s i r[r1])
     * (accept s i r[if(c,r1,r2)]) & ~c => (accept s i r[r2])
     * (accept s i r) & len(s) <= i => nullable(r)
-     * (accept s r) & len(s) > i => (accept s (+ i 1) D(nth(s,i), r))
+     * (accept s i r) & len(s) > i => (accept s (+ i 1) D(nth(s,i), r))
     */
    bool seq_regex::propagate(literal lit) {
@ -143,15 +163,15 @@ namespace smt {
        expr* s = nullptr, *i = nullptr, *r = nullptr;
        expr* e = ctx.bool_var2expr(lit.var());
        VERIFY(sk().is_accept(e, s, i, r));
        unsigned idx = 0;
-        rational n;
+        VERIFY(sk().is_accept(e, s, i, idx, r));
-        VERIFY(a().is_numeral(i, n)); 
+        expr_ref is_nullable(m), d(r, m);
        SASSERT(n.is_unsigned());
        idx = n.get_unsigned();
        TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);
        if (is_string_equality(lit))
            return true;
        if (block_unfolding(lit, idx))
            return true;
@ -161,62 +181,46 @@ namespace smt {
        case l_undef:
            ctx.mark_as_relevant(len_s_le_i);
            return false;
-        case l_true: {
+        case l_true: 
-            expr_ref is_nullable = seq_rw().is_nullable(r);
+            is_nullable = seq_rw().is_nullable(r);
            rewrite(is_nullable);
            th.add_axiom(~lit, ~len_s_le_i, th.mk_literal(is_nullable));
            return true;
        }
        case l_false:
            break;
        }
        expr_ref head(m), d(r, m), i_next(m);        
        literal_vector conds;
        if (!unfold_cofactors(d, conds)) 
            return false;
-        // s in R & len(s) > i => tail(s,i) in D(nth(s, i), R)
+        // (accept s i R) & len(s) > i => (accept s (+ i 1) D(nth(s, i), R)) or conds
-        head = th.mk_nth(s, i);        
+        expr_ref head = th.mk_nth(s, i);        
        i_next = a().mk_int(idx + 1);
        d = seq_rw().derivative(head, d);
        if (!d) 
            throw default_exception("unable to expand derivative");
-        literal acc_next = th.mk_literal(sk().mk_accept(s, i_next, d));
+        literal acc_next = th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d));
-        if (conds.empty()) {
+        conds.push_back(~lit);
-            th.add_axiom(~lit, len_s_le_i, acc_next);
+        conds.push_back(len_s_le_i);
-        }
+        conds.push_back(acc_next);
-        else {
+        th.add_axiom(conds);
-            conds.push_back(~lit);
+        
-            conds.push_back(len_s_le_i);
+        TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";);
            conds.push_back(acc_next);
            for (literal c : conds) 
                ctx.mark_as_relevant(c);
            ctx.mk_th_axiom(th.get_id(), conds.size(), conds.c_ptr());
        }
        TRACE("seq", tout << "head " << head << "\n";
              tout << mk_pp(r, m) << "\n";);
        return true;
    }
    /**
     * Put a limit to the unfolding of s. 
     */
    bool seq_regex::block_unfolding(literal lit, unsigned i) {
-        expr* s = nullptr, *idx = nullptr, *r = nullptr;
+        return 
-        expr* e = ctx.bool_var2expr(lit.var());
+            i > th.m_max_unfolding_depth &&
        VERIFY(sk().is_accept(e, s, idx, r));
        if (i > th.m_max_unfolding_depth && 
            th.m_max_unfolding_lit != null_literal && 
            ctx.get_assignment(th.m_max_unfolding_lit) == l_true && 
-            !ctx.at_base_level()) {
+            !ctx.at_base_level() &&
-            th.propagate_lit(nullptr, 1, &lit, ~th.m_max_unfolding_lit);
+            (th.propagate_lit(nullptr, 1, &lit, ~th.m_max_unfolding_lit), 
-            return true;
+             true);
        }
        return false;
    }
    /**
@ -263,7 +267,6 @@ namespace smt {
        return true;
    }
    void seq_regex::propagate_eq(expr* r1, expr* r2) {
        // the dual version of unroll_non_empty, but
        // skolem functions have to be eliminated or turned into 
@ -317,7 +320,14 @@ namespace smt {
        VERIFY(u().is_re(r, seq_sort));
        VERIFY(u().is_seq(seq_sort, elem_sort));
        return expr_ref(m.mk_fresh_const("re.first", elem_sort), m);
-        // return sk().mk("re.first", r, elem_sort);
+        //   return sk().mk("re.first", r, elem_sort);  
        // - for this to be effective, requires internalizer to skip skolem function internalization, 
        //   because of the regex argument r and we don't handle extensionality of regex well.
        //   It is probably a good idea to skip internalization of all skolem expressions, 
        //   but requires some changes to theory_seq.
        // - it is more useful to eliminate quantifiers in he common case, so never have to
        //   work with fresh expressions in the fist hand. This is possible for characters and
        //   ranges (just equalities and inequalities with constant bounds).
    }
    expr_ref seq_regex::unroll_non_empty(expr* r, expr_mark& seen, unsigned depth) {
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@ -49,6 +49,8 @@ namespace smt {
        seq_skolem& sk();
        arith_util& a();
        bool is_string_equality(literal lit);
        void rewrite(expr_ref& e);
        bool coallesce_in_re(literal lit);
@ -75,6 +77,8 @@ namespace smt {
        void propagate_in_re(literal lit);
        // (accept s i r) means 
        // the suffix of s after the first i characters is a member of r
        void propagate_accept(literal lit);
        void propagate_eq(expr* r1, expr* r2);
--- a/src/smt/seq_skolem.h
+++ b/src/smt/seq_skolem.h
@ -101,10 +101,11 @@ namespace smt {
                 r = to_app(a)->get_arg(2), n = to_app(a)->get_arg(3), 
                 true); 
        }
-        bool is_accept(expr* a, expr*& s, expr*& i, expr*& r) const {
+        bool is_accept(expr* e, expr*& s, expr*& i, unsigned& idx, expr*& r) const {
-            return is_accept(a) && to_app(a)->get_num_args() == 3 &&
+            return is_accept(e) && to_app(e)->get_num_args() == 3 &&                
-                (s = to_app(a)->get_arg(0), i = to_app(a)->get_arg(1),
+                (s = to_app(e)->get_arg(0), i = to_app(e)->get_arg(1),
-                 r = to_app(a)->get_arg(2), true);
+                 r = to_app(e)->get_arg(2), true) && 
                a.is_unsigned(i, idx);
        }
        bool is_post(expr* e, expr*& s, expr*& start);
        bool is_pre(expr* e, expr*& s, expr*& i);
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@ -2917,13 +2917,20 @@ literal theory_seq::mk_eq_empty(expr* _e, bool phase) {
 void theory_seq::add_axiom(literal l1, literal l2, literal l3, literal l4, literal l5) {
    literal_vector lits;
-    if (l1 == true_literal || l2 == true_literal || l3 == true_literal || l4 == true_literal || l5 == true_literal) return;
+    if (l1 == true_literal || l2 == true_literal || l3 == true_literal || 
-    if (l1 != null_literal && l1 != false_literal) { ctx.mark_as_relevant(l1); lits.push_back(l1); }
+        l4 == true_literal || l5 == true_literal) return;
-    if (l2 != null_literal && l2 != false_literal) { ctx.mark_as_relevant(l2); lits.push_back(l2); }
+    if (l1 != null_literal && l1 != false_literal) lits.push_back(l1); 
-    if (l3 != null_literal && l3 != false_literal) { ctx.mark_as_relevant(l3); lits.push_back(l3); }
+    if (l2 != null_literal && l2 != false_literal) lits.push_back(l2); 
-    if (l4 != null_literal && l4 != false_literal) { ctx.mark_as_relevant(l4); lits.push_back(l4); }
+    if (l3 != null_literal && l3 != false_literal) lits.push_back(l3); 
-    if (l5 != null_literal && l5 != false_literal) { ctx.mark_as_relevant(l5); lits.push_back(l5); }
+    if (l4 != null_literal && l4 != false_literal) lits.push_back(l4); 
    if (l5 != null_literal && l5 != false_literal) lits.push_back(l5); 
    add_axiom(lits);
 }
 void theory_seq::add_axiom(literal_vector & lits) {
    TRACE("seq", ctx.display_literals_verbose(tout << "assert:", lits) << "\n";);
    for (literal lit : lits)
        ctx.mark_as_relevant(lit);
    IF_VERBOSE(10, verbose_stream() << "ax ";
               for (literal l : lits) ctx.display_literal_smt2(verbose_stream() << " ", l); 
--- a/src/smt/theory_seq.h
+++ b/src/smt/theory_seq.h
@ -617,6 +617,7 @@ namespace smt {
        void enque_axiom(expr* e);
        void deque_axiom(expr* e);
        void add_axiom(literal l1, literal l2 = null_literal, literal l3 = null_literal, literal l4 = null_literal, literal l5 = null_literal);        
        void add_axiom(literal_vector& lits);
        bool has_length(expr *e) const { return m_has_length.contains(e); }
        void add_length(expr* e, expr* l);