add some notes to regex

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2025-07-18 10:30:44 +00:00 · 2020-05-26 13:22:57 -07:00 · 2020-05-26 13:22:57 -07:00 · 5e79eb62fd
commit 5e79eb62fd
parent 9336e823ed
6 changed files with 78 additions and 55 deletions
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@ -34,7 +34,6 @@ namespace smt {
    arith_util& seq_regex::a() { return th.m_autil; }
    void seq_regex::rewrite(expr_ref& e) { th.m_rewrite(e); }

-
    bool seq_regex::propagate() {
        bool change = false;
        for (unsigned i = 0; !ctx.inconsistent() && i < m_to_propagate.size(); ++i) {
@ -46,6 +45,23 @@ namespace smt {
        return change;
    }

+    /**
+     * is_string_equality holds of str.in_re s R, if R is of the form .* ++ x ++ .* ++ y ++ .* ++ 
+     * s = fresh1 ++ x ++ fresh2 ++ y ++ fresh3
+     * 
+     * example rewrite:
+     * (str.in_re s .* ++ R) => s = x ++ y and (str.in_re y R)
+     * 
+     * is_string_equality is currently placed under propagate_accept.
+     * this allows extracting string equalities after processing regexes that are not
+     * simple unions of simple concatentations. Though, it may produce different equations for 
+     * alternate values of the unfolding index.
+     */
+
+    bool seq_regex::is_string_equality(literal lit) {
+        return false;
+    }
+
    /**
     * Propagate the atom (str.in.re s r)
     * 
@ -76,8 +92,12 @@ namespace smt {

        if (coallesce_in_re(lit))
            return;
-
+        
+        //
        // TBD s in R => R != {}
+        // non-emptiness enforcement could instead of here, 
+        // be added to propagate_accept after some threshold is met.
+        // 
        if (false) {
            expr_ref is_empty(m.mk_eq(r, re().mk_empty(m.get_sort(s))), m);
            rewrite(is_empty);
@ -135,7 +155,7 @@ namespace smt {
     * (accept s i r[if(c,r1,r2)]) & c => (accept s i r[r1])
     * (accept s i r[if(c,r1,r2)]) & ~c => (accept s i r[r2])
     * (accept s i r) & len(s) <= i => nullable(r)
-     * (accept s r) & len(s) > i => (accept s (+ i 1) D(nth(s,i), r))
+     * (accept s i r) & len(s) > i => (accept s (+ i 1) D(nth(s,i), r))
     */
    
    bool seq_regex::propagate(literal lit) {
@ -143,15 +163,15 @@ namespace smt {

        expr* s = nullptr, *i = nullptr, *r = nullptr;
        expr* e = ctx.bool_var2expr(lit.var());
-        VERIFY(sk().is_accept(e, s, i, r));
        unsigned idx = 0;
-        rational n;
-        VERIFY(a().is_numeral(i, n)); 
-        SASSERT(n.is_unsigned());
-        idx = n.get_unsigned();
+        VERIFY(sk().is_accept(e, s, i, idx, r));
+        expr_ref is_nullable(m), d(r, m);

        TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";);

+        if (is_string_equality(lit))
+            return true;
+
        if (block_unfolding(lit, idx))
            return true;

@ -161,62 +181,46 @@ namespace smt {
        case l_undef:
            ctx.mark_as_relevant(len_s_le_i);
            return false;
-        case l_true: {
-            expr_ref is_nullable = seq_rw().is_nullable(r);
+        case l_true: 
+            is_nullable = seq_rw().is_nullable(r);
            rewrite(is_nullable);
            th.add_axiom(~lit, ~len_s_le_i, th.mk_literal(is_nullable));
            return true;
-        }
        case l_false:
            break;
        }

-        expr_ref head(m), d(r, m), i_next(m);        
        literal_vector conds;
-
        if (!unfold_cofactors(d, conds)) 
            return false;

-        // s in R & len(s) > i => tail(s,i) in D(nth(s, i), R)
-        head = th.mk_nth(s, i);        
-        i_next = a().mk_int(idx + 1);
+        // (accept s i R) & len(s) > i => (accept s (+ i 1) D(nth(s, i), R)) or conds
+        expr_ref head = th.mk_nth(s, i);        
        d = seq_rw().derivative(head, d);
        if (!d) 
            throw default_exception("unable to expand derivative");

-        literal acc_next = th.mk_literal(sk().mk_accept(s, i_next, d));
-        if (conds.empty()) {
-            th.add_axiom(~lit, len_s_le_i, acc_next);
-        }
-        else {
-            conds.push_back(~lit);
-            conds.push_back(len_s_le_i);
-            conds.push_back(acc_next);
-            for (literal c : conds) 
-                ctx.mark_as_relevant(c);
-            ctx.mk_th_axiom(th.get_id(), conds.size(), conds.c_ptr());
-        }
-        TRACE("seq", tout << "head " << head << "\n";
-              tout << mk_pp(r, m) << "\n";);
+        literal acc_next = th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d));
+        conds.push_back(~lit);
+        conds.push_back(len_s_le_i);
+        conds.push_back(acc_next);
+        th.add_axiom(conds);
+        
+        TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";);
        return true;
    }

    /**
     * Put a limit to the unfolding of s. 
     */
-
    bool seq_regex::block_unfolding(literal lit, unsigned i) {
-        expr* s = nullptr, *idx = nullptr, *r = nullptr;
-        expr* e = ctx.bool_var2expr(lit.var());
-        VERIFY(sk().is_accept(e, s, idx, r));
-        if (i > th.m_max_unfolding_depth && 
+        return 
+            i > th.m_max_unfolding_depth &&
            th.m_max_unfolding_lit != null_literal && 
            ctx.get_assignment(th.m_max_unfolding_lit) == l_true && 
-            !ctx.at_base_level()) {
-            th.propagate_lit(nullptr, 1, &lit, ~th.m_max_unfolding_lit);
-            return true;
-        }
-        return false;
+            !ctx.at_base_level() &&
+            (th.propagate_lit(nullptr, 1, &lit, ~th.m_max_unfolding_lit), 
+             true);
    }

    /**
@ -263,7 +267,6 @@ namespace smt {
        return true;
    }

-
    void seq_regex::propagate_eq(expr* r1, expr* r2) {
        // the dual version of unroll_non_empty, but
        // skolem functions have to be eliminated or turned into 
@ -317,7 +320,14 @@ namespace smt {
        VERIFY(u().is_re(r, seq_sort));
        VERIFY(u().is_seq(seq_sort, elem_sort));
        return expr_ref(m.mk_fresh_const("re.first", elem_sort), m);
-        // return sk().mk("re.first", r, elem_sort);
+        //   return sk().mk("re.first", r, elem_sort);  
+        // - for this to be effective, requires internalizer to skip skolem function internalization, 
+        //   because of the regex argument r and we don't handle extensionality of regex well.
+        //   It is probably a good idea to skip internalization of all skolem expressions, 
+        //   but requires some changes to theory_seq.
+        // - it is more useful to eliminate quantifiers in he common case, so never have to
+        //   work with fresh expressions in the fist hand. This is possible for characters and
+        //   ranges (just equalities and inequalities with constant bounds).
    }

    expr_ref seq_regex::unroll_non_empty(expr* r, expr_mark& seen, unsigned depth) {