add empty/full regular languages, escape sequence fixes, check cancellation inside simplifier

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2025-09-30 05:09:02 +00:00 · 2016-01-13 20:13:17 +01:00 · 2016-01-13 20:13:17 +01:00 · e0215400e2
commit e0215400e2
parent 57e1d4dc1f
5 changed files with 91 additions and 29 deletions
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@ -126,14 +126,16 @@ eautomaton* re2automaton::re2aut(expr* e) {
        }
        return b.detach();        
    }
 #if 0
    else if (u.re.is_empty(e)) {
-        return alloc(eautomaton, m);
+        return alloc(eautomaton, sm);
    }
    else if (u.re.is_full(e)) {
        sym_expr* _true = sym_expr::mk_pred(expr_ref(m.mk_true(), m));
        return eautomaton::mk_loop(sm, _true);
    }
 #if 0
    else if (u.re.is_intersect(e, e1, e2)) {
-
+        // maybe later
    }
 #endif
@ -1127,7 +1129,6 @@ bool seq_rewriter::reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_
        if (set_empty(szr, _rs, true, lhs, rhs)) {
            lchange |= szr > 1; 
            change  |= szr > 1;
            TRACE("seq", tout << lchange << " " << szr << "\n";);
            if (szr == 1 && !lchange) {
                lhs.reset();
                rhs.reset();
@ -1322,23 +1323,20 @@ bool seq_rewriter::is_subsequence(unsigned szl, expr* const* l, unsigned szr, ex
    for (unsigned i = 0; i < szl; ++i) {
        bool found = false;
        unsigned j = 0;
        bool is_unit = m_util.str.is_unit(l[i]);
        for (; !found && j < szr; ++j) {
-            found = !rpos.contains(j) && l[i] == r[j];
+            found = !rpos.contains(j) && (l[i] == r[j] || (is_unit && m_util.str.is_unit(r[j])));
        }
        if (!found) {
 #if 0
            std::cout << mk_pp(l[i], m()) << " not found in ";
            for (unsigned j = 0; j < szr; ++j) {
                std::cout << mk_pp(r[j], m()) << " ";
            }
            std::cout << "\n";
 #endif
            return false;
        }
        SASSERT(0 < j && j <= szr);
        rpos.insert(j-1);
    }
    // if we reach here, then every element of l is contained in r in some position.
    // or each non-unit in l is matched by a non-unit in r, and otherwise, the non-units match up.
    bool change = false;
    ptr_vector<expr> rs;
    for (unsigned j = 0; j < szr; ++j) {
        if (rpos.contains(j)) {
@ -1348,6 +1346,12 @@ bool seq_rewriter::is_subsequence(unsigned szl, expr* const* l, unsigned szr, ex
            is_sat = false;
            return true;
        }
        else {
            change = true;
        }
    }
    if (!change) {
        return false;
    }
    SASSERT(szl == rs.size());
    if (szl > 0) {
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@ -36,18 +36,55 @@ static bool is_hex_digit(char ch, unsigned& d) {
 static bool is_escape_char(char const *& s, unsigned& result) {
    unsigned d1, d2;
-    if (*s == '\\' && *(s + 1) == 'x' && 
+    if (*s != '\\' || *(s + 1) == 0) {
        return false;
    }
    if (*(s + 1) == 'x' && 
        is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) {
        result = d1*16 + d2;
        s += 4;
        return true;
    }
-    if (*s == '\\' && *(s + 1) == '\\') {
+    switch (*(s + 1)) {
-        result = '\\';
+    case 'a':
        result = '\a';
        s += 2;
        return true;
    case 'b':
        result = '\b';
        s += 2;
        return true;
 #if 0
    case 'e':
        result = '\e';
        s += 2;
        return true;
 #endif
    case 'f':
        result = '\f';
        s += 2;
        return true;
    case 'n':
        result = '\n';
        s += 2;
        return true;
    case 'r':
        result = '\r';
        s += 2;
        return true;
    case 't':
        result = '\t';
        s += 2;
        return true;
    case 'v':
        result = '\v';
        s += 2;
        return true;
    default:
        result = *(s + 1);
        s += 2;
        return true;
    }
    return false;
 }
 zstring::zstring(encoding enc): m_encoding(enc) {}
@ -411,9 +448,9 @@ void seq_decl_plugin::init() {
    m_sigs[OP_RE_UNION]      = alloc(psig, m, "re.union",     1, 2, reAreA, reA);
    m_sigs[OP_RE_INTERSECT]  = alloc(psig, m, "re.inter",     1, 2, reAreA, reA);
    m_sigs[OP_RE_LOOP]           = alloc(psig, m, "re.loop",    1, 1, &reA, reA);
-    m_sigs[OP_RE_EMPTY_SET]      = alloc(psig, m, "re-empty-set", 1, 0, 0, reA);
+    m_sigs[OP_RE_EMPTY_SET]      = alloc(psig, m, "re.empty", 1, 0, 0, reA);
-    m_sigs[OP_RE_FULL_SET]       = alloc(psig, m, "re-full-set", 1, 0, 0, reA);
+    m_sigs[OP_RE_FULL_SET]       = alloc(psig, m, "re.all", 1, 0, 0, reA);
-    m_sigs[OP_RE_OF_PRED]        = alloc(psig, m, "re-of-pred", 1, 1, &predA, reA);
+    m_sigs[OP_RE_OF_PRED]        = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
    m_sigs[OP_SEQ_TO_RE]         = alloc(psig, m, "seq.to.re",  1, 1, &seqA, reA);
    m_sigs[OP_SEQ_IN_RE]         = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
    m_sigs[OP_STRING_CONST]      = 0;
@ -429,6 +466,8 @@ void seq_decl_plugin::init() {
    m_sigs[_OP_STRING_SUFFIX]    = alloc(psig, m, "str.suffixof", 0, 2, str2T, boolT);
    m_sigs[_OP_STRING_IN_REGEXP]  = alloc(psig, m, "str.in.re", 0, 2, strTreT, boolT);
    m_sigs[_OP_STRING_TO_REGEXP]  = alloc(psig, m, "str.to.re", 0, 1, &strT, reT);
    m_sigs[_OP_REGEXP_EMPTY]      = alloc(psig, m, "re.nostr", 0, 0, 0, reT);
    m_sigs[_OP_REGEXP_FULL]       = alloc(psig, m, "re.allchar", 0, 0, 0, reT);
    m_sigs[_OP_STRING_SUBSTR]     = alloc(psig, m, "str.substr", 0, 3, strTint2T, strT);
 }
@ -527,6 +566,13 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
    case _OP_REGEXP_FULL:
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_SET));
    case _OP_REGEXP_EMPTY:
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(symbol("re.nostr"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_EMPTY_SET));
    case OP_RE_LOOP:
        match(*m_sigs[k], arity, domain, range, rng);
        if (num_parameters != 2 || !parameters[0].is_int() || !parameters[1].is_int()) {
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@ -75,6 +75,8 @@ enum seq_op_kind {
    _OP_STRING_CHARAT, 
    _OP_STRING_SUBSTR,      
    _OP_STRING_STRIDOF,
    _OP_REGEXP_EMPTY,
    _OP_REGEXP_FULL,
    _OP_SEQ_SKOLEM,
    LAST_SEQ_OP
 };
--- a/src/ast/simplifier/simplifier.cpp
+++ b/src/ast/simplifier/simplifier.cpp
@ -127,11 +127,11 @@ bool simplifier::get_subst(expr * n, expr_ref & r, proof_ref & p) {
    return false;
 }
-void simplifier::reduce_core(expr * n) {
+void simplifier::reduce_core(expr * n1) {
-    if (!is_cached(n)) {
+    if (!is_cached(n1)) {
        // We do not assume m_todo is empty... So, we store the current size of the todo-stack.
        unsigned sz = m_todo.size();
-        m_todo.push_back(n);
+        m_todo.push_back(n1);
        while (m_todo.size() != sz) {
            expr * n = m_todo.back();
            if (is_cached(n))
@ -142,6 +142,10 @@ void simplifier::reduce_core(expr * n) {
                m_todo.pop_back();
                reduce1(n);
            }
            if (m.canceled()) {
                cache_result(n1, n1, 0);
                break;
            }
        }
    }
 }
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@ -444,7 +444,9 @@ bool theory_seq::check_length_coherence(expr* e) {
            propagate_is_conc(e, conc);
            assume_equality(tail, emp);
        }
        if (!get_context().at_base_level()) {
            m_trail_stack.push(push_replay(alloc(replay_length_coherence, m, e)));
        }
        return true;
    }
    return false;
@ -652,7 +654,7 @@ bool theory_seq::simplify_eq(expr_ref_vector& ls, expr_ref_vector& rs, dependenc
    expr_ref_vector lhs(m), rhs(m);
    bool changed = false;
    if (!m_seq_rewrite.reduce_eq(ls, rs, lhs, rhs, changed)) {
-        // equality is inconsistent.x2
+        // equality is inconsistent.
        TRACE("seq", tout << ls << " != " << rs << "\n";);
        set_conflict(deps);
        return true;
@ -1534,7 +1536,7 @@ void theory_seq::propagate() {
    }
    while (!m_replay.empty() && !ctx.inconsistent()) {
        (*m_replay[m_replay.size()-1])(*this);
-        TRACE("seq", tout << "replay: " << ctx.get_scope_level() << "\n";);
+        TRACE("seq", tout << "replay at level: " << ctx.get_scope_level() << "\n";);
        m_replay.pop_back();
    }
    if (m_new_solution) {
@ -1716,6 +1718,7 @@ void theory_seq::add_elim_string_axiom(expr* n) {
    - len(x) >= 0                   otherwise
 */
 void theory_seq::add_length_axiom(expr* n) {
    context& ctx = get_context();
    expr* x;
    VERIFY(m_util.str.is_length(n, x));
    if (m_util.str.is_concat(x) ||
@ -1726,13 +1729,16 @@ void theory_seq::add_length_axiom(expr* n) {
        m_rewrite(len);
        SASSERT(n != len);
        add_axiom(mk_eq(len, n, false));
-
+        if (!ctx.at_base_level()) {
            m_trail_stack.push(push_replay(alloc(replay_axiom, m, n)));
        }
    }
    else {
        add_axiom(mk_literal(m_autil.mk_ge(n, m_autil.mk_int(0))));        
        if (!ctx.at_base_level()) {
            m_trail_stack.push(push_replay(alloc(replay_axiom, m, n)));
        }
    }
 }