optional unicode mode

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2025-10-01 13:39:28 +00:00 · 2020-05-17 19:06:34 -07:00 · 2020-05-17 19:06:34 -07:00 · 1def58bc9f
commit 1def58bc9f
parent 30f17b1509
7 changed files with 155 additions and 26 deletions
--- a/src/smt/seq_unicode.cpp
+++ b/src/smt/seq_unicode.cpp
@ -38,8 +38,6 @@ namespace smt {
    void seq_unicode::assign_le(theory_var v1, theory_var v2, literal lit) {
        dl.init_var(v1);
        dl.init_var(v2);
-        // dl.set_assignment(v1, 'a' + ctx().random(24));
-        // dl.set_assignment(v2, 'a' + ctx().random(24));
        ctx().push_trail(push_back_vector<context, svector<theory_var>>(m_asserted_edges));
        m_asserted_edges.push_back(dl.add_edge(v1, v2, s_integer(0), lit));
    }
@ -79,11 +77,12 @@ namespace smt {
        adapt_eq(v1, v2);
    }

-    final_check_status seq_unicode::final_check() {
+    bool seq_unicode::final_check() {
        // ensure all variables are above 0 and less than zstring::max_char()
        bool added_constraint = false;
-        // TBD: shift assignments on variables that are not lower-bounded
-        // TBD: set zero to a zero value.
+        // TBD: shift assignments on variables that are not lower-bounded, so that they are "nice" (have values 'a', 'b', ...)
+        // TBD: set "zero" to a zero value.
+        // TBD: ensure that unicode constants have the right values
        arith_util a(m);
        arith_value avalue(m);
        avalue.init(&ctx());
@ -111,10 +110,15 @@ namespace smt {
                continue;
            }
            // ensure str.to_code(unit(v)) = val
-            expr_ref ch1(seq.str.mk_unit(seq.str.mk_char(val)), m);
-            // or
-            // expr_ref ch2(seq.str.mk_string(zstring(val)), m);
-            expr_ref code(seq.str.mk_to_code(ch1), m);
+            expr_ref ch(m);
+            if (false) {
+                /// m_rewrite.coalesce_chars();
+                ch = seq.str.mk_string(zstring(val));
+            }
+            else {
+                ch = seq.str.mk_unit(seq.str.mk_char(val));
+            }
+            expr_ref code(seq.str.mk_to_code(ch), m);
            rational val2;
            if (avalue.get_value(code, val2) && val2 == rational(val))
                continue;
@ -122,13 +126,13 @@ namespace smt {
            added_constraint = true;
        }
        if (added_constraint)
-            return FC_CONTINUE;
+            return false;
        
        // ensure equalities over shared symbols
        if (th.assume_eqs(m_var_value_table))
-            return FC_CONTINUE;
+            return false;
        
-        return FC_DONE;
+        return true;
    }
    
    void seq_unicode::propagate() {
--- a/src/smt/seq_unicode.h
+++ b/src/smt/seq_unicode.h
@ -102,7 +102,7 @@ namespace smt {
        void new_diseq_eh(theory_var v1, theory_var v2);

        // ensure coherence for character codes and equalities of shared symbols.
-        final_check_status final_check();
+        bool final_check();

        unsigned get_value(theory_var v);

--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@ -298,12 +298,14 @@ theory_seq::theory_seq(context& ctx):
    m_autil(m),
    m_sk(m, m_rewrite),
    m_ax(*this, m_rewrite),
+    m_unicode(*this),
    m_arith_value(m),
    m_trail_stack(*this),
    m_ls(m), m_rs(m),
    m_lhs(m), m_rhs(m),
    m_new_eqs(m),
    m_has_seq(m_util.has_seq()),
+    m_unhandled_expr(nullptr),
    m_res(m),
    m_max_unfolding_depth(1),
    m_max_unfolding_lit(null_literal),
@ -346,6 +348,7 @@ final_check_status theory_seq::final_check_eh() {
    if (!m_has_seq) {
        return FC_DONE;
    }
+    
    m_new_propagation = false;
    TRACE("seq", display(tout << "level: " << ctx.get_scope_level() << "\n"););
    TRACE("seq_verbose", ctx.display(tout););
@ -374,6 +377,9 @@ final_check_status theory_seq::final_check_eh() {
        TRACEFIN("zero_length");
        return FC_CONTINUE;
    }
+    if (!m_unicode.final_check()) {
+        return FC_CONTINUE;
+    }
    if (get_fparams().m_split_w_len && len_based_split()) {
        ++m_stats.m_branch_variable;
        TRACEFIN("split_based_on_length");
@ -424,6 +430,9 @@ final_check_status theory_seq::final_check_eh() {
        TRACEFIN("branch_ne");
        return FC_CONTINUE;
    }
+    if (m_unhandled_expr) {
+        return FC_GIVEUP;
+    }
    if (is_solved()) {
        //scoped_enable_trace _se;
        TRACEFIN("is_solved");
@ -2309,7 +2318,7 @@ theory_var theory_seq::mk_var(enode* n) {
 }

 bool theory_seq::can_propagate() {
-    return m_axioms_head < m_axioms.size() || !m_replay.empty() || m_new_solution;
+    return m_axioms_head < m_axioms.size() || !m_replay.empty() || m_new_solution || m_unicode.can_propagate();
 }

 bool theory_seq::canonize(expr* e, dependency*& eqs, expr_ref& result) {
@ -2501,6 +2510,7 @@ void theory_seq::add_dependency(dependency*& dep, enode* a, enode* b) {


 void theory_seq::propagate() {
+    m_unicode.propagate();
    while (m_axioms_head < m_axioms.size() && !ctx.inconsistent()) {
        expr_ref e(m);
        e = m_axioms[m_axioms_head].get();
@ -2578,6 +2588,15 @@ void theory_seq::deque_axiom(expr* n) {
    else if (m_util.str.is_unit(n)) {
        m_ax.add_unit_axiom(n);
    }
+    else if (m_util.str.is_is_digit(n)) {
+        m_ax.add_is_digit_axiom(n);        
+    }
+    else if (m_util.str.is_from_code(n)) {
+        m_ax.add_str_from_code_axiom(n);        
+    }
+    else if (m_util.str.is_to_code(n)) {
+        m_ax.add_str_to_code_axiom(n);        
+    }
 }

 expr_ref theory_seq::add_elim_string_axiom(expr* n) {
@ -3053,7 +3072,16 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
    else if (m_util.str.is_nth_i(e) || m_util.str.is_nth_u(e)) {
        // no-op
    }
+    else if (m_util.is_char_le(e, e1, e2)) {
+        theory_var v1 = get_th_var(ctx.get_enode(e1));
+        theory_var v2 = get_th_var(ctx.get_enode(e2));
+        if (is_true) 
+            m_unicode.assign_le(v1, v2, lit);
+        else
+            m_unicode.assign_lt(v2, v1, lit);
+    }
    else if (m_util.is_skolem(e)) {
+        
        // no-op
    }
    else {
@ -3065,6 +3093,10 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
 void theory_seq::new_eq_eh(theory_var v1, theory_var v2) {
    enode* n1 = get_enode(v1);
    enode* n2 = get_enode(v2);
+    if (m_util.is_char(n1->get_owner())) {
+        m_unicode.new_eq_eh(v1, v2);
+        return;
+    }
    dependency* deps = m_dm.mk_leaf(assumption(n1, n2));
    new_eq_eh(deps, n1, n2);
 }
@ -3156,6 +3188,10 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) {
            throw default_exception("convert regular expressions into automata");            
        }
    }
+    if (m_util.is_char(n1->get_owner())) {
+        m_unicode.new_diseq_eh(v1, v2);
+        return;
+    }
    m_exclude.update(e1, e2);
    expr_ref eq(m.mk_eq(e1, e2), m);
    TRACE("seq", tout << "new disequality " << ctx.get_scope_level() << ": " << mk_bounded_pp(eq, m, 2) << "\n";);
@ -3219,6 +3255,9 @@ void theory_seq::relevant_eh(app* n) {
        m_util.str.is_itos(n) || 
        m_util.str.is_stoi(n) ||
        m_util.str.is_lt(n) ||
+        m_util.str.is_is_digit(n) ||
+        m_util.str.is_from_code(n) ||
+        m_util.str.is_to_code(n) ||
        m_util.str.is_unit(n) ||
        m_util.str.is_le(n)) {
        enque_axiom(n);
@ -3237,6 +3276,15 @@ void theory_seq::relevant_eh(app* n) {
    if (m_util.str.is_length(n, arg) && !has_length(arg) && ctx.e_internalized(arg)) {
        add_length_to_eqc(arg);
    }
+
+    if (m_util.str.is_replace_all(n) ||
+        m_util.str.is_replace_re(n) ||
+        m_util.str.is_replace_re_all(n)) {
+        if (!m_unhandled_expr) {
+            ctx.push_trail(value_trail<context, expr*>(m_unhandled_expr));
+            m_unhandled_expr = n;
+        }
+    }
 }


--- a/src/smt/theory_seq.h
+++ b/src/smt/theory_seq.h
@ -33,6 +33,7 @@ Revision History:
 #include "smt/theory_seq_empty.h"
 #include "smt/seq_skolem.h"
 #include "smt/seq_axioms.h"
+#include "smt/seq_unicode.h"
 #include "smt/seq_offset_eq.h"

 namespace smt {
@ -402,6 +403,7 @@ namespace smt {
        arith_util       m_autil;
        seq_skolem       m_sk;
        seq_axioms       m_ax;
+        seq_unicode      m_unicode;
        arith_value      m_arith_value;
        th_trail_stack   m_trail_stack;
        stats            m_stats;
@ -409,6 +411,7 @@ namespace smt {
        expr_ref_vector  m_ls, m_rs, m_lhs, m_rhs;
        expr_ref_pair_vector m_new_eqs;
        bool             m_has_seq;
+        expr*            m_unhandled_expr;

        // maintain automata with regular expressions.
        scoped_ptr_vector<eautomaton>  m_automata;