From 70b48225718b9052c3719fe59ee3cbf6df27ac76 Mon Sep 17 00:00:00 2001
From: Nikolaj Bjorner <nbjorner@microsoft.com>
Date: Sun, 14 Feb 2021 17:40:41 -0800
Subject: [PATCH] patch seq theory using purification to avoid unsoundness
 caused by interaction with canonization and rewriting

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
---
 src/ast/rewriter/seq_rewriter.cpp |  8 +++++++
 src/ast/rewriter/seq_rewriter.h   |  4 +++-
 src/smt/seq_axioms.cpp            | 32 +-------------------------
 src/smt/seq_axioms.h              |  2 --
 src/smt/theory_seq.cpp            | 38 +++++++++++++++++++++++++------
 src/smt/theory_seq.h              |  3 ++-
 6 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 8a3222c7c..362ee43e2 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -4514,6 +4514,14 @@ bool seq_rewriter::is_string(unsigned n, expr* const* es, zstring& s) const {
     return true;
 }
 
+expr_ref seq_rewriter::mk_length(expr* s) {
+    expr_ref result(m());
+    if (BR_FAILED == mk_seq_length(s, result))
+        result = str().mk_length(s);
+    return result;
+}
+
+
 /**
  * itos(n) = <numeric string> -> n = numeric
  */
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index d55751400..d503ba022 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -280,7 +280,7 @@ class seq_rewriter {
     bool reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
     bool reduce_by_length(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
     bool reduce_itos(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
-    bool reduce_eq_empty(expr* l, expr* r, expr_ref& result);
+    bool reduce_eq_empty(expr* l, expr* r, expr_ref& result);    
     bool min_length(expr_ref_vector const& es, unsigned& len);
     expr* concat_non_empty(expr_ref_vector& es);
 
@@ -344,6 +344,8 @@ public:
 
     bool reduce_contains(expr* a, expr* b, expr_ref_vector& disj);
 
+    expr_ref mk_length(expr* s);
+
     void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs);
 
     // Expose derivative and nullability check
diff --git a/src/smt/seq_axioms.cpp b/src/smt/seq_axioms.cpp
index 534e3ddf0..49264ce35 100644
--- a/src/smt/seq_axioms.cpp
+++ b/src/smt/seq_axioms.cpp
@@ -106,10 +106,6 @@ void seq_axioms::add_extract_axiom(expr* e) {
         add_extract_prefix_axiom(e, s, l);
         return;
     }
-    if (is_extract_suffix(s, i, l)) {
-        add_extract_suffix_axiom(e, s, i);
-        return;
-    }
     expr_ref x = m_sk.mk_pre(s, i);
     expr_ref ls = mk_len(s);
     expr_ref lx = mk_len(x);
@@ -195,24 +191,18 @@ bool seq_axioms::is_extract_prefix0(expr* s, expr* i, expr* l) {
     return a.is_numeral(i, i1) && i1.is_zero();    
 }
 
-bool seq_axioms::is_extract_suffix(expr* s, expr* i, expr* l) {
-    expr_ref len(a.mk_add(l, i), m);
-    m_rewrite(len);
-    return seq.str.is_length(len, l) && l == s;
-}
 
 /*
   s = ey
   l <= 0 => e = empty
   0 <= l <= len(s) => len(e) = l
-  len(s) < l => len(e) = len(s)
+  len(s) < l => e = s
  */
 void seq_axioms::add_extract_prefix_axiom(expr* e, expr* s, expr* l) {
     TRACE("seq", tout << "prefix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << " " << mk_bounded_pp(l, m, 2) << "\n";);
     expr_ref le = mk_len(e);
     expr_ref ls = mk_len(s);
     expr_ref ls_minus_l(mk_sub(ls, l), m);
-    expr_ref zero(a.mk_int(0), m);
     expr_ref y = m_sk.mk_post(s, l);
     expr_ref ey = mk_concat(e, y);
     literal l_le_s = mk_le(mk_sub(l, ls), 0);
@@ -222,26 +212,6 @@ void seq_axioms::add_extract_prefix_axiom(expr* e, expr* s, expr* l) {
     add_axiom(l_le_s, mk_eq(e, s));
 }
 
-/*
-  0 <= i <= len(s) => s = xe & i = len(x)    
-  i < 0 => e = empty
-  i > len(s) => e = empty
- */
-void seq_axioms::add_extract_suffix_axiom(expr* e, expr* s, expr* i) {
-    TRACE("seq", tout << "suffix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << "\n";);
-    expr_ref x = m_sk.mk_pre(s, i);
-    expr_ref lx = mk_len(x);
-    expr_ref ls = mk_len(s);
-    expr_ref zero(a.mk_int(0), m);
-    expr_ref xe = mk_concat(x, e);
-    literal le_is_0 = mk_eq_empty(e);
-    literal i_ge_0 = mk_ge(i, 0);
-    literal i_le_s = mk_le(mk_sub(i, ls), 0);
-    add_axiom(~i_ge_0, ~i_le_s, mk_seq_eq(s, xe));
-    add_axiom(~i_ge_0, ~i_le_s, mk_eq(i, lx));
-    add_axiom(i_ge_0, le_is_0);
-    add_axiom(i_le_s, le_is_0);
-}        
 
 /*
   encode that s is not contained in of xs1
diff --git a/src/smt/seq_axioms.h b/src/smt/seq_axioms.h
index 4f27f8ab5..2299695ea 100644
--- a/src/smt/seq_axioms.h
+++ b/src/smt/seq_axioms.h
@@ -57,9 +57,7 @@ namespace smt {
         bool is_drop_last(expr* s, expr* i, expr* l);
         bool is_tail(expr* s, expr* i, expr* l);
         bool is_extract_prefix0(expr* s, expr* i, expr* l);
-        bool is_extract_suffix(expr* s, expr* i, expr* l);
         void add_extract_prefix_axiom(expr* e, expr* s, expr* l);
-        void add_extract_suffix_axiom(expr* e, expr* s, expr* i);
         void tightest_prefix(expr* s, expr* x);
         void ensure_digit_axiom();
     public:
diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp
index d925c0b57..343411fd0 100644
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@@ -2493,14 +2493,14 @@ void theory_seq::propagate() {
         m_regex.propagate();
     while (m_axioms_head < m_axioms.size() && !ctx.inconsistent()) {
         expr_ref e(m);
-        e = m_axioms[m_axioms_head].get();
+        e = m_axioms.get(m_axioms_head);
         deque_axiom(e);
         ++m_axioms_head;
     }
     while (!m_replay.empty() && !ctx.inconsistent()) {
-        apply* app = m_replay[m_replay.size() - 1];
         TRACE("seq", tout << "replay at level: " << ctx.get_scope_level() << "\n";);
-        (*app)(*this);
+        apply& app = *m_replay[m_replay.size() - 1];
+        app(*this);
         m_replay.pop_back();
     }
     if (m_new_solution) {
@@ -2540,7 +2540,7 @@ void theory_seq::deque_axiom(expr* n) {
         m_ax.add_replace_axiom(n);
     }
     else if (m_util.str.is_extract(n)) {
-        m_ax.add_extract_axiom(n);
+        m_ax.add_extract_axiom(purify(n));
     }
     else if (m_util.str.is_at(n)) {
         m_ax.add_at_axiom(n);
@@ -2579,6 +2579,32 @@ void theory_seq::deque_axiom(expr* n) {
     }
 }
 
+expr_ref theory_seq::purify(expr* e) {
+    app* a = to_app(e);
+    expr_ref_vector args(m);
+    bool has_fresh = false;
+    for (expr* arg : *a) {
+        expr_ref tmp(m);
+        m_rewrite(arg, tmp);
+        if (arg != tmp) {
+            has_fresh = true;
+            tmp = m.mk_fresh_const("purify", arg->get_sort());
+            enode* n1 = ctx.get_enode(arg);
+            enode* n2 = ensure_enode(tmp);
+            justification* js = ctx.mk_justification(
+                ext_theory_eq_propagation_justification(
+                    get_id(), ctx.get_region(), 0, nullptr, 0, nullptr, n1, n2));
+            ctx.assign_eq(n1, n2, eq_justification(js));
+        }
+        args.push_back(tmp);
+    }
+
+    if (has_fresh)
+        return expr_ref(m.mk_app(a->get_decl(), args), m);
+    
+    return expr_ref(a, m);
+}
+
 expr_ref theory_seq::add_elim_string_axiom(expr* n) {
     zstring s;
     TRACE("seq", tout << mk_pp(n, m) << "\n";);
@@ -2609,9 +2635,7 @@ expr_ref theory_seq::mk_add(expr* a, expr* b) {
 }
 
 expr_ref theory_seq::mk_len(expr* s) {
-    expr_ref result(m_util.str.mk_length(s), m); 
-    m_rewrite(result);
-    return result;
+    return m_seq_rewrite.mk_length(s);
 }
 
 
diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h
index 011cebe1a..1a14279ce 100644
--- a/src/smt/theory_seq.h
+++ b/src/smt/theory_seq.h
@@ -20,6 +20,7 @@ Revision History:
 
 #include "ast/seq_decl_plugin.h"
 #include "ast/rewriter/th_rewriter.h"
+#include "ast/rewriter/seq_skolem.h"
 #include "ast/ast_trail.h"
 #include "util/scoped_vector.h"
 #include "util/scoped_ptr_vector.h"
@@ -29,7 +30,6 @@ Revision History:
 #include "smt/smt_theory.h"
 #include "smt/smt_arith_value.h"
 #include "smt/theory_seq_empty.h"
-#include "ast/rewriter/seq_skolem.h"
 #include "smt/seq_axioms.h"
 #include "smt/seq_regex.h"
 #include "smt/seq_offset_eq.h"
@@ -496,6 +496,7 @@ namespace smt {
         bool reduce_ne(unsigned idx);
         bool branch_nqs();
         lbool branch_nq(ne const& n);
+        expr_ref purify(expr* e);
 
         struct cell {
             cell*       m_parent;