From e63dc7efc2f14e04522c430b3da52afcc4a4d0be Mon Sep 17 00:00:00 2001
From: Nikolaj Bjorner <nbjorner@microsoft.com>
Date: Wed, 17 Feb 2021 17:31:46 -0800
Subject: [PATCH] more rewrite rules

---
 src/ast/rewriter/seq_axioms.cpp   |  49 ++++++++++--
 src/ast/rewriter/seq_axioms.h     |   2 +
 src/ast/rewriter/seq_rewriter.cpp | 126 +++++++++++++++++++++++-------
 src/ast/rewriter/seq_rewriter.h   |   1 +
 src/util/rational.h               |  11 ++-
 5 files changed, 150 insertions(+), 39 deletions(-)

diff --git a/src/ast/rewriter/seq_axioms.cpp b/src/ast/rewriter/seq_axioms.cpp
index 5798df110..e7a4da35c 100644
--- a/src/ast/rewriter/seq_axioms.cpp
+++ b/src/ast/rewriter/seq_axioms.cpp
@@ -127,31 +127,34 @@ namespace seq {
         auto s = purify(_s);
         auto i = purify(_i);
         auto l = purify(_l);
-        if (is_tail(s, i, l)) {
+        if (is_tail(s, _i, _l)) {
             tail_axiom(e, s);
             return;
         }
-        if (is_drop_last(s, i, l)) {
+        if (is_drop_last(s, _i, _l)) {
             drop_last_axiom(e, s);
             return;
         }
-        if (is_extract_prefix0(s, i, l)) {
+        if (is_extract_prefix0(s, _i, _l)) {
             extract_prefix_axiom(e, s, l);
             return;
         }
+        if (is_extract_suffix(s, _i, _l)) {
+            return;
+        }
         expr_ref x = m_sk.mk_pre(s, i);
-        expr_ref ls = mk_len(s);
+        expr_ref ls = mk_len(_s);
         expr_ref lx = mk_len(x);
         expr_ref le = mk_len(e);
-        expr_ref ls_minus_i_l(mk_sub(mk_sub(ls, i), l), m);
+        expr_ref ls_minus_i_l(mk_sub(mk_sub(ls, _i), _l), m);
         expr_ref y = m_sk.mk_post(s, a.mk_add(i, l));
         expr_ref xe = mk_concat(x, e);
         expr_ref xey = mk_concat(x, e, y);
         expr_ref zero(a.mk_int(0), m);
 
-        expr_ref i_ge_0    = mk_ge(i, 0);
-        expr_ref i_le_ls   = mk_le(mk_sub(i, ls), 0);
-        expr_ref ls_le_i   = mk_le(mk_sub(ls, i), 0);
+        expr_ref i_ge_0    = mk_ge(_i, 0);
+        expr_ref i_le_ls   = mk_le(mk_sub(_i, ls), 0);
+        expr_ref ls_le_i   = mk_le(mk_sub(ls, _i), 0);
         expr_ref ls_ge_li  = mk_ge(ls_minus_i_l, 0);
         expr_ref l_ge_0    = mk_ge(l, 0);
         expr_ref l_le_0    = mk_le(l, 0);
@@ -224,6 +227,13 @@ namespace seq {
         return a.is_numeral(i, i1) && i1.is_zero();    
     }
 
+    bool axioms::is_extract_suffix(expr* s, expr* i, expr* l) {
+        expr_ref len(a.mk_add(l, i), m);
+        m_rewrite(len);
+        return seq.str.is_length(len, l) && l == s;
+    }
+
+
     /*
       s = ey
       l <= 0 => e = empty
@@ -231,6 +241,7 @@ namespace seq {
       len(s) < l => e = s
     */
     void axioms::extract_prefix_axiom(expr* e, expr* s, expr* l) {
+        
         TRACE("seq", tout << "prefix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << " " << mk_bounded_pp(l, m, 2) << "\n";);
         expr_ref le = mk_len(e);
         expr_ref ls = mk_len(s);
@@ -244,6 +255,28 @@ namespace seq {
         add_clause(l_le_s, mk_eq(e, s));
     }
 
+    /*
+      s = xe
+      0 <= i <= len(s) => i = len(x)    
+      i < 0 => e = empty
+      i > len(s) => e = empty
+    */
+    void axioms::extract_suffix_axiom(expr* e, expr* s, expr* i) {
+        TRACE("seq", tout << "suffix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << "\n";);
+        expr_ref x = m_sk.mk_pre(s, i);
+        expr_ref lx = mk_len(x);
+        expr_ref ls = mk_len(s);
+        expr_ref xe = mk_concat(x, e);
+        expr_ref emp = mk_eq_empty(e);
+        expr_ref i_ge_0 = mk_ge(i, 0);
+        expr_ref i_le_s = mk_le(mk_sub(i, ls), 0);
+        add_clause(mk_eq(s, xe));
+        add_clause(~i_ge_0, ~i_le_s, mk_eq(i, lx));
+        add_clause(i_ge_0, emp);
+        add_clause(i_le_s, emp);
+    }        
+    
+
     /*
       encode that s is not contained in of xs1
       where s1 is all of s, except the last element.
diff --git a/src/ast/rewriter/seq_axioms.h b/src/ast/rewriter/seq_axioms.h
index be15a8166..16b166c62 100644
--- a/src/ast/rewriter/seq_axioms.h
+++ b/src/ast/rewriter/seq_axioms.h
@@ -62,10 +62,12 @@ namespace seq {
         bool is_drop_last(expr* s, expr* i, expr* l);
         bool is_tail(expr* s, expr* i, expr* l);
         bool is_extract_prefix0(expr* s, expr* i, expr* l);
+        bool is_extract_suffix(expr* s, expr* i, expr* l);
 
         void tail_axiom(expr* e, expr* s);
         void drop_last_axiom(expr* e, expr* s);
         void extract_prefix_axiom(expr* e, expr* s, expr* l);
+        void extract_suffix_axiom(expr* e, expr* s, expr* l);
         void tightest_prefix(expr* s, expr* x);
 
     public:
diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 362ee43e2..f2b22f41b 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -966,7 +966,6 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu
     bool constantBase = str().is_string(a, s);
     bool constantPos = m_autil.is_numeral(b, pos);
     bool constantLen = m_autil.is_numeral(c, len);
-    bool lengthPos   = str().is_length(b) || m_autil.is_add(b);
     sort* a_sort = a->get_sort();
 
     sign sg;
@@ -988,6 +987,12 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu
         return BR_DONE;
     }
 
+    unsigned len_a;
+    if (constantPos && max_length(a, len_a) && rational(len_a) <= pos) {
+        result = str().mk_empty(a_sort);
+        return BR_DONE;
+    }
+
     constantPos &= pos.is_unsigned();
     constantLen &= len.is_unsigned();
 
@@ -1013,20 +1018,21 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu
         return BR_DONE;
     }
 
+    auto reassemble = [&](rational const& p, expr_ref_vector const& xs) {
+        expr_ref r(m_autil.mk_int(p), m());
+        for (expr* e : xs)
+            r = m_autil.mk_add(r, str().mk_length(e));
+        return r;
+    };
+
     // extract(a + b + c, len(a + b), s) -> extract(c, 0, s)
     // extract(a + b + c, len(a) + len(b), s) -> extract(c, 0, s)
-    if (lengthPos) {
-        
-        m_lhs.reset();
-        expr_ref_vector lens(m());
-        str().get_concat(a, m_lhs);
-        TRACE("seq", tout << m_lhs << " " << pos << " " << lens << "\n";);
-        if (!get_lengths(b, lens, pos) || pos.is_neg()) {
-            return BR_FAILED;
-        }
+
+    expr_ref_vector lens(m());       
+    if (get_lengths(b, lens, pos) && pos >= 0) {
         unsigned i = 0;
-        for (; i < m_lhs.size(); ++i) {
-            expr* lhs = m_lhs.get(i);
+        for (; i < as.size(); ++i) {
+            expr* lhs = as.get(i);
             if (lens.contains(lhs)) {
                 lens.erase(lhs);
             }
@@ -1037,32 +1043,54 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu
                 break;
             }
         }
-        if (i == 0) return BR_FAILED;
-        expr_ref t1(m()), t2(m());
-        t1 = str().mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i, a->get_sort());        
-        t2 = m_autil.mk_int(pos);
-        for (expr* rhs : lens) {
-            t2 = m_autil.mk_add(t2, str().mk_length(rhs));
+        if (i != 0) {
+            expr_ref t1(m());
+            t1 = str().mk_concat(as.size() - i, as.c_ptr() + i, a->get_sort());        
+            expr_ref t2 = reassemble(pos, lens);
+            result = str().mk_substr(t1, t2, c);
+            TRACE("seq", tout << result << "\n";);
+            return BR_REWRITE2;
         }
-        result = str().mk_substr(t1, t2, c);
-        TRACE("seq", tout << result << "\n";);
-        return BR_REWRITE2;
     }
 
+
     if (!constantPos) {
         return BR_FAILED;
     }
     unsigned _pos = pos.get_unsigned();
 
-    // (extract s 0 (len s)) = s 
-    expr* a2 = nullptr;
-    if (_pos == 0 && str().is_length(c, a2)) {
-        m_lhs.reset();
-        str().get_concat(a, m_lhs);
-        if (!m_lhs.empty() && m_lhs.get(0) == a2) {
-            result = a2;
+    // extract(a + b + c, 0, len(a) + len(b)) -> c
+    if (pos.is_zero()) {
+        lens.reset();
+        if (!get_lengths(c, lens, pos) || pos.is_neg()) 
+            return BR_FAILED;
+        unsigned i = 0;
+        for (; i < as.size(); ++i) {
+            expr* lhs = as.get(i);
+            if (lens.contains(lhs)) {
+                lens.erase(lhs);
+            }
+            else if (str().is_unit(lhs) && pos.is_pos()) {
+                pos -= rational(1);
+            }
+            else {
+                break;
+            }
+        }
+        if (i == as.size()) {
+            result = a;
             return BR_DONE;
         }
+        else if (i != 0) {
+            expr_ref t1(m()), t2(m());
+            t1 = str().mk_concat(as.size() - i, as.c_ptr() + i, a->get_sort());  
+            t2 = reassemble(pos, lens);
+            result = str().mk_substr(t1, b, t2);
+            as[i] = result;
+            result = str().mk_concat(i + 1, as.c_ptr(), a->get_sort());
+            TRACE("seq", tout << result << "\n";);
+            return BR_REWRITE2;
+        }
     }
 
     expr* a1 = nullptr, *b1 = nullptr, *c1 = nullptr;
@@ -1123,16 +1151,23 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu
 }
 
 bool seq_rewriter::get_lengths(expr* e, expr_ref_vector& lens, rational& pos) {
-    expr* arg = nullptr;
+    expr* arg = nullptr, *e1 = nullptr, *e2 = nullptr;
     rational pos1;
     if (m_autil.is_add(e)) {
         for (expr* arg1 : *to_app(e)) {
-            if (!get_lengths(arg1, lens, pos)) return false;
+            if (!get_lengths(arg1, lens, pos)) 
+                return false;
         }
     }
     else if (str().is_length(e, arg)) {
         lens.push_back(arg);
     }
+    else if (m_autil.is_mul(e, e1, e2) && m_autil.is_numeral(e1, pos1) && str().is_length(e2, arg) && pos1 <= 10) {
+        while (pos1 > 0) {
+            lens.push_back(arg);
+            pos1 -= rational(1);
+        }
+    }
     else if (m_autil.is_numeral(e, pos1)) {
         pos += pos1;
     }
@@ -4496,6 +4531,37 @@ bool seq_rewriter::min_length(expr_ref_vector const& es, unsigned& len) {
     return bounded;
 }
 
+bool seq_rewriter::max_length(expr* e, unsigned& len) {
+    if (str().is_unit(e)) {
+        len = 1;
+        return true;
+    }
+    if (str().is_at(e)) {
+        len = 1;
+        return true;
+    }
+    zstring s;
+    if (str().is_string(e, s)) {
+        len = s.length();
+        return true;
+    }
+    if (str().is_empty(e)) {
+        len = 0;
+        return true;
+    }
+    if (str().is_concat(e)) {
+        unsigned l = 0;
+        len = 0;
+        for (expr* arg : *to_app(e)) {
+            if (!max_length(arg, l))
+                return false;
+            len += l;
+        }
+        return true;
+    }
+    return false;
+}
+
 bool seq_rewriter::is_string(unsigned n, expr* const* es, zstring& s) const {
     zstring s1;
     expr* e;
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index d503ba022..314a83d4f 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -282,6 +282,7 @@ class seq_rewriter {
     bool reduce_itos(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
     bool reduce_eq_empty(expr* l, expr* r, expr_ref& result);    
     bool min_length(expr_ref_vector const& es, unsigned& len);
+    bool max_length(expr* e, unsigned& len);
     expr* concat_non_empty(expr_ref_vector& es);
 
     bool is_string(unsigned n, expr* const* es, zstring& s) const;
diff --git a/src/util/rational.h b/src/util/rational.h
index e0a76d9ee..7bf800294 100644
--- a/src/util/rational.h
+++ b/src/util/rational.h
@@ -500,7 +500,16 @@ inline bool operator>(int a, rational const & b) {
 }
 
 
-inline bool operator!=(rational const & a, int b) {
+inline bool operator>=(rational const& a, int b) {
+    return a >= rational(b);
+}
+
+inline bool operator>=(int a, rational const& b) {
+    return rational(a) >= b;
+}
+
+
+inline bool operator!=(rational const& a, int b) {
     return !(a == rational(b));
 }