diff --git a/scripts/mk_util.py b/scripts/mk_util.py index 6ec3c0543..1fbbe8bc1 100644 --- a/scripts/mk_util.py +++ b/scripts/mk_util.py @@ -65,6 +65,7 @@ IS_WINDOWS=False IS_LINUX=False IS_OSX=False IS_FREEBSD=False +IS_OPENBSD=False VERBOSE=True DEBUG_MODE=False SHOW_CPPS = True @@ -126,6 +127,9 @@ def is_linux(): def is_freebsd(): return IS_FREEBSD +def is_openbsd(): + return IS_OPENBSD + def is_osx(): return IS_OSX @@ -582,6 +586,8 @@ elif os.name == 'posix': IS_LINUX=True elif os.uname()[0] == 'FreeBSD': IS_FREEBSD=True + elif os.uname()[0] == 'OpenBSD': + IS_OPENBSD=True def display_help(exit_code): print("mk_make.py: Z3 Makefile generator\n") @@ -1656,6 +1662,8 @@ class JavaDLLComponent(Component): t = t.replace('PLATFORM', 'linux') elif IS_FREEBSD: t = t.replace('PLATFORM', 'freebsd') + elif IS_OPENBSD: + t = t.replace('PLATFORM', 'openbsd') else: t = t.replace('PLATFORM', 'win32') out.write(t) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index f50d3334c..f810b48cd 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -21,6 +21,7 @@ Notes: #include"arith_decl_plugin.h" #include"ast_pp.h" #include"ast_util.h" +#include"uint_set.h" br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { @@ -41,14 +42,15 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con case OP_RE_LOOP: case OP_RE_EMPTY_SET: case OP_RE_FULL_SET: - case OP_RE_EMPTY_SEQ: case OP_RE_OF_PRED: + case _OP_SEQ_SKOLEM: return BR_FAILED; - - // string specific operators. - case OP_STRING_CONST: - return BR_FAILED; + case OP_SEQ_CONCAT: + if (num_args == 1) { + result = args[0]; + return BR_DONE; + } SASSERT(num_args == 2); return mk_seq_concat(args[0], args[1], result); case OP_SEQ_LENGTH: @@ -63,26 +65,31 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con case OP_SEQ_AT: SASSERT(num_args == 2); return mk_str_at(args[0], args[1], result); - case OP_STRING_STRIDOF: - SASSERT(num_args == 3); - return mk_str_stridof(args[0], args[1], args[2], result); - case OP_STRING_STRREPL: - SASSERT(num_args == 3); - return mk_str_strrepl(args[0], args[1], args[2], result); case OP_SEQ_PREFIX: SASSERT(num_args == 2); return mk_seq_prefix(args[0], args[1], result); case OP_SEQ_SUFFIX: SASSERT(num_args == 2); return mk_seq_suffix(args[0], args[1], result); + case OP_SEQ_TO_RE: + return BR_FAILED; + case OP_SEQ_IN_RE: + return BR_FAILED; + + case OP_STRING_CONST: + return BR_FAILED; + case OP_STRING_STRIDOF: + SASSERT(num_args == 3); + return mk_str_stridof(args[0], args[1], args[2], result); + case OP_STRING_STRREPL: + SASSERT(num_args == 3); + return mk_str_strrepl(args[0], args[1], args[2], result); case OP_STRING_ITOS: SASSERT(num_args == 1); return mk_str_itos(args[0], result); case OP_STRING_STOI: SASSERT(num_args == 1); return mk_str_stoi(args[0], result); - case OP_SEQ_TO_RE: - case OP_SEQ_IN_RE: case OP_REGEXP_LOOP: return BR_FAILED; case _OP_STRING_CONCAT: @@ -141,7 +148,7 @@ br_status seq_rewriter::mk_str_length(expr* a, expr_ref& result) { m_es.reset(); m_util.str.get_concat(a, m_es); size_t len = 0; - size_t j = 0; + unsigned j = 0; for (unsigned i = 0; i < m_es.size(); ++i) { if (m_util.str.is_string(m_es[i], b)) { len += b.length(); @@ -155,7 +162,7 @@ br_status seq_rewriter::mk_str_length(expr* a, expr_ref& result) { result = m_autil.mk_numeral(rational(len, rational::ui64()), true); return BR_DONE; } - if (j != m_es.size()) { + if (j != m_es.size() || j != 1) { expr_ref_vector es(m()); for (unsigned i = 0; i < j; ++i) { es.push_back(m_util.str.mk_length(m_es[i])); @@ -509,3 +516,217 @@ br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) { br_status seq_rewriter::mk_re_opt(expr* a, expr_ref& result) { return BR_FAILED; } + +br_status seq_rewriter::mk_eq_core(expr * l, expr * r, expr_ref & result) { + expr_ref_vector lhs(m()), rhs(m()), res(m()); + if (!reduce_eq(l, r, lhs, rhs)) { + result = m().mk_false(); + return BR_DONE; + } + if (lhs.size() == 1 && lhs[0].get() == l && rhs[0].get() == r) { + return BR_FAILED; + } + for (unsigned i = 0; i < lhs.size(); ++i) { + res.push_back(m().mk_eq(lhs[i].get(), rhs[i].get())); + } + result = mk_and(res); + return BR_REWRITE3; +} + +bool seq_rewriter::reduce_eq(expr* l, expr* r, expr_ref_vector& lhs, expr_ref_vector& rhs) { + expr* a, *b; + bool change = false; + expr_ref_vector trail(m()); + m_lhs.reset(); + m_rhs.reset(); + m_util.str.get_concat(l, m_lhs); + m_util.str.get_concat(r, m_rhs); + + // solve from back + while (!m_lhs.empty() && !m_rhs.empty()) { + if (m_lhs.back() == m_rhs.back()) { + m_lhs.pop_back(); + m_rhs.pop_back(); + } + else if(m_util.str.is_unit(m_lhs.back(), a) && + m_util.str.is_unit(m_rhs.back(), b)) { + lhs.push_back(a); + rhs.push_back(b); + m_lhs.pop_back(); + m_rhs.pop_back(); + } + else if (!m_rhs.empty() && m_util.str.is_empty(m_rhs.back())) { + m_rhs.pop_back(); + } + else if (!m_lhs.empty() && m_util.str.is_empty(m_lhs.back())) { + m_lhs.pop_back(); + } + else { + break; + } + change = true; + } + + // solve from front + unsigned head1 = 0, head2 = 0; + while (head1 < m_lhs.size() && head2 < m_rhs.size()) { + if (m_lhs[head1] == m_rhs[head2]) { + ++head1; + ++head2; + } + else if(m_util.str.is_unit(m_lhs[head1], a) && + m_util.str.is_unit(m_rhs[head2], b)) { + lhs.push_back(a); + rhs.push_back(b); + ++head1; + ++head2; + } + else if (head1 < m_lhs.size() && m_util.str.is_empty(m_lhs[head1])) { + ++head1; + } + else if (head2 < m_rhs.size() && m_util.str.is_empty(m_rhs[head2])) { + ++head2; + } + else { + break; + } + change = true; + } + // reduce strings + std::string s1, s2; + while (head1 < m_lhs.size() && + head2 < m_rhs.size() && + m_util.str.is_string(m_lhs[head1], s1) && + m_util.str.is_string(m_rhs[head2], s2)) { + size_t l = std::min(s1.length(), s2.length()); + for (size_t i = 0; i < l; ++i) { + if (s1[i] != s2[i]) { + return false; + } + } + if (l == s1.length()) { + ++head1; + } + else { + m_lhs[head1] = m_util.str.mk_string(std::string(s1.c_str()+l,s1.length()-l)); + trail.push_back(m_lhs[head1]); + } + if (l == s2.length()) { + ++head2; + } + else { + m_rhs[head2] = m_util.str.mk_string(std::string(s2.c_str()+l,s2.length()-l)); + trail.push_back(m_rhs[head2]); + } + change = true; + } + while (head1 < m_lhs.size() && + head2 < m_rhs.size() && + m_util.str.is_string(m_lhs.back(), s1) && + m_util.str.is_string(m_rhs.back(), s2)) { + size_t l = std::min(s1.length(), s2.length()); + for (size_t i = 0; i < l; ++i) { + if (s1[s1.length()-i-1] != s2[s2.length()-i-1]) { + return false; + } + } + m_lhs.pop_back(); + m_rhs.pop_back(); + if (l < s1.length()) { + m_lhs.push_back(m_util.str.mk_string(std::string(s1.c_str(),s1.length()-l))); + trail.push_back(m_lhs.back()); + } + if (l < s2.length()) { + m_rhs.push_back(m_util.str.mk_string(std::string(s2.c_str(),s2.length()-l))); + trail.push_back(m_rhs.back()); + } + change = true; + } + + bool is_sat; + if (!change) { + if (is_subsequence(m_lhs.size(), m_lhs.c_ptr(), m_rhs.size(), m_rhs.c_ptr(), lhs, rhs, is_sat)) { + return is_sat; + } + lhs.push_back(l); + rhs.push_back(r); + } + else if (head1 == m_lhs.size() && head2 == m_rhs.size()) { + // skip + } + else if (head1 == m_lhs.size()) { + return set_empty(m_rhs.size() - head2, m_rhs.c_ptr() + head2, lhs, rhs); + } + else if (head2 == m_rhs.size()) { + return set_empty(m_lhs.size() - head1, m_lhs.c_ptr() + head1, lhs, rhs); + } + else { // could solve if either side is fixed size. + SASSERT(head1 < m_lhs.size() && head2 < m_rhs.size()); + if (is_subsequence(m_lhs.size() - head1, m_lhs.c_ptr() + head1, + m_rhs.size() - head2, m_rhs.c_ptr() + head2, lhs, rhs, is_sat)) { + return is_sat; + } + + lhs.push_back(m_util.str.mk_concat(m_lhs.size() - head1, m_lhs.c_ptr() + head1)); + rhs.push_back(m_util.str.mk_concat(m_rhs.size() - head2, m_rhs.c_ptr() + head2)); + } + return true; +} + +bool seq_rewriter::set_empty(unsigned sz, expr* const* es, expr_ref_vector& lhs, expr_ref_vector& rhs) { + std::string s; + for (unsigned i = 0; i < sz; ++i) { + if (m_util.str.is_unit(es[i])) { + return false; + } + if (m_util.str.is_empty(es[i])) { + continue; + } + if (m_util.str.is_string(es[i], s)) { + SASSERT(s.length() > 0); + return false; + } + lhs.push_back(m_util.str.mk_empty(m().get_sort(es[i]))); + rhs.push_back(es[i]); + } + return true; +} + +bool seq_rewriter::is_subsequence(unsigned szl, expr* const* l, unsigned szr, expr* const* r, + expr_ref_vector& lhs, expr_ref_vector& rhs, bool& is_sat) { + is_sat = true; + if (szl == szr) return false; + if (szr < szl) { + std::swap(szl, szr); + std::swap(l, r); + } + + uint_set rpos; + for (unsigned i = 0; i < szl; ++i) { + bool found = false; + unsigned j = 0; + for (; !found && j < szr; ++j) { + found = !rpos.contains(j) && l[i] == r[j]; + } + if (!found) { + return false; + } + SASSERT(0 < j && j <= szr); + rpos.insert(j-1); + } + // if we reach here, then every element of l is contained in r in some position. + ptr_vector rs; + for (unsigned j = 0; j < szr; ++j) { + if (rpos.contains(j)) { + rs.push_back(r[j]); + } + else if (!set_empty(1, r + j, lhs, rhs)) { + is_sat = false; + return true; + } + } + SASSERT(szl == rs.size()); + lhs.push_back(m_util.str.mk_concat(szl, l)); + rhs.push_back(m_util.str.mk_concat(szl, rs.c_ptr())); + return true; +} diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 4674a7535..d4652f614 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -32,7 +32,7 @@ Notes: class seq_rewriter { seq_util m_util; arith_util m_autil; - ptr_vector m_es; + ptr_vector m_es, m_lhs, m_rhs; br_status mk_seq_concat(expr* a, expr* b, expr_ref& result); br_status mk_str_length(expr* a, expr_ref& result); @@ -53,6 +53,9 @@ class seq_rewriter { br_status mk_re_plus(expr* a, expr_ref& result); br_status mk_re_opt(expr* a, expr_ref& result); + bool set_empty(unsigned sz, expr* const* es, expr_ref_vector& lhs, expr_ref_vector& rhs); + bool is_subsequence(unsigned n, expr* const* l, unsigned m, expr* const* r, + expr_ref_vector& lhs, expr_ref_vector& rhs, bool& is_sat); public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): m_util(m), m_autil(m) { @@ -64,6 +67,9 @@ public: static void get_param_descrs(param_descrs & r) {} br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result); + br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result); + + bool reduce_eq(expr* l, expr* r, expr_ref_vector& lhs, expr_ref_vector& rhs); }; diff --git a/src/ast/rewriter/th_rewriter.cpp b/src/ast/rewriter/th_rewriter.cpp index a8aea47b6..aa1b35b89 100644 --- a/src/ast/rewriter/th_rewriter.cpp +++ b/src/ast/rewriter/th_rewriter.cpp @@ -177,6 +177,8 @@ struct th_rewriter_cfg : public default_rewriter_cfg { st = m_f_rw.mk_eq_core(args[0], args[1], result); else if (s_fid == m_ar_rw.get_fid()) st = m_ar_rw.mk_eq_core(args[0], args[1], result); + else if (s_fid == m_seq_rw.get_fid()) + st = m_seq_rw.mk_eq_core(args[0], args[1], result); if (st != BR_FAILED) return st; diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index e22979163..2946bb1bc 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -47,7 +47,7 @@ bool seq_decl_plugin::match(ptr_vector& binding, sort* s, sort* sP) { if (is_sort_param(sP, i)) { if (binding.size() <= i) binding.resize(i+1); if (binding[i] && (binding[i] != s)) return false; - TRACE("seq", tout << "setting binding @ " << i << " to " << mk_pp(s, m) << "\n";); + TRACE("seq_verbose", tout << "setting binding @ " << i << " to " << mk_pp(s, m) << "\n";); binding[i] = s; return true; } @@ -77,7 +77,7 @@ bool seq_decl_plugin::match(ptr_vector& binding, sort* s, sort* sP) { void seq_decl_plugin::match_left_assoc(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) { ptr_vector binding; ast_manager& m = *m_manager; - TRACE("seq", + TRACE("seq_verbose", tout << sig.m_name << ": "; for (unsigned i = 0; i < dsz; ++i) tout << mk_pp(dom[i], m) << " "; if (range) tout << " range: " << mk_pp(range, m); @@ -102,7 +102,7 @@ void seq_decl_plugin::match_left_assoc(psig& sig, unsigned dsz, sort *const* dom m.raise_exception(strm.str().c_str()); } range_out = apply_binding(binding, sig.m_range); - TRACE("seq", tout << mk_pp(range_out, m) << "\n";); + TRACE("seq_verbose", tout << mk_pp(range_out, m) << "\n";); } void seq_decl_plugin::match(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) { @@ -186,28 +186,27 @@ void seq_decl_plugin::init() { sort* seqAintT[2] = { seqA, intT }; m_sigs.resize(LAST_SEQ_OP); // TBD: have (par ..) construct and load parameterized signature from premable. - m_sigs[OP_SEQ_UNIT] = alloc(psig, m, "seq.unit", 1, 1, &A, seqA); - m_sigs[OP_SEQ_EMPTY] = alloc(psig, m, "seq.empty", 1, 0, 0, seqA); - m_sigs[OP_SEQ_CONCAT] = alloc(psig, m, "seq.++", 1, 2, seqAseqA, seqA); + m_sigs[OP_SEQ_UNIT] = alloc(psig, m, "seq.unit", 1, 1, &A, seqA); + m_sigs[OP_SEQ_EMPTY] = alloc(psig, m, "seq.empty", 1, 0, 0, seqA); + m_sigs[OP_SEQ_CONCAT] = alloc(psig, m, "seq.++", 1, 2, seqAseqA, seqA); m_sigs[OP_SEQ_PREFIX] = alloc(psig, m, "seq.prefixof", 1, 2, seqAseqA, boolT); m_sigs[OP_SEQ_SUFFIX] = alloc(psig, m, "seq.suffixof", 1, 2, seqAseqA, boolT); m_sigs[OP_SEQ_CONTAINS] = alloc(psig, m, "seq.contains", 1, 2, seqAseqA, boolT); - m_sigs[OP_SEQ_EXTRACT] = alloc(psig, m, "seq.extract", 1, 3, seqAint2T, seqA); - m_sigs[OP_SEQ_AT] = alloc(psig, m, "seq.at", 1, 2, seqAintT, seqA); - m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq-length", 1, 1, &seqA, intT); - m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA); - m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA); - m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA); - m_sigs[OP_RE_RANGE] = alloc(psig, m, "re.range", 1, 2, seqAseqA, reA); - m_sigs[OP_RE_CONCAT] = alloc(psig, m, "re.++", 1, 2, reAreA, reA); - m_sigs[OP_RE_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA); - m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re.inter", 1, 2, reAreA, reA); + m_sigs[OP_SEQ_EXTRACT] = alloc(psig, m, "seq.extract", 1, 3, seqAint2T, seqA); + m_sigs[OP_SEQ_AT] = alloc(psig, m, "seq.at", 1, 2, seqAintT, seqA); + m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT); + m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA); + m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA); + m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA); + m_sigs[OP_RE_RANGE] = alloc(psig, m, "re.range", 1, 2, seqAseqA, reA); + m_sigs[OP_RE_CONCAT] = alloc(psig, m, "re.++", 1, 2, reAreA, reA); + m_sigs[OP_RE_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA); + m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re.inter", 1, 2, reAreA, reA); m_sigs[OP_RE_LOOP] = alloc(psig, m, "re-loop", 1, 1, &reA, reA); - m_sigs[OP_RE_EMPTY_SEQ] = alloc(psig, m, "re-empty-seq", 1, 0, 0, reA); m_sigs[OP_RE_EMPTY_SET] = alloc(psig, m, "re-empty-set", 1, 0, 0, reA); m_sigs[OP_RE_FULL_SET] = alloc(psig, m, "re-full-set", 1, 0, 0, reA); - m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA); m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re-of-pred", 1, 1, &predA, reA); + m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA); m_sigs[OP_SEQ_IN_RE] = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT); m_sigs[OP_STRING_CONST] = 0; m_sigs[OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT); @@ -303,7 +302,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, case OP_RE_OPTION: case OP_RE_RANGE: case OP_RE_UNION: - case OP_RE_EMPTY_SEQ: case OP_RE_EMPTY_SET: case OP_RE_OF_PRED: @@ -323,18 +321,27 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, func_decl_info(m_family_id, OP_STRING_CONST, num_parameters, parameters)); case OP_SEQ_CONCAT: { + if (arity == 0) { + m.raise_exception("invalid concatenation. At least one argument expected"); + } match_left_assoc(*m_sigs[k], arity, domain, range, rng); func_decl_info info(m_family_id, k); info.set_left_associative(); return m.mk_func_decl(m_sigs[(rng == m_string)?_OP_STRING_CONCAT:k]->m_name, rng, rng, rng, info); } case OP_RE_CONCAT: { + if (arity == 0) { + m.raise_exception("invalid concatenation. At least one argument expected"); + } match_left_assoc(*m_sigs[k], arity, domain, range, rng); func_decl_info info(m_family_id, k); info.set_left_associative(); return m.mk_func_decl(m_sigs[k]->m_name, rng, rng, rng, info); } case _OP_STRING_CONCAT: { + if (arity == 0) { + m.raise_exception("invalid concatenation. At least one argument expected"); + } match_left_assoc(*m_sigs[k], arity, domain, range, rng); func_decl_info info(m_family_id, OP_SEQ_CONCAT); info.set_left_associative(); @@ -388,6 +395,8 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, match(*m_sigs[k], arity, domain, range, rng); return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k)); + case _OP_SEQ_SKOLEM: + return m.mk_func_decl(symbol("seq.skolem"), arity, domain, rng, func_decl_info(m_family_id, k)); default: UNREACHABLE(); return 0; @@ -421,10 +430,13 @@ bool seq_decl_plugin::is_value(app* e) const { return is_app_of(e, m_family_id, OP_STRING_CONST); } -app* seq_util::str::mk_string(symbol const& s) { - return u.seq.mk_string(s); +app* seq_util::mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range) { + parameter param(name); + func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 1, ¶m, n, args, range); + return m.mk_app(f, n, args); } + void seq_util::str::get_concat(expr* e, ptr_vector& es) const { expr* e1, *e2; while (is_concat(e, e1, e2)) { diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 8cef2e391..0ab889c38 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -54,7 +54,6 @@ enum seq_op_kind { OP_RE_LOOP, OP_RE_EMPTY_SET, OP_RE_FULL_SET, - OP_RE_EMPTY_SEQ, OP_RE_OF_PRED, @@ -75,6 +74,7 @@ enum seq_op_kind { _OP_STRING_TO_REGEXP, _OP_STRING_CHARAT, _OP_STRING_SUBSTR, + _OP_SEQ_SKOLEM, LAST_SEQ_OP }; @@ -156,17 +156,22 @@ public: bool is_string(sort* s) const { return is_seq(s) && seq.is_char(s->get_parameter(0).get_ast()); } bool is_seq(sort* s) const { return is_sort_of(s, m_fid, SEQ_SORT); } bool is_re(sort* s) const { return is_sort_of(s, m_fid, RE_SORT); } + bool is_seq(expr* e) const { return is_seq(m.get_sort(e)); } + bool is_re(expr* e) const { return is_re(m.get_sort(e)); } + + app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range); + bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); } class str { seq_util& u; ast_manager& m; family_id m_fid; public: - str(seq_util& u):u(u), m(u.m), m_fid(u.m_fid) {} + str(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} sort* mk_seq(sort* s) { parameter param(s); return m.mk_sort(m_fid, SEQ_SORT, 1, ¶m); } app* mk_empty(sort* s) { return m.mk_const(m.mk_func_decl(m_fid, OP_SEQ_EMPTY, 0, 0, 0, (expr*const*)0, s)); } - app* mk_string(symbol const& s); + app* mk_string(symbol const& s) { return u.seq.mk_string(s); } app* mk_string(char const* s) { return mk_string(symbol(s)); } app* mk_string(std::string const& s) { return mk_string(symbol(s.c_str())); } app* mk_concat(expr* a, expr* b) { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONCAT, 2, es); } @@ -190,18 +195,19 @@ public: bool is_empty(expr const* n) const { symbol s; return is_app_of(n, m_fid, OP_SEQ_EMPTY) || (is_string(n, s) && !s.is_numerical() && *s.bare_str() == 0); } - bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONCAT); } - bool is_length(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LENGTH); } + bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONCAT); } + bool is_length(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LENGTH); } bool is_extract(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); } - bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); } - bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); } - bool is_stridof(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STRIDOF); } - bool is_repl(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STRREPL); } - bool is_prefix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); } - bool is_suffix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); } - bool is_itos(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_ITOS); } - bool is_stoi(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STOI); } - bool is_in_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_IN_RE); } + bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); } + bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); } + bool is_stridof(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STRIDOF); } + bool is_repl(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STRREPL); } + bool is_prefix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); } + bool is_suffix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); } + bool is_itos(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_ITOS); } + bool is_stoi(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STOI); } + bool is_in_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_IN_RE); } + bool is_unit(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_UNIT); } MATCH_BINARY(is_concat); @@ -216,17 +222,17 @@ public: MATCH_UNARY(is_itos); MATCH_UNARY(is_stoi); MATCH_BINARY(is_in_re); + MATCH_UNARY(is_unit); void get_concat(expr* e, ptr_vector& es) const; expr* get_leftmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e1; return e; } }; class re { - seq_util& u; ast_manager& m; family_id m_fid; public: - re(seq_util& u):u(u), m(u.m), m_fid(u.m_fid) {} + re(seq_util& u): m(u.m), m_fid(u.m_fid) {} bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); } bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); } diff --git a/src/smt/smt_setup.cpp b/src/smt/smt_setup.cpp index edb4f1e55..8a40f9d7a 100644 --- a/src/smt/smt_setup.cpp +++ b/src/smt/smt_setup.cpp @@ -30,6 +30,7 @@ Revision History: #include"theory_dummy.h" #include"theory_dl.h" #include"theory_seq_empty.h" +#include"theory_seq.h" #include"theory_pb.h" #include"theory_fpa.h" @@ -200,7 +201,7 @@ namespace smt { void setup::setup_QF_BVRE() { setup_QF_BV(); setup_QF_LIA(); - m_context.register_plugin(alloc(smt::theory_seq_empty, m_manager)); + setup_seq(); } void setup::setup_QF_UF(static_features const & st) { @@ -814,7 +815,7 @@ namespace smt { } void setup::setup_seq() { - m_context.register_plugin(alloc(theory_seq_empty, m_manager)); + m_context.register_plugin(alloc(theory_seq, m_manager)); } void setup::setup_card() { diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index b3a143377..d8bc522f8 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -21,66 +21,338 @@ Revision History: #include "smt_context.h" #include "smt_model_generator.h" #include "theory_seq.h" +#include "seq_rewriter.h" using namespace smt; +void theory_seq::solution_map::update(expr* e, expr* r, enode_pair_dependency* d) { + std::pair value; + if (m_map.find(e, value)) { + m_updates.push_back(DEL); + m_lhs.push_back(e); + m_rhs.push_back(value.first); + m_deps.push_back(value.second); + } + value.first = r; + value.second = d; + m_map.insert(e, value); + m_updates.push_back(INS); + m_lhs.push_back(e); + m_rhs.push_back(value.first); + m_deps.push_back(value.second); +} + +expr* theory_seq::solution_map::find(expr* e, enode_pair_dependency*& d) { + std::pair value; + d = 0; + unsigned num_finds = 0; + expr* result = e; + while (m_map.find(result, value)) { + d = m_dm.mk_join(d, value.second); + result = value.first; + ++num_finds; + } + if (num_finds > 1) { // path compression for original key only. + update(e, result, d); + } + return result; +} + +void theory_seq::solution_map::pop_scope(unsigned num_scopes) { + if (num_scopes == 0) return; + unsigned start = m_limit[m_limit.size() - num_scopes]; + for (unsigned i = m_updates.size(); i > start; ) { + --i; + if (m_updates[i] == INS) { + m_map.remove(m_lhs[i].get()); + } + else { + m_map.insert(m_lhs[i].get(), std::make_pair(m_rhs[i].get(), m_deps[i])); + } + } + m_updates.resize(start); + m_lhs.resize(start); + m_rhs.resize(start); + m_deps.resize(start); + m_limit.resize(m_limit.size() - num_scopes); +} + +void theory_seq::solution_map::display(std::ostream& out) const { + map_t::iterator it = m_map.begin(), end = m_map.end(); + for (; it != end; ++it) { + out << mk_pp(it->m_key, m) << " |-> " << mk_pp(it->m_value.first, m) << "\n"; + } +} + + theory_seq::theory_seq(ast_manager& m): theory(m.mk_family_id("seq")), - m_axioms_head(0), - m_axioms(m), + m(m), + m_dam(m_dep_array_value_manager, m_alloc), + m_rep(m, m_dm), m_ineqs(m), - m_used(false), + m_axioms(m), + m_axioms_head(0), + m_incomplete(false), m_rewrite(m), m_util(m), m_autil(m), - m_trail_stack(*this), - m_find(*this) {} + m_trail_stack(*this) { + m_lhs.push_back(expr_array()); + m_rhs.push_back(expr_array()); + m_deps.push_back(enode_pair_dependency_array()); + m_prefix_sym = "prefix"; + m_suffix_sym = "suffix"; + m_left_sym = "left"; + m_right_sym = "right"; + m_contains_left_sym = "contains_left"; + m_contains_right_sym = "contains_right"; +} + +theory_seq::~theory_seq() { + unsigned num_scopes = m_lhs.size()-1; + if (num_scopes > 0) pop_scope_eh(num_scopes); + m.del(m_lhs.back()); + m.del(m_rhs.back()); + m_dam.del(m_deps.back()); +} + final_check_status theory_seq::final_check_eh() { context & ctx = get_context(); - ast_manager& m = get_manager(); - final_check_status st = check_ineqs(); - if (st == FC_CONTINUE) { + TRACE("seq", display(tout);); + if (!check_ineqs()) { return FC_CONTINUE; } - return m_used?FC_GIVEUP:FC_DONE; + if (simplify_and_solve_eqs()) { + return FC_CONTINUE; + } + if (ctx.inconsistent()) { + return FC_CONTINUE; + } + if (m.size(m_lhs.back()) > 0 || m_incomplete) { + return FC_GIVEUP; + } + return FC_DONE; } -final_check_status theory_seq::check_ineqs() { +bool theory_seq::check_ineqs() { context & ctx = get_context(); - ast_manager& m = get_manager(); - enode_pair_vector eqs; for (unsigned i = 0; i < m_ineqs.size(); ++i) { - expr_ref a(m_ineqs[i].get(), m); + expr* a = m_ineqs[i].get(); + enode_pair_dependency* eqs = 0; expr_ref b = canonize(a, eqs); if (m.is_true(b)) { + TRACE("seq", tout << "Evaluates to false: " << mk_pp(a,m) << "\n";); ctx.internalize(a, false); - literal lit(ctx.get_literal(a)); - ctx.mark_as_relevant(lit); - ctx.assign( - lit, - ctx.mk_justification( - ext_theory_propagation_justification( - get_id(), ctx.get_region(), 0, 0, eqs.size(), eqs.c_ptr(), lit))); - return FC_CONTINUE; + propagate_lit(eqs, ctx.get_literal(a)); + return false; } } - return FC_DONE; + return true; } -final_check_status theory_seq::simplify_eqs() { - bool simplified = false; - for (unsigned i = 0; i < get_num_vars(); ++i) { - theory_var v = m_find.find(i); - if (v != i) continue; - - } - if (simplified) { - return FC_CONTINUE; - } - return FC_DONE; +void theory_seq::propagate_lit(enode_pair_dependency* dep, literal lit) { + context& ctx = get_context(); + ctx.mark_as_relevant(lit); + vector _eqs; + m_dm.linearize(dep, _eqs); + TRACE("seq", + ctx.display_detailed_literal(tout, lit); + tout << " <- "; + for (unsigned i = 0; i < _eqs.size(); ++i) { + tout << mk_pp(_eqs[i].first->get_owner(), m) << " = " + << mk_pp(_eqs[i].second->get_owner(), m) << "\n"; + } + ); + justification* js = + ctx.mk_justification( + ext_theory_propagation_justification( + get_id(), ctx.get_region(), 0, 0, _eqs.size(), _eqs.c_ptr(), lit)); + + ctx.assign(lit, js); } +void theory_seq::set_conflict(enode_pair_dependency* dep) { + context& ctx = get_context(); + vector _eqs; + m_dm.linearize(dep, _eqs); + TRACE("seq", + for (unsigned i = 0; i < _eqs.size(); ++i) { + tout << mk_pp(_eqs[i].first->get_owner(), m) << " = " + << mk_pp(_eqs[i].second->get_owner(), m) << "\n"; + } + ); + ctx.set_conflict( + ctx.mk_justification( + ext_theory_conflict_justification( + get_id(), ctx.get_region(), 0, 0, _eqs.size(), _eqs.c_ptr(), 0, 0))); +} + +void theory_seq::propagate_eq(enode_pair_dependency* dep, enode* n1, enode* n2) { + context& ctx = get_context(); + vector _eqs; + m_dm.linearize(dep, _eqs); + TRACE("seq", + tout << mk_pp(n1->get_owner(), m) << " " << mk_pp(n2->get_owner(), m) << " <- "; + for (unsigned i = 0; i < _eqs.size(); ++i) { + tout << mk_pp(_eqs[i].first->get_owner(), m) << " = " + << mk_pp(_eqs[i].second->get_owner(), m) << "\n"; + } + ); + + justification* js = ctx.mk_justification( + ext_theory_eq_propagation_justification( + get_id(), ctx.get_region(), 0, 0, _eqs.size(), _eqs.c_ptr(), n1, n2)); + ctx.assign_eq(n1, n2, eq_justification(js)); +} + + + +bool theory_seq::simplify_eq(expr* l, expr* r, enode_pair_dependency* deps) { + context& ctx = get_context(); + seq_rewriter rw(m); + expr_ref_vector lhs(m), rhs(m); + expr_ref lh = canonize(l, deps); + expr_ref rh = canonize(r, deps); + if (!rw.reduce_eq(lh, rh, lhs, rhs)) { + // equality is inconsistent. + TRACE("seq", tout << lh << " != " << rh << "\n";); + set_conflict(deps); + return true; + } + if (lhs.size() == 1 && l == lhs[0].get() && + rhs.size() == 1 && r == rhs[0].get()) { + return false; + } + SASSERT(lhs.size() == rhs.size()); + for (unsigned i = 0; i < lhs.size(); ++i) { + m.push_back(m_lhs.back(), lhs[i].get()); + m.push_back(m_rhs.back(), rhs[i].get()); + m_dam.push_back(m_deps.back(), deps); + } + TRACE("seq", + tout << mk_pp(l, m) << " = " << mk_pp(r, m) << " => "; + for (unsigned i = 0; i < lhs.size(); ++i) { + tout << mk_pp(lhs[i].get(), m) << " = " << mk_pp(rhs[i].get(), m) << "; "; + } + tout << "\n"; + ); + return true; +} + +bool theory_seq::solve_unit_eq(expr* l, expr* r, enode_pair_dependency* deps) { + expr_ref lh = canonize(l, deps); + expr_ref rh = canonize(r, deps); + if (lh == rh) { + return true; + } + if (is_var(lh) && !occurs(lh, rh)) { + add_solution(lh, rh, deps); + return true; + } + if (is_var(rh) && !occurs(rh, lh)) { + add_solution(rh, lh, deps); + return true; + } + // Use instead reference counts for dependencies to GC? + + // TBD: Solutions to units are not necessarily variables, but + // they may induce new equations. + + return false; +} + +bool theory_seq::occurs(expr* a, expr* b) { + // true if a occurs under an interpreted function or under left/right selector. + SASSERT(is_var(a)); + expr* e1, *e2; + while (is_left_select(a, e1) || is_right_select(a, e1)) { + a = e1; + } + if (m_util.str.is_concat(b, e1, e2)) { + return occurs(a, e1) || occurs(a, e2); + } + while (is_left_select(b, e1) || is_right_select(b, e1)) { + b = e1; + } + if (a == b) { + return true; + } + return false; +} + +bool theory_seq::is_var(expr* a) { + return is_uninterp(a) || m_util.is_skolem(a); +} + +bool theory_seq::is_left_select(expr* a, expr*& b) { + return m_util.is_skolem(a) && + to_app(a)->get_decl()->get_parameter(0).get_symbol() == m_left_sym && (b = to_app(a)->get_arg(0), true); +} + +bool theory_seq::is_right_select(expr* a, expr*& b) { + return m_util.is_skolem(a) && + to_app(a)->get_decl()->get_parameter(0).get_symbol() == m_right_sym && (b = to_app(a)->get_arg(0), true); +} + + +void theory_seq::add_solution(expr* l, expr* r, enode_pair_dependency* deps) { + context& ctx = get_context(); + m_rep.update(l, r, deps); + // TBD: skip new equalities for non-internalized terms. + if (ctx.e_internalized(l) && ctx.e_internalized(r)) { + enode* n1 = ctx.get_enode(l); + enode* n2 = ctx.get_enode(r); + propagate_eq(deps, n1, n2); + } +} + +bool theory_seq::simplify_eqs() { + return pre_process_eqs(true); +} + +bool theory_seq::solve_basic_eqs() { + return pre_process_eqs(false); +} + +bool theory_seq::pre_process_eqs(bool simplify_or_solve) { + context& ctx = get_context(); + bool change = false; + expr_array& lhs = m_lhs.back(); + expr_array& rhs = m_rhs.back(); + enode_pair_dependency_array& deps = m_deps.back(); + for (unsigned i = 0; !ctx.inconsistent() && i < m.size(lhs); ++i) { + if (simplify_or_solve? + simplify_eq(m.get(lhs, i), m.get(rhs, i), m_dam.get(deps, i)): + solve_unit_eq(m.get(lhs, i), m.get(rhs, i), m_dam.get(deps, i))) { + if (i + 1 != m.size(lhs)) { + m.set(lhs, i, m.get(lhs, m.size(lhs)-1)); + m.set(rhs, i, m.get(rhs, m.size(rhs)-1)); + m_dam.set(deps, i, m_dam.get(deps, m_dam.size(deps)-1)); + --i; + ++m_stats.m_num_reductions; + } + m.pop_back(lhs); + m.pop_back(rhs); + m_dam.pop_back(deps); + change = true; + } + } + return change; +} + +bool theory_seq::simplify_and_solve_eqs() { + context & ctx = get_context(); + bool change = simplify_eqs(); + while (!ctx.inconsistent() && solve_basic_eqs()) { + simplify_eqs(); + change = true; + } + return change; +} + + final_check_status theory_seq::add_axioms() { for (unsigned i = 0; i < get_num_vars(); ++i) { @@ -94,9 +366,7 @@ bool theory_seq::internalize_atom(app* a, bool) { } bool theory_seq::internalize_term(app* term) { - m_used = true; context & ctx = get_context(); - ast_manager& m = get_manager(); unsigned num_args = term->get_num_args(); for (unsigned i = 0; i < num_args; i++) { ctx.internalize(term->get_arg(i), false); @@ -104,87 +374,159 @@ bool theory_seq::internalize_term(app* term) { if (ctx.e_internalized(term)) { return true; } - enode * e = ctx.mk_enode(term, false, m.is_bool(term), true); if (m.is_bool(term)) { bool_var bv = ctx.mk_bool_var(term); ctx.set_var_theory(bv, get_id()); ctx.set_enode_flag(bv, true); } else { + enode * e = ctx.mk_enode(term, false, m.is_bool(term), true); theory_var v = mk_var(e); ctx.attach_th_var(e, this, v); } + if (!m_util.str.is_concat(term) && + !m_util.str.is_string(term) && + !m_util.str.is_empty(term) && + !m_util.str.is_unit(term) && + !m_util.str.is_suffix(term) && + !m_util.str.is_prefix(term) && + !m_util.str.is_contains(term) && + !m_util.is_skolem(term)) { + set_incomplete(term); + } + // assert basic axioms - if (!m_used) { m_trail_stack.push(value_trail(m_used)); m_used = true; } return true; } +void theory_seq::apply_sort_cnstr(enode* n, sort* s) { + if (!is_attached_to_var(n)) { + mk_var(n); + } +} + +void theory_seq::display(std::ostream & out) const { + expr_array const& lhs = m_lhs.back(); + expr_array const& rhs = m_rhs.back(); + enode_pair_dependency_array const& deps = m_deps.back(); + out << "Equations:\n"; + for (unsigned i = 0; i < m.size(lhs); ++i) { + out << mk_pp(m.get(lhs, i), m) << " = " << mk_pp(m.get(rhs, i), m) << " <-\n"; + enode_pair_dependency* dep = m_dam.get(deps, i); + if (dep) { + vector _eqs; + const_cast(m_dm).linearize(dep, _eqs); + for (unsigned i = 0; i < _eqs.size(); ++i) { + out << " " << mk_pp(_eqs[i].first->get_owner(), m) << " = " << mk_pp(_eqs[i].second->get_owner(), m) << "\n"; + } + } + } + out << "Negative constraints:\n"; + for (unsigned i = 0; i < m_ineqs.size(); ++i) { + out << mk_pp(m_ineqs[i], m) << "\n"; + } + out << "Solved equations:\n"; + m_rep.display(out); +} + +void theory_seq::collect_statistics(::statistics & st) const { + st.update("seq num splits", m_stats.m_num_splits); + st.update("seq num reductions", m_stats.m_num_reductions); +} + +void theory_seq::init_model(model_generator & mg) { + m_factory = alloc(seq_factory, get_manager(), + get_family_id(), mg.get_model()); + mg.register_factory(m_factory); + // TBD: this is still unsound model generation. + // disequalities are not guaranteed. we need to + // prime the factory with a prefix that cannot be + // constructed using any existing combinations of the + // strings (or units) that are used. + for (unsigned i = 0; i < get_num_vars(); ++i) { + expr* e = get_enode(i)->get_owner(); + if (m_util.is_seq(e)) { + enode_pair_dependency* deps = 0; + e = m_rep.find(e, deps); + if (is_var(e)) { + expr* val = m_factory->get_fresh_value(m.get_sort(e)); + m_rep.update(e, val, 0); + } + } + else if (m_util.is_re(e)) { + // TBD + } + } +} + +model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) { + enode_pair_dependency* deps = 0; + expr_ref e(n->get_owner(), m); + canonize(e, deps); + SASSERT(is_app(e)); + m_factory->add_trail(e); + return alloc(expr_wrapper_proc, to_app(e)); +} + + + +void theory_seq::set_incomplete(app* term) { + TRACE("seq", tout << "No support for: " << mk_pp(term, m) << "\n";); + if (!m_incomplete) { + m_trail_stack.push(value_trail(m_incomplete)); + m_incomplete = true; + } +} + theory_var theory_seq::mk_var(enode* n) { - theory_var r = theory::mk_var(n); - VERIFY(r == m_find.mk_var()); - return r; + return theory::mk_var(n); } bool theory_seq::can_propagate() { return m_axioms_head < m_axioms.size(); } -expr_ref theory_seq::canonize(expr* e, enode_pair_vector& eqs) { - eqs.reset(); +expr_ref theory_seq::canonize(expr* e, enode_pair_dependency*& eqs) { expr_ref result = expand(e, eqs); m_rewrite(result); return result; } -expr_ref theory_seq::expand(expr* e, enode_pair_vector& eqs) { - context& ctx = get_context(); - ast_manager& m = get_manager(); +expr_ref theory_seq::expand(expr* e, enode_pair_dependency*& eqs) { + enode_pair_dependency* deps = 0; + e = m_rep.find(e, deps); expr* e1, *e2; - SASSERT(ctx.e_internalized(e)); - enode* n = ctx.get_enode(e); - enode* start = n; - do { - e = n->get_owner(); - if (m_util.str.is_concat(e, e1, e2)) { - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(m_util.str.mk_concat(expand(e1, eqs), expand(e2, eqs)), m); - } - if (m_util.str.is_empty(e) || m_util.str.is_string(e)) { - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(e, m); - } - if (m.is_eq(e, e1, e2)) { - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(m.mk_eq(expand(e1, eqs), expand(e2, eqs)), m); - } - if (m_util.str.is_prefix(e, e1, e2)) { - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(m_util.str.mk_prefix(expand(e1, eqs), expand(e2, eqs)), m); - } - if (m_util.str.is_suffix(e, e1, e2)) { - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(m_util.str.mk_suffix(expand(e1, eqs), expand(e2, eqs)), m); - } - if (m_util.str.is_contains(e, e1, e2)) { - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(m_util.str.mk_contains(expand(e1, eqs), expand(e2, eqs)), m); - } -#if 0 - if (m_util.str.is_unit(e)) { - // TBD: canonize the element. - if (start != n) eqs.push_back(enode_pair(start, n)); - return expr_ref(e, m); - } -#endif - n = n->get_next(); + eqs = m_dm.mk_join(eqs, deps); + if (m_util.str.is_concat(e, e1, e2)) { + return expr_ref(m_util.str.mk_concat(expand(e1, eqs), expand(e2, eqs)), m); + } + if (m_util.str.is_empty(e) || m_util.str.is_string(e)) { + return expr_ref(e, m); } - while (n != start); - return expr_ref(n->get_root()->get_owner(), m); + if (m.is_eq(e, e1, e2)) { + return expr_ref(m.mk_eq(expand(e1, eqs), expand(e2, eqs)), m); + } + if (m_util.str.is_prefix(e, e1, e2)) { + return expr_ref(m_util.str.mk_prefix(expand(e1, eqs), expand(e2, eqs)), m); + } + if (m_util.str.is_suffix(e, e1, e2)) { + return expr_ref(m_util.str.mk_suffix(expand(e1, eqs), expand(e2, eqs)), m); + } + if (m_util.str.is_contains(e, e1, e2)) { + return expr_ref(m_util.str.mk_contains(expand(e1, eqs), expand(e2, eqs)), m); + } + return expr_ref(e, m); } +void theory_seq::add_dependency(enode_pair_dependency*& dep, enode* a, enode* b) { + if (a != b) { + dep = m_dm.mk_join(dep, m_dm.mk_leaf(std::make_pair(a, b))); + } +} + + void theory_seq::propagate() { context & ctx = get_context(); - ast_manager& m = get_manager(); while (m_axioms_head < m_axioms.size() && !ctx.inconsistent()) { expr_ref e(m); e = m_axioms[m_axioms_head].get(); @@ -200,59 +542,58 @@ void theory_seq::create_axiom(expr_ref& e) { void theory_seq::assert_axiom(expr_ref& e) { context & ctx = get_context(); - ast_manager& m = get_manager(); if (m.is_true(e)) return; TRACE("seq", tout << "asserting " << e << "\n";); ctx.internalize(e, false); literal lit(ctx.get_literal(e)); ctx.mark_as_relevant(lit); ctx.mk_th_axiom(get_id(), 1, &lit); - } -expr_ref theory_seq::mk_skolem(char const* name, expr* e1, expr* e2) { - ast_manager& m = get_manager(); - expr_ref result(m); - sort* s = m.get_sort(e1); - SASSERT(s == m.get_sort(e2)); - sort* ss[2] = { s, s }; - result = m.mk_app(m.mk_func_decl(symbol("#prefix_eq"), 2, ss, s), e1, e2); - return result; +expr_ref theory_seq::mk_skolem(symbol const& name, expr* e1, expr* e2) { + expr* es[2] = { e1, e2 }; + return expr_ref(m_util.mk_skolem(name, 2, es, m.get_sort(e1)), m); } void theory_seq::propagate_eq(bool_var v, expr* e1, expr* e2) { context& ctx = get_context(); + TRACE("seq", + tout << mk_pp(ctx.bool_var2enode(v)->get_owner(), m) << " => " + << mk_pp(e1, m) << " = " << mk_pp(e2, m) << "\n";); + ctx.internalize(e1, false); + SASSERT(ctx.e_internalized(e2)); enode* n1 = ctx.get_enode(e1); enode* n2 = ctx.get_enode(e2); literal lit(v); - ctx.assign_eq(n1, n2, eq_justification( - alloc(ext_theory_eq_propagation_justification, - get_id(), ctx.get_region(), 1, &lit, 0, 0, n1, n2))); + justification* js = + ctx.mk_justification( + ext_theory_eq_propagation_justification( + get_id(), ctx.get_region(), 1, &lit, 0, 0, n1, n2)); + + ctx.assign_eq(n1, n2, eq_justification(js)); } void theory_seq::assign_eq(bool_var v, bool is_true) { context & ctx = get_context(); - ast_manager& m = get_manager(); - enode* n = ctx.bool_var2enode(v); app* e = n->get_owner(); if (is_true) { expr* e1, *e2; expr_ref f(m); if (m_util.str.is_prefix(e, e1, e2)) { - f = mk_skolem("#prefix_eq", e1, e2); + f = mk_skolem(m_prefix_sym, e1, e2); f = m_util.str.mk_concat(e1, f); propagate_eq(v, f, e2); } else if (m_util.str.is_suffix(e, e1, e2)) { - f = mk_skolem("#suffix_eq", e1, e2); + f = mk_skolem(m_suffix_sym, e1, e2); f = m_util.str.mk_concat(f, e1); propagate_eq(v, f, e2); } else if (m_util.str.is_contains(e, e1, e2)) { - expr_ref f1 = mk_skolem("#contains_eq1", e1, e2); - expr_ref f2 = mk_skolem("#contains_eq2", e1, e2); + expr_ref f1 = mk_skolem(m_contains_left_sym, e1, e2); + expr_ref f2 = mk_skolem(m_contains_right_sym, e1, e2); f = m_util.str.mk_concat(m_util.str.mk_concat(f1, e1), f2); propagate_eq(v, f, e2); } @@ -270,34 +611,69 @@ void theory_seq::assign_eq(bool_var v, bool is_true) { } void theory_seq::new_eq_eh(theory_var v1, theory_var v2) { - m_find.merge(v1, v2); + enode* n1 = get_enode(v1); + enode* n2 = get_enode(v2); + if (n1 != n2) { + m.push_back(m_lhs.back(), n1->get_owner()); + m.push_back(m_rhs.back(), n2->get_owner()); + m_dam.push_back(m_deps.back(), m_dm.mk_leaf(enode_pair(n1, n2))); + } } void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) { - ast_manager& m = get_manager(); expr* e1 = get_enode(v1)->get_owner(); expr* e2 = get_enode(v2)->get_owner(); m_trail_stack.push(push_back_vector(m_ineqs)); - m_ineqs.push_back(m.mk_eq(e1, e2)); + m_ineqs.push_back(mk_eq_atom(e1, e2)); } void theory_seq::push_scope_eh() { theory::push_scope_eh(); + m_rep.push_scope(); + m_dm.push_scope(); m_trail_stack.push_scope(); m_trail_stack.push(value_trail(m_axioms_head)); + expr_array lhs, rhs; + enode_pair_dependency_array deps; + m.copy(m_lhs.back(), lhs); + m.copy(m_rhs.back(), rhs); + m_dam.copy(m_deps.back(), deps); + m_lhs.push_back(lhs); + m_rhs.push_back(rhs); + m_deps.push_back(deps); } void theory_seq::pop_scope_eh(unsigned num_scopes) { m_trail_stack.pop_scope(num_scopes); - theory::pop_scope_eh(num_scopes); + theory::pop_scope_eh(num_scopes); + m_dm.pop_scope(num_scopes); + m_rep.pop_scope(num_scopes); + while (num_scopes > 0) { + --num_scopes; + m.del(m_lhs.back()); + m.del(m_rhs.back()); + m_dam.del(m_deps.back()); + m_lhs.pop_back(); + m_rhs.pop_back(); + m_deps.pop_back(); + } } void theory_seq::restart_eh() { + SASSERT(m_lhs.size() == 1); + m.del(m_lhs.back()); + m.del(m_rhs.back()); + m_dam.del(m_deps.back()); + m_lhs.reset(); + m_rhs.reset(); + m_deps.reset(); + m_lhs.push_back(expr_array()); + m_rhs.push_back(expr_array()); + m_deps.push_back(enode_pair_dependency_array()); } void theory_seq::relevant_eh(app* n) { - ast_manager& m = get_manager(); if (m_util.str.is_length(n)) { expr_ref e(m); e = m_autil.mk_le(m_autil.mk_numeral(rational(0), true), n); diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index 3232a8469..3cdacaeda 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -28,30 +28,82 @@ Revision History: namespace smt { class theory_seq : public theory { + struct config { + static const bool preserve_roots = true; + static const unsigned max_trail_sz = 16; + static const unsigned factor = 2; + typedef small_object_allocator allocator; + }; + typedef scoped_dependency_manager enode_pair_dependency_manager; + typedef enode_pair_dependency_manager::dependency enode_pair_dependency; + struct enode_pair_dependency_array_config : public config { + typedef enode_pair_dependency* value; + typedef dummy_value_manager value_manager; + static const bool ref_count = false; + }; + typedef parray_manager enode_pair_dependency_array_manager; + typedef enode_pair_dependency_array_manager::ref enode_pair_dependency_array; + typedef union_find th_union_find; typedef trail_stack th_trail_stack; + + class solution_map { + enum map_update { INS, DEL }; + typedef obj_map > map_t; + ast_manager& m; + enode_pair_dependency_manager& m_dm; + map_t m_map; + expr_ref_vector m_lhs, m_rhs; + ptr_vector m_deps; + svector m_updates; + unsigned_vector m_limit; + public: + solution_map(ast_manager& m, enode_pair_dependency_manager& dm): m(m), m_dm(dm), m_lhs(m), m_rhs(m) {} + void update(expr* e, expr* r, enode_pair_dependency* d); + expr* find(expr* e, enode_pair_dependency*& d); + void push_scope() { m_limit.push_back(m_updates.size()); } + void pop_scope(unsigned num_scopes); + void display(std::ostream& out) const; + }; + struct stats { stats() { reset(); } void reset() { memset(this, 0, sizeof(stats)); } unsigned m_num_splits; + unsigned m_num_reductions; }; - expr_ref_vector m_axioms; - expr_ref_vector m_ineqs; + ast_manager& m; + small_object_allocator m_alloc; + enode_pair_dependency_array_config::value_manager m_dep_array_value_manager; + enode_pair_dependency_manager m_dm; + enode_pair_dependency_array_manager m_dam; + solution_map m_rep; // unification representative. + vector m_lhs, m_rhs; // persistent sets of equalities. + vector m_deps; // persistent sets of dependencies. + + seq_factory* m_factory; // value factory + expr_ref_vector m_ineqs; // inequalities to check + expr_ref_vector m_axioms; unsigned m_axioms_head; - bool m_used; + bool m_incomplete; th_rewriter m_rewrite; seq_util m_util; arith_util m_autil; th_trail_stack m_trail_stack; - th_union_find m_find; stats m_stats; + symbol m_prefix_sym; + symbol m_suffix_sym; + symbol m_contains_left_sym; + symbol m_contains_right_sym; + symbol m_left_sym; + symbol m_right_sym; virtual final_check_status final_check_eh(); virtual bool internalize_atom(app*, bool); virtual bool internalize_term(app*); virtual void new_eq_eh(theory_var, theory_var); virtual void new_diseq_eh(theory_var, theory_var); - virtual void assign_eq(bool_var v, bool is_true); + virtual void assign_eq(bool_var v, bool is_true); virtual bool can_propagate(); virtual void propagate(); virtual void push_scope_eh(); @@ -61,28 +113,44 @@ namespace smt { virtual theory* mk_fresh(context* new_ctx) { return alloc(theory_seq, new_ctx->get_manager()); } virtual char const * get_name() const { return "seq"; } virtual theory_var mk_var(enode* n); + virtual void apply_sort_cnstr(enode* n, sort* s); + virtual void display(std::ostream & out) const; + virtual void collect_statistics(::statistics & st) const; + virtual model_value_proc * mk_value(enode * n, model_generator & mg); + virtual void init_model(model_generator & mg); + + bool check_ineqs(); + bool pre_process_eqs(bool simplify_or_solve); + bool simplify_eqs(); + bool simplify_eq(expr* l, expr* r, enode_pair_dependency* dep); + bool solve_unit_eq(expr* l, expr* r, enode_pair_dependency* dep); + bool solve_basic_eqs(); + bool simplify_and_solve_eqs(); + void propagate_lit(enode_pair_dependency* dep, literal lit); + void propagate_eq(enode_pair_dependency* dep, enode* n1, enode* n2); + void propagate_eq(bool_var v, expr* e1, expr* e2); + void set_conflict(enode_pair_dependency* dep); - final_check_status check_ineqs(); - final_check_status simplify_eqs(); + bool occurs(expr* a, expr* b); + bool is_var(expr* b); + void add_solution(expr* l, expr* r, enode_pair_dependency* dep); + bool is_left_select(expr* a, expr*& b); + bool is_right_select(expr* a, expr*& b); + final_check_status add_axioms(); void assert_axiom(expr_ref& e); void create_axiom(expr_ref& e); - expr_ref canonize(expr* e, enode_pair_vector& eqs); - expr_ref expand(expr* e, enode_pair_vector& eqs); + expr_ref canonize(expr* e, enode_pair_dependency*& eqs); + expr_ref expand(expr* e, enode_pair_dependency*& eqs); + void add_dependency(enode_pair_dependency*& dep, enode* a, enode* b); - void propagate_eq(bool_var v, expr* e1, expr* e2); - expr_ref mk_skolem(char const* name, expr* e1, expr* e2); + expr_ref mk_skolem(symbol const& s, expr* e1, expr* e2); + + void set_incomplete(app* term); public: theory_seq(ast_manager& m); - virtual void init_model(model_generator & mg) { - mg.register_factory(alloc(seq_factory, get_manager(), get_family_id(), mg.get_model())); - } - - th_trail_stack & get_trail_stack() { return m_trail_stack; } - virtual void merge_eh(theory_var v1, theory_var v2, theory_var, theory_var); - static void after_merge_eh(theory_var r1, theory_var r2, theory_var v1, theory_var v2) {} - void unmerge_eh(theory_var v1, theory_var v2); + virtual ~theory_seq(); }; }; diff --git a/src/smt/theory_seq_empty.h b/src/smt/theory_seq_empty.h index e8e619bf8..b1bab6c05 100644 --- a/src/smt/theory_seq_empty.h +++ b/src/smt/theory_seq_empty.h @@ -25,22 +25,44 @@ Revision History: namespace smt { class seq_factory : public value_factory { typedef hashtable symbol_set; + ast_manager& m; proto_model& m_model; seq_util u; symbol_set m_strings; unsigned m_next; + std::string m_unique_prefix; + obj_map m_unique_sequences; + expr_ref_vector m_trail; public: + seq_factory(ast_manager & m, family_id fid, proto_model & md): value_factory(m, fid), + m(m), m_model(md), u(m), - m_next(0) + m_next(0), + m_unique_prefix("#B"), + m_trail(m) { m_strings.insert(symbol("")); m_strings.insert(symbol("a")); m_strings.insert(symbol("b")); } + void add_trail(expr* e) { + m_trail.push_back(e); + } + + void set_prefix(char const* p) { + m_unique_prefix = p; + } + + // generic method for setting unique sequences + void set_prefix(expr* uniq) { + m_trail.push_back(uniq); + m_unique_sequences.insert(m.get_sort(uniq), uniq); + } + virtual expr* get_some_value(sort* s) { if (u.is_string(s)) return u.str.mk_string(symbol("")); @@ -60,7 +82,7 @@ namespace smt { if (u.is_string(s)) { while (true) { std::ostringstream strm; - strm << "S" << m_next++; + strm << m_unique_prefix << m_next++; symbol sym(strm.str().c_str()); if (m_strings.contains(sym)) continue; m_strings.insert(sym);