diff --git a/src/smt/params/smt_params_helper.pyg b/src/smt/params/smt_params_helper.pyg index 95d359dee..73550274f 100644 --- a/src/smt/params/smt_params_helper.pyg +++ b/src/smt/params/smt_params_helper.pyg @@ -97,6 +97,7 @@ def_module_params(module_name='smt', ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), ('seq.use_derivatives', BOOL, False, 'dev flag (not for users) enable derivative based unfolding of regex'), + ('seq.use_unicode', BOOL, False, 'dev flag (not for users) enable unicode semantics'), ('str.strong_arrangements', BOOL, True, 'assert equivalences instead of implications when generating string arrangement axioms'), ('str.aggressive_length_testing', BOOL, False, 'prioritize testing concrete length values over generating more options'), ('str.aggressive_value_testing', BOOL, False, 'prioritize testing concrete string constant values over generating more options'), diff --git a/src/smt/params/theory_seq_params.cpp b/src/smt/params/theory_seq_params.cpp index 0563cb615..069a43c47 100644 --- a/src/smt/params/theory_seq_params.cpp +++ b/src/smt/params/theory_seq_params.cpp @@ -22,4 +22,5 @@ void theory_seq_params::updt_params(params_ref const & _p) { m_split_w_len = p.seq_split_w_len(); m_seq_validate = p.seq_validate(); m_seq_use_derivatives = p.seq_use_derivatives(); + m_seq_use_unicode = p.seq_use_unicode(); } diff --git a/src/smt/params/theory_seq_params.h b/src/smt/params/theory_seq_params.h index c925fc4e1..278efea7e 100644 --- a/src/smt/params/theory_seq_params.h +++ b/src/smt/params/theory_seq_params.h @@ -26,12 +26,14 @@ struct theory_seq_params { bool m_split_w_len; bool m_seq_validate; bool m_seq_use_derivatives; + bool m_seq_use_unicode; theory_seq_params(params_ref const & p = params_ref()): m_split_w_len(true), m_seq_validate(false), - m_seq_use_derivatives(false) + m_seq_use_derivatives(false), + m_seq_use_unicode(false) { updt_params(p); } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index a2f875269..3f65184c4 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -48,12 +48,29 @@ namespace smt { } return change; } + + /** + * Propagate the atom (str.in.re s r) + * + * Propagation implements the following inference rules + * + * (not (str.in.re s r)) => (str.in.re s (complement r)) + * (str.in.re s r) => r != {} + * + * (str.in.re s r[if(c,r1,r2)]) & c => (str.in.re s r[r1]) + * (str.in.re s r[if(c,r1,r2)]) & ~c => (str.in.re s r[r2]) + * (str.in.re s r) & s = "" => nullable(r) + * s != "" => s = unit(head) ++ tail + * (str.in.re s r) & s != "" => (str.in.re tail D(head,r)) + */ bool seq_regex::propagate(literal lit) { expr* s = nullptr, *r = nullptr; expr* e = ctx.bool_var2expr(lit.var()); VERIFY(str().is_in_re(e, s, r)); + TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); + // only positive assignments of membership propagation are relevant. if (lit.sign() && sk().is_tail(s)) return true; @@ -72,8 +89,16 @@ namespace smt { return true; // TBD - // for !sk().is_tail(s): - // s in R => R != {} + // for !sk().is_tail(s): s in R => R != {} + if (false && !sk().is_tail(s)) { + expr_ref is_empty(m.mk_eq(r, re().mk_empty(m.get_sort(s))), m); + rewrite(is_empty); + literal is_emptyl = mk_literal(is_empty); + if (ctx.get_assignment(is_emptyl) != l_false) { + th.propagate_lit(nullptr, 1, &lit, ~is_emptyl); + return true; + } + } if (block_unfolding(lit, s)) return true; @@ -131,9 +156,22 @@ namespace smt { throw default_exception("unable to expand derivative"); th.add_axiom(is_empty, th.mk_eq(s, th.mk_concat(head, tail), false)); th.add_axiom(~lit, is_empty, th.mk_literal(re().mk_in_re(tail, d))); + + TRACE("seq", tout << "head " << head << "\n"; + tout << mk_pp(r, m) << "\n";); return true; } + /** + * Put a limit to the unfolding of s. + * TBD: + * Given an Regex R, we can compute the minimal length string accepted by R + * A strong analysis could compute the minimal length of s to be accepted by R + * For example if s = x + "abc" and R = .* ++ "def" . .**, then minimal length + * of s is 6. + * Limiting the depth of unfolding s below such lengths is not useful. + */ + bool seq_regex::block_unfolding(literal lit, expr* s) { expr* t = nullptr; unsigned i = 0; @@ -149,6 +187,10 @@ namespace smt { return false; } + /** + * Combine a conjunction of membership relations for the same string + * within the same Regex. + */ bool seq_regex::coallesce_in_re(literal lit) { // initially disable this return false; @@ -190,15 +232,23 @@ namespace smt { } - void seq_regex::propagate_is_empty(literal lit) { + void seq_regex::propagate_eq(expr* r1, expr* r2) { // the dual version of unroll_non_empty, but // skolem functions have to be eliminated or turned into // universal quantifiers. - throw default_exception("emptiness checking of regex is TBD"); + throw default_exception("emptiness checking for regex is TBD"); } - void seq_regex::propagate_is_nonempty(expr* r) { - sort* seq_sort = nullptr; + void seq_regex::propagate_ne(expr* r1, expr* r2) { + expr_ref r(m); + if (re().is_empty(r1)) + std::swap(r1, r2); + if (re().is_empty(r2)) + r = r1; + else + r = re().mk_union(re().mk_diff(r1, r2), re().mk_diff(r2, r1)); + rewrite(r); + sort* seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); literal lit = ~th.mk_eq(r, re().mk_empty(seq_sort), false); expr_mark seen; diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 97b8e56b2..c3b68f61a 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -64,13 +64,18 @@ namespace smt { seq_regex(theory_seq& th); + void push_scope() { m_to_propagate.push_scope(); } + + void pop_scope(unsigned num_scopes) { m_to_propagate.pop_scope(num_scopes); } + bool propagate(); void propagate_in_re(literal lit); - void propagate_is_empty(literal lit); + void propagate_eq(expr* r1, expr* r2); + + void propagate_ne(expr* r1, expr* r2); - void propagate_is_nonempty(expr* r); }; }; diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index a27197f41..8dd843fc7 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -382,7 +382,7 @@ final_check_status theory_seq::final_check_eh() { TRACEFIN("zero_length"); return FC_CONTINUE; } - if (false && !m_unicode.final_check()) { + if (ctx.get_fparams().m_seq_use_unicode && !m_unicode.final_check()) { return FC_CONTINUE; } if (get_fparams().m_split_w_len && len_based_split()) { @@ -1535,6 +1535,13 @@ bool theory_seq::internalize_term(app* term) { return true; } + if (ctx.get_fparams().m_seq_use_derivatives && m_util.str.is_in_re(term)) { + bool_var bv = ctx.mk_bool_var(term); + ctx.set_var_theory(bv, get_id()); + ctx.mark_as_relevant(bv); + return true; + } + for (auto arg : *term) { mk_var(ensure_enode(arg)); } @@ -2515,7 +2522,8 @@ void theory_seq::add_dependency(dependency*& dep, enode* a, enode* b) { void theory_seq::propagate() { - // m_unicode.propagate(); + if (ctx.get_fparams().m_seq_use_unicode) + m_unicode.propagate(); while (m_axioms_head < m_axioms.size() && !ctx.inconsistent()) { expr_ref e(m); e = m_axioms[m_axioms_head].get(); @@ -3083,7 +3091,7 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { else if (m_util.str.is_nth_i(e) || m_util.str.is_nth_u(e)) { // no-op } - else if (false && m_util.is_char_le(e, e1, e2)) { + else if (ctx.get_fparams().m_seq_use_unicode && m_util.is_char_le(e, e1, e2)) { theory_var v1 = get_th_var(ctx.get_enode(e1)); theory_var v2 = get_th_var(ctx.get_enode(e2)); if (is_true) @@ -3104,10 +3112,15 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { void theory_seq::new_eq_eh(theory_var v1, theory_var v2) { enode* n1 = get_enode(v1); enode* n2 = get_enode(v2); - if (false && m_util.is_char(n1->get_owner())) { + if (ctx.get_fparams().m_seq_use_unicode && m_util.is_char(n1->get_owner())) { m_unicode.new_eq_eh(v1, v2); return; } + if (ctx.get_fparams().m_seq_use_derivatives && m_util.is_re(n1->get_owner())) { + m_regex.propagate_eq(n1->get_owner(), n2->get_owner()); + return; + } + dependency* deps = m_dm.mk_leaf(assumption(n1, n2)); new_eq_eh(deps, n1, n2); } @@ -3184,6 +3197,12 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) { expr_ref e2(n2->get_owner(), m); SASSERT(n1->get_root() != n2->get_root()); if (m_util.is_re(n1->get_owner())) { + + if (ctx.get_fparams().m_seq_use_derivatives) { + m_regex.propagate_ne(e1, e2); + return; + } + enode_pair_vector eqs; literal_vector lits; switch (regex_are_equal(e1, e2)) { @@ -3199,7 +3218,7 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) { throw default_exception("convert regular expressions into automata"); } } - if (false && m_util.is_char(n1->get_owner())) { + if (ctx.get_fparams().m_seq_use_unicode && m_util.is_char(n1->get_owner())) { m_unicode.new_diseq_eh(v1, v2); return; } @@ -3233,6 +3252,7 @@ void theory_seq::push_scope_eh() { m_nqs.push_scope(); m_ncs.push_scope(); m_lts.push_scope(); + m_regex.push_scope(); } void theory_seq::pop_scope_eh(unsigned num_scopes) { @@ -3245,6 +3265,7 @@ void theory_seq::pop_scope_eh(unsigned num_scopes) { m_nqs.pop_scope(num_scopes); m_ncs.pop_scope(num_scopes); m_lts.pop_scope(num_scopes); + m_regex.pop_scope(num_scopes); m_rewrite.reset(); if (ctx.get_base_level() > ctx.get_scope_level() - num_scopes) { m_replay.reset();