From 61934d81061344e9c15b03e9ab0ab0d8025b64a2 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 7 Feb 2018 20:08:15 -0800 Subject: [PATCH] align semantics of re.allchar with string proposal. #1475 Signed-off-by: Nikolaj Bjorner --- src/api/api_ast.cpp | 5 ++-- src/api/api_seq.cpp | 2 +- src/ast/rewriter/seq_rewriter.cpp | 41 ++++++++++++++++++++++--------- src/ast/seq_decl_plugin.cpp | 25 +++++++++++++------ src/ast/seq_decl_plugin.h | 11 ++++++--- src/smt/theory_seq.cpp | 3 ++- src/smt/theory_str.cpp | 11 ++++++--- 7 files changed, 67 insertions(+), 31 deletions(-) diff --git a/src/api/api_ast.cpp b/src/api/api_ast.cpp index ae23ca100..6c39ad07d 100644 --- a/src/api/api_ast.cpp +++ b/src/api/api_ast.cpp @@ -1115,7 +1115,7 @@ extern "C" { case _OP_STRING_SUBSTR: return Z3_OP_SEQ_EXTRACT; case _OP_STRING_STRIDOF: return Z3_OP_SEQ_INDEX; case _OP_REGEXP_EMPTY: return Z3_OP_RE_EMPTY_SET; - case _OP_REGEXP_FULL: return Z3_OP_RE_FULL_SET; + case _OP_REGEXP_FULL_CHAR: return Z3_OP_RE_FULL_SET; case OP_STRING_STOI: return Z3_OP_STR_TO_INT; case OP_STRING_ITOS: return Z3_OP_INT_TO_STR; @@ -1127,7 +1127,8 @@ extern "C" { case OP_RE_UNION: return Z3_OP_RE_UNION; case OP_RE_INTERSECT: return Z3_OP_RE_INTERSECT; case OP_RE_LOOP: return Z3_OP_RE_LOOP; - case OP_RE_FULL_SET: return Z3_OP_RE_FULL_SET; + // case OP_RE_FULL_SEQ_SET: return Z3_OP_RE_FULL_SET; + case OP_RE_FULL_CHAR_SET: return Z3_OP_RE_FULL_SET; case OP_RE_EMPTY_SET: return Z3_OP_RE_EMPTY_SET; default: return Z3_OP_INTERNAL; diff --git a/src/api/api_seq.cpp b/src/api/api_seq.cpp index 44003a5fb..428d86d0d 100644 --- a/src/api/api_seq.cpp +++ b/src/api/api_seq.cpp @@ -165,7 +165,7 @@ extern "C" { MK_BINARY(Z3_mk_re_range, mk_c(c)->get_seq_fid(), OP_RE_RANGE, SKIP); MK_SORTED(Z3_mk_re_empty, mk_c(c)->sutil().re.mk_empty); - MK_SORTED(Z3_mk_re_full, mk_c(c)->sutil().re.mk_full); + MK_SORTED(Z3_mk_re_full, mk_c(c)->sutil().re.mk_full_seq); diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index fa95278b4..ec3e72cc4 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -286,7 +286,7 @@ eautomaton* re2automaton::re2aut(expr* e) { else if (u.re.is_empty(e)) { return alloc(eautomaton, sm); } - else if (u.re.is_full(e)) { + else if (u.re.is_full_seq(e)) { expr_ref tt(m.mk_true(), m); sort *seq_s = 0, *char_s = 0; VERIFY (u.is_re(m.get_sort(e), seq_s)); @@ -294,6 +294,15 @@ eautomaton* re2automaton::re2aut(expr* e) { sym_expr* _true = sym_expr::mk_pred(tt, char_s); return eautomaton::mk_loop(sm, _true); } + else if (u.re.is_full_char(e)) { + expr_ref tt(m.mk_true(), m); + sort *seq_s = 0, *char_s = 0; + VERIFY (u.is_re(m.get_sort(e), seq_s)); + VERIFY (u.is_seq(seq_s, char_s)); + sym_expr* _true = sym_expr::mk_pred(tt, char_s); + a = alloc(eautomaton, sm, _true); + return a.detach(); + } else if (u.re.is_intersection(e, e1, e2) && m_sa && (a = re2aut(e1)) && (b = re2aut(e2))) { return m_sa->mk_product(*a, *b); } @@ -370,7 +379,9 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con return mk_re_loop(num_args, args, result); case OP_RE_EMPTY_SET: return BR_FAILED; - case OP_RE_FULL_SET: + case OP_RE_FULL_SEQ_SET: + return BR_FAILED; + case OP_RE_FULL_CHAR_SET: return BR_FAILED; case OP_RE_OF_PRED: return BR_FAILED; @@ -1217,7 +1228,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { result = m().mk_false(); return BR_DONE; } - if (m_util.re.is_full(b)) { + if (m_util.re.is_full_seq(b)) { result = m().mk_true(); return BR_DONE; } @@ -1312,7 +1323,7 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) { return BR_FAILED; } br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { - if (m_util.re.is_full(a) && m_util.re.is_full(b)) { + if (m_util.re.is_full_seq(a) && m_util.re.is_full_seq(b)) { result = a; return BR_DONE; } @@ -1352,11 +1363,11 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } - if (m_util.re.is_full(a)) { + if (m_util.re.is_full_seq(a)) { result = a; return BR_DONE; } - if (m_util.re.is_full(b)) { + if (m_util.re.is_full_seq(b)) { result = b; return BR_DONE; } @@ -1382,10 +1393,10 @@ br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) { return BR_REWRITE2; } if (m_util.re.is_empty(a)) { - result = m_util.re.mk_full(m().get_sort(a)); + result = m_util.re.mk_full_seq(m().get_sort(a)); return BR_DONE; } - if (m_util.re.is_full(a)) { + if (m_util.re.is_full_seq(a)) { result = m_util.re.mk_empty(m().get_sort(a)); return BR_DONE; } @@ -1412,11 +1423,11 @@ br_status seq_rewriter::mk_re_inter(expr* a, expr* b, expr_ref& result) { result = b; return BR_DONE; } - if (m_util.re.is_full(a)) { + if (m_util.re.is_full_seq(a)) { result = b; return BR_DONE; } - if (m_util.re.is_full(b)) { + if (m_util.re.is_full_seq(b)) { result = a; return BR_DONE; } @@ -1459,10 +1470,16 @@ br_status seq_rewriter::mk_re_loop(unsigned num_args, expr* const* args, expr_re */ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) { expr* b, *c, *b1, *c1; - if (m_util.re.is_star(a) || m_util.re.is_full(a)) { + if (m_util.re.is_star(a) || m_util.re.is_full_seq(a)) { result = a; return BR_DONE; } + if (m_util.re.is_full_char(a)) { + sort* seq_sort = 0; + VERIFY(m_util.is_re(a, seq_sort)); + result = m_util.re.mk_full_seq(seq_sort); + return BR_DONE; + } if (m_util.re.is_empty(a)) { sort* seq_sort = 0; VERIFY(m_util.is_re(a, seq_sort)); @@ -1519,7 +1536,7 @@ br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) { result = a; return BR_DONE; } - if (m_util.re.is_full(a)) { + if (m_util.re.is_full_seq(a)) { result = a; return BR_DONE; } diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 7cc0ccb12..daf39b79e 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -544,7 +544,8 @@ void seq_decl_plugin::init() { m_sigs[OP_RE_LOOP] = alloc(psig, m, "re.loop", 1, 1, &reA, reA); m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re.complement", 1, 1, &reA, reA); m_sigs[OP_RE_EMPTY_SET] = alloc(psig, m, "re.empty", 1, 0, 0, reA); - m_sigs[OP_RE_FULL_SET] = alloc(psig, m, "re.all", 1, 0, 0, reA); + m_sigs[OP_RE_FULL_SEQ_SET] = alloc(psig, m, "re.all", 1, 0, 0, reA); + m_sigs[OP_RE_FULL_CHAR_SET] = alloc(psig, m, "re.all1", 1, 0, 0, reA); m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA); m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA); m_sigs[OP_SEQ_IN_RE] = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT); @@ -562,7 +563,7 @@ void seq_decl_plugin::init() { m_sigs[_OP_STRING_IN_REGEXP] = alloc(psig, m, "str.in.re", 0, 2, strTreT, boolT); m_sigs[_OP_STRING_TO_REGEXP] = alloc(psig, m, "str.to.re", 0, 1, &strT, reT); m_sigs[_OP_REGEXP_EMPTY] = alloc(psig, m, "re.nostr", 0, 0, 0, reT); - m_sigs[_OP_REGEXP_FULL] = alloc(psig, m, "re.allchar", 0, 0, 0, reT); + m_sigs[_OP_REGEXP_FULL_CHAR] = alloc(psig, m, "re.allchar", 0, 0, 0, reT); m_sigs[_OP_STRING_SUBSTR] = alloc(psig, m, "str.substr", 0, 3, strTint2T, strT); m_sigs[_OP_RE_UNROLL] = alloc(psig, m, "_re.unroll", 0, 2, reTintT, strT); } @@ -669,20 +670,24 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, match(*m_sigs[k], arity, domain, range, rng); return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k)); - case _OP_REGEXP_FULL: + case _OP_REGEXP_FULL_CHAR: if (!range) { range = m_re; } match(*m_sigs[k], arity, domain, range, rng); - return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_SET)); - case OP_RE_FULL_SET: + return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_CHAR_SET)); + + case OP_RE_FULL_CHAR_SET: if (!range) range = m_re; if (range == m_re) { match(*m_sigs[k], arity, domain, range, rng); return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, k)); } return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k)); - + + case OP_RE_FULL_SEQ_SET: + if (!range) range = m_re; + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k)); case _OP_REGEXP_EMPTY: if (!range) { @@ -972,8 +977,12 @@ app* seq_util::re::mk_loop(expr* r, unsigned lo, unsigned hi) { return m.mk_app(m_fid, OP_RE_LOOP, 2, params, 1, &r); } -app* seq_util::re::mk_full(sort* s) { - return m.mk_app(m_fid, OP_RE_FULL_SET, 0, 0, 0, 0, s); +app* seq_util::re::mk_full_char(sort* s) { + return m.mk_app(m_fid, OP_RE_FULL_CHAR_SET, 0, 0, 0, 0, s); +} + +app* seq_util::re::mk_full_seq(sort* s) { + return m.mk_app(m_fid, OP_RE_FULL_SEQ_SET, 0, 0, 0, 0, s); } app* seq_util::re::mk_empty(sort* s) { diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 17fa50423..2e3b3fd24 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -56,7 +56,8 @@ enum seq_op_kind { OP_RE_LOOP, OP_RE_COMPLEMENT, OP_RE_EMPTY_SET, - OP_RE_FULL_SET, + OP_RE_FULL_SEQ_SET, + OP_RE_FULL_CHAR_SET, OP_RE_OF_PRED, @@ -77,7 +78,7 @@ enum seq_op_kind { _OP_STRING_SUBSTR, _OP_STRING_STRIDOF, _OP_REGEXP_EMPTY, - _OP_REGEXP_FULL, + _OP_REGEXP_FULL_CHAR, _OP_SEQ_SKOLEM, _OP_RE_UNROLL, LAST_SEQ_OP @@ -327,7 +328,8 @@ public: app* mk_opt(expr* r) { return m.mk_app(m_fid, OP_RE_OPTION, r); } app* mk_loop(expr* r, unsigned lo); app* mk_loop(expr* r, unsigned lo, unsigned hi); - app* mk_full(sort* s); + app* mk_full_char(sort* s); + app* mk_full_seq(sort* s); app* mk_empty(sort* s); bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); } @@ -341,7 +343,8 @@ public: bool is_range(expr const* n) const { return is_app_of(n, m_fid, OP_RE_RANGE); } bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); } bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); } - bool is_full(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SET); } + bool is_full_char(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_CHAR_SET); } + bool is_full_seq(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET); } MATCH_UNARY(is_to_re); MATCH_BINARY(is_concat); MATCH_BINARY(is_union); diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index f91685d43..4c1a63a2f 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -2271,7 +2271,8 @@ bool theory_seq::internalize_re(expr* e) { m_util.re.is_concat(e, e1, e2)) { return internalize_re(e1) && internalize_re(e2); } - if (m_util.re.is_full(e) || + if (m_util.re.is_full_seq(e) || + m_util.re.is_full_char(e) || m_util.re.is_empty(e)) { return true; } diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 022a2ad73..56a54f964 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -1689,8 +1689,10 @@ namespace smt { u.str.is_string(range1, range1val); u.str.is_string(range2, range2val); return zstring("[") + range1val + zstring("-") + range2val + zstring("]"); - } else if (u.re.is_full(a_regex)) { + } else if (u.re.is_full_seq(a_regex)) { return zstring("(.*)"); + } else if (u.re.is_full_char(a_regex)) { + return zstring("str.allchar"); } else { TRACE("str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;); UNREACHABLE(); return zstring(""); @@ -1806,9 +1808,12 @@ namespace smt { expr_ref finalAxiom(m.mk_iff(ex, rhs), m); SASSERT(finalAxiom); assert_axiom(finalAxiom); - } else if (u.re.is_full(regex)) { + } else if (u.re.is_full_seq(regex)) { // trivially true for any string! assert_axiom(ex); + } else if (u.re.is_full_char(regex)) { + TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); + NOT_IMPLEMENTED_YET(); } else { TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); NOT_IMPLEMENTED_YET(); @@ -6309,7 +6314,7 @@ namespace smt { } TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;); - } else if (u.re.is_full(e)) { + } else if (u.re.is_full_seq(e)) { // effectively the same as .* where . can be any single character // start --e--> tmp // tmp --e--> end