From 27db97c2697ae219e0db69b6b45b7ab2738bd13f Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Thu, 18 Feb 2021 16:51:34 -0600 Subject: [PATCH] Z3str3: add str.to_code and str.from_code (#5015) --- src/smt/theory_str.cpp | 86 ++++++++++++++++++++++++++-- src/smt/theory_str.h | 2 + src/smt/theory_str_mc.cpp | 114 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+), 5 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index f80c09f4b..498ee9e77 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -831,6 +831,10 @@ namespace smt { instantiate_axiom_Replace(e); } else if (u.str.is_in_re(a)) { instantiate_axiom_RegexIn(e); + } else if (u.str.is_from_code(a)) { + instantiate_axiom_str_from_code(e); + } else if (u.str.is_to_code(a)) { + instantiate_axiom_str_to_code(e); } else { TRACE("str", tout << "BUG: unhandled library-aware term " << mk_pp(e->get_owner(), get_manager()) << std::endl;); NOT_IMPLEMENTED_YET(); @@ -1824,6 +1828,71 @@ namespace smt { } } + void theory_str::instantiate_axiom_str_from_code(enode * e) { + ast_manager & m = get_manager(); + + app * ex = e->get_owner(); + if (axiomatized_terms.contains(ex)) { + TRACE("str", tout << "already set up str.from_code axiom for " << mk_pp(ex, m) << std::endl;); + return; + } + axiomatized_terms.insert(ex); + TRACE("str", tout << "instantiate str.from_code axiom for " << mk_pp(ex, m) << std::endl;); + + expr * arg; + u.str.is_from_code(ex, arg); + // (str.from_code N) == "" if N is not in the range [0, max_char]. + { + expr_ref premise(m.mk_or(m_autil.mk_le(arg, mk_int(-1)), m_autil.mk_ge(arg, mk_int(u.max_char() + 1))), m); + expr_ref conclusion(ctx.mk_eq_atom(ex, mk_string("")), m); + expr_ref axiom(rewrite_implication(premise, conclusion), m); + assert_axiom_rw(axiom); + } + // len (str.from_code N) == 1 if N is in the range [0, max_char]. + { + expr_ref premise(m.mk_and(m_autil.mk_ge(arg, mk_int(0)), m_autil.mk_le(arg, mk_int(u.max_char() + 1))), m); + expr_ref conclusion(ctx.mk_eq_atom(mk_strlen(ex), mk_int(1)), m); + expr_ref axiom(rewrite_implication(premise, conclusion), m); + assert_axiom_rw(axiom); + } + // If N is in the range [0, max_char], then to_code(from_code(e)) == e. + { + expr_ref premise(m.mk_and(m_autil.mk_ge(arg, mk_int(0)), m_autil.mk_le(arg, mk_int(u.max_char() + 1))), m); + expr_ref conclusion(ctx.mk_eq_atom(u.str.mk_to_code(ex), arg), m); + expr_ref axiom(rewrite_implication(premise, conclusion), m); + assert_axiom_rw(axiom); + } + } + + void theory_str::instantiate_axiom_str_to_code(enode * e) { + ast_manager & m = get_manager(); + + app * ex = e->get_owner(); + if (axiomatized_terms.contains(ex)) { + TRACE("str", tout << "already set up str.to_code axiom for " << mk_pp(ex, m) << std::endl;); + return; + } + axiomatized_terms.insert(ex); + TRACE("str", tout << "instantiate str.to_code axiom for " << mk_pp(ex, m) << std::endl;); + + expr * arg; + u.str.is_to_code(ex, arg); + // (str.to_code S) == -1 if len(S) != 1. + { + expr_ref premise(m.mk_not(ctx.mk_eq_atom(mk_strlen(arg), mk_int(1))), m); + expr_ref conclusion(ctx.mk_eq_atom(ex, mk_int(-1)), m); + expr_ref axiom(rewrite_implication(premise, conclusion), m); + assert_axiom_rw(axiom); + } + // (str.to_code S) is in [0, max_char] if len(S) == 1. + { + expr_ref premise(ctx.mk_eq_atom(mk_strlen(arg), mk_int(1)), m); + expr_ref conclusion(m.mk_and(m_autil.mk_ge(ex, mk_int(0)), m_autil.mk_le(ex, mk_int(u.max_char()))), m); + expr_ref axiom(rewrite_implication(premise, conclusion), m); + assert_axiom_rw(axiom); + } + } + expr * theory_str::mk_RegexIn(expr * str, expr * regexp) { app * regexIn = u.re.mk_in_re(str, regexp); // immediately force internalization so that axiom setup does not fail @@ -6774,7 +6843,7 @@ namespace smt { if (ex_sort != str_sort) return false; // string constants cannot be variables if (u.str.is_string(e)) return false; - if (u.str.is_concat(e) || u.str.is_at(e) || u.str.is_extract(e) || u.str.is_replace(e) || u.str.is_itos(e)) + if (u.str.is_concat(e) || u.str.is_at(e) || u.str.is_extract(e) || u.str.is_replace(e) || u.str.is_itos(e) || u.str.is_from_code(e)) return false; if (m.is_ite(e)) return false; @@ -6798,8 +6867,7 @@ namespace smt { sort * int_sort = m.mk_sort(m_arith_fid, INT_SORT); // reject unhandled expressions - if (u.str.is_replace_all(ex) || u.str.is_replace_re(ex) || u.str.is_replace_re_all(ex) || u.str.is_from_code(ex) - || u.str.is_to_code(ex) || u.str.is_is_digit(ex)) { + if (u.str.is_replace_all(ex) || u.str.is_replace_re(ex) || u.str.is_replace_re_all(ex) || u.str.is_is_digit(ex)) { TRACE("str", tout << "ERROR: Z3str3 has encountered an unsupported operator. Aborting." << std::endl;); m.raise_exception("Z3str3 encountered an unsupported operator."); } @@ -6830,6 +6898,11 @@ namespace smt { string_int_conversion_terms.push_back(ap); m_library_aware_axiom_todo.push_back(n); m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); + } else if (u.str.is_from_code(ap)) { + TRACE("str", tout << "found string-codepoint conversion term: " << mk_pp(ex, get_manager()) << std::endl;); + string_int_conversion_terms.push_back(ap); + m_library_aware_axiom_todo.push_back(n); + m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); } else if (is_var(ex)) { // if ex is a variable, add it to our list of variables TRACE("str", tout << "tracking variable " << mk_ismt2_pp(ap, get_manager()) << std::endl;); @@ -6881,6 +6954,11 @@ namespace smt { string_int_conversion_terms.push_back(ap); m_library_aware_axiom_todo.push_back(n); m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); + } else if (u.str.is_to_code(ex)) { + TRACE("str", tout << "found string-codepoint conversion term: " << mk_pp(ex, get_manager()) << std::endl;); + string_int_conversion_terms.push_back(ap); + m_library_aware_axiom_todo.push_back(n); + m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); } } } else { @@ -8617,8 +8695,6 @@ namespace smt { if (axiomAdd) { addedStrIntAxioms = true; } - } else { - UNREACHABLE(); } } if (addedStrIntAxioms) { diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index f659402fb..fabb774bf 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -606,6 +606,8 @@ protected: void instantiate_axiom_Replace(enode * e); void instantiate_axiom_str_to_int(enode * e); void instantiate_axiom_int_to_str(enode * e); + void instantiate_axiom_str_to_code(enode * e); + void instantiate_axiom_str_from_code(enode * e); void add_persisted_axiom(expr * a); diff --git a/src/smt/theory_str_mc.cpp b/src/smt/theory_str_mc.cpp index 05060b5fe..d84494ac3 100644 --- a/src/smt/theory_str_mc.cpp +++ b/src/smt/theory_str_mc.cpp @@ -1152,6 +1152,30 @@ namespace smt { TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(e, get_manager()) << std::endl;); // consistency needs to be checked after the string is assigned } + } else if (u.str.is_to_code(e, _arg)) { + expr_ref arg(_arg, m); + rational ival; + if (v.get_value(e, ival)) { + TRACE("str_fl", tout << "integer theory assigns " << ival << " to " << mk_pp(e, m) << std::endl;); + if (ival >= rational::zero() && ival <= rational(u.max_char())) { + zstring ival_str(ival.get_unsigned()); + expr_ref arg_char_expr(mk_string(ival_str), m); + expr_ref stoi_cex(m); + // Add (e == ival) as a precondition + precondition.push_back(m.mk_eq(e, mk_int(ival))); + if (!fixed_length_reduce_eq(subsolver, arg, arg_char_expr, stoi_cex)) { + // Counterexample: (str.to_code arg) == ival AND arg == arg_char_expr cannot both be true. + stoi_cex = expr_ref(m.mk_not(m.mk_and(m.mk_eq(e, mk_int(ival)), m.mk_eq(arg, arg_char_expr))), m); + assert_axiom(stoi_cex); + add_persisted_axiom(stoi_cex); + return l_undef; + } + fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival))); + } + } else { + TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(e, m) << std::endl;); + // consistency needs to be checked after the string is assigned + } } else if (u.str.is_itos(e, _arg)) { expr_ref arg(_arg, m); rational slen; @@ -1194,6 +1218,31 @@ namespace smt { TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(arg, get_manager()) << std::endl;); // consistency needs to be checked after the string is assigned } + } else if (u.str.is_from_code(e, _arg)) { + expr_ref arg(_arg, m); + rational ival; + if (v.get_value(arg, ival)) { + TRACE("str_fl", tout << "integer theory assigns " << ival << " to " << mk_pp(arg, m) << std::endl;); + if (ival >= rational::zero() && ival <= rational(u.max_char())) { + zstring ival_str(ival.get_unsigned()); + expr_ref arg_char_expr(mk_string(ival_str), m); + expr_ref itos_cex(m); + // Add (arg == ival) as a precondition + precondition.push_back(m.mk_eq(arg, mk_int(ival))); + expr_ref _e(e, m); + if (!fixed_length_reduce_eq(subsolver, _e, arg_char_expr, itos_cex)) { + // Counterexample: (str.from_code arg) == arg_char AND arg == ival cannot both be true. + itos_cex = expr_ref(m.mk_not(m.mk_and(m.mk_eq(arg, mk_int(ival)), m.mk_eq(e, arg_char_expr))), m); + assert_axiom(itos_cex); + add_persisted_axiom(itos_cex); + return l_undef; + } + fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival))); + } + } else { + TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(arg, m) << std::endl;); + // consistency needs to be checked after the string is assigned + } } } } @@ -1312,6 +1361,36 @@ namespace smt { } } } + } else if (u.str.is_to_code(e, _arg)) { + expr_ref arg(_arg, m); + rational ival; + if (v.get_value(e, ival)) { + expr_ref arg_subst(arg, m); + (*replacer)(arg, arg_subst); + rw(arg_subst); + TRACE("str_fl", tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(arg_subst, m) << std::endl;); + symbol arg_str; + if (u.str.is_string(arg_subst, arg_str)) { + zstring arg_zstr(arg_str.bare_str()); + if (ival >= rational::zero() && ival <= rational(u.max_char())) { + // check that arg_subst has length 1 and that the codepoints are the same + if (arg_zstr.length() != 1 || rational(arg_zstr[0]) != ival) { + // contradiction + expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); + assert_axiom(cex); + return l_undef; + } + } else { + // arg_subst must not be a singleton char + if (arg_zstr.length() == 1) { + // contradiction + expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); + assert_axiom(cex); + return l_undef; + } + } + } + } } else if (u.str.is_itos(e, _arg)) { expr_ref arg(_arg, m); rational ival; @@ -1353,6 +1432,41 @@ namespace smt { } } } + } else if (u.str.is_from_code(e, _arg)) { + expr_ref arg(_arg, m); + rational ival; + if (v.get_value(arg, ival)) { + expr_ref e_subst(e, m); + (*replacer)(e, e_subst); + rw(e_subst); + TRACE("str_fl", tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(e_subst, m) << std::endl;); + symbol e_str; + if (u.str.is_string(e_subst, e_str)) { + zstring e_zstr(e_str.bare_str()); + // if arg is out of range, e must be empty + // if arg is in range, e must be valid + if (ival <= rational::zero() || ival >= rational(u.max_char())) { + if (!e_zstr.empty()) { + // contradiction + expr_ref cex(ctx.mk_eq_atom( + m.mk_or(m_autil.mk_le(arg, mk_int(0)), m_autil.mk_ge(arg, mk_int(u.max_char() + 1))), + ctx.mk_eq_atom(e, mk_string("")) + ), m); + assert_axiom(cex); + return l_undef; + } + } else { + if (e_zstr.length() != 1 || e_zstr[0] != ival.get_unsigned()) { + // contradiction + expr_ref premise(ctx.mk_eq_atom(arg, mk_int(ival)), m); + expr_ref conclusion(ctx.mk_eq_atom(e, mk_string(zstring(ival.get_unsigned()))), m); + expr_ref cex(rewrite_implication(premise, conclusion), m); + assert_axiom(cex); + return l_undef; + } + } + } + } } } }