From da68c3213c9aafa660b1b96c2b6e11aa1f031f0d Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Fri, 29 Jan 2021 08:14:38 -0600 Subject: [PATCH] Unicode for Z3str3 (#4981) * z3str3: remove hard-coded char set * z3str3: remove hard-coded char set * z3str3: use char abstraction * z3str3: scope management for unicode chars * add QF_CHAR for z3str3 * z3str3: remove hard-coded char set * z3str3: use char abstraction * z3str3: scope management for unicode chars * add QF_CHAR for z3str3 * z3str3: add 'char' string solver case * z3str3: fix mk_char using the wrong ast manager * z3str3: fix refcounted character vectors --- src/smt/smt_setup.cpp | 5 ++- src/smt/theory_str.cpp | 5 +++ src/smt/theory_str.h | 7 ++-- src/smt/theory_str_mc.cpp | 68 +++++++++++++++++++++------------------ 4 files changed, 49 insertions(+), 36 deletions(-) diff --git a/src/smt/smt_setup.cpp b/src/smt/smt_setup.cpp index 8118e8146..06f00fa08 100644 --- a/src/smt/smt_setup.cpp +++ b/src/smt/smt_setup.cpp @@ -735,6 +735,10 @@ namespace smt { else if (m_params.m_string_solver == "none") { // don't register any solver. } + else if (m_params.m_string_solver == "char") { + setup_QF_BV(); + setup_char(); + } else { throw default_exception("invalid parameter for smt.string_solver, valid options are 'z3str3', 'seq', 'auto'"); } @@ -930,7 +934,6 @@ namespace smt { void setup::setup_str() { setup_arith(); m_context.register_plugin(alloc(theory_str, m_context, m_manager, m_params)); - setup_char(); } void setup::setup_seq() { diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 19a14d7c6..1d46b49c7 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -99,6 +99,8 @@ namespace smt { dealloc(aut); } regex_automata.clear(); + for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); + for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); } void theory_str::init() { @@ -162,7 +164,10 @@ namespace smt { fixed_length_subterm_trail.reset(); fixed_length_assumptions.reset(); fixed_length_used_len_terms.reset(); + + for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); var_to_char_subterm_map.reset(); + for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); uninterpreted_to_char_subterm_map.reset(); fixed_length_lesson.reset(); candidate_model.reset(); diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index fa6e28621..24dde6b5d 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -509,7 +509,6 @@ protected: obj_map > string_chars; // S --> [S_0, S_1, ...] for character terms S_i - obj_pair_map concat_astNode_map; // all (str.to-int) and (int.to-str) terms @@ -537,8 +536,8 @@ protected: expr_ref_vector fixed_length_subterm_trail; // trail for subterms generated *in the subsolver* expr_ref_vector fixed_length_assumptions; // cache of boolean terms to assert *into the subsolver*, unsat core is a subset of these obj_map fixed_length_used_len_terms; // constraints used in generating fixed length model - obj_map > var_to_char_subterm_map; // maps a var to a list of character terms *in the subsolver* - obj_map > uninterpreted_to_char_subterm_map; // maps an "uninterpreted" string term to a list of character terms *in the subsolver* + obj_map var_to_char_subterm_map; // maps a var to a list of character terms *in the subsolver* + obj_map uninterpreted_to_char_subterm_map; // maps an "uninterpreted" string term to a list of character terms *in the subsolver* obj_map> fixed_length_lesson; //keep track of information for the lesson unsigned preprocessing_iteration_count; // number of attempts we've made to solve by preprocessing length information obj_map candidate_model; @@ -748,7 +747,7 @@ protected: lbool fixed_length_model_construction(expr_ref_vector formulas, expr_ref_vector &precondition, expr_ref_vector& free_variables, obj_map &model, expr_ref_vector &cex); - bool fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, ptr_vector & term_chars, expr_ref & cex); + bool fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, expr_ref_vector & term_chars, expr_ref & cex); bool fixed_length_get_len_value(expr * e, rational & val); bool fixed_length_reduce_eq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex); bool fixed_length_reduce_diseq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex); diff --git a/src/smt/theory_str_mc.cpp b/src/smt/theory_str_mc.cpp index 9dda3973a..f8c396a75 100644 --- a/src/smt/theory_str_mc.cpp +++ b/src/smt/theory_str_mc.cpp @@ -85,7 +85,7 @@ namespace smt { expr_ref haystack(full, m); expr_ref needle(suff, m); - ptr_vector full_chars, suff_chars; + expr_ref_vector full_chars(m), suff_chars(m); if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { @@ -147,7 +147,7 @@ namespace smt { expr_ref haystack(full, m); expr_ref needle(suff, m); - ptr_vector full_chars, suff_chars; + expr_ref_vector full_chars(m), suff_chars(m); if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { return false; @@ -201,7 +201,7 @@ namespace smt { expr_ref haystack(full, m); expr_ref needle(pref, m); - ptr_vector full_chars, pref_chars; + expr_ref_vector full_chars(m), pref_chars(m); if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) { return false; @@ -262,7 +262,7 @@ namespace smt { expr_ref haystack(full, m); expr_ref needle(pref, m); - ptr_vector full_chars, pref_chars; + expr_ref_vector full_chars(m), pref_chars(m); if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) { return false; @@ -316,7 +316,7 @@ namespace smt { expr_ref haystack(full, m); expr_ref needle(small, m); - ptr_vector haystack_chars, needle_chars; + expr_ref_vector haystack_chars(m), needle_chars(m); if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex) || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) { return false; @@ -382,7 +382,7 @@ namespace smt { expr_ref haystack(full, m); expr_ref needle(small, m); - ptr_vector haystack_chars, needle_chars; + expr_ref_vector haystack_chars(m), needle_chars(m); if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex) || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) { return false; @@ -459,7 +459,7 @@ namespace smt { eautomaton * aut = m_mk_aut(re); aut->compress(); - ptr_vector str_chars; + expr_ref_vector str_chars(m); if (!fixed_length_reduce_string_term(subsolver, str, str_chars, cex)) { return false; } @@ -614,7 +614,7 @@ namespace smt { * this conflict clause exists in the main solver. */ bool theory_str::fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, - ptr_vector & eqc_chars, expr_ref & cex) { + expr_ref_vector & eqc_chars, expr_ref & cex) { ast_manager & m = get_manager(); ast_manager & sub_m = subsolver.m(); @@ -628,10 +628,12 @@ namespace smt { for (unsigned i = 0; i < strConst.length(); ++i) { expr_ref chTerm(u.mk_char(strConst[i]), m); eqc_chars.push_back(chTerm); + fixed_length_subterm_trail.push_back(chTerm); } } else if (to_app(term)->get_num_args() == 0 && !u.str.is_string(term)) { // this is a variable; get its length and create/reuse character terms - if (!var_to_char_subterm_map.contains(term)) { + expr_ref_vector * chars = nullptr; + if (!var_to_char_subterm_map.find(term, chars)) { rational varLen_value; bool var_hasLen = fixed_length_get_len_value(term, varLen_value); if (!var_hasLen || varLen_value.is_neg()) { @@ -640,21 +642,23 @@ namespace smt { return false; } TRACE("str_fl", tout << "creating character terms for variable " << mk_pp(term, m) << ", length = " << varLen_value << std::endl;); - ptr_vector new_chars; + chars = alloc(expr_ref_vector, m); for (rational i = rational::zero(); i < varLen_value; ++i) { // TODO we can probably name these better for the sake of debugging expr_ref ch(mk_fresh_const("char", u.mk_char_sort()), m); - new_chars.push_back(ch); + chars->push_back(ch); fixed_length_subterm_trail.push_back(ch); } - var_to_char_subterm_map.insert(term, new_chars); + var_to_char_subterm_map.insert(term, chars); fixed_length_used_len_terms.insert(term, varLen_value); } - var_to_char_subterm_map.find(term, eqc_chars); + for (auto c : *chars) { + eqc_chars.push_back(c); + } } else if (u.str.is_concat(term, arg0, arg1)) { expr_ref first(arg0, sub_m); expr_ref second(arg1, sub_m); - ptr_vector chars0, chars1; + expr_ref_vector chars0(m), chars1(m); if (!fixed_length_reduce_string_term(subsolver, first, chars0, cex) || !fixed_length_reduce_string_term(subsolver, second, chars1, cex)) { return false; @@ -666,7 +670,7 @@ namespace smt { expr_ref first(arg0, sub_m); expr_ref second(arg1, sub_m); expr_ref third(arg2, sub_m); - ptr_vector base_chars; + expr_ref_vector base_chars(m); if (!fixed_length_reduce_string_term(subsolver, first, base_chars, cex)) { return false; } @@ -712,7 +716,7 @@ namespace smt { // (str.at Base Pos) expr_ref base(arg0, sub_m); expr_ref pos(arg1, sub_m); - ptr_vector base_chars; + expr_ref_vector base_chars(m); if (!fixed_length_reduce_string_term(subsolver, base, base_chars, cex)) { return false; } @@ -777,7 +781,8 @@ namespace smt { } } else { TRACE("str_fl", tout << "string term " << mk_pp(term, m) << " handled as uninterpreted function" << std::endl;); - if (!uninterpreted_to_char_subterm_map.contains(term)) { + expr_ref_vector *chars = nullptr; + if (!uninterpreted_to_char_subterm_map.find(term, chars)) { rational ufLen_value; bool uf_hasLen = fixed_length_get_len_value(term, ufLen_value); if (!uf_hasLen || ufLen_value.is_neg()) { @@ -786,16 +791,18 @@ namespace smt { return false; } TRACE("str_fl", tout << "creating character terms for uninterpreted function " << mk_pp(term, m) << ", length = " << ufLen_value << std::endl;); - ptr_vector new_chars; + chars = alloc(expr_ref_vector, m); for (rational i = rational::zero(); i < ufLen_value; ++i) { expr_ref ch(mk_fresh_const("char", u.mk_char_sort()), m); - new_chars.push_back(ch); + chars->push_back(ch); fixed_length_subterm_trail.push_back(ch); } - uninterpreted_to_char_subterm_map.insert(term, new_chars); + uninterpreted_to_char_subterm_map.insert(term, chars); fixed_length_used_len_terms.insert(term, ufLen_value); } - uninterpreted_to_char_subterm_map.find(term, eqc_chars); + for (auto c : *chars) { + eqc_chars.push_back(c); + } } return true; } @@ -805,7 +812,7 @@ namespace smt { ast_manager & sub_m = subsolver.m(); - ptr_vector lhs_chars, rhs_chars; + expr_ref_vector lhs_chars(m), rhs_chars(m); if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { @@ -851,7 +858,7 @@ namespace smt { return false; } - ptr_vector lhs_chars, rhs_chars; + expr_ref_vector lhs_chars(m), rhs_chars(m); if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { return false; @@ -884,7 +891,6 @@ namespace smt { ast_manager & m = get_manager(); - TRACE("str", ast_manager & m = get_manager(); tout << "dumping all formulas:" << std::endl; @@ -897,7 +903,10 @@ namespace smt { fixed_length_subterm_trail.reset(); fixed_length_used_len_terms.reset(); fixed_length_assumptions.reset(); + + for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); var_to_char_subterm_map.reset(); + for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); uninterpreted_to_char_subterm_map.reset(); fixed_length_lesson.reset(); @@ -911,7 +920,6 @@ namespace smt { subsolver_params.m_string_solver = symbol("char"); smt::kernel subsolver(m, subsolver_params); subsolver.set_logic(symbol("QF_S")); - sort * str_sort = u.str.mk_string_sort(); sort * bool_sort = m.mk_bool_sort(); @@ -925,7 +933,7 @@ namespace smt { add_persisted_axiom(var_len_assertion); return l_undef; } - ptr_vector var_chars; + expr_ref_vector var_chars(m); expr_ref str_counterexample(m); if (!fixed_length_reduce_string_term(subsolver, var, var_chars, str_counterexample)) { TRACE("str_fl", tout << "free variable " << mk_pp(var, m) << " caused a conflict; asserting and continuing" << std::endl;); @@ -1197,7 +1205,7 @@ namespace smt { } TRACE("str_fl", - tout << "formulas asserted subsolver:" << std::endl; + tout << "formulas asserted to subsolver:" << std::endl; for (auto e : fixed_length_assumptions) { tout << mk_pp(e, subsolver.m()) << std::endl; } @@ -1230,8 +1238,7 @@ namespace smt { for (auto entry : var_to_char_subterm_map) { svector assignment; expr * var = entry.m_key; - ptr_vector char_subterms(entry.m_value); - for (expr * chExpr : char_subterms) { + for (expr * chExpr : *(entry.m_value)) { expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); unsigned n = 0; if (chAssignment != nullptr && u.is_const_char(chAssignment, n)) { @@ -1252,8 +1259,7 @@ namespace smt { for (auto entry : uninterpreted_to_char_subterm_map) { svector assignment; expr * var = entry.m_key; - ptr_vector char_subterms(entry.m_value); - for (expr * chExpr : char_subterms) { + for (expr * chExpr : *(entry.m_value)) { expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); unsigned n = 0; if (chAssignment != nullptr && u.is_const_char(chAssignment, n)) {