mirror of
				https://github.com/Z3Prover/z3
				synced 2025-10-31 11:42:28 +00:00 
			
		
		
		
	Unicode for Z3str3 (#4981)
* z3str3: remove hard-coded char set * z3str3: remove hard-coded char set * z3str3: use char abstraction * z3str3: scope management for unicode chars * add QF_CHAR for z3str3 * z3str3: remove hard-coded char set * z3str3: use char abstraction * z3str3: scope management for unicode chars * add QF_CHAR for z3str3 * z3str3: add 'char' string solver case * z3str3: fix mk_char using the wrong ast manager * z3str3: fix refcounted character vectors
This commit is contained in:
		
							parent
							
								
									cfcd7f18a9
								
							
						
					
					
						commit
						da68c3213c
					
				
					 4 changed files with 49 additions and 36 deletions
				
			
		|  | @ -735,6 +735,10 @@ namespace smt { | |||
|         else if (m_params.m_string_solver == "none") { | ||||
|             // don't register any solver.
 | ||||
|         } | ||||
|         else if (m_params.m_string_solver == "char") { | ||||
|             setup_QF_BV(); | ||||
|             setup_char(); | ||||
|         } | ||||
|         else { | ||||
|             throw default_exception("invalid parameter for smt.string_solver, valid options are 'z3str3', 'seq', 'auto'"); | ||||
|         } | ||||
|  | @ -930,7 +934,6 @@ namespace smt { | |||
|     void setup::setup_str() { | ||||
|         setup_arith(); | ||||
|         m_context.register_plugin(alloc(theory_str, m_context, m_manager, m_params)); | ||||
|         setup_char(); | ||||
|     } | ||||
| 
 | ||||
|     void setup::setup_seq() { | ||||
|  |  | |||
|  | @ -99,6 +99,8 @@ namespace smt { | |||
|             dealloc(aut); | ||||
|         } | ||||
|         regex_automata.clear(); | ||||
|         for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); | ||||
|         for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); | ||||
|     } | ||||
| 
 | ||||
|     void theory_str::init() { | ||||
|  | @ -162,7 +164,10 @@ namespace smt { | |||
|         fixed_length_subterm_trail.reset(); | ||||
|         fixed_length_assumptions.reset(); | ||||
|         fixed_length_used_len_terms.reset(); | ||||
| 
 | ||||
|         for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); | ||||
|         var_to_char_subterm_map.reset(); | ||||
|         for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); | ||||
|         uninterpreted_to_char_subterm_map.reset(); | ||||
|         fixed_length_lesson.reset(); | ||||
|         candidate_model.reset(); | ||||
|  |  | |||
|  | @ -509,7 +509,6 @@ protected: | |||
| 
 | ||||
|     obj_map<expr, ptr_vector<expr> > string_chars; // S --> [S_0, S_1, ...] for character terms S_i
 | ||||
| 
 | ||||
| 
 | ||||
|     obj_pair_map<expr, expr, expr*> concat_astNode_map; | ||||
| 
 | ||||
|     // all (str.to-int) and (int.to-str) terms
 | ||||
|  | @ -537,8 +536,8 @@ protected: | |||
|     expr_ref_vector fixed_length_subterm_trail; // trail for subterms generated *in the subsolver*
 | ||||
|     expr_ref_vector fixed_length_assumptions; // cache of boolean terms to assert *into the subsolver*, unsat core is a subset of these
 | ||||
|     obj_map<expr, rational> fixed_length_used_len_terms; // constraints used in generating fixed length model
 | ||||
|     obj_map<expr, ptr_vector<expr> > var_to_char_subterm_map; // maps a var to a list of character terms *in the subsolver*
 | ||||
|     obj_map<expr, ptr_vector<expr> > uninterpreted_to_char_subterm_map; // maps an "uninterpreted" string term to a list of character terms *in the subsolver*
 | ||||
|     obj_map<expr, expr_ref_vector* > var_to_char_subterm_map; // maps a var to a list of character terms *in the subsolver*
 | ||||
|     obj_map<expr, expr_ref_vector* > uninterpreted_to_char_subterm_map; // maps an "uninterpreted" string term to a list of character terms *in the subsolver*
 | ||||
|     obj_map<expr, std::tuple<rational, expr*, expr*>> fixed_length_lesson; //keep track of information for the lesson
 | ||||
|     unsigned preprocessing_iteration_count; // number of attempts we've made to solve by preprocessing length information
 | ||||
|     obj_map<expr, zstring> candidate_model; | ||||
|  | @ -748,7 +747,7 @@ protected: | |||
|     lbool fixed_length_model_construction(expr_ref_vector formulas, expr_ref_vector &precondition, | ||||
|             expr_ref_vector& free_variables, | ||||
|             obj_map<expr, zstring> &model, expr_ref_vector &cex); | ||||
|     bool fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, ptr_vector<expr> & term_chars, expr_ref & cex); | ||||
|     bool fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, expr_ref_vector & term_chars, expr_ref & cex); | ||||
|     bool fixed_length_get_len_value(expr * e, rational & val); | ||||
|     bool fixed_length_reduce_eq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex); | ||||
|     bool fixed_length_reduce_diseq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex); | ||||
|  |  | |||
|  | @ -85,7 +85,7 @@ namespace smt { | |||
|         expr_ref haystack(full, m); | ||||
|         expr_ref needle(suff, m); | ||||
| 
 | ||||
|         ptr_vector<expr> full_chars, suff_chars; | ||||
|         expr_ref_vector full_chars(m), suff_chars(m); | ||||
| 
 | ||||
|         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { | ||||
|  | @ -147,7 +147,7 @@ namespace smt { | |||
|         expr_ref haystack(full, m); | ||||
|         expr_ref needle(suff, m); | ||||
| 
 | ||||
|         ptr_vector<expr> full_chars, suff_chars; | ||||
|         expr_ref_vector full_chars(m), suff_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { | ||||
|             return false; | ||||
|  | @ -201,7 +201,7 @@ namespace smt { | |||
|         expr_ref haystack(full, m); | ||||
|         expr_ref needle(pref, m); | ||||
| 
 | ||||
|         ptr_vector<expr> full_chars, pref_chars; | ||||
|         expr_ref_vector full_chars(m), pref_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) { | ||||
|             return false; | ||||
|  | @ -262,7 +262,7 @@ namespace smt { | |||
|         expr_ref haystack(full, m); | ||||
|         expr_ref needle(pref, m); | ||||
| 
 | ||||
|         ptr_vector<expr> full_chars, pref_chars; | ||||
|         expr_ref_vector full_chars(m), pref_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) { | ||||
|             return false; | ||||
|  | @ -316,7 +316,7 @@ namespace smt { | |||
|         expr_ref haystack(full, m); | ||||
|         expr_ref needle(small, m); | ||||
| 
 | ||||
|         ptr_vector<expr> haystack_chars, needle_chars; | ||||
|         expr_ref_vector haystack_chars(m), needle_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) { | ||||
|             return false; | ||||
|  | @ -382,7 +382,7 @@ namespace smt { | |||
|         expr_ref haystack(full, m); | ||||
|         expr_ref needle(small, m); | ||||
| 
 | ||||
|         ptr_vector<expr> haystack_chars, needle_chars; | ||||
|         expr_ref_vector haystack_chars(m), needle_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) { | ||||
|             return false; | ||||
|  | @ -459,7 +459,7 @@ namespace smt { | |||
|         eautomaton * aut = m_mk_aut(re); | ||||
|         aut->compress(); | ||||
| 
 | ||||
|         ptr_vector<expr> str_chars; | ||||
|         expr_ref_vector str_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, str, str_chars, cex)) { | ||||
|             return false; | ||||
|         } | ||||
|  | @ -614,7 +614,7 @@ namespace smt { | |||
|      * this conflict clause exists in the main solver. | ||||
|      */ | ||||
|     bool theory_str::fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, | ||||
|             ptr_vector<expr> & eqc_chars, expr_ref & cex) { | ||||
|             expr_ref_vector & eqc_chars, expr_ref & cex) { | ||||
|         ast_manager & m = get_manager(); | ||||
| 
 | ||||
|         ast_manager & sub_m = subsolver.m(); | ||||
|  | @ -628,10 +628,12 @@ namespace smt { | |||
|             for (unsigned i = 0; i < strConst.length(); ++i) { | ||||
|                 expr_ref chTerm(u.mk_char(strConst[i]), m); | ||||
|                 eqc_chars.push_back(chTerm); | ||||
|                 fixed_length_subterm_trail.push_back(chTerm); | ||||
|             } | ||||
|         } else if (to_app(term)->get_num_args() == 0 && !u.str.is_string(term)) { | ||||
|             // this is a variable; get its length and create/reuse character terms
 | ||||
|             if (!var_to_char_subterm_map.contains(term)) { | ||||
|             expr_ref_vector * chars = nullptr; | ||||
|             if (!var_to_char_subterm_map.find(term, chars)) { | ||||
|                 rational varLen_value; | ||||
|                 bool var_hasLen = fixed_length_get_len_value(term, varLen_value); | ||||
|                 if (!var_hasLen || varLen_value.is_neg()) { | ||||
|  | @ -640,21 +642,23 @@ namespace smt { | |||
|                     return false; | ||||
|                 } | ||||
|                 TRACE("str_fl", tout << "creating character terms for variable " << mk_pp(term, m) << ", length = " << varLen_value << std::endl;); | ||||
|                 ptr_vector<expr> new_chars; | ||||
|                 chars = alloc(expr_ref_vector, m); | ||||
|                 for (rational i = rational::zero(); i < varLen_value; ++i) { | ||||
|                     // TODO we can probably name these better for the sake of debugging
 | ||||
|                     expr_ref ch(mk_fresh_const("char", u.mk_char_sort()), m); | ||||
|                     new_chars.push_back(ch); | ||||
|                     chars->push_back(ch); | ||||
|                     fixed_length_subterm_trail.push_back(ch); | ||||
|                 } | ||||
|                 var_to_char_subterm_map.insert(term, new_chars); | ||||
|                 var_to_char_subterm_map.insert(term, chars); | ||||
|                 fixed_length_used_len_terms.insert(term, varLen_value); | ||||
|             } | ||||
|             var_to_char_subterm_map.find(term, eqc_chars); | ||||
|             for (auto c : *chars) { | ||||
|                 eqc_chars.push_back(c); | ||||
|             } | ||||
|         } else if (u.str.is_concat(term, arg0, arg1)) { | ||||
|             expr_ref first(arg0, sub_m); | ||||
|             expr_ref second(arg1, sub_m); | ||||
|             ptr_vector<expr> chars0, chars1; | ||||
|             expr_ref_vector chars0(m), chars1(m); | ||||
|             if (!fixed_length_reduce_string_term(subsolver, first, chars0, cex) | ||||
|                     || !fixed_length_reduce_string_term(subsolver, second, chars1, cex)) { | ||||
|                 return false; | ||||
|  | @ -666,7 +670,7 @@ namespace smt { | |||
|             expr_ref first(arg0, sub_m); | ||||
|             expr_ref second(arg1, sub_m); | ||||
|             expr_ref third(arg2, sub_m); | ||||
|             ptr_vector<expr> base_chars; | ||||
|             expr_ref_vector base_chars(m); | ||||
|             if (!fixed_length_reduce_string_term(subsolver, first, base_chars, cex)) { | ||||
|                 return false; | ||||
|             } | ||||
|  | @ -712,7 +716,7 @@ namespace smt { | |||
|             // (str.at Base Pos)
 | ||||
|             expr_ref base(arg0, sub_m); | ||||
|             expr_ref pos(arg1, sub_m); | ||||
|             ptr_vector<expr> base_chars; | ||||
|             expr_ref_vector base_chars(m); | ||||
|             if (!fixed_length_reduce_string_term(subsolver, base, base_chars, cex)) { | ||||
|                 return false; | ||||
|             } | ||||
|  | @ -777,7 +781,8 @@ namespace smt { | |||
|             } | ||||
|         } else { | ||||
|             TRACE("str_fl", tout << "string term " << mk_pp(term, m) << " handled as uninterpreted function" << std::endl;); | ||||
|             if (!uninterpreted_to_char_subterm_map.contains(term)) { | ||||
|             expr_ref_vector *chars = nullptr; | ||||
|             if (!uninterpreted_to_char_subterm_map.find(term, chars)) { | ||||
|                 rational ufLen_value; | ||||
|                 bool uf_hasLen = fixed_length_get_len_value(term, ufLen_value); | ||||
|                 if (!uf_hasLen || ufLen_value.is_neg()) { | ||||
|  | @ -786,16 +791,18 @@ namespace smt { | |||
|                     return false; | ||||
|                 } | ||||
|                 TRACE("str_fl", tout << "creating character terms for uninterpreted function " << mk_pp(term, m) << ", length = " << ufLen_value << std::endl;); | ||||
|                 ptr_vector<expr> new_chars; | ||||
|                 chars = alloc(expr_ref_vector, m); | ||||
|                 for (rational i = rational::zero(); i < ufLen_value; ++i) { | ||||
|                     expr_ref ch(mk_fresh_const("char", u.mk_char_sort()), m); | ||||
|                     new_chars.push_back(ch); | ||||
|                     chars->push_back(ch); | ||||
|                     fixed_length_subterm_trail.push_back(ch); | ||||
|                 } | ||||
|                 uninterpreted_to_char_subterm_map.insert(term, new_chars); | ||||
|                 uninterpreted_to_char_subterm_map.insert(term, chars); | ||||
|                 fixed_length_used_len_terms.insert(term, ufLen_value); | ||||
|             } | ||||
|             uninterpreted_to_char_subterm_map.find(term, eqc_chars); | ||||
|             for (auto c : *chars) { | ||||
|                 eqc_chars.push_back(c); | ||||
|             } | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|  | @ -805,7 +812,7 @@ namespace smt { | |||
| 
 | ||||
|         ast_manager & sub_m = subsolver.m(); | ||||
| 
 | ||||
|         ptr_vector<expr> lhs_chars, rhs_chars; | ||||
|         expr_ref_vector lhs_chars(m), rhs_chars(m); | ||||
| 
 | ||||
|         if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { | ||||
|  | @ -851,7 +858,7 @@ namespace smt { | |||
|             return false; | ||||
|         } | ||||
| 
 | ||||
|         ptr_vector<expr> lhs_chars, rhs_chars; | ||||
|         expr_ref_vector lhs_chars(m), rhs_chars(m); | ||||
|         if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) | ||||
|                 || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { | ||||
|             return false; | ||||
|  | @ -884,7 +891,6 @@ namespace smt { | |||
| 
 | ||||
|         ast_manager & m = get_manager(); | ||||
| 
 | ||||
| 
 | ||||
|         TRACE("str", | ||||
|             ast_manager & m = get_manager(); | ||||
|             tout << "dumping all formulas:" << std::endl; | ||||
|  | @ -897,7 +903,10 @@ namespace smt { | |||
|         fixed_length_subterm_trail.reset(); | ||||
|         fixed_length_used_len_terms.reset(); | ||||
|         fixed_length_assumptions.reset(); | ||||
| 
 | ||||
|         for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); | ||||
|         var_to_char_subterm_map.reset(); | ||||
|         for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); | ||||
|         uninterpreted_to_char_subterm_map.reset(); | ||||
|         fixed_length_lesson.reset(); | ||||
| 
 | ||||
|  | @ -911,7 +920,6 @@ namespace smt { | |||
|         subsolver_params.m_string_solver = symbol("char"); | ||||
|         smt::kernel subsolver(m, subsolver_params); | ||||
|         subsolver.set_logic(symbol("QF_S")); | ||||
| 
 | ||||
|         sort * str_sort = u.str.mk_string_sort(); | ||||
|         sort * bool_sort = m.mk_bool_sort(); | ||||
| 
 | ||||
|  | @ -925,7 +933,7 @@ namespace smt { | |||
|                 add_persisted_axiom(var_len_assertion); | ||||
|                 return l_undef; | ||||
|             } | ||||
|             ptr_vector<expr> var_chars; | ||||
|             expr_ref_vector var_chars(m); | ||||
|             expr_ref str_counterexample(m); | ||||
|             if (!fixed_length_reduce_string_term(subsolver, var, var_chars, str_counterexample)) { | ||||
|                 TRACE("str_fl", tout << "free variable " << mk_pp(var, m) << " caused a conflict; asserting and continuing" << std::endl;); | ||||
|  | @ -1197,7 +1205,7 @@ namespace smt { | |||
|         } | ||||
| 
 | ||||
|         TRACE("str_fl", | ||||
|             tout << "formulas asserted  subsolver:" << std::endl; | ||||
|             tout << "formulas asserted to subsolver:" << std::endl; | ||||
|             for (auto e : fixed_length_assumptions) { | ||||
|                 tout << mk_pp(e, subsolver.m()) << std::endl; | ||||
|             } | ||||
|  | @ -1230,8 +1238,7 @@ namespace smt { | |||
|             for (auto entry : var_to_char_subterm_map) { | ||||
|                 svector<unsigned> assignment; | ||||
|                 expr * var = entry.m_key; | ||||
|                 ptr_vector<expr> char_subterms(entry.m_value); | ||||
|                 for (expr * chExpr : char_subterms) { | ||||
|                 for (expr * chExpr : *(entry.m_value)) { | ||||
|                     expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); | ||||
|                     unsigned n = 0; | ||||
|                     if (chAssignment != nullptr && u.is_const_char(chAssignment, n)) { | ||||
|  | @ -1252,8 +1259,7 @@ namespace smt { | |||
|             for (auto entry : uninterpreted_to_char_subterm_map) { | ||||
|                 svector<unsigned> assignment; | ||||
|                 expr * var = entry.m_key; | ||||
|                 ptr_vector<expr> char_subterms(entry.m_value); | ||||
|                 for (expr * chExpr : char_subterms) { | ||||
|                 for (expr * chExpr : *(entry.m_value)) { | ||||
|                     expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); | ||||
|                     unsigned n = 0; | ||||
|                     if (chAssignment != nullptr && u.is_const_char(chAssignment, n)) { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue