diff --git a/src/params/CMakeLists.txt b/src/params/CMakeLists.txt index a00917834..76d561af9 100644 --- a/src/params/CMakeLists.txt +++ b/src/params/CMakeLists.txt @@ -10,8 +10,7 @@ z3_add_component(params theory_array_params.cpp theory_bv_params.cpp theory_pb_params.cpp - theory_seq_params.cpp - theory_str_params.cpp + theory_seq_params.cpp COMPONENT_DEPENDENCIES util ast diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp index 1471dcd98..a80483d0f 100644 --- a/src/params/smt_params.cpp +++ b/src/params/smt_params.cpp @@ -80,7 +80,6 @@ void smt_params::updt_params(params_ref const & p) { theory_pb_params::updt_params(p); // theory_array_params::updt_params(p); theory_datatype_params::updt_params(p); - theory_str_params::updt_params(p); updt_local_params(p); } @@ -100,7 +99,6 @@ void smt_params::display(std::ostream & out) const { theory_bv_params::display(out); theory_pb_params::display(out); theory_datatype_params::display(out); - theory_str_params::display(out); DISPLAY_PARAM(m_display_proof); DISPLAY_PARAM(m_display_dot_proof); diff --git a/src/params/smt_params.h b/src/params/smt_params.h index cc4082715..68ab50ffe 100644 --- a/src/params/smt_params.h +++ b/src/params/smt_params.h @@ -24,7 +24,6 @@ Revision History: #include "params/theory_arith_params.h" #include "params/theory_array_params.h" #include "params/theory_bv_params.h" -#include "params/theory_str_params.h" #include "params/theory_seq_params.h" #include "params/theory_pb_params.h" #include "params/theory_datatype_params.h" @@ -79,7 +78,6 @@ struct smt_params : public preprocessor_params, public theory_arith_params, public theory_array_params, public theory_bv_params, - public theory_str_params, public theory_seq_params, public theory_pb_params, public theory_datatype_params { diff --git a/src/params/theory_str_params.cpp b/src/params/theory_str_params.cpp deleted file mode 100644 index b256af715..000000000 --- a/src/params/theory_str_params.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/*++ -Module Name: - - theory_str_params.cpp - -Abstract: - - Parameters for string theory plugin - -Author: - - Murphy Berzish (mtrberzi) 2016-12-13 - -Revision History: - ---*/ - -#include "params/theory_str_params.h" -#include "params/smt_params_helper.hpp" - -void theory_str_params::updt_params(params_ref const & _p) { - smt_params_helper p(_p); - m_StrongArrangements = p.str_strong_arrangements(); - m_AggressiveLengthTesting = p.str_aggressive_length_testing(); - m_AggressiveValueTesting = p.str_aggressive_value_testing(); - m_AggressiveUnrollTesting = p.str_aggressive_unroll_testing(); - m_UseFastLengthTesterCache = p.str_fast_length_tester_cache(); - m_UseFastValueTesterCache = p.str_fast_value_tester_cache(); - m_StringConstantCache = p.str_string_constant_cache(); - m_OverlapTheoryAwarePriority = p.str_overlap_priority(); - m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold(); - m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold(); - m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold(); - m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold(); - m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold(); - m_FixedLengthRefinement = p.str_fixed_length_refinement(); - m_FixedLengthNaiveCounterexamples = p.str_fixed_length_naive_cex(); -} - -#define DISPLAY_PARAM(X) out << #X"=" << X << '\n'; - -void theory_str_params::display(std::ostream & out) const { - DISPLAY_PARAM(m_StrongArrangements); - DISPLAY_PARAM(m_AggressiveLengthTesting); - DISPLAY_PARAM(m_AggressiveValueTesting); - DISPLAY_PARAM(m_AggressiveUnrollTesting); - DISPLAY_PARAM(m_UseFastLengthTesterCache); - DISPLAY_PARAM(m_UseFastValueTesterCache); - DISPLAY_PARAM(m_StringConstantCache); - DISPLAY_PARAM(m_OverlapTheoryAwarePriority); - DISPLAY_PARAM(m_RegexAutomata_DifficultyThreshold); - DISPLAY_PARAM(m_RegexAutomata_IntersectionDifficultyThreshold); - DISPLAY_PARAM(m_RegexAutomata_FailedAutomatonThreshold); - DISPLAY_PARAM(m_RegexAutomata_FailedIntersectionThreshold); - DISPLAY_PARAM(m_RegexAutomata_LengthAttemptThreshold); - DISPLAY_PARAM(m_FixedLengthNaiveCounterexamples); -} diff --git a/src/params/theory_str_params.h b/src/params/theory_str_params.h deleted file mode 100644 index 0dd5e51f6..000000000 --- a/src/params/theory_str_params.h +++ /dev/null @@ -1,122 +0,0 @@ -/*++ -Module Name: - - theory_str_params.h - -Abstract: - - Parameters for string theory plugin - -Author: - - Murphy Berzish (mtrberzi) 2016-12-13 - -Revision History: - ---*/ - -#pragma once - -#include "util/params.h" - -struct theory_str_params { - /* - * If AssertStrongerArrangements is set to true, - * the implications that would normally be asserted during arrangement generation - * will instead be asserted as equivalences. - * This is a stronger version of the standard axiom. - * The Z3str2 axioms can be simulated by setting this to false. - */ - bool m_StrongArrangements = true; - - /* - * If AggressiveLengthTesting is true, we manipulate the phase of length tester equalities - * to prioritize trying concrete length options over choosing the "more" option. - */ - bool m_AggressiveLengthTesting = false; - - /* - * Similarly, if AggressiveValueTesting is true, we manipulate the phase of value tester equalities - * to prioritize trying concrete value options over choosing the "more" option. - */ - bool m_AggressiveValueTesting = false; - - /* - * If AggressiveUnrollTesting is true, we manipulate the phase of regex unroll tester equalities - * to prioritize trying concrete unroll counts over choosing the "more" option. - */ - bool m_AggressiveUnrollTesting = true; - - /* - * If UseFastLengthTesterCache is set to true, - * length tester terms will not be generated from scratch each time they are needed, - * but will be saved in a map and looked up. - */ - bool m_UseFastLengthTesterCache = false; - - /* - * If UseFastValueTesterCache is set to true, - * value tester terms will not be generated from scratch each time they are needed, - * but will be saved in a map and looked up. - */ - bool m_UseFastValueTesterCache = true; - - /* - * If StringConstantCache is set to true, - * all string constants in theory_str generated from anywhere will be cached and saved. - */ - bool m_StringConstantCache = true; - - double m_OverlapTheoryAwarePriority = -0.1; - - /* - * RegexAutomata_DifficultyThreshold is the lowest difficulty above which Z3str3 - * will not eagerly construct an automaton for a regular expression term. - */ - unsigned m_RegexAutomata_DifficultyThreshold = 1000; - - /* - * RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3 - * will not eagerly intersect automata to check unsatisfiability. - */ - unsigned m_RegexAutomata_IntersectionDifficultyThreshold = 1000; - - /* - * RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton - * after which a full automaton (i.e. with no length information) will be built regardless of difficulty. - */ - unsigned m_RegexAutomata_FailedAutomatonThreshold = 10; - - /* - * RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton - * intersection after which intersection will always be performed regardless of difficulty. - */ - unsigned m_RegexAutomata_FailedIntersectionThreshold = 10; - - /* - * RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints - * before which we begin checking unsatisfiability of a regex term. - */ - unsigned m_RegexAutomata_LengthAttemptThreshold = 10; - /* - * If FixedLengthRefinement is true and the fixed-length equation solver is enabled, - * Z3str3 will use abstraction refinement to handle formulas that would result in disjunctions or expensive - * reductions to fixed-length formulas. - */ - bool m_FixedLengthRefinement = false; - - /* - * If FixedLengthNaiveCounterexamples is true and the fixed-length equation solver is enabled, - * Z3str3 will only construct simple counterexamples to block unsatisfiable length assignments - * instead of attempting to learn more complex lessons. - */ - bool m_FixedLengthNaiveCounterexamples = true; - - theory_str_params(params_ref const & p = params_ref()) { - updt_params(p); - } - - void updt_params(params_ref const & p); - void display(std::ostream & out) const; -}; - diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt index 42469c365..3de6eb5f5 100644 --- a/src/smt/CMakeLists.txt +++ b/src/smt/CMakeLists.txt @@ -69,9 +69,6 @@ z3_add_component(smt theory_seq.cpp theory_sls.cpp theory_special_relations.cpp - theory_str.cpp - theory_str_mc.cpp - theory_str_regex.cpp theory_user_propagator.cpp theory_utvpi.cpp theory_wmaxsat.cpp diff --git a/src/smt/smt_setup.cpp b/src/smt/smt_setup.cpp index 7a053ded0..262a76b12 100644 --- a/src/smt/smt_setup.cpp +++ b/src/smt/smt_setup.cpp @@ -39,7 +39,6 @@ Revision History: #include "smt/theory_sls.h" #include "smt/theory_pb.h" #include "smt/theory_fpa.h" -#include "smt/theory_str.h" #include "smt/theory_polymorphism.h" namespace smt { @@ -562,7 +561,7 @@ namespace smt { void setup::setup_QF_S() { if (m_params.m_string_solver == "z3str3") { - setup_str(); + throw default_exception("z3str3 string solver has been removed. Use 'seq' instead."); } else if (m_params.m_string_solver == "seq") { setup_unknown(); @@ -582,7 +581,7 @@ namespace smt { // don't register any solver. } else { - throw default_exception("invalid parameter for smt.string_solver, valid options are 'z3str3', 'seq', 'auto'"); + throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'char', 'empty', 'none', 'auto'"); } } @@ -749,7 +748,7 @@ namespace smt { void setup::setup_seq_str(static_features const & st) { // check params for what to do here when it's ambiguous if (m_params.m_string_solver == "z3str3") { - setup_str(); + throw default_exception("z3str3 string solver has been removed. Use 'seq' instead."); } else if (m_params.m_string_solver == "seq") { setup_seq(); @@ -765,11 +764,11 @@ namespace smt { setup_seq(); } else { - setup_str(); + setup_seq(); // default to seq instead of z3str3 } } else { - throw default_exception("invalid parameter for smt.string_solver, valid options are 'z3str3', 'seq', 'auto'"); + throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'empty', 'none', 'auto'"); } } @@ -787,11 +786,6 @@ namespace smt { m_context.register_plugin(alloc(theory_fpa, m_context)); } - void setup::setup_str() { - setup_arith(); - m_context.register_plugin(alloc(theory_str, m_context, m_manager, m_params)); - } - void setup::setup_seq() { m_context.register_plugin(alloc(smt::theory_seq, m_context)); setup_char(); diff --git a/src/smt/smt_setup.h b/src/smt/smt_setup.h index a42c465f5..3d2bf47f3 100644 --- a/src/smt/smt_setup.h +++ b/src/smt/smt_setup.h @@ -108,7 +108,6 @@ namespace smt { void setup_mi_arith(); void setup_lra_arith(); void setup_fpa(); - void setup_str(); void setup_relevancy(static_features& st); public: diff --git a/src/smt/smt_theory.h b/src/smt/smt_theory.h index 25104affd..20c7380eb 100644 --- a/src/smt/smt_theory.h +++ b/src/smt/smt_theory.h @@ -283,7 +283,6 @@ namespace smt { /** \brief This method is called by smt_context before the search starts to get any extra assumptions the theory wants to use. - (See theory_str for an example) */ virtual void add_theory_assumptions(expr_ref_vector & assumptions) { } diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp deleted file mode 100644 index 984cea972..000000000 --- a/src/smt/theory_str.cpp +++ /dev/null @@ -1,8985 +0,0 @@ -/*++ - Module Name: - - theory_str.cpp - - Abstract: - - String Theory Plugin - - Author: - - Murphy Berzish and Yunhui Zheng - - Revision History: - - --*/ -#include "ast/ast_smt2_pp.h" -#include "smt/smt_context.h" -#include "smt/theory_str.h" -#include "smt/smt_model_generator.h" -#include "ast/ast_pp.h" -#include "ast/ast_ll_pp.h" -#include -#include -#include "smt/theory_seq_empty.h" -#include "smt/theory_arith.h" -#include "ast/ast_util.h" -#include "ast/rewriter/seq_rewriter.h" -#include "ast/rewriter/expr_replacer.h" -#include "ast/rewriter/var_subst.h" -#include "smt_kernel.h" -#include "model/model_smt2_pp.h" - -namespace smt { - - - class seq_expr_solver : public expr_solver { - kernel m_kernel; - public: - seq_expr_solver(ast_manager& m, smt_params& fp): - m_kernel(m, fp) {} - lbool check_sat(expr* e) override { - m_kernel.push(); - m_kernel.assert_expr(e); - lbool r = m_kernel.check(); - m_kernel.pop(1); - return r; - } - }; - - theory_str::theory_str(context& ctx, ast_manager & m, theory_str_params const & params): - theory(ctx, m.mk_family_id("seq")), - m_params(params), - /* Options */ - opt_EagerStringConstantLengthAssertions(true), - opt_VerifyFinalCheckProgress(false), - opt_LCMUnrollStep(2), - opt_NoQuickReturn_IntegerTheory(false), - opt_DisableIntegerTheoryIntegration(false), - opt_DeferEQCConsistencyCheck(false), - opt_CheckVariableScope(true), - opt_ConcatOverlapAvoid(true), - /* Internal setup */ - search_started(false), - m_autil(m), - u(m), - sLevel(0), - finalCheckProgressIndicator(false), - m_trail(m), - m_factory(nullptr), - m_mk_aut(m), - m_unused_id(0), - m_delayed_axiom_setup_terms(m), - m_delayed_assertions_todo(m), - m_persisted_axioms(m), - m_persisted_axiom_todo(m), - tmpStringVarCount(0), - tmpXorVarCount(0), - avoidLoopCut(true), - loopDetected(false), - m_theoryStrOverlapAssumption_term(m.mk_true(), m), - contains_map(m), - string_int_conversion_terms(m), - totalCacheAccessCount(0), - cacheHitCount(0), - cacheMissCount(0), - m_fresh_id(0), - m_trail_stack(), - m_library_aware_trail_stack(), - m_find(*this), - fixed_length_subterm_trail(m), - fixed_length_assumptions(m) - { - } - - theory_str::~theory_str() { - m_trail_stack.reset(); - for (eautomaton * aut : regex_automata) { - dealloc(aut); - } - regex_automata.clear(); - for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); - for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); - } - - void theory_str::init() { - m_mk_aut.set_solver(alloc(seq_expr_solver, get_manager(), ctx.get_fparams())); - } - - void theory_str::reset_internal_data_structures() { - //m_trail.reset(); - m_delayed_axiom_setup_terms.reset(); - m_basicstr_axiom_todo.reset(); - m_concat_axiom_todo.reset(); - m_string_constant_length_todo.reset(); - m_concat_eval_todo.reset(); - m_delayed_assertions_todo.reset(); - m_library_aware_axiom_todo.reset(); - m_persisted_axioms.reset(); - m_persisted_axiom_todo.reset(); - axiomatized_terms.reset(); - existing_toplevel_exprs.reset(); - - varForBreakConcat.clear(); - loopDetected = false; - cut_var_map.reset(); - m_cut_allocs.reset(); - - //variable_set.reset(); - //internal_variable_set.reset(); - //internal_variable_scope_levels.clear(); - - contains_map.reset(); - contain_pair_bool_map.reset(); - contain_pair_idx_map.reset(); - - m_automata.reset(); - regex_automata.reset(); - regex_terms.reset(); - regex_terms_by_string.reset(); - regex_automaton_assumptions.reset(); - regex_terms_with_path_constraints.reset(); - regex_terms_with_length_constraints.reset(); - regex_term_to_length_constraint.reset(); - regex_term_to_extra_length_vars.reset(); - regex_last_lower_bound.reset(); - regex_last_upper_bound.reset(); - regex_length_attempt_count.reset(); - regex_fail_count.reset(); - regex_intersection_fail_count.reset(); - - string_chars.reset(); - - concat_astNode_map.reset(); - string_int_conversion_terms.reset(); - string_int_axioms.reset(); - stringConstantCache.reset(); - - length_ast_map.reset(); - //m_trail_stack.reset(); - // m_find.reset(); - - fixed_length_subterm_trail.reset(); - fixed_length_assumptions.reset(); - fixed_length_used_len_terms.reset(); - - for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); - var_to_char_subterm_map.reset(); - for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); - uninterpreted_to_char_subterm_map.reset(); - fixed_length_lesson.reset(); - candidate_model.reset(); - } - - expr * theory_str::mk_string(zstring const& str) { - if (m_params.m_StringConstantCache) { - ++totalCacheAccessCount; - expr * val; - if (stringConstantCache.find(str, val)) { - return val; - } else { - val = u.str.mk_string(str); - m_trail.push_back(val); - stringConstantCache.insert(str, val); - return val; - } - } else { - return u.str.mk_string(str); - } - } - - expr * theory_str::mk_string(const char * str) { - return u.str.mk_string(str); - } - - void theory_str::collect_statistics(::statistics & st) const { - st.update("str refine equation", m_stats.m_refine_eq); - st.update("str refine negated equation", m_stats.m_refine_neq); - st.update("str refine function", m_stats.m_refine_f); - st.update("str refine negated function", m_stats.m_refine_nf); - } - - void theory_str::assert_axiom(expr * _e) { - if (_e == nullptr) - return; - if (opt_VerifyFinalCheckProgress) { - finalCheckProgressIndicator = true; - } - ast_manager& m = get_manager(); - SASSERT(!m.is_true(_e)); - - if (m.is_true(_e)) return; - TRACE(str, tout << "asserting " << mk_ismt2_pp(_e, m) << std::endl;); - expr_ref e(_e, m); - if (!ctx.b_internalized(e)) { - ctx.internalize(e, false); - } - literal lit(ctx.get_literal(e)); - ctx.mark_as_relevant(lit); - if (m.has_trace_stream()) log_axiom_instantiation(e); - ctx.mk_th_axiom(get_id(), 1, &lit); - if (m.has_trace_stream()) m.trace_stream() << "[end-of-instance]\n"; - - // crash/error avoidance: add all axioms to the trail - m_trail.push_back(e); - - //TRACE(str, tout << "done asserting " << mk_ismt2_pp(e, get_manager()) << std::endl;); - } - - void theory_str::assert_axiom_rw(expr * e) { - if (e == nullptr) - return; - ast_manager & m = get_manager(); - expr_ref _e(e, m); - ctx.get_rewriter()(_e); - if (m.is_true(_e)) return; - assert_axiom(_e); - } - - expr * theory_str::rewrite_implication(expr * premise, expr * conclusion) { - ast_manager & m = get_manager(); - return m.mk_or(mk_not(m, premise), conclusion); - } - - void theory_str::assert_implication(expr * premise, expr * conclusion) { - ast_manager & m = get_manager(); - TRACE(str, tout << "asserting implication " << mk_ismt2_pp(premise, m) << " -> " << mk_ismt2_pp(conclusion, m) << std::endl;); - expr_ref axiom(m.mk_or(mk_not(m, premise), conclusion), m); - assert_axiom(axiom); - } - - bool theory_str::internalize_atom(app * atom, bool gate_ctx) { - return internalize_term(atom); - } - - bool theory_str::internalize_term(app * term) { - ast_manager & m = get_manager(); - SASSERT(term->get_family_id() == get_family_id()); - - TRACE(str, tout << "internalizing term: " << mk_ismt2_pp(term, get_manager()) << std::endl;); - - // emulation of user_smt_theory::internalize_term() - - unsigned num_args = term->get_num_args(); - for (unsigned i = 0; i < num_args; ++i) { - ctx.internalize(term->get_arg(i), false); - } - if (ctx.e_internalized(term)) { - enode * e = ctx.get_enode(term); - mk_var(e); - return true; - } - // m_parents.push_back(term); - enode * e = ctx.mk_enode(term, false, m.is_bool(term), true); - if (m.is_bool(term)) { - bool_var bv = ctx.mk_bool_var(term); - ctx.set_var_theory(bv, get_id()); - ctx.set_enode_flag(bv, true); - } - // make sure every argument is attached to a theory variable - for (unsigned i = 0; i < num_args; ++i) { - enode * arg = e->get_arg(i); - theory_var v_arg = mk_var(arg); - TRACE(str, tout << "arg has theory var #" << v_arg << std::endl;); (void)v_arg; - } - - theory_var v = mk_var(e); - TRACE(str, tout << "term has theory var #" << v << std::endl;); (void)v; - - if (opt_EagerStringConstantLengthAssertions && u.str.is_string(term)) { - TRACE(str, tout << "eagerly asserting length of string term " << mk_pp(term, m) << std::endl;); - m_basicstr_axiom_todo.insert(e); - } - return true; - } - - enode* theory_str::ensure_enode(expr* e) { - if (!ctx.e_internalized(e)) { - ctx.internalize(e, false); - } - enode* n = ctx.get_enode(e); - ctx.mark_as_relevant(n); - return n; - } - - void theory_str::refresh_theory_var(expr * e) { - enode * en = ensure_enode(e); - theory_var v = mk_var(en); (void)v; - TRACE(str, tout << "refresh " << mk_pp(e, get_manager()) << ": v#" << v << std::endl;); - if (e->get_sort() == u.str.mk_string_sort()) { - m_basicstr_axiom_todo.push_back(en); - } - } - - theory_var theory_str::mk_var(enode* n) { - TRACE(str, tout << "mk_var for " << mk_pp(n->get_expr(), get_manager()) << std::endl;); - if (!(n->get_expr()->get_sort() == u.str.mk_string_sort())) { - return null_theory_var; - } - if (is_attached_to_var(n)) { - TRACE(str, tout << "already attached to theory var" << std::endl;); - return n->get_th_var(get_id()); - } else { - theory_var v = theory::mk_var(n); - m_find.mk_var(); - TRACE(str, tout << "new theory var v#" << v << " find " << m_find.find(v) << std::endl;); - ctx.attach_th_var(n, this, v); - ctx.mark_as_relevant(n); - return v; - } - } - - static void cut_vars_map_copy(obj_map & dest, obj_map & src) { - for (auto const& kv : src) { - dest.insert(kv.m_key, 1); - } - } - - bool theory_str::has_self_cut(expr * n1, expr * n2) { - if (!cut_var_map.contains(n1)) { - return false; - } - if (!cut_var_map.contains(n2)) { - return false; - } - if (cut_var_map[n1].empty() || cut_var_map[n2].empty()) { - return false; - } - - for (auto const& kv : cut_var_map[n1].top()->vars) { - if (cut_var_map[n2].top()->vars.contains(kv.m_key)) { - return true; - } - } - return false; - } - - void theory_str::add_cut_info_one_node(expr * baseNode, int slevel, expr * node) { - // crash avoidance? - m_trail.push_back(baseNode); - m_trail.push_back(node); - if (!cut_var_map.contains(baseNode)) { - T_cut * varInfo = alloc(T_cut); - m_cut_allocs.push_back(varInfo); - varInfo->level = slevel; - varInfo->vars.insert(node, 1); - cut_var_map.insert(baseNode, std::stack()); - cut_var_map[baseNode].push(varInfo); - TRACE(str, tout << "add var info for baseNode=" << mk_pp(baseNode, get_manager()) << ", node=" << mk_pp(node, get_manager()) << " [" << slevel << "]" << std::endl;); - } else { - if (cut_var_map[baseNode].empty()) { - T_cut * varInfo = alloc(T_cut); - m_cut_allocs.push_back(varInfo); - varInfo->level = slevel; - varInfo->vars.insert(node, 1); - cut_var_map[baseNode].push(varInfo); - TRACE(str, tout << "add var info for baseNode=" << mk_pp(baseNode, get_manager()) << ", node=" << mk_pp(node, get_manager()) << " [" << slevel << "]" << std::endl;); - } else { - if (cut_var_map[baseNode].top()->level < slevel) { - T_cut * varInfo = alloc(T_cut); - m_cut_allocs.push_back(varInfo); - varInfo->level = slevel; - cut_vars_map_copy(varInfo->vars, cut_var_map[baseNode].top()->vars); - varInfo->vars.insert(node, 1); - cut_var_map[baseNode].push(varInfo); - TRACE(str, tout << "add var info for baseNode=" << mk_pp(baseNode, get_manager()) << ", node=" << mk_pp(node, get_manager()) << " [" << slevel << "]" << std::endl;); - } else if (cut_var_map[baseNode].top()->level == slevel) { - cut_var_map[baseNode].top()->vars.insert(node, 1); - TRACE(str, tout << "add var info for baseNode=" << mk_pp(baseNode, get_manager()) << ", node=" << mk_pp(node, get_manager()) << " [" << slevel << "]" << std::endl;); - } else { - get_manager().raise_exception("entered illegal state during add_cut_info_one_node()"); - } - } - } - } - - void theory_str::add_cut_info_merge(expr * destNode, int slevel, expr * srcNode) { - // crash avoidance? - m_trail.push_back(destNode); - m_trail.push_back(srcNode); - if (!cut_var_map.contains(srcNode)) { - get_manager().raise_exception("illegal state in add_cut_info_merge(): cut_var_map doesn't contain srcNode"); - } - - if (cut_var_map[srcNode].empty()) { - get_manager().raise_exception("illegal state in add_cut_info_merge(): cut_var_map[srcNode] is empty"); - } - - if (!cut_var_map.contains(destNode)) { - T_cut * varInfo = alloc(T_cut); - m_cut_allocs.push_back(varInfo); - varInfo->level = slevel; - cut_vars_map_copy(varInfo->vars, cut_var_map[srcNode].top()->vars); - cut_var_map.insert(destNode, std::stack()); - cut_var_map[destNode].push(varInfo); - TRACE(str, tout << "merge var info for destNode=" << mk_pp(destNode, get_manager()) << ", srcNode=" << mk_pp(srcNode, get_manager()) << " [" << slevel << "]" << std::endl;); - } else { - if (cut_var_map[destNode].empty() || cut_var_map[destNode].top()->level < slevel) { - T_cut * varInfo = alloc(T_cut); - m_cut_allocs.push_back(varInfo); - varInfo->level = slevel; - cut_vars_map_copy(varInfo->vars, cut_var_map[destNode].top()->vars); - cut_vars_map_copy(varInfo->vars, cut_var_map[srcNode].top()->vars); - cut_var_map[destNode].push(varInfo); - TRACE(str, tout << "merge var info for destNode=" << mk_pp(destNode, get_manager()) << ", srcNode=" << mk_pp(srcNode, get_manager()) << " [" << slevel << "]" << std::endl;); - } else if (cut_var_map[destNode].top()->level == slevel) { - cut_vars_map_copy(cut_var_map[destNode].top()->vars, cut_var_map[srcNode].top()->vars); - TRACE(str, tout << "merge var info for destNode=" << mk_pp(destNode, get_manager()) << ", srcNode=" << mk_pp(srcNode, get_manager()) << " [" << slevel << "]" << std::endl;); - } else { - get_manager().raise_exception("illegal state in add_cut_info_merge(): inconsistent slevels"); - } - } - } - - void theory_str::check_and_init_cut_var(expr * node) { - if (cut_var_map.contains(node)) { - return; - } else if (!u.str.is_string(node)) { - add_cut_info_one_node(node, -1, node); - } - } - - literal theory_str::mk_literal(expr* _e) { - ast_manager & m = get_manager(); - expr_ref e(_e, m); - ensure_enode(e); - return ctx.get_literal(e); - } - - app * theory_str::mk_int(int n) { - return m_autil.mk_numeral(rational(n), true); - } - - app * theory_str::mk_int(rational & q) { - return m_autil.mk_numeral(q, true); - } - - void theory_str::track_variable_scope(expr * var) { - if (internal_variable_scope_levels.find(sLevel) == internal_variable_scope_levels.end()) { - internal_variable_scope_levels[sLevel] = obj_hashtable(); - } - internal_variable_scope_levels[sLevel].insert(var); - } - - app * theory_str::mk_internal_xor_var() { - return mk_int_var("$$_xor"); - } - - app * theory_str::mk_fresh_const(char const* name, sort* s) { - string_buffer<64> buffer; - buffer << name; - buffer << "!tmp"; - buffer << m_fresh_id; - m_fresh_id++; - return u.mk_skolem(symbol(buffer.c_str()), 0, nullptr, s); - } - - - app * theory_str::mk_int_var(std::string name) { - ast_manager & m = get_manager(); - - TRACE(str, tout << "creating integer variable " << name << " at scope level " << sLevel << std::endl;); - - sort * int_sort = m.mk_sort(m_autil.get_family_id(), INT_SORT); - app * a = mk_fresh_const(name.c_str(), int_sort); - - ctx.internalize(a, false); - SASSERT(ctx.get_enode(a) != nullptr); - SASSERT(ctx.e_internalized(a)); - ctx.mark_as_relevant(a); - // I'm assuming that this combination will do the correct thing in the integer theory. - - //mk_var(ctx.get_enode(a)); - m_trail.push_back(a); - //variable_set.insert(a); - //internal_variable_set.insert(a); - //track_variable_scope(a); - - return a; - } - - app * theory_str::mk_str_var(std::string name) { - - TRACE(str, tout << "creating string variable " << name << " at scope level " << sLevel << std::endl;); - - sort * string_sort = u.str.mk_string_sort(); - app * a = mk_fresh_const(name.c_str(), string_sort); - m_trail.push_back(a); - - TRACE(str, tout << "a->get_family_id() = " << a->get_family_id() << std::endl - << "this->get_family_id() = " << this->get_family_id() << std::endl;); - - // I have a hunch that this may not get internalized for free... - ctx.internalize(a, false); - SASSERT(ctx.get_enode(a) != nullptr); - SASSERT(ctx.e_internalized(a)); - // this might help?? - mk_var(ctx.get_enode(a)); - m_basicstr_axiom_todo.push_back(ctx.get_enode(a)); - TRACE(str, tout << "add " << mk_pp(a, get_manager()) << " to m_basicstr_axiom_todo" << std::endl;); - - variable_set.insert(a); - internal_variable_set.insert(a); - track_variable_scope(a); - - return a; - } - - void theory_str::add_nonempty_constraint(expr * s) { - ast_manager & m = get_manager(); - - expr_ref ax1(mk_not(m, ctx.mk_eq_atom(s, mk_string(""))), m); - assert_axiom(ax1); - - { - // build LHS - expr_ref len_str(mk_strlen(s), m); - SASSERT(len_str); - // build RHS - expr_ref zero(m_autil.mk_numeral(rational(0), true), m); - SASSERT(zero); - // build LHS > RHS and assert - // we have to build !(LHS <= RHS) instead - expr_ref lhs_gt_rhs(mk_not(m, m_autil.mk_le(len_str, zero)), m); - SASSERT(lhs_gt_rhs); - assert_axiom(lhs_gt_rhs); - } - } - - app_ref theory_str::mk_nonempty_str_var() { - ast_manager & m = get_manager(); - - std::stringstream ss; - ss << tmpStringVarCount; - tmpStringVarCount++; - std::string name = "$$_str" + ss.str(); - - TRACE(str, tout << "creating nonempty string variable " << name << " at scope level " << sLevel << std::endl;); - - sort * string_sort = u.str.mk_string_sort(); - app_ref a(mk_fresh_const(name.c_str(), string_sort), m); - - ctx.internalize(a, false); - SASSERT(ctx.get_enode(a) != nullptr); - // this might help?? - mk_var(ctx.get_enode(a)); - - // assert a variation of the basic string axioms that ensures this string is nonempty - { - // build LHS - expr_ref len_str(mk_strlen(a), m); - SASSERT(len_str); - // build RHS - expr_ref zero(m_autil.mk_numeral(rational(0), true), m); - SASSERT(zero); - // build LHS > RHS and assert - // we have to build !(LHS <= RHS) instead - expr_ref lhs_gt_rhs(mk_not(m, m_autil.mk_le(len_str, zero)), m); - SASSERT(lhs_gt_rhs); - assert_axiom(lhs_gt_rhs); - } - - // add 'a' to variable sets, so we can keep track of it - m_trail.push_back(a); - variable_set.insert(a); - internal_variable_set.insert(a); - track_variable_scope(a); - - return a; - } - - app * theory_str::mk_contains(expr * haystack, expr * needle) { - app * contains = u.str.mk_contains(haystack, needle); // TODO double-check semantics/argument order - m_trail.push_back(contains); - // immediately force internalization so that axiom setup does not fail - ctx.internalize(contains, false); - set_up_axioms(contains); - return contains; - } - - // note, this invokes "special-case" handling for the start index being 0 - app * theory_str::mk_indexof(expr * haystack, expr * needle) { - app * indexof = u.str.mk_index(haystack, needle, mk_int(0)); - m_trail.push_back(indexof); - // immediately force internalization so that axiom setup does not fail - ctx.internalize(indexof, false); - set_up_axioms(indexof); - return indexof; - } - - app * theory_str::mk_strlen(expr * e) { - /*if (m_strutil.is_string(e)) {*/ if (false) { - zstring strval; - u.str.is_string(e, strval); - unsigned int len = strval.length(); - return m_autil.mk_numeral(rational(len), true); - } else { - if (false) { - // use cache - app * lenTerm = nullptr; - if (!length_ast_map.find(e, lenTerm)) { - lenTerm = u.str.mk_length(e); - length_ast_map.insert(e, lenTerm); - m_trail.push_back(lenTerm); - } - return lenTerm; - } else { - // always regen - return u.str.mk_length(e); - } - } - } - - /* - * Returns the simplified concatenation of two expressions, - * where either both expressions are constant strings - * or one expression is the empty string. - * If this precondition does not hold, the function returns nullptr. - * (note: this function was strTheory::Concat()) - */ - expr * theory_str::mk_concat_const_str(expr * n1, expr * n2) { - bool n1HasEqcValue = false; - bool n2HasEqcValue = false; - expr * v1 = get_eqc_value(n1, n1HasEqcValue); - expr * v2 = get_eqc_value(n2, n2HasEqcValue); - if (u.str.is_string(v1)) { - n1HasEqcValue = true; - } - if (u.str.is_string(v2)) { - n2HasEqcValue = true; - } - if (n1HasEqcValue && n2HasEqcValue) { - zstring n1_str; - u.str.is_string(v1, n1_str); - zstring n2_str; - u.str.is_string(v2, n2_str); - zstring result = n1_str + n2_str; - return mk_string(result); - } else if (n1HasEqcValue && !n2HasEqcValue) { - zstring n1_str; - u.str.is_string(v1, n1_str); - if (n1_str.empty()) { - return n2; - } - } else if (!n1HasEqcValue && n2HasEqcValue) { - zstring n2_str; - u.str.is_string(v2, n2_str); - if (n2_str.empty()) { - return n1; - } - } - return nullptr; - } - - expr * theory_str::mk_concat(expr * n1, expr * n2) { - ast_manager & m = get_manager(); - ENSURE(n1 != nullptr); - ENSURE(n2 != nullptr); - bool n1HasEqcValue = false; - bool n2HasEqcValue = false; - n1 = get_eqc_value(n1, n1HasEqcValue); - n2 = get_eqc_value(n2, n2HasEqcValue); - if (n1HasEqcValue && n2HasEqcValue) { - return mk_concat_const_str(n1, n2); - } else if (n1HasEqcValue && !n2HasEqcValue) { - bool n2_isConcatFunc = u.str.is_concat(to_app(n2)); - zstring n1_str; - u.str.is_string(n1, n1_str); - if (n1_str.empty()) { - return n2; - } - if (n2_isConcatFunc) { - expr * n2_arg0 = to_app(n2)->get_arg(0); - expr * n2_arg1 = to_app(n2)->get_arg(1); - if (u.str.is_string(n2_arg0)) { - n1 = mk_concat_const_str(n1, n2_arg0); // n1 will be a constant - n2 = n2_arg1; - } - } - } else if (!n1HasEqcValue && n2HasEqcValue) { - zstring n2_str; - u.str.is_string(n2, n2_str); - if (n2_str.empty()) { - return n1; - } - - if (u.str.is_concat(to_app(n1))) { - expr * n1_arg0 = to_app(n1)->get_arg(0); - expr * n1_arg1 = to_app(n1)->get_arg(1); - if (u.str.is_string(n1_arg1)) { - n1 = n1_arg0; - n2 = mk_concat_const_str(n1_arg1, n2); // n2 will be a constant - } - } - } else { - if (u.str.is_concat(to_app(n1)) && u.str.is_concat(to_app(n2))) { - expr * n1_arg0 = to_app(n1)->get_arg(0); - expr * n1_arg1 = to_app(n1)->get_arg(1); - expr * n2_arg0 = to_app(n2)->get_arg(0); - expr * n2_arg1 = to_app(n2)->get_arg(1); - if (u.str.is_string(n1_arg1) && u.str.is_string(n2_arg0)) { - expr * tmpN1 = n1_arg0; - expr * tmpN2 = mk_concat_const_str(n1_arg1, n2_arg0); - n1 = mk_concat(tmpN1, tmpN2); - n2 = n2_arg1; - } - } - } - - //------------------------------------------------------ - // * expr * ast1 = mk_2_arg_app(ctx, td->Concat, n1, n2); - // * expr * ast2 = mk_2_arg_app(ctx, td->Concat, n1, n2); - // Z3 treats (ast1) and (ast2) as two different nodes. - //------------------------------------------------------- - - expr * concatAst = nullptr; - - if (!concat_astNode_map.find(n1, n2, concatAst)) { - concatAst = u.str.mk_concat(n1, n2); - m_trail.push_back(concatAst); - concat_astNode_map.insert(n1, n2, concatAst); - - expr_ref concat_length(mk_strlen(concatAst), m); - - ptr_vector childrenVector; - get_nodes_in_concat(concatAst, childrenVector); - expr_ref_vector items(m); - for (auto el : childrenVector) { - items.push_back(mk_strlen(el)); - } - expr_ref lenAssert(ctx.mk_eq_atom(concat_length, m_autil.mk_add(items.size(), items.data())), m); - assert_axiom(lenAssert); - } - return concatAst; - } - - bool theory_str::can_propagate() { - return !m_basicstr_axiom_todo.empty() - || !m_concat_axiom_todo.empty() || !m_concat_eval_todo.empty() - || !m_library_aware_axiom_todo.empty() - || !m_delayed_axiom_setup_terms.empty() - || !m_persisted_axiom_todo.empty() - || (search_started && !m_delayed_assertions_todo.empty()) - ; - } - - void theory_str::propagate() { - candidate_model.reset(); - while (can_propagate()) { - TRACE(str, tout << "propagating..." << std::endl;); - while(true) { - // this can potentially recursively activate itself - unsigned start_count = m_basicstr_axiom_todo.size(); - ptr_vector axioms_tmp(m_basicstr_axiom_todo); - for (auto const& el : axioms_tmp) { - instantiate_basic_string_axioms(el); - } - unsigned end_count = m_basicstr_axiom_todo.size(); - if (end_count > start_count) { - TRACE(str, tout << "new basic string axiom terms added -- checking again" << std::endl;); - continue; - } else { - break; - } - } - m_basicstr_axiom_todo.reset(); - TRACE(str, tout << "reset m_basicstr_axiom_todo" << std::endl;); - - for (auto const& el : m_concat_axiom_todo) { - instantiate_concat_axiom(el); - } - m_concat_axiom_todo.reset(); - - for (auto const& el : m_concat_eval_todo) { - try_eval_concat(el); - } - m_concat_eval_todo.reset(); - - while(true) { - // Special handling: terms can recursively set up other terms - // (e.g. indexof can instantiate other indexof terms). - // - Copy the list so it can potentially be modified during setup. - // - Don't clear this list if new ones are added in the process; - // instead, set up all the new terms before proceeding. - // TODO see if any other propagate() worklists need this kind of handling - // TODO we really only need to check the new ones on each pass - unsigned start_count = m_library_aware_axiom_todo.size(); - ptr_vector axioms_tmp(m_library_aware_axiom_todo); - for (auto const& e : axioms_tmp) { - app * a = e->get_expr(); - if (u.str.is_stoi(a)) { - instantiate_axiom_str_to_int(e); - } else if (u.str.is_itos(a)) { - instantiate_axiom_int_to_str(e); - } else if (u.str.is_at(a)) { - instantiate_axiom_CharAt(e); - } else if (u.str.is_prefix(a)) { - instantiate_axiom_prefixof(e); - } else if (u.str.is_suffix(a)) { - instantiate_axiom_suffixof(e); - } else if (u.str.is_contains(a)) { - instantiate_axiom_Contains(e); - } else if (u.str.is_index(a)) { - instantiate_axiom_Indexof(e); - } else if (u.str.is_extract(a)) { - instantiate_axiom_Substr(e); - } else if (u.str.is_replace(a)) { - instantiate_axiom_Replace(e); - } else if (u.str.is_in_re(a)) { - instantiate_axiom_RegexIn(e); - } else if (u.str.is_is_digit(a)) { - instantiate_axiom_is_digit(e); - } else if (u.str.is_from_code(a)) { - instantiate_axiom_str_from_code(e); - } else if (u.str.is_to_code(a)) { - instantiate_axiom_str_to_code(e); - } else { - TRACE(str, tout << "BUG: unhandled library-aware term " << mk_pp(e->get_expr(), get_manager()) << std::endl;); - NOT_IMPLEMENTED_YET(); - } - } - unsigned end_count = m_library_aware_axiom_todo.size(); - if (end_count > start_count) { - TRACE(str, tout << "new library-aware terms added during axiom setup -- checking again" << std::endl;); - continue; - } else { - break; - } - } - //m_library_aware_axiom_todo.reset(); - unsigned nScopes = m_library_aware_trail_stack.get_num_scopes(); - m_library_aware_trail_stack.reset(); - for (unsigned i = 0; i < nScopes; ++i) { - m_library_aware_trail_stack.push_scope(); - } - - for (auto el : m_delayed_axiom_setup_terms) { - // I think this is okay - ctx.internalize(el, false); - set_up_axioms(el); - } - m_delayed_axiom_setup_terms.reset(); - - for (expr * a : m_persisted_axiom_todo) { - assert_axiom(a); - } - m_persisted_axiom_todo.reset(); - - if (search_started) { - for (auto const& el : m_delayed_assertions_todo) { - assert_axiom(el); - } - m_delayed_assertions_todo.reset(); - } - } - } - - /* - * Attempt to evaluate a concat over constant strings, - * and if this is possible, assert equality between the - * flattened string and the original term. - */ - - void theory_str::try_eval_concat(enode * cat) { - app * a_cat = cat->get_expr(); - SASSERT(u.str.is_concat(a_cat)); - - ast_manager & m = get_manager(); - - TRACE(str, tout << "attempting to flatten " << mk_pp(a_cat, m) << std::endl;); - - std::stack worklist; - zstring flattenedString(""); - bool constOK = true; - - { - app * arg0 = to_app(a_cat->get_arg(0)); - app * arg1 = to_app(a_cat->get_arg(1)); - - worklist.push(arg1); - worklist.push(arg0); - } - - while (constOK && !worklist.empty()) { - app * evalArg = worklist.top(); worklist.pop(); - zstring nextStr; - if (u.str.is_string(evalArg, nextStr)) { - flattenedString = flattenedString + nextStr; - } else if (u.str.is_concat(evalArg)) { - app * arg0 = to_app(evalArg->get_arg(0)); - app * arg1 = to_app(evalArg->get_arg(1)); - - worklist.push(arg1); - worklist.push(arg0); - } else { - TRACE(str, tout << "non-constant term in concat -- giving up." << std::endl;); - constOK = false; - break; - } - } - if (constOK) { - TRACE(str, tout << "flattened to \"" << flattenedString.encode() << '"' << std::endl;); - expr_ref constStr(mk_string(flattenedString), m); - expr_ref axiom(ctx.mk_eq_atom(a_cat, constStr), m); - assert_axiom(axiom); - } - } - - /* - * Instantiate an axiom of the following form: - * Length(Concat(x, y)) = Length(x) + Length(y) - */ - void theory_str::instantiate_concat_axiom(enode * cat) { - ast_manager & m = get_manager(); - app * a_cat = cat->get_expr(); - TRACE(str, tout << "instantiating concat axiom for " << mk_ismt2_pp(a_cat, m) << std::endl;); - if (!u.str.is_concat(a_cat)) { - return; - } - - // build LHS - expr_ref len_xy(m); - len_xy = mk_strlen(a_cat); - SASSERT(len_xy); - - // build RHS: start by extracting x and y from Concat(x, y) - SASSERT(a_cat->get_num_args() == 2); - app * a_x = to_app(a_cat->get_arg(0)); - app * a_y = to_app(a_cat->get_arg(1)); - - expr_ref len_x(m); - len_x = mk_strlen(a_x); - SASSERT(len_x); - - expr_ref len_y(m); - len_y = mk_strlen(a_y); - SASSERT(len_y); - - // now build len_x + len_y - expr_ref len_x_plus_len_y(m); - len_x_plus_len_y = m_autil.mk_add(len_x, len_y); - SASSERT(len_x_plus_len_y); - - // finally assert equality between the two subexpressions - app * eq = m.mk_eq(len_xy, len_x_plus_len_y); - SASSERT(eq); - assert_axiom(eq); - } - - /* - * Add axioms that are true for any string variable: - * 1. Length(x) >= 0 - * 2. Length(x) == 0 <=> x == "" - * If the term is a string constant, we can assert something stronger: - * Length(x) == strlen(x) - */ - void theory_str::instantiate_basic_string_axioms(enode * str) { - ast_manager & m = get_manager(); - - TRACE(str, tout << "set up basic string axioms on " << mk_pp(str->get_expr(), m) << std::endl;); - - { - sort * a_sort = str->get_expr()->get_sort(); - sort * str_sort = u.str.mk_string_sort(); - if (a_sort != str_sort) { - TRACE(str, tout << "WARNING: not setting up string axioms on non-string term " << mk_pp(str->get_expr(), m) << std::endl;); - return; - } - } - - // TESTING: attempt to avoid a crash here when a variable goes out of scope - if (str->get_iscope_lvl() > ctx.get_scope_level()) { - TRACE(str, tout << "WARNING: skipping axiom setup on out-of-scope string term" << std::endl;); - return; - } - - // generate a stronger axiom for constant strings - app * a_str = str->get_expr(); - - if (u.str.is_string(a_str)) { - expr_ref len_str(m); - len_str = mk_strlen(a_str); - SASSERT(len_str); - - zstring strconst; - u.str.is_string(str->get_expr(), strconst); - TRACE(str, tout << "instantiating constant string axioms for \"" << strconst.encode() << '"' << std::endl;); - unsigned int l = strconst.length(); - expr_ref len(m_autil.mk_numeral(rational(l), true), m); - - literal lit(mk_eq(len_str, len, false)); - ctx.mark_as_relevant(lit); - if (m.has_trace_stream()) log_axiom_instantiation(ctx.bool_var2expr(lit.var())); - ctx.mk_th_axiom(get_id(), 1, &lit); - if (m.has_trace_stream()) m.trace_stream() << "[end-of-instance]\n"; - } else { - // build axiom 1: Length(a_str) >= 0 - { - // build LHS - expr_ref len_str(m); - len_str = mk_strlen(a_str); - SASSERT(len_str); - // build RHS - expr_ref zero(m); - zero = m_autil.mk_numeral(rational(0), true); - SASSERT(zero); - // build LHS >= RHS and assert - app * lhs_ge_rhs = m_autil.mk_ge(len_str, zero); - SASSERT(lhs_ge_rhs); - TRACE(str, tout << "string axiom 1: " << mk_ismt2_pp(lhs_ge_rhs, m) << std::endl;); - assert_axiom(lhs_ge_rhs); - } - - // build axiom 2: Length(a_str) == 0 <=> a_str == "" - { - // build LHS of iff - expr_ref len_str(m); - len_str = mk_strlen(a_str); - SASSERT(len_str); - expr_ref zero(m); - zero = m_autil.mk_numeral(rational(0), true); - SASSERT(zero); - expr_ref lhs(m); - lhs = ctx.mk_eq_atom(len_str, zero); - SASSERT(lhs); - // build RHS of iff - expr_ref empty_str(m); - empty_str = mk_string(""); - SASSERT(empty_str); - expr_ref rhs(m); - rhs = ctx.mk_eq_atom(a_str, empty_str); - SASSERT(rhs); - // build LHS <=> RHS and assert - TRACE(str, tout << "string axiom 2: " << mk_ismt2_pp(lhs, m) << " <=> " << mk_ismt2_pp(rhs, m) << std::endl;); - literal l(mk_eq(lhs, rhs, true)); - ctx.mark_as_relevant(l); - if (m.has_trace_stream()) log_axiom_instantiation(ctx.bool_var2expr(l.var())); - ctx.mk_th_axiom(get_id(), 1, &l); - if (m.has_trace_stream()) m.trace_stream() << "[end-of-instance]\n"; - } - - } - } - - /* - * Add an axiom of the form: - * (lhs == rhs) -> ( Length(lhs) == Length(rhs) ) - */ - void theory_str::instantiate_str_eq_length_axiom(enode * lhs, enode * rhs) { - ast_manager & m = get_manager(); - - app * a_lhs = lhs->get_expr(); - app * a_rhs = rhs->get_expr(); - - // build premise: (lhs == rhs) - expr_ref premise(ctx.mk_eq_atom(a_lhs, a_rhs), m); - - // build conclusion: ( Length(lhs) == Length(rhs) ) - expr_ref len_lhs(mk_strlen(a_lhs), m); - SASSERT(len_lhs); - expr_ref len_rhs(mk_strlen(a_rhs), m); - SASSERT(len_rhs); - expr_ref conclusion(ctx.mk_eq_atom(len_lhs, len_rhs), m); - - TRACE(str, tout << "string-eq length-eq axiom: " - << mk_ismt2_pp(premise, m) << " -> " << mk_ismt2_pp(conclusion, m) << std::endl;); - assert_implication(premise, conclusion); - } - - void theory_str::instantiate_axiom_CharAt(enode * e) { - ast_manager & m = get_manager(); - expr* arg0 = nullptr, *arg1 = nullptr; - app * expr = e->get_expr(); - if (axiomatized_terms.contains(expr)) { - TRACE(str, tout << "already set up CharAt axiom for " << mk_pp(expr, m) << std::endl;); - return; - } - axiomatized_terms.insert(expr); - VERIFY(u.str.is_at(expr, arg0, arg1)); - - TRACE(str, tout << "instantiate CharAt axiom for " << mk_pp(expr, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref ts0(mk_str_var("ch_ts0"), m); - expr_ref ts1(mk_str_var("ch_ts1"), m); - expr_ref ts2(mk_str_var("ch_ts2"), m); - - expr_ref cond(m.mk_and( - m_autil.mk_ge(arg1, mk_int(0)), - m_autil.mk_lt(arg1, mk_strlen(arg0))), m); - - expr_ref_vector and_item(m); - and_item.push_back(ctx.mk_eq_atom(arg0, mk_concat(ts0, mk_concat(ts1, ts2)))); - and_item.push_back(ctx.mk_eq_atom(arg1, mk_strlen(ts0))); - and_item.push_back(ctx.mk_eq_atom(mk_strlen(ts1), mk_int(1))); - - expr_ref thenBranch(::mk_and(and_item)); - expr_ref elseBranch(ctx.mk_eq_atom(ts1, mk_string("")), m); - expr_ref axiom(m.mk_ite(cond, thenBranch, elseBranch), m); - expr_ref reductionVar(ctx.mk_eq_atom(expr, ts1), m); - expr_ref finalAxiom(m.mk_and(axiom, reductionVar), m); - ctx.get_rewriter()(finalAxiom); - assert_axiom(finalAxiom); - } - - void theory_str::instantiate_axiom_prefixof(enode * e) { - ast_manager & m = get_manager(); - - app * expr = e->get_expr(); - if (axiomatized_terms.contains(expr)) { - TRACE(str, tout << "already set up prefixof axiom for " << mk_pp(expr, m) << std::endl;); - return; - } - axiomatized_terms.insert(expr); - - TRACE(str, tout << "instantiate prefixof axiom for " << mk_pp(expr, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref ts0(mk_str_var("p_ts0"), m); - expr_ref ts1(mk_str_var("p_ts1"), m); - - expr_ref_vector innerItems(m); - innerItems.push_back(ctx.mk_eq_atom(expr->get_arg(1), mk_concat(ts0, ts1))); - innerItems.push_back(ctx.mk_eq_atom(mk_strlen(ts0), mk_strlen(expr->get_arg(0)))); - innerItems.push_back(m.mk_ite(ctx.mk_eq_atom(ts0, expr->get_arg(0)), expr, mk_not(m, expr))); - expr_ref then1(m.mk_and(innerItems.size(), innerItems.data()), m); - SASSERT(then1); - - // the top-level condition is Length(arg0) >= Length(arg1) - expr_ref topLevelCond( - m_autil.mk_ge( - m_autil.mk_add( - mk_strlen(expr->get_arg(1)), m_autil.mk_mul(mk_int(-1), mk_strlen(expr->get_arg(0)))), - mk_int(0)) - , m); - SASSERT(topLevelCond); - - expr_ref finalAxiom(m.mk_ite(topLevelCond, then1, mk_not(m, expr)), m); - SASSERT(finalAxiom); - assert_axiom(finalAxiom); - } - - void theory_str::instantiate_axiom_suffixof(enode * e) { - ast_manager & m = get_manager(); - - app * expr = e->get_expr(); - if (axiomatized_terms.contains(expr)) { - TRACE(str, tout << "already set up suffixof axiom for " << mk_pp(expr, m) << std::endl;); - return; - } - axiomatized_terms.insert(expr); - - TRACE(str, tout << "instantiate suffixof axiom for " << mk_pp(expr, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref ts0(mk_str_var("s_ts0"), m); - expr_ref ts1(mk_str_var("s_ts1"), m); - - expr_ref_vector innerItems(m); - innerItems.push_back(ctx.mk_eq_atom(expr->get_arg(1), mk_concat(ts0, ts1))); - innerItems.push_back(ctx.mk_eq_atom(mk_strlen(ts1), mk_strlen(expr->get_arg(0)))); - innerItems.push_back(m.mk_ite(ctx.mk_eq_atom(ts1, expr->get_arg(0)), expr, mk_not(m, expr))); - expr_ref then1(m.mk_and(innerItems.size(), innerItems.data()), m); - SASSERT(then1); - - // the top-level condition is Length(arg0) >= Length(arg1) - expr_ref topLevelCond( - m_autil.mk_ge( - m_autil.mk_add( - mk_strlen(expr->get_arg(1)), m_autil.mk_mul(mk_int(-1), mk_strlen(expr->get_arg(0)))), - mk_int(0)) - , m); - SASSERT(topLevelCond); - - expr_ref finalAxiom(m.mk_ite(topLevelCond, then1, mk_not(m, expr)), m); - SASSERT(finalAxiom); - assert_axiom(finalAxiom); - } - - void theory_str::instantiate_axiom_Contains(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up Contains axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - - // quick path, because this is necessary due to rewriter behaviour - // at minimum it should fix z3str/concat-006.smt2 - zstring haystackStr, needleStr; - if (u.str.is_string(ex->get_arg(0), haystackStr) && u.str.is_string(ex->get_arg(1), needleStr)) { - TRACE(str, tout << "eval constant Contains term " << mk_pp(ex, m) << std::endl;); - if (haystackStr.contains(needleStr)) { - assert_axiom(ex); - } else { - assert_axiom(mk_not(m, ex)); - } - return; - } - - { // register Contains() - expr * str = ex->get_arg(0); - expr * substr = ex->get_arg(1); - contains_map.push_back(ex); - std::pair key = std::pair(str, substr); - contain_pair_bool_map.insert(str, substr, ex); - if (!contain_pair_idx_map.contains(str)) { - contain_pair_idx_map.insert(str, std::set>()); - } - if (!contain_pair_idx_map.contains(substr)) { - contain_pair_idx_map.insert(substr, std::set>()); - } - contain_pair_idx_map[str].insert(key); - contain_pair_idx_map[substr].insert(key); - } - - TRACE(str, tout << "instantiate Contains axiom for " << mk_pp(ex, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref ts0(mk_str_var("c_ts0"), m); - expr_ref ts1(mk_str_var("c_ts1"), m); - - expr_ref breakdownAssert(ctx.mk_eq_atom(ex, ctx.mk_eq_atom(ex->get_arg(0), mk_concat(ts0, mk_concat(ex->get_arg(1), ts1)))), m); - SASSERT(breakdownAssert); - assert_axiom(breakdownAssert); - } - - void theory_str::instantiate_axiom_Indexof(enode * e) { - th_rewriter & rw = ctx.get_rewriter(); - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up str.indexof axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - SASSERT(ex->get_num_args() == 3); - - { - // Attempt to rewrite to an integer constant. If this succeeds, - // assert equality with that constant. - // The rewriter normally takes care of this for terms that are in scope - // at the beginning of the search. - // We perform the check here to catch terms that are added during the search. - expr_ref rwex(ex, m); - rw(rwex); - if (m_autil.is_numeral(rwex)) { - TRACE(str, tout << "constant expression " << mk_pp(ex, m) << " simplifies to " << mk_pp(rwex, m) << std::endl;); - assert_axiom(ctx.mk_eq_atom(ex, rwex)); - axiomatized_terms.insert(ex); - return; - } - } - - expr * exHaystack = nullptr; - expr * exNeedle = nullptr; - expr * exIndex = nullptr; - u.str.is_index(ex, exHaystack, exNeedle, exIndex); - - // if the third argument is exactly the integer 0, we can use this "simple" indexof; - // otherwise, we call the "extended" version - rational startingInteger; - if (!m_autil.is_numeral(exIndex, startingInteger) || !startingInteger.is_zero()) { - // "extended" indexof term with prefix - instantiate_axiom_Indexof_extended(e); - return; - } - axiomatized_terms.insert(ex); - - TRACE(str, tout << "instantiate str.indexof axiom for " << mk_pp(ex, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref x1(mk_str_var("i_x1"), m); - expr_ref x2(mk_str_var("i_x2"), m); - - expr_ref condAst1(mk_contains(exHaystack, exNeedle), m); - expr_ref condAst2(m.mk_not(ctx.mk_eq_atom(exNeedle, mk_string(""))), m); - expr_ref condAst(m.mk_and(condAst1, condAst2), m); - SASSERT(condAst); - - // ----------------------- - // true branch - expr_ref_vector thenItems(m); - // args[0] = x1 . args[1] . x2 - thenItems.push_back(ctx.mk_eq_atom(exHaystack, mk_concat(x1, mk_concat(exNeedle, x2)))); - // indexAst = |x1| - thenItems.push_back(ctx.mk_eq_atom(ex, mk_strlen(x1))); - // args[0] = x3 . x4 - // /\ |x3| = |x1| + |args[1]| - 1 - // /\ ! contains(x3, args[1]) - // change subvaribale names to solve some invalide model problems - expr_ref x3(mk_str_var("i_x3"), m); - expr_ref x4(mk_str_var("i_x4"), m); - expr_ref tmpLen(m_autil.mk_add(ex, mk_strlen(ex->get_arg(1)), mk_int(-1)), m); - SASSERT(tmpLen); - thenItems.push_back(ctx.mk_eq_atom(exHaystack, mk_concat(x3, x4))); - thenItems.push_back(ctx.mk_eq_atom(mk_strlen(x3), tmpLen)); - thenItems.push_back(mk_not(m, mk_contains(x3, exNeedle))); - expr_ref thenBranch(mk_and(thenItems), m); - SASSERT(thenBranch); - - // ----------------------- - // false branch - expr_ref elseBranch(m.mk_ite( - ctx.mk_eq_atom(exNeedle, mk_string("")), - ctx.mk_eq_atom(ex, mk_int(0)), - ctx.mk_eq_atom(ex, mk_int(-1)) - ), m); - SASSERT(elseBranch); - - expr_ref breakdownAssert(m.mk_ite(condAst, thenBranch, elseBranch), m); - assert_axiom_rw(breakdownAssert); - - { - // heuristic: integrate with str.contains information - // (but don't introduce it if it isn't already in the instance) - expr_ref haystack(ex->get_arg(0), m), needle(ex->get_arg(1), m), startIdx(ex->get_arg(2), m); - expr_ref zeroAst(mk_int(0), m); - // (H contains N) <==> (H indexof N, 0) >= 0 - expr_ref premise(u.str.mk_contains(haystack, needle), m); - ctx.internalize(premise, false); - expr_ref conclusion(m_autil.mk_ge(ex, zeroAst), m); - expr_ref containsAxiom(ctx.mk_eq_atom(premise, conclusion), m); - SASSERT(containsAxiom); - - // we can't assert this during init_search as it breaks an invariant if the instance becomes inconsistent - //m_delayed_axiom_setup_terms.push_back(containsAxiom); - } - } - - void theory_str::instantiate_axiom_Indexof_extended(enode * _e) { - th_rewriter & rw = ctx.get_rewriter(); - ast_manager & m = get_manager(); - - app * e = _e->get_expr(); - if (axiomatized_terms.contains(e)) { - TRACE(str, tout << "already set up extended str.indexof axiom for " << mk_pp(e, m) << std::endl;); - return; - } - SASSERT(e->get_num_args() == 3); - axiomatized_terms.insert(e); - - TRACE(str, tout << "instantiate extended str.indexof axiom for " << mk_pp(e, m) << std::endl;); - - // str.indexof(H, N, i): - // i < 0 --> -1 - // i == 0 --> str.indexof(H, N, 0) - // i >= len(H) --> -1 - // 0 < i < len(H) --> - // H = hd ++ tl - // len(hd) = i - // i + str.indexof(tl, N, 0) - - expr * H = nullptr; // "haystack" - expr * N = nullptr; // "needle" - expr * i = nullptr; // start index - u.str.is_index(e, H, N, i); - - expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); - expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); - expr_ref empty_string(mk_string(""), m); - - // case split - - // case 1: i < 0 - { - expr_ref premise(m_autil.mk_le(i, minus_one), m); - expr_ref conclusion(ctx.mk_eq_atom(e, minus_one), m); - expr_ref ax(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(ax); - } - - // case 1.1: N == "" and i out of range - { - expr_ref premiseNEmpty(ctx.mk_eq_atom(N, empty_string), m); - // range check - expr_ref premiseRangeLower(m_autil.mk_ge(i, zero), m); - expr_ref premiseRangeUpper(m_autil.mk_le(i, mk_strlen(H)), m); - expr_ref premiseRange(m.mk_and(premiseRangeLower, premiseRangeUpper), m); - expr_ref premise(m.mk_and(premiseNEmpty, m.mk_not(premiseRange)), m); - expr_ref conclusion(ctx.mk_eq_atom(e, minus_one), m); - expr_ref finalAxiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(finalAxiom); - } - - // case 1.2: N == "" and i within range - { - expr_ref premiseNEmpty(ctx.mk_eq_atom(N, empty_string), m); - // range check - expr_ref premiseRangeLower(m_autil.mk_ge(i, zero), m); - expr_ref premiseRangeUpper(m_autil.mk_le(i, mk_strlen(H)), m); - expr_ref premiseRange(m.mk_and(premiseRangeLower, premiseRangeUpper), m); - expr_ref premise(m.mk_and(premiseNEmpty, premiseRange), m); - expr_ref conclusion(ctx.mk_eq_atom(e, i), m); - expr_ref finalAxiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(finalAxiom); - } - - // case 2: i = 0 - { - expr_ref premise1(ctx.mk_eq_atom(i, zero), m); - expr_ref premise2(m.mk_not(ctx.mk_eq_atom(N, empty_string)), m); - expr_ref premise(m.mk_and(premise1, premise2), m); - // reduction to simpler case - expr_ref conclusion(ctx.mk_eq_atom(e, mk_indexof(H, N)), m); - expr_ref ax(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(ax); - } - // case 3: i >= len(H) - { - expr_ref premise1(m_autil.mk_ge(m_autil.mk_add(i, m_autil.mk_mul(minus_one, mk_strlen(H))), zero), m); - expr_ref premise2(m.mk_not(ctx.mk_eq_atom(N, empty_string)), m); - expr_ref premise(m.mk_and(premise1, premise2), m); - expr_ref conclusion(ctx.mk_eq_atom(e, minus_one), m); - expr_ref ax(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(ax); - } - // case 3.5: H doesn't contain N - { - expr_ref premise(m.mk_not(u.str.mk_contains(H, N)), m); - expr_ref conclusion(ctx.mk_eq_atom(e, minus_one), m); - expr_ref ax(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(ax); - } - // case 4: 0 < i < len(H), N non-empty, and H contains N - { - expr_ref premise1(m_autil.mk_gt(i, zero), m); - //expr_ref premise2(m_autil.mk_lt(i, mk_strlen(H)), m); - expr_ref premise2(m.mk_not(m_autil.mk_ge(m_autil.mk_add(i, m_autil.mk_mul(minus_one, mk_strlen(H))), zero)), m); - expr_ref premise3(u.str.mk_contains(H, N), m); - expr_ref premise4(m.mk_not(ctx.mk_eq_atom(N, mk_string(""))), m); - - expr_ref_vector premises(m); - premises.push_back(premise1); - premises.push_back(premise2); - premises.push_back(premise3); - premises.push_back(premise4); - expr_ref premise(mk_and(premises), m); - - expr_ref hd(mk_str_var("hd"), m); - expr_ref tl(mk_str_var("tl"), m); - - expr_ref_vector conclusion_terms(m); - conclusion_terms.push_back(ctx.mk_eq_atom(H, mk_concat(hd, tl))); - conclusion_terms.push_back(ctx.mk_eq_atom(mk_strlen(hd), i)); - conclusion_terms.push_back(u.str.mk_contains(tl, N)); - conclusion_terms.push_back(ctx.mk_eq_atom(e, m_autil.mk_add(i, mk_indexof(tl, N)))); - - expr_ref conclusion(mk_and(conclusion_terms), m); - expr_ref ax(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(ax); - } - - { - // heuristic: integrate with str.contains information - // (but don't introduce it if it isn't already in the instance) - // (0 <= i < len(H)) ==> (H contains N) <==> (H indexof N, i) >= 0 - expr_ref precondition1(m_autil.mk_gt(i, minus_one), m); - //expr_ref precondition2(m_autil.mk_lt(i, mk_strlen(H)), m); - expr_ref precondition2(m.mk_not(m_autil.mk_ge(m_autil.mk_add(i, m_autil.mk_mul(minus_one, mk_strlen(H))), zero)), m); - expr_ref precondition3(m.mk_not(ctx.mk_eq_atom(N, mk_string(""))), m); - expr_ref precondition(m.mk_and(precondition1, precondition2, precondition3), m); - rw(precondition); - - expr_ref premise(u.str.mk_contains(H, N), m); - ctx.internalize(premise, false); - expr_ref conclusion(m_autil.mk_ge(e, zero), m); - expr_ref containsAxiom(ctx.mk_eq_atom(premise, conclusion), m); - expr_ref finalAxiom(rewrite_implication(precondition, containsAxiom), m); - SASSERT(finalAxiom); - // we can't assert this during init_search as it breaks an invariant if the instance becomes inconsistent - m_delayed_assertions_todo.push_back(finalAxiom); - } - } - - void theory_str::instantiate_axiom_LastIndexof(enode * e) { - ast_manager & m = get_manager(); - - app * expr = e->get_expr(); - if (axiomatized_terms.contains(expr)) { - TRACE(str, tout << "already set up LastIndexof axiom for " << mk_pp(expr, m) << std::endl;); - return; - } - axiomatized_terms.insert(expr); - - TRACE(str, tout << "instantiate LastIndexof axiom for " << mk_pp(expr, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref x1(mk_str_var("li_x1"), m); - expr_ref x2(mk_str_var("li_x2"), m); - expr_ref indexAst(mk_int_var("index"), m); - expr_ref_vector items(m); - - // args[0] = x1 . args[1] . x2 - expr_ref eq1(ctx.mk_eq_atom(expr->get_arg(0), mk_concat(x1, mk_concat(expr->get_arg(1), x2))), m); - expr_ref arg0HasArg1(mk_contains(expr->get_arg(0), expr->get_arg(1)), m); // arg0HasArg1 = Contains(args[0], args[1]) - items.push_back(ctx.mk_eq_atom(arg0HasArg1, eq1)); - - - expr_ref condAst(arg0HasArg1, m); - //---------------------------- - // true branch - expr_ref_vector thenItems(m); - thenItems.push_back(m_autil.mk_ge(indexAst, mk_int(0))); - // args[0] = x1 . args[1] . x2 - // x1 doesn't contain args[1] - thenItems.push_back(mk_not(m, mk_contains(x2, expr->get_arg(1)))); - thenItems.push_back(ctx.mk_eq_atom(indexAst, mk_strlen(x1))); - - bool canSkip = false; - zstring arg1Str; - if (u.str.is_string(expr->get_arg(1), arg1Str)) { - if (arg1Str.length() == 1) { - canSkip = true; - } - } - - if (!canSkip) { - // args[0] = x3 . x4 /\ |x3| = |x1| + 1 /\ ! contains(x4, args[1]) - // change subvaribale names to solve some invalide model problems - expr_ref x3(mk_str_var("li_x3"), m); - expr_ref x4(mk_str_var("li_x4"), m); - expr_ref tmpLen(m_autil.mk_add(indexAst, mk_int(1)), m); - thenItems.push_back(ctx.mk_eq_atom(expr->get_arg(0), mk_concat(x3, x4))); - thenItems.push_back(ctx.mk_eq_atom(mk_strlen(x3), tmpLen)); - thenItems.push_back(mk_not(m, mk_contains(x4, expr->get_arg(1)))); - } - //---------------------------- - // else branch - expr_ref_vector elseItems(m); - elseItems.push_back(ctx.mk_eq_atom(indexAst, mk_int(-1))); - - items.push_back(m.mk_ite(condAst, m.mk_and(thenItems.size(), thenItems.data()), m.mk_and(elseItems.size(), elseItems.data()))); - - expr_ref breakdownAssert(m.mk_and(items.size(), items.data()), m); - SASSERT(breakdownAssert); - - expr_ref reduceToIndex(ctx.mk_eq_atom(expr, indexAst), m); - SASSERT(reduceToIndex); - - expr_ref finalAxiom(m.mk_and(breakdownAssert, reduceToIndex), m); - SASSERT(finalAxiom); - assert_axiom_rw(finalAxiom); - } - - void theory_str::instantiate_axiom_Substr(enode * _e) { - ast_manager & m = get_manager(); - expr* s = nullptr; - expr* i = nullptr; - expr* l = nullptr; - - app * e = _e->get_expr(); - if (axiomatized_terms.contains(e)) { - TRACE(str, tout << "already set up Substr axiom for " << mk_pp(e, m) << std::endl;); - return; - } - axiomatized_terms.insert(e); - - TRACE(str, tout << "instantiate Substr axiom for " << mk_pp(e, m) << std::endl;); - - VERIFY(u.str.is_extract(e, s, i, l)); - - // e = substr(s, i, l) - expr_ref x(mk_str_var("substrPre"), m); - expr_ref ls(mk_strlen(s), m); - expr_ref lx(mk_strlen(x), m); - expr_ref le(mk_strlen(e), m); - expr_ref ls_minus_i_l(m_autil.mk_sub(m_autil.mk_sub(ls, i), l), m); - expr_ref y(mk_str_var("substrPost"), m); - expr_ref xe(mk_concat(x, e), m); - expr_ref xey(mk_concat(xe, y), m); - expr_ref zero(mk_int(0), m); - - expr_ref i_ge_0(m_autil.mk_ge(i, zero), m); - expr_ref i_le_ls(m_autil.mk_le(m_autil.mk_sub(i, ls), zero), m); - expr_ref ls_le_i(m_autil.mk_le(m_autil.mk_sub(ls, i), zero), m); - expr_ref ls_ge_li(m_autil.mk_ge(ls_minus_i_l, zero), m); - expr_ref l_ge_0(m_autil.mk_ge(l, zero), m); - expr_ref l_le_0(m_autil.mk_le(l, zero), m); - expr_ref ls_le_0(m_autil.mk_le(ls, zero), m); - expr_ref le_is_0(ctx.mk_eq_atom(le, zero), m); - - // 0 <= i & i <= |s| & 0 <= l => xey = s - { - expr_ref clause(m.mk_or(~i_ge_0, ~i_le_ls, ~l_ge_0, ctx.mk_eq_atom(xey, s)), m); - assert_axiom_rw(clause); - } - // 0 <= i & i <= |s| => |x| = i - { - expr_ref clause(m.mk_or(~i_ge_0, ~i_le_ls, ctx.mk_eq_atom(lx, i)), m); - assert_axiom_rw(clause); - } - // 0 <= i & i <= |s| & l >= 0 & |s| >= l + i => |e| = l - { - expr_ref_vector terms(m); - terms.push_back(~i_ge_0); - terms.push_back(~i_le_ls); - terms.push_back(~l_ge_0); - terms.push_back(~ls_ge_li); - terms.push_back(ctx.mk_eq_atom(le, l)); - expr_ref clause(mk_or(terms), m); - assert_axiom_rw(clause); - } - // 0 <= i & i <= |s| & |s| < l + i => |e| = |s| - i - { - expr_ref_vector terms(m); - terms.push_back(~i_ge_0); - terms.push_back(~i_le_ls); - terms.push_back(~l_ge_0); - terms.push_back(ls_ge_li); - terms.push_back(ctx.mk_eq_atom(le, m_autil.mk_sub(ls, i))); - expr_ref clause(mk_or(terms), m); - assert_axiom_rw(clause); - } - // i < 0 => |e| = 0 - { - expr_ref clause(m.mk_or(i_ge_0, le_is_0), m); - assert_axiom_rw(clause); - } - // |s| <= i => |e| = 0 - { - expr_ref clause(m.mk_or(~ls_le_i, le_is_0), m); - assert_axiom_rw(clause); - } - // |s| <= 0 => |e| = 0 - { - expr_ref clause(m.mk_or(~ls_le_0, le_is_0), m); - assert_axiom_rw(clause); - } - // l <= 0 => |e| = 0 - { - expr_ref clause(m.mk_or(~l_le_0, le_is_0), m); - assert_axiom_rw(clause); - } - // |e| = 0 & i >= 0 & |s| > i & |s| > 0 => l <= 0 - { - expr_ref_vector terms(m); - terms.push_back(~le_is_0); - terms.push_back(~i_ge_0); - terms.push_back(ls_le_i); - terms.push_back(ls_le_0); - terms.push_back(l_le_0); - expr_ref clause(mk_or(terms), m); - assert_axiom_rw(clause); - } - - // Auxiliary axioms - - // |e| <= |s| - { - expr_ref axiom(m_autil.mk_le(le, ls), m); - assert_axiom_rw(axiom); - } - - // l >= 0 => |e| <= len - { - expr_ref premise(m_autil.mk_ge(l, zero), m); - expr_ref conclusion(m_autil.mk_le(le, l), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(axiom); - } - } - - // (str.replace s t t') is the string obtained by replacing the first occurrence - // of t in s, if any, by t'. Note that if t is empty, the result is to prepend - // t' to s; also, if t does not occur in s then the result is s. - void theory_str::instantiate_axiom_Replace(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up Replace axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - - TRACE(str, tout << "instantiate Replace axiom for " << mk_pp(ex, m) << std::endl;); - - // change subvaribale names to solve some invalide model problems - expr_ref x1(mk_str_var("rp_x1"), m); - expr_ref x2(mk_str_var("rp_x2"), m); - expr_ref i1(mk_int_var("i1"), m); - expr_ref result(mk_str_var("rp_result"), m); - - expr * replaceS = nullptr; - expr * replaceT = nullptr; - expr * replaceTPrime = nullptr; - VERIFY(u.str.is_replace(ex, replaceS, replaceT, replaceTPrime)); - - // t empty => result = (str.++ t' s) - expr_ref emptySrcAst(ctx.mk_eq_atom(replaceT, mk_string("")), m); - expr_ref prependTPrimeToS(ctx.mk_eq_atom(result, mk_concat(replaceTPrime, replaceS)), m); - - // condAst = Contains(args[0], args[1]) - expr_ref condAst(mk_contains(ex->get_arg(0), ex->get_arg(1)), m); - // ----------------------- - // true branch - expr_ref_vector thenItems(m); - // args[0] = x1 . args[1] . x2 - thenItems.push_back(ctx.mk_eq_atom(ex->get_arg(0), mk_concat(x1, mk_concat(ex->get_arg(1), x2)))); - // i1 = |x1| - thenItems.push_back(ctx.mk_eq_atom(i1, mk_strlen(x1))); - // args[0] = x3 . x4 /\ |x3| = |x1| + |args[1]| - 1 /\ ! contains(x3, args[1]) - // change subvaribale names to solve some invalide model problems - expr_ref x3(mk_str_var("rp_x3"), m); - expr_ref x4(mk_str_var("rp_x4"), m); - expr_ref tmpLen(m_autil.mk_add(i1, mk_strlen(ex->get_arg(1)), mk_int(-1)), m); - thenItems.push_back(ctx.mk_eq_atom(ex->get_arg(0), mk_concat(x3, x4))); - thenItems.push_back(ctx.mk_eq_atom(mk_strlen(x3), tmpLen)); - thenItems.push_back(mk_not(m, mk_contains(x3, ex->get_arg(1)))); - thenItems.push_back(ctx.mk_eq_atom(result, mk_concat(x1, mk_concat(ex->get_arg(2), x2)))); - // ----------------------- - // false branch - expr_ref elseBranch(ctx.mk_eq_atom(result, ex->get_arg(0)), m); - - expr_ref breakdownAssert(m.mk_ite(emptySrcAst, prependTPrimeToS, - m.mk_ite(condAst, mk_and(thenItems), elseBranch)), m); - expr_ref breakdownAssert_rw(breakdownAssert, m); - assert_axiom_rw(breakdownAssert_rw); - - expr_ref reduceToResult(ctx.mk_eq_atom(ex, result), m); - expr_ref reduceToResult_rw(reduceToResult, m); - assert_axiom_rw(reduceToResult_rw); - } - - void theory_str::instantiate_axiom_str_to_int(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up str.to-int axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - - TRACE(str, tout << "instantiate str.to-int axiom for " << mk_pp(ex, m) << std::endl;); - - // let expr = (str.to-int S) - // axiom 1: expr >= -1 - // axiom 2: expr = 0 <==> S in "0+" - // axiom 3: expr >= 1 ==> S in "0*[1-9][0-9]*" - - // expr * S = ex->get_arg(0); - { - expr_ref axiom1(m_autil.mk_ge(ex, m_autil.mk_numeral(rational::minus_one(), true)), m); - SASSERT(axiom1); - assert_axiom_rw(axiom1); - } -# if 0 - { - expr_ref lhs(ctx.mk_eq_atom(ex, m_autil.mk_numeral(rational::zero(), true)), m); - expr_ref re_zeroes(u.re.mk_plus(u.re.mk_to_re(mk_string("0"))), m); - expr_ref rhs(mk_RegexIn(S, re_zeroes), m); - expr_ref axiom2(ctx.mk_eq_atom(lhs, rhs), m); - SASSERT(axiom2); - assert_axiom_rw(axiom2); - } - - { - expr_ref premise(m_autil.mk_ge(ex, m_autil.mk_numeral(rational::one(), true)), m); - //expr_ref re_positiveInteger(u.re.mk_concat( - // u.re.mk_range(mk_string("1"), mk_string("9")), - // u.re.mk_star(u.re.mk_range(mk_string("0"), mk_string("9")))), m); - expr_ref re_subterm(u.re.mk_concat(u.re.mk_range(mk_string("1"), mk_string("9")), - u.re.mk_star(u.re.mk_range(mk_string("0"), mk_string("9")))), m); - expr_ref re_integer(u.re.mk_concat(u.re.mk_star(mk_string("0")), re_subterm), m); - expr_ref conclusion(mk_RegexIn(S, re_integer), m); - SASSERT(premise); - SASSERT(conclusion); - //assert_implication(premise, conclusion); - assert_axiom_rw(rewrite_implication(premise, conclusion)); - } -#endif - } - - void theory_str::instantiate_axiom_int_to_str(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up str.from-int axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - - TRACE(str, tout << "instantiate str.from-int axiom for " << mk_pp(ex, m) << std::endl;); - - // axiom 1: N < 0 <==> (str.from-int N) = "" - expr * N = ex->get_arg(0); - { - expr_ref axiom1_lhs(mk_not(m, m_autil.mk_ge(N, m_autil.mk_numeral(rational::zero(), true))), m); - expr_ref axiom1_rhs(ctx.mk_eq_atom(ex, mk_string("")), m); - expr_ref axiom1(ctx.mk_eq_atom(axiom1_lhs, axiom1_rhs), m); - SASSERT(axiom1); - assert_axiom(axiom1); - } - - // axiom 2: The only (str.from-int N) that starts with a "0" is "0". - { - expr_ref zero(mk_string("0"), m); - // let (the result starts with a "0") be p - expr_ref starts_with_zero(u.str.mk_prefix(zero, ex), m); - // let (the result is "0") be q - expr_ref is_zero(ctx.mk_eq_atom(ex, zero), m); - // encoding: the result does NOT start with a "0" (~p) xor the result is "0" (q) - // ~p xor q == (~p or q) and (p or ~q) - assert_axiom(m.mk_and(m.mk_or(m.mk_not(starts_with_zero), is_zero), m.mk_or(starts_with_zero, m.mk_not(is_zero)))); - } - } - - void theory_str::instantiate_axiom_is_digit(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up str.is_digit axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - - TRACE(str, tout << "instantiate str.is_digit axiom for " << mk_pp(ex, m) << std::endl;); - expr * string_term = nullptr; - u.str.is_is_digit(ex, string_term); - SASSERT(string_term); - - expr_ref_vector rhs_terms(m); - - for (unsigned c = 0x30; c <= 0x39; ++c) { - zstring ch(c); - expr_ref rhs_term(ctx.mk_eq_atom(string_term, mk_string(ch)), m); - rhs_terms.push_back(rhs_term); - } - - expr_ref rhs(mk_or(rhs_terms), m); - expr_ref axiom(ctx.mk_eq_atom(ex, rhs), m); - assert_axiom_rw(axiom); - } - - void theory_str::instantiate_axiom_str_from_code(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up str.from_code axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - TRACE(str, tout << "instantiate str.from_code axiom for " << mk_pp(ex, m) << std::endl;); - - expr * arg = nullptr; - VERIFY(u.str.is_from_code(ex, arg)); - // (str.from_code N) == "" if N is not in the range [0, max_char]. - { - expr_ref premise(m.mk_or(m_autil.mk_le(arg, mk_int(-1)), m_autil.mk_ge(arg, mk_int(u.max_char() + 1))), m); - expr_ref conclusion(ctx.mk_eq_atom(ex, mk_string("")), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(axiom); - } - // len (str.from_code N) == 1 if N is in the range [0, max_char]. - { - expr_ref premise(m.mk_and(m_autil.mk_ge(arg, mk_int(0)), m_autil.mk_le(arg, mk_int(u.max_char() + 1))), m); - expr_ref conclusion(ctx.mk_eq_atom(mk_strlen(ex), mk_int(1)), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(axiom); - } - // If N is in the range [0, max_char], then to_code(from_code(e)) == e. - { - expr_ref premise(m.mk_and(m_autil.mk_ge(arg, mk_int(0)), m_autil.mk_le(arg, mk_int(u.max_char() + 1))), m); - expr_ref conclusion(ctx.mk_eq_atom(u.str.mk_to_code(ex), arg), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(axiom); - } - } - - void theory_str::instantiate_axiom_str_to_code(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up str.to_code axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - TRACE(str, tout << "instantiate str.to_code axiom for " << mk_pp(ex, m) << std::endl;); - - expr * arg = nullptr; - VERIFY(u.str.is_to_code(ex, arg)); - // (str.to_code S) == -1 if len(S) != 1. - { - expr_ref premise(m.mk_not(ctx.mk_eq_atom(mk_strlen(arg), mk_int(1))), m); - expr_ref conclusion(ctx.mk_eq_atom(ex, mk_int(-1)), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(axiom); - } - // (str.to_code S) is in [0, max_char] if len(S) == 1. - { - expr_ref premise(ctx.mk_eq_atom(mk_strlen(arg), mk_int(1)), m); - expr_ref conclusion(m.mk_and(m_autil.mk_ge(ex, mk_int(0)), m_autil.mk_le(ex, mk_int(u.max_char()))), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(axiom); - } - } - - expr * theory_str::mk_RegexIn(expr * str, expr * regexp) { - app * regexIn = u.re.mk_in_re(str, regexp); - // immediately force internalization so that axiom setup does not fail - ctx.internalize(regexIn, false); - set_up_axioms(regexIn); - return regexIn; - } - - void theory_str::instantiate_axiom_RegexIn(enode * e) { - ast_manager & m = get_manager(); - - app * ex = e->get_expr(); - if (axiomatized_terms.contains(ex)) { - TRACE(str, tout << "already set up RegexIn axiom for " << mk_pp(ex, m) << std::endl;); - return; - } - axiomatized_terms.insert(ex); - - TRACE(str, tout << "instantiate RegexIn axiom for " << mk_pp(ex, m) << std::endl;); - - expr_ref str(ex->get_arg(0), m); - - regex_terms.insert(ex); - if (!regex_terms_by_string.contains(str)) { - regex_terms_by_string.insert(str, ptr_vector()); - } - regex_terms_by_string[str].push_back(ex); - } - - void theory_str::attach_new_th_var(enode * n) { - theory_var v = mk_var(n); - ctx.attach_th_var(n, this, v); - TRACE(str, tout << "new theory var: " << mk_ismt2_pp(n->get_expr(), get_manager()) << " := v#" << v << std::endl;); - } - - void theory_str::reset_eh() { - TRACE(str, tout << "resetting" << std::endl;); - m_trail_stack.reset(); - m_library_aware_trail_stack.reset(); - - candidate_model.reset(); - m_basicstr_axiom_todo.reset(); - m_concat_axiom_todo.reset(); - pop_scope_eh(ctx.get_scope_level()); - } - - /* - * Check equality among equivalence class members of LHS and RHS - * to discover an incorrect LHS == RHS. - * For example, if we have y2 == "str3" - * and the equivalence classes are - * { y2, (Concat ce m2) } - * { "str3", (Concat abc x2) } - * then y2 can't be equal to "str3". - * Then add an assertion: (y2 == (Concat ce m2)) AND ("str3" == (Concat abc x2)) -> (y2 != "str3") - */ - bool theory_str::new_eq_check(expr * lhs, expr * rhs) { - ast_manager & m = get_manager(); - - // skip this check if we defer consistency checking, as we can do it for every EQC in final check - if (!opt_DeferEQCConsistencyCheck) { - check_concat_len_in_eqc(lhs); - check_concat_len_in_eqc(rhs); - } - - // Now we iterate over all pairs of terms across both EQCs - // and check whether we can show that any pair of distinct terms - // cannot possibly be equal. - // If that's the case, we assert an axiom to that effect and stop. - - expr * eqc_nn1 = lhs; - do { - expr * eqc_nn2 = rhs; - do { - TRACE(str, tout << "checking whether " << mk_pp(eqc_nn1, m) << " and " << mk_pp(eqc_nn2, m) << " can be equal" << std::endl;); - // inconsistency check: value - if (!can_two_nodes_eq(eqc_nn1, eqc_nn2)) { - TRACE(str, tout << "inconsistency detected: " << mk_pp(eqc_nn1, m) << " cannot be equal to " << mk_pp(eqc_nn2, m) << std::endl;); - expr_ref to_assert(mk_not(m, m.mk_eq(eqc_nn1, eqc_nn2)), m); - assert_axiom(to_assert); - // this shouldn't use the integer theory at all, so we don't allow the option of quick-return - return false; - } - if (!check_length_consistency(eqc_nn1, eqc_nn2)) { - TRACE(str, tout << "inconsistency detected: " << mk_pp(eqc_nn1, m) << " and " << mk_pp(eqc_nn2, m) << " have inconsistent lengths" << std::endl;); - if (opt_NoQuickReturn_IntegerTheory){ - TRACE(str, tout << "continuing in new_eq_check() due to opt_NoQuickReturn_IntegerTheory" << std::endl;); - } else { - return false; - } - } - eqc_nn2 = get_eqc_next(eqc_nn2); - } while (eqc_nn2 != rhs); - eqc_nn1 = get_eqc_next(eqc_nn1); - } while (eqc_nn1 != lhs); - - if (!contains_map.empty()) { - check_contain_in_new_eq(lhs, rhs); - } - - // okay, all checks here passed - return true; - } - - // support for user_smt_theory-style EQC handling - - app * theory_str::get_ast(theory_var v) { - return get_enode(v)->get_expr(); - } - - theory_var theory_str::get_var(expr * n) const { - if (!is_app(n)) { - return null_theory_var; - } - if (ctx.e_internalized(to_app(n))) { - enode * e = ctx.get_enode(to_app(n)); - return e->get_th_var(get_id()); - } - return null_theory_var; - } - - // simulate Z3_theory_get_eqc_next() - expr * theory_str::get_eqc_next(expr * n) { - theory_var v = get_var(n); - if (v != null_theory_var) { - theory_var r = m_find.next(v); - return get_ast(r); - } - return n; - } - - void theory_str::group_terms_by_eqc(expr * n, std::set & concats, std::set & vars, std::set & consts) { - expr * eqcNode = n; - do { - app * ast = to_app(eqcNode); - if (u.str.is_concat(ast)) { - expr * simConcat = simplify_concat(ast); - if (simConcat != ast) { - if (u.str.is_concat(to_app(simConcat))) { - concats.insert(simConcat); - } else { - if (u.str.is_string(simConcat)) { - consts.insert(simConcat); - } else { - vars.insert(simConcat); - } - } - } else { - concats.insert(simConcat); - } - } else if (u.str.is_string(ast)) { - consts.insert(ast); - } else { - vars.insert(ast); - } - eqcNode = get_eqc_next(eqcNode); - } while (eqcNode != n); - } - - void theory_str::get_nodes_in_concat(expr * node, ptr_vector & nodeList) { - app * a_node = to_app(node); - if (!u.str.is_concat(a_node)) { - nodeList.push_back(node); - return; - } else { - SASSERT(a_node->get_num_args() == 2); - expr * leftArg = a_node->get_arg(0); - expr * rightArg = a_node->get_arg(1); - get_nodes_in_concat(leftArg, nodeList); - get_nodes_in_concat(rightArg, nodeList); - } - } - - // previously Concat() in strTheory.cpp - // Evaluates the concatenation (n1 . n2) with respect to - // the current equivalence classes of n1 and n2. - // Returns a constant string expression representing this concatenation - // if one can be determined, or nullptr if this is not possible. - expr * theory_str::eval_concat(expr * n1, expr * n2) { - bool n1HasEqcValue = false; - bool n2HasEqcValue = false; - expr * v1 = get_eqc_value(n1, n1HasEqcValue); - expr * v2 = get_eqc_value(n2, n2HasEqcValue); - if (n1HasEqcValue && n2HasEqcValue) { - zstring n1_str, n2_str; - u.str.is_string(v1, n1_str); - u.str.is_string(v2, n2_str); - zstring result = n1_str + n2_str; - return mk_string(result); - } else if (n1HasEqcValue && !n2HasEqcValue) { - zstring v1_str; - u.str.is_string(v1, v1_str); - if (v1_str.empty()) { - return n2; - } - } else if (n2HasEqcValue && !n1HasEqcValue) { - zstring v2_str; - u.str.is_string(v2, v2_str); - if (v2_str.empty()) { - return n1; - } - } - // give up - return nullptr; - } - - // trace code helper - inline std::string rational_to_string_if_exists(const rational & x, bool x_exists) { - if (x_exists) { - return x.to_string(); - } else { - return "?"; - } - } - - /* - * The inputs: - * ~ nn: non const node - * ~ eq_str: the equivalent constant string of nn - * Iterate the parent of all eqc nodes of nn, looking for: - * ~ concat node - * to see whether some concat nodes can be simplified. - */ - void theory_str::simplify_parent(expr * nn, expr * eq_str) { - ast_manager & m = get_manager(); - - TRACE(str, tout << "simplifying parents of " << mk_ismt2_pp(nn, m) - << " with respect to " << mk_ismt2_pp(eq_str, m) << std::endl;); - - ctx.internalize(nn, false); - - zstring eq_strValue; - u.str.is_string(eq_str, eq_strValue); - expr * n_eqNode = nn; - do { - enode * n_eq_enode = ctx.get_enode(n_eqNode); - TRACE(str, tout << "considering all parents of " << mk_ismt2_pp(n_eqNode, m) << std::endl - << "associated n_eq_enode has " << n_eq_enode->get_num_parents() << " parents" << std::endl;); - - // the goal of this next bit is to avoid dereferencing a bogus e_parent in the following loop. - // what I imagine is causing this bug is that, for example, we examine some parent, we add an axiom that involves it, - // and the parent_it iterator becomes invalidated, because we indirectly modified the container that we're iterating over. - - enode_vector current_parents; - for (auto &parent: n_eq_enode->get_parents()) { - current_parents.insert(parent); - } - - for (auto &e_parent : current_parents) { - SASSERT(e_parent != nullptr); - - app * a_parent = e_parent->get_expr(); - TRACE(str, tout << "considering parent " << mk_ismt2_pp(a_parent, m) << std::endl;); - - if (u.str.is_concat(a_parent)) { - expr * arg0 = a_parent->get_arg(0); - expr * arg1 = a_parent->get_arg(1); - - rational parentLen; - bool parentLen_exists = get_len_value(a_parent, parentLen); - - if (arg0 == n_eq_enode->get_expr()) { - rational arg0Len, arg1Len; - bool arg0Len_exists = get_len_value(eq_str, arg0Len); - bool arg1Len_exists = get_len_value(arg1, arg1Len); - - TRACE(str, - tout << "simplify_parent #1:" << std::endl - << "* parent = " << mk_ismt2_pp(a_parent, m) << std::endl - << "* |parent| = " << rational_to_string_if_exists(parentLen, parentLen_exists) << std::endl - << "* |arg0| = " << rational_to_string_if_exists(arg0Len, arg0Len_exists) << std::endl - << "* |arg1| = " << rational_to_string_if_exists(arg1Len, arg1Len_exists) << std::endl; - ); (void)arg0Len_exists; - - if (parentLen_exists && !arg1Len_exists) { - TRACE(str, tout << "make up len for arg1" << std::endl;); - expr_ref implyL11(m.mk_and(ctx.mk_eq_atom(mk_strlen(a_parent), mk_int(parentLen)), - ctx.mk_eq_atom(mk_strlen(arg0), mk_int(arg0Len))), m); - rational makeUpLenArg1 = parentLen - arg0Len; - if (makeUpLenArg1.is_nonneg()) { - expr_ref implyR11(ctx.mk_eq_atom(mk_strlen(arg1), mk_int(makeUpLenArg1)), m); - assert_implication(implyL11, implyR11); - } else { - expr_ref neg(mk_not(m, implyL11), m); - assert_axiom(neg); - } - } - - // (Concat n_eqNode arg1) /\ arg1 has eq const - - expr * concatResult = eval_concat(eq_str, arg1); - if (concatResult != nullptr) { - bool arg1HasEqcValue = false; - expr * arg1Value = get_eqc_value(arg1, arg1HasEqcValue); - expr_ref implyL(m); - if (arg1 != arg1Value) { - expr_ref eq_ast1(m); - eq_ast1 = ctx.mk_eq_atom(n_eqNode, eq_str); - SASSERT(eq_ast1); - - expr_ref eq_ast2(m); - eq_ast2 = ctx.mk_eq_atom(arg1, arg1Value); - SASSERT(eq_ast2); - implyL = m.mk_and(eq_ast1, eq_ast2); - } else { - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - } - - - if (!in_same_eqc(a_parent, concatResult)) { - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(a_parent, concatResult); - SASSERT(implyR); - - assert_implication(implyL, implyR); - } - } else if (u.str.is_concat(to_app(n_eqNode))) { - expr_ref simpleConcat(m); - simpleConcat = mk_concat(eq_str, arg1); - if (!in_same_eqc(a_parent, simpleConcat)) { - expr_ref implyL(m); - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - SASSERT(implyL); - - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(a_parent, simpleConcat); - SASSERT(implyR); - assert_implication(implyL, implyR); - } - } - } // if (arg0 == n_eq_enode->get_expr()) - - if (arg1 == n_eq_enode->get_expr()) { - rational arg0Len, arg1Len; - bool arg0Len_exists = get_len_value(arg0, arg0Len); - bool arg1Len_exists = get_len_value(eq_str, arg1Len); - - TRACE(str, - tout << "simplify_parent #2:" << std::endl - << "* parent = " << mk_ismt2_pp(a_parent, m) << std::endl - << "* |parent| = " << rational_to_string_if_exists(parentLen, parentLen_exists) << std::endl - << "* |arg0| = " << rational_to_string_if_exists(arg0Len, arg0Len_exists) << std::endl - << "* |arg1| = " << rational_to_string_if_exists(arg1Len, arg1Len_exists) << std::endl; - ); (void)arg1Len_exists; - - if (parentLen_exists && !arg0Len_exists) { - TRACE(str, tout << "make up len for arg0" << std::endl;); - expr_ref implyL11(m.mk_and(ctx.mk_eq_atom(mk_strlen(a_parent), mk_int(parentLen)), - ctx.mk_eq_atom(mk_strlen(arg1), mk_int(arg1Len))), m); - rational makeUpLenArg0 = parentLen - arg1Len; - if (makeUpLenArg0.is_nonneg()) { - expr_ref implyR11(ctx.mk_eq_atom(mk_strlen(arg0), mk_int(makeUpLenArg0)), m); - assert_implication(implyL11, implyR11); - } else { - expr_ref neg(mk_not(m, implyL11), m); - assert_axiom(neg); - } - } - - // (Concat arg0 n_eqNode) /\ arg0 has eq const - - expr * concatResult = eval_concat(arg0, eq_str); - if (concatResult != nullptr) { - bool arg0HasEqcValue = false; - expr * arg0Value = get_eqc_value(arg0, arg0HasEqcValue); - expr_ref implyL(m); - if (arg0 != arg0Value) { - expr_ref eq_ast1(m); - eq_ast1 = ctx.mk_eq_atom(n_eqNode, eq_str); - SASSERT(eq_ast1); - expr_ref eq_ast2(m); - eq_ast2 = ctx.mk_eq_atom(arg0, arg0Value); - SASSERT(eq_ast2); - - implyL = m.mk_and(eq_ast1, eq_ast2); - } else { - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - } - - if (!in_same_eqc(a_parent, concatResult)) { - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(a_parent, concatResult); - SASSERT(implyR); - - assert_implication(implyL, implyR); - } - } else if (u.str.is_concat(to_app(n_eqNode))) { - expr_ref simpleConcat(m); - simpleConcat = mk_concat(arg0, eq_str); - if (!in_same_eqc(a_parent, simpleConcat)) { - expr_ref implyL(m); - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - SASSERT(implyL); - - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(a_parent, simpleConcat); - SASSERT(implyR); - assert_implication(implyL, implyR); - } - } - } // if (arg1 == n_eq_enode->get_owner - - - //--------------------------------------------------------- - // Case (2-1) begin: (Concat n_eqNode (Concat str var)) - if (arg0 == n_eqNode && u.str.is_concat(to_app(arg1))) { - app * a_arg1 = to_app(arg1); - TRACE(str, tout << "simplify_parent #3" << std::endl;); - expr * r_concat_arg0 = a_arg1->get_arg(0); - if (u.str.is_string(r_concat_arg0)) { - expr * combined_str = eval_concat(eq_str, r_concat_arg0); - SASSERT(combined_str); - expr * r_concat_arg1 = a_arg1->get_arg(1); - expr_ref implyL(m); - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - expr * simplifiedAst = mk_concat(combined_str, r_concat_arg1); - if (!in_same_eqc(a_parent, simplifiedAst)) { - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(a_parent, simplifiedAst); - assert_implication(implyL, implyR); - } - } - } - // Case (2-1) end: (Concat n_eqNode (Concat str var)) - //--------------------------------------------------------- - - - //--------------------------------------------------------- - // Case (2-2) begin: (Concat (Concat var str) n_eqNode) - if (u.str.is_concat(to_app(arg0)) && arg1 == n_eqNode) { - app * a_arg0 = to_app(arg0); - TRACE(str, tout << "simplify_parent #4" << std::endl;); - expr * l_concat_arg1 = a_arg0->get_arg(1); - if (u.str.is_string(l_concat_arg1)) { - expr * combined_str = eval_concat(l_concat_arg1, eq_str); - SASSERT(combined_str); - expr * l_concat_arg0 = a_arg0->get_arg(0); - expr_ref implyL(m); - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - expr * simplifiedAst = mk_concat(l_concat_arg0, combined_str); - if (!in_same_eqc(a_parent, simplifiedAst)) { - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(a_parent, simplifiedAst); - assert_implication(implyL, implyR); - } - } - } - // Case (2-2) end: (Concat (Concat var str) n_eqNode) - //--------------------------------------------------------- - - // Have to look up one more layer: if the parent of the concat is another concat - //------------------------------------------------- - // Case (3-1) begin: (Concat (Concat var n_eqNode) str ) - if (arg1 == n_eqNode) { - expr_ref_vector concat_parents(m); - for (auto& e_concat_parent : e_parent->get_parents()) { - concat_parents.push_back(e_concat_parent->get_expr()); - } - for (auto& _concat_parent : concat_parents) { - app* concat_parent = to_app(_concat_parent); - if (u.str.is_concat(concat_parent)) { - expr * concat_parent_arg0 = concat_parent->get_arg(0); - expr * concat_parent_arg1 = concat_parent->get_arg(1); - if (concat_parent_arg0 == a_parent && u.str.is_string(concat_parent_arg1)) { - TRACE(str, tout << "simplify_parent #5" << std::endl;); - expr * combinedStr = eval_concat(eq_str, concat_parent_arg1); - SASSERT(combinedStr); - expr_ref implyL(m); - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - expr * simplifiedAst = mk_concat(arg0, combinedStr); - if (!in_same_eqc(concat_parent, simplifiedAst)) { - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(concat_parent, simplifiedAst); - assert_implication(implyL, implyR); - } - } - } - } - } - // Case (3-1) end: (Concat (Concat var n_eqNode) str ) - // Case (3-2) begin: (Concat str (Concat n_eqNode var) ) - if (arg0 == n_eqNode) { - expr_ref_vector concat_parents(m); - for (auto& e_concat_parent : e_parent->get_parents()) { - concat_parents.push_back(e_concat_parent->get_expr()); - } - for (auto& _concat_parent : concat_parents) { - app* concat_parent = to_app(_concat_parent); - if (u.str.is_concat(concat_parent)) { - expr * concat_parent_arg0 = concat_parent->get_arg(0); - expr * concat_parent_arg1 = concat_parent->get_arg(1); - if (concat_parent_arg1 == a_parent && u.str.is_string(concat_parent_arg0)) { - TRACE(str, tout << "simplify_parent #6" << std::endl;); - expr * combinedStr = eval_concat(concat_parent_arg0, eq_str); - SASSERT(combinedStr); - expr_ref implyL(m); - implyL = ctx.mk_eq_atom(n_eqNode, eq_str); - expr * simplifiedAst = mk_concat(combinedStr, arg1); - if (!in_same_eqc(concat_parent, simplifiedAst)) { - expr_ref implyR(m); - implyR = ctx.mk_eq_atom(concat_parent, simplifiedAst); - assert_implication(implyL, implyR); - } - } - } - } - } - // Case (3-2) end: (Concat str (Concat n_eqNode var) ) - } // if is_concat(a_parent) - } // for parent_it : n_eq_enode->begin_parents() - - - // check next EQC member - n_eqNode = get_eqc_next(n_eqNode); - } while (n_eqNode != nn); - } - - expr * theory_str::simplify_concat(expr * node) { - ast_manager & m = get_manager(); - std::map resolvedMap; - ptr_vector argVec; - get_nodes_in_concat(node, argVec); - - for (unsigned i = 0; i < argVec.size(); ++i) { - bool vArgHasEqcValue = false; - expr * vArg = get_eqc_value(argVec[i], vArgHasEqcValue); - if (vArg != argVec[i]) { - resolvedMap[argVec[i]] = vArg; - } - } - - if (resolvedMap.empty()) { - // no simplification possible - return node; - } else { - expr * resultAst = mk_string(""); - for (unsigned i = 0; i < argVec.size(); ++i) { - bool vArgHasEqcValue = false; - expr * vArg = get_eqc_value(argVec[i], vArgHasEqcValue); - resultAst = mk_concat(resultAst, vArg); - } - TRACE(str, tout << mk_ismt2_pp(node, m) << " is simplified to " << mk_ismt2_pp(resultAst, m) << std::endl;); - - if (in_same_eqc(node, resultAst)) { - TRACE(str, tout << "SKIP: both concats are already in the same equivalence class" << std::endl;); - } else { - expr_ref_vector items(m); - for (auto itor : resolvedMap) { - items.push_back(ctx.mk_eq_atom(itor.first, itor.second)); - } - expr_ref premise(mk_and(items), m); - expr_ref conclusion(ctx.mk_eq_atom(node, resultAst), m); - assert_implication(premise, conclusion); - } - return resultAst; - } - - } - - // Modified signature of Z3str2's inferLenConcat(). - // Returns true iff nLen can be inferred by this method - // (i.e. the equivalent of a len_exists flag in get_len_value()). - - bool theory_str::infer_len_concat(expr * n, rational & nLen) { - ast_manager & m = get_manager(); - expr * arg0 = to_app(n)->get_arg(0); - expr * arg1 = to_app(n)->get_arg(1); - - rational arg0_len, arg1_len; - bool arg0_len_exists = get_len_value(arg0, arg0_len); - bool arg1_len_exists = get_len_value(arg1, arg1_len); - rational tmp_len; - bool nLen_exists = get_len_value(n, tmp_len); - - if (arg0_len_exists && arg1_len_exists && !nLen_exists) { - expr_ref_vector l_items(m); - // if (mk_strlen(arg0) != mk_int(arg0_len)) { - { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(arg0), mk_int(arg0_len))); - } - - // if (mk_strlen(arg1) != mk_int(arg1_len)) { - { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(arg1), mk_int(arg1_len))); - } - - expr_ref axl(m.mk_and(l_items.size(), l_items.data()), m); - rational nnLen = arg0_len + arg1_len; - expr_ref axr(ctx.mk_eq_atom(mk_strlen(n), mk_int(nnLen)), m); - TRACE(str, tout << "inferred (Length " << mk_pp(n, m) << ") = " << nnLen << std::endl;); - assert_implication(axl, axr); - nLen = nnLen; - return true; - } else { - return false; - } - } - - void theory_str::infer_len_concat_arg(expr * n, rational len) { - if (len.is_neg()) { - return; - } - - ast_manager & m = get_manager(); - - expr * arg0 = to_app(n)->get_arg(0); - expr * arg1 = to_app(n)->get_arg(1); - rational arg0_len, arg1_len; - bool arg0_len_exists = get_len_value(arg0, arg0_len); - bool arg1_len_exists = get_len_value(arg1, arg1_len); - - expr_ref_vector l_items(m); - expr_ref axr(m); - axr.reset(); - - // if (mk_length(t, n) != mk_int(ctx, len)) { - { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(n), mk_int(len))); - } - - if (!arg0_len_exists && arg1_len_exists) { - //if (mk_length(t, arg1) != mk_int(ctx, arg1_len)) { - { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(arg1), mk_int(arg1_len))); - } - rational arg0Len = len - arg1_len; - if (arg0Len.is_nonneg()) { - axr = ctx.mk_eq_atom(mk_strlen(arg0), mk_int(arg0Len)); - } else { - // could negate - } - } else if (arg0_len_exists && !arg1_len_exists) { - //if (mk_length(t, arg0) != mk_int(ctx, arg0_len)) { - { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(arg0), mk_int(arg0_len))); - } - rational arg1Len = len - arg0_len; - if (arg1Len.is_nonneg()) { - axr = ctx.mk_eq_atom(mk_strlen(arg1), mk_int(arg1Len)); - } else { - // could negate - } - } else { - - } - - if (axr) { - expr_ref axl(m.mk_and(l_items.size(), l_items.data()), m); - assert_implication(axl, axr); - } - } - - void theory_str::infer_len_concat_equality(expr * nn1, expr * nn2) { - rational nnLen; - bool nnLen_exists = get_len_value(nn1, nnLen); - if (!nnLen_exists) { - nnLen_exists = get_len_value(nn2, nnLen); - } - - // case 1: - // Known: a1_arg0 and a1_arg1 - // Unknown: nn1 - - if (u.str.is_concat(to_app(nn1))) { - rational nn1ConcatLen; - bool nn1ConcatLen_exists = infer_len_concat(nn1, nn1ConcatLen); - if (nnLen_exists && nn1ConcatLen_exists) { - nnLen = nn1ConcatLen; - } - } - - // case 2: - // Known: a1_arg0 and a1_arg1 - // Unknown: nn1 - - if (u.str.is_concat(to_app(nn2))) { - rational nn2ConcatLen; - bool nn2ConcatLen_exists = infer_len_concat(nn2, nn2ConcatLen); - if (nnLen_exists && nn2ConcatLen_exists) { - nnLen = nn2ConcatLen; - } - } - - if (nnLen_exists) { - if (u.str.is_concat(to_app(nn1))) { - infer_len_concat_arg(nn1, nnLen); - } - if (u.str.is_concat(to_app(nn2))) { - infer_len_concat_arg(nn2, nnLen); - } - } - - /* - if (isConcatFunc(t, nn2)) { - int nn2ConcatLen = inferLenConcat(t, nn2); - if (nnLen == -1 && nn2ConcatLen != -1) - nnLen = nn2ConcatLen; - } - - if (nnLen != -1) { - if (isConcatFunc(t, nn1)) { - inferLenConcatArg(t, nn1, nnLen); - } - if (isConcatFunc(t, nn2)) { - inferLenConcatArg(t, nn2, nnLen); - } - } - */ - } - - void theory_str::add_theory_aware_branching_info(expr * term, double priority, lbool phase) { - ctx.internalize(term, false); - bool_var v = ctx.get_bool_var(term); - ctx.add_theory_aware_branching_info(v, priority, phase); - } - - void theory_str::generate_mutual_exclusion(expr_ref_vector & terms) { - // pull each literal out of the arrangement disjunction - literal_vector ls; - for (expr * e : terms) { - literal l = ctx.get_literal(e); - ls.push_back(l); - } - ctx.mk_th_case_split(ls.size(), ls.data()); - } - - void theory_str::print_cut_var(expr * node, std::ofstream & xout) { - ast_manager & m = get_manager(); - xout << "Cut info of " << mk_pp(node, m) << std::endl; - if (cut_var_map.contains(node)) { - if (!cut_var_map[node].empty()) { - xout << "[" << cut_var_map[node].top()->level << "] "; - for (auto const& kv : cut_var_map[node].top()->vars) { - xout << mk_pp(kv.m_key, m) << ", "; - } - xout << std::endl; - } - } - } - - /* - * Handle two equivalent Concats. - */ - void theory_str::simplify_concat_equality(expr * nn1, expr * nn2) { - ast_manager & m = get_manager(); - - app * a_nn1 = to_app(nn1); - SASSERT(a_nn1->get_num_args() == 2); - app * a_nn2 = to_app(nn2); - SASSERT(a_nn2->get_num_args() == 2); - - expr * a1_arg0 = a_nn1->get_arg(0); - expr * a1_arg1 = a_nn1->get_arg(1); - expr * a2_arg0 = a_nn2->get_arg(0); - expr * a2_arg1 = a_nn2->get_arg(1); - - rational a1_arg0_len, a1_arg1_len, a2_arg0_len, a2_arg1_len; - - bool a1_arg0_len_exists = get_len_value(a1_arg0, a1_arg0_len); - bool a1_arg1_len_exists = get_len_value(a1_arg1, a1_arg1_len); - bool a2_arg0_len_exists = get_len_value(a2_arg0, a2_arg0_len); - bool a2_arg1_len_exists = get_len_value(a2_arg1, a2_arg1_len); - - TRACE(str, tout << "nn1 = " << mk_ismt2_pp(nn1, m) << std::endl - << "nn2 = " << mk_ismt2_pp(nn2, m) << std::endl;); - - TRACE(str, tout - << "len(" << mk_pp(a1_arg0, m) << ") = " << (a1_arg0_len_exists ? a1_arg0_len.to_string() : "?") << std::endl - << "len(" << mk_pp(a1_arg1, m) << ") = " << (a1_arg1_len_exists ? a1_arg1_len.to_string() : "?") << std::endl - << "len(" << mk_pp(a2_arg0, m) << ") = " << (a2_arg0_len_exists ? a2_arg0_len.to_string() : "?") << std::endl - << "len(" << mk_pp(a2_arg1, m) << ") = " << (a2_arg1_len_exists ? a2_arg1_len.to_string() : "?") << std::endl - << std::endl;); - - infer_len_concat_equality(nn1, nn2); - - if (a1_arg0 == a2_arg0) { - if (!in_same_eqc(a1_arg1, a2_arg1)) { - expr_ref premise(ctx.mk_eq_atom(nn1, nn2), m); - expr_ref eq1(ctx.mk_eq_atom(a1_arg1, a2_arg1), m); - expr_ref eq2(ctx.mk_eq_atom(mk_strlen(a1_arg1), mk_strlen(a2_arg1)), m); - expr_ref conclusion(m.mk_and(eq1, eq2), m); - assert_implication(premise, conclusion); - } - TRACE(str, tout << "SKIP: a1_arg0 == a2_arg0" << std::endl;); - return; - } - - if (a1_arg1 == a2_arg1) { - if (!in_same_eqc(a1_arg0, a2_arg0)) { - expr_ref premise(ctx.mk_eq_atom(nn1, nn2), m); - expr_ref eq1(ctx.mk_eq_atom(a1_arg0, a2_arg0), m); - expr_ref eq2(ctx.mk_eq_atom(mk_strlen(a1_arg0), mk_strlen(a2_arg0)), m); - expr_ref conclusion(m.mk_and(eq1, eq2), m); - assert_implication(premise, conclusion); - } - TRACE(str, tout << "SKIP: a1_arg1 == a2_arg1" << std::endl;); - return; - } - - // quick path - - if (in_same_eqc(a1_arg0, a2_arg0)) { - if (in_same_eqc(a1_arg1, a2_arg1)) { - TRACE(str, tout << "SKIP: a1_arg0 =~ a2_arg0 and a1_arg1 =~ a2_arg1" << std::endl;); - return; - } else { - TRACE(str, tout << "quick path 1-1: a1_arg0 =~ a2_arg0" << std::endl;); - expr_ref premise(m.mk_and(ctx.mk_eq_atom(nn1, nn2), ctx.mk_eq_atom(a1_arg0, a2_arg0)), m); - expr_ref conclusion(m.mk_and(ctx.mk_eq_atom(a1_arg1, a2_arg1), ctx.mk_eq_atom(mk_strlen(a1_arg1), mk_strlen(a2_arg1))), m); - assert_implication(premise, conclusion); - return; - } - } else { - if (in_same_eqc(a1_arg1, a2_arg1)) { - TRACE(str, tout << "quick path 1-2: a1_arg1 =~ a2_arg1" << std::endl;); - expr_ref premise(m.mk_and(ctx.mk_eq_atom(nn1, nn2), ctx.mk_eq_atom(a1_arg1, a2_arg1)), m); - expr_ref conclusion(m.mk_and(ctx.mk_eq_atom(a1_arg0, a2_arg0), ctx.mk_eq_atom(mk_strlen(a1_arg0), mk_strlen(a2_arg0))), m); - assert_implication(premise, conclusion); - return; - } - } - - // quick path 2-1 - if (a1_arg0_len_exists && a2_arg0_len_exists && a1_arg0_len == a2_arg0_len) { - if (!in_same_eqc(a1_arg0, a2_arg0)) { - TRACE(str, tout << "quick path 2-1: len(nn1.arg0) == len(nn2.arg0)" << std::endl;); - expr_ref ax_l1(ctx.mk_eq_atom(nn1, nn2), m); - expr_ref ax_l2(ctx.mk_eq_atom(mk_strlen(a1_arg0), mk_strlen(a2_arg0)), m); - expr_ref ax_r1(ctx.mk_eq_atom(a1_arg0, a2_arg0), m); - expr_ref ax_r2(ctx.mk_eq_atom(a1_arg1, a2_arg1), m); - - expr_ref premise(m.mk_and(ax_l1, ax_l2), m); - expr_ref conclusion(m.mk_and(ax_r1, ax_r2), m); - - assert_implication(premise, conclusion); - - if (opt_NoQuickReturn_IntegerTheory) { - TRACE(str, tout << "bypassing quick return from the end of this case" << std::endl;); - } else { - return; - } - } - } - - if (a1_arg1_len_exists && a2_arg1_len_exists && a1_arg1_len == a2_arg1_len) { - if (!in_same_eqc(a1_arg1, a2_arg1)) { - TRACE(str, tout << "quick path 2-2: len(nn1.arg1) == len(nn2.arg1)" << std::endl;); - expr_ref ax_l1(ctx.mk_eq_atom(nn1, nn2), m); - expr_ref ax_l2(ctx.mk_eq_atom(mk_strlen(a1_arg1), mk_strlen(a2_arg1)), m); - expr_ref ax_r1(ctx.mk_eq_atom(a1_arg0, a2_arg0), m); - expr_ref ax_r2(ctx.mk_eq_atom(a1_arg1, a2_arg1), m); - - expr_ref premise(m.mk_and(ax_l1, ax_l2), m); - expr_ref conclusion(m.mk_and(ax_r1, ax_r2), m); - - assert_implication(premise, conclusion); - if (opt_NoQuickReturn_IntegerTheory) { - TRACE(str, tout << "bypassing quick return from the end of this case" << std::endl;); - } else { - return; - } - } - } - - expr_ref new_nn1(simplify_concat(nn1), m); - expr_ref new_nn2(simplify_concat(nn2), m); - app * a_new_nn1 = to_app(new_nn1); - app * a_new_nn2 = to_app(new_nn2); - - TRACE(str, tout << "new_nn1 = " << mk_ismt2_pp(new_nn1, m) << std::endl - << "new_nn2 = " << mk_ismt2_pp(new_nn2, m) << std::endl;); - - if (new_nn1 == new_nn2) { - TRACE(str, tout << "equal concats, return" << std::endl;); - return; - } - - if (!can_two_nodes_eq(new_nn1, new_nn2)) { - expr_ref detected(mk_not(m, ctx.mk_eq_atom(new_nn1, new_nn2)), m); - TRACE(str, tout << "inconsistency detected: " << mk_ismt2_pp(detected, m) << std::endl;); - assert_axiom(detected); - return; - } - - // check whether new_nn1 and new_nn2 are still concats - - bool n1IsConcat = u.str.is_concat(a_new_nn1); - bool n2IsConcat = u.str.is_concat(a_new_nn2); - if (!n1IsConcat && n2IsConcat) { - TRACE(str, tout << "nn1_new is not a concat" << std::endl;); - if (u.str.is_string(a_new_nn1)) { - simplify_parent(new_nn2, new_nn1); - } - return; - } else if (n1IsConcat && !n2IsConcat) { - TRACE(str, tout << "nn2_new is not a concat" << std::endl;); - if (u.str.is_string(a_new_nn2)) { - simplify_parent(new_nn1, new_nn2); - } - return; - } else if (!n1IsConcat && !n2IsConcat) { - // normally this should never happen, because group_terms_by_eqc() should have pre-simplified - // as much as possible. however, we make a defensive check here just in case - TRACE(str, tout << "WARNING: nn1_new and nn2_new both simplify to non-concat terms" << std::endl;); - return; - } - - expr * v1_arg0 = a_new_nn1->get_arg(0); - expr * v1_arg1 = a_new_nn1->get_arg(1); - expr * v2_arg0 = a_new_nn2->get_arg(0); - expr * v2_arg1 = a_new_nn2->get_arg(1); - - if (!in_same_eqc(new_nn1, new_nn2) && (nn1 != new_nn1 || nn2 != new_nn2)) { - int ii4 = 0; - expr* item[3]; - if (nn1 != new_nn1) { - item[ii4++] = ctx.mk_eq_atom(nn1, new_nn1); - } - if (nn2 != new_nn2) { - item[ii4++] = ctx.mk_eq_atom(nn2, new_nn2); - } - item[ii4++] = ctx.mk_eq_atom(nn1, nn2); - expr_ref premise(m.mk_and(ii4, item), m); - expr_ref conclusion(ctx.mk_eq_atom(new_nn1, new_nn2), m); - assert_implication(premise, conclusion); - } - - // start to split both concats - check_and_init_cut_var(v1_arg0); - check_and_init_cut_var(v1_arg1); - check_and_init_cut_var(v2_arg0); - check_and_init_cut_var(v2_arg1); - - //************************************************************* - // case 1: concat(x, y) = concat(m, n) - //************************************************************* - if (is_concat_eq_type1(new_nn1, new_nn2)) { - process_concat_eq_type1(new_nn1, new_nn2); - return; - } - - //************************************************************* - // case 2: concat(x, y) = concat(m, "str") - //************************************************************* - if (is_concat_eq_type2(new_nn1, new_nn2)) { - process_concat_eq_type2(new_nn1, new_nn2); - return; - } - - //************************************************************* - // case 3: concat(x, y) = concat("str", n) - //************************************************************* - if (is_concat_eq_type3(new_nn1, new_nn2)) { - process_concat_eq_type3(new_nn1, new_nn2); - return; - } - - //************************************************************* - // case 4: concat("str1", y) = concat("str2", n) - //************************************************************* - if (is_concat_eq_type4(new_nn1, new_nn2)) { - process_concat_eq_type4(new_nn1, new_nn2); - return; - } - - //************************************************************* - // case 5: concat(x, "str1") = concat(m, "str2") - //************************************************************* - if (is_concat_eq_type5(new_nn1, new_nn2)) { - process_concat_eq_type5(new_nn1, new_nn2); - return; - } - //************************************************************* - // case 6: concat("str1", y) = concat(m, "str2") - //************************************************************* - if (is_concat_eq_type6(new_nn1, new_nn2)) { - process_concat_eq_type6(new_nn1, new_nn2); - return; - } - - } - - /* - * Returns true if attempting to process a concat equality between lhs and rhs - * will result in overlapping variables (false otherwise). - */ - bool theory_str::will_result_in_overlap(expr * lhs, expr * rhs) { - ast_manager & m = get_manager(); - - expr_ref new_nn1(simplify_concat(lhs), m); - expr_ref new_nn2(simplify_concat(rhs), m); - app * a_new_nn1 = to_app(new_nn1); - app * a_new_nn2 = to_app(new_nn2); - - bool n1IsConcat = u.str.is_concat(a_new_nn1); - bool n2IsConcat = u.str.is_concat(a_new_nn2); - if (!n1IsConcat && !n2IsConcat) { - // we simplified both sides to non-concat expressions... - return false; - } - - expr * v1_arg0 = a_new_nn1->get_arg(0); - expr * v1_arg1 = a_new_nn1->get_arg(1); - expr * v2_arg0 = a_new_nn2->get_arg(0); - expr * v2_arg1 = a_new_nn2->get_arg(1); - - TRACE(str, tout << "checking whether " << mk_pp(new_nn1, m) << " and " << mk_pp(new_nn1, m) << " might overlap." << std::endl;); - - check_and_init_cut_var(v1_arg0); - check_and_init_cut_var(v1_arg1); - check_and_init_cut_var(v2_arg0); - check_and_init_cut_var(v2_arg1); - - //************************************************************* - // case 1: concat(x, y) = concat(m, n) - //************************************************************* - if (is_concat_eq_type1(new_nn1, new_nn2)) { - TRACE(str, tout << "Type 1 check." << std::endl;); - expr * x = to_app(new_nn1)->get_arg(0); - expr * y = to_app(new_nn1)->get_arg(1); - expr * m = to_app(new_nn2)->get_arg(0); - expr * n = to_app(new_nn2)->get_arg(1); - - if (has_self_cut(m, y)) { - TRACE(str, tout << "Possible overlap found" << std::endl; print_cut_var(m, tout); print_cut_var(y, tout);); - return true; - } else if (has_self_cut(x, n)) { - TRACE(str, tout << "Possible overlap found" << std::endl; print_cut_var(x, tout); print_cut_var(n, tout);); - return true; - } else { - return false; - } - } - - //************************************************************* - // case 2: concat(x, y) = concat(m, "str") - //************************************************************* - if (is_concat_eq_type2(new_nn1, new_nn2)) { - - expr * y = nullptr; - expr * m = nullptr; - expr * v1_arg0 = to_app(new_nn1)->get_arg(0); - expr * v1_arg1 = to_app(new_nn1)->get_arg(1); - expr * v2_arg0 = to_app(new_nn2)->get_arg(0); - expr * v2_arg1 = to_app(new_nn2)->get_arg(1); - - if (u.str.is_string(v1_arg1) && !u.str.is_string(v2_arg1)) { - m = v1_arg0; - y = v2_arg1; - } else { - m = v2_arg0; - y = v1_arg1; - } - - if (has_self_cut(m, y)) { - TRACE(str, tout << "Possible overlap found" << std::endl; print_cut_var(m, tout); print_cut_var(y, tout);); - return true; - } else { - return false; - } - } - - //************************************************************* - // case 3: concat(x, y) = concat("str", n) - //************************************************************* - if (is_concat_eq_type3(new_nn1, new_nn2)) { - expr * v1_arg0 = to_app(new_nn1)->get_arg(0); - expr * v1_arg1 = to_app(new_nn1)->get_arg(1); - expr * v2_arg0 = to_app(new_nn2)->get_arg(0); - expr * v2_arg1 = to_app(new_nn2)->get_arg(1); - - expr * x = nullptr; - expr * n = nullptr; - - if (u.str.is_string(v1_arg0) && !u.str.is_string(v2_arg0)) { - n = v1_arg1; - x = v2_arg0; - } else { - n = v2_arg1; - x = v1_arg0; - } - if (has_self_cut(x, n)) { - TRACE(str, tout << "Possible overlap found" << std::endl; print_cut_var(x, tout); print_cut_var(n, tout);); - return true; - } else { - return false; - } - } - - //************************************************************* - // case 4: concat("str1", y) = concat("str2", n) - //************************************************************* - if (is_concat_eq_type4(new_nn1, new_nn2)) { - // This case can never result in an overlap. - return false; - } - - //************************************************************* - // case 5: concat(x, "str1") = concat(m, "str2") - //************************************************************* - if (is_concat_eq_type5(new_nn1, new_nn2)) { - // This case can never result in an overlap. - return false; - } - //************************************************************* - // case 6: concat("str1", y) = concat(m, "str2") - //************************************************************* - if (is_concat_eq_type6(new_nn1, new_nn2)) { - expr * v1_arg0 = to_app(new_nn1)->get_arg(0); - expr * v1_arg1 = to_app(new_nn1)->get_arg(1); - expr * v2_arg0 = to_app(new_nn2)->get_arg(0); - expr * v2_arg1 = to_app(new_nn2)->get_arg(1); - - expr * y = nullptr; - expr * m = nullptr; - - if (u.str.is_string(v1_arg0)) { - y = v1_arg1; - m = v2_arg0; - } else { - y = v2_arg1; - m = v1_arg0; - } - if (has_self_cut(m, y)) { - TRACE(str, tout << "Possible overlap found" << std::endl; print_cut_var(m, tout); print_cut_var(y, tout);); - return true; - } else { - return false; - } - } - - TRACE(str, tout << "warning: unrecognized concat case" << std::endl;); - return false; - } - - /************************************************************* - * Type 1: concat(x, y) = concat(m, n) - * x, y, m and n all variables - *************************************************************/ - bool theory_str::is_concat_eq_type1(expr * concatAst1, expr * concatAst2) { - expr * x = to_app(concatAst1)->get_arg(0); - expr * y = to_app(concatAst1)->get_arg(1); - expr * m = to_app(concatAst2)->get_arg(0); - expr * n = to_app(concatAst2)->get_arg(1); - - if (!u.str.is_string(x) && !u.str.is_string(y) && !u.str.is_string(m) && !u.str.is_string(n)) { - return true; - } else { - return false; - } - } - - void theory_str::process_concat_eq_type1(expr * concatAst1, expr * concatAst2) { - ast_manager & mgr = get_manager(); - - bool overlapAssumptionUsed = false; - - TRACE(str, tout << "process_concat_eq TYPE 1" << std::endl - << "concatAst1 = " << mk_ismt2_pp(concatAst1, mgr) << std::endl - << "concatAst2 = " << mk_ismt2_pp(concatAst2, mgr) << std::endl; - ); - - if (!u.str.is_concat(to_app(concatAst1))) { - TRACE(str, tout << "concatAst1 is not a concat function" << std::endl;); - return; - } - if (!u.str.is_concat(to_app(concatAst2))) { - TRACE(str, tout << "concatAst2 is not a concat function" << std::endl;); - return; - } - expr * x = to_app(concatAst1)->get_arg(0); - expr * y = to_app(concatAst1)->get_arg(1); - expr * m = to_app(concatAst2)->get_arg(0); - expr * n = to_app(concatAst2)->get_arg(1); - - rational x_len, y_len, m_len, n_len; - bool x_len_exists = get_len_value(x, x_len); - bool y_len_exists = get_len_value(y, y_len); - bool m_len_exists = get_len_value(m, m_len); - bool n_len_exists = get_len_value(n, n_len); - - int splitType = -1; - if (x_len_exists && m_len_exists) { - TRACE(str, tout << "length values found: x/m" << std::endl;); - if (x_len < m_len) { - splitType = 0; - } else if (x_len == m_len) { - splitType = 1; - } else { - splitType = 2; - } - } - - if (splitType == -1 && y_len_exists && n_len_exists) { - TRACE(str, tout << "length values found: y/n" << std::endl;); - if (y_len > n_len) { - splitType = 0; - } else if (y_len == n_len) { - splitType = 1; - } else { - splitType = 2; - } - } - - TRACE(str, tout - << "len(x) = " << (x_len_exists ? x_len.to_string() : "?") << std::endl - << "len(y) = " << (y_len_exists ? y_len.to_string() : "?") << std::endl - << "len(m) = " << (m_len_exists ? m_len.to_string() : "?") << std::endl - << "len(n) = " << (n_len_exists ? n_len.to_string() : "?") << std::endl - << "split type " << splitType << std::endl; - ); - - expr_ref t1(mgr), t2(mgr); - expr * xorFlag = nullptr; - - std::pair key1(concatAst1, concatAst2); - std::pair key2(concatAst2, concatAst1); - - // check the entries in this map to make sure they're still in scope - // before we use them. - - std::map, std::map >::iterator entry1 = varForBreakConcat.find(key1); - std::map, std::map >::iterator entry2 = varForBreakConcat.find(key2); - - bool entry1InScope; - if (entry1 == varForBreakConcat.end()) { - entry1InScope = false; - } else { - if (internal_variable_set.find((entry1->second)[0]) == internal_variable_set.end() - || internal_variable_set.find((entry1->second)[1]) == internal_variable_set.end() - /*|| internal_variable_set.find((entry1->second)[2]) == internal_variable_set.end() */) { - entry1InScope = false; - } else { - entry1InScope = true; - } - } - - bool entry2InScope; - if (entry2 == varForBreakConcat.end()) { - entry2InScope = false; - } else { - if (internal_variable_set.find((entry2->second)[0]) == internal_variable_set.end() - || internal_variable_set.find((entry2->second)[1]) == internal_variable_set.end() - /* || internal_variable_set.find((entry2->second)[2]) == internal_variable_set.end() */) { - entry2InScope = false; - } else { - entry2InScope = true; - } - } - - TRACE(str, tout << "entry 1 " << (entry1InScope ? "in scope" : "not in scope") << std::endl - << "entry 2 " << (entry2InScope ? "in scope" : "not in scope") << std::endl;); - - if (!entry1InScope && !entry2InScope) { - t1 = mk_nonempty_str_var(); - t2 = mk_nonempty_str_var(); - xorFlag = mk_internal_xor_var(); - check_and_init_cut_var(t1); - check_and_init_cut_var(t2); - varForBreakConcat[key1][0] = t1; - varForBreakConcat[key1][1] = t2; - varForBreakConcat[key1][2] = xorFlag; - } else { - // match found - if (entry1InScope) { - t1 = varForBreakConcat[key1][0]; - t2 = varForBreakConcat[key1][1]; - xorFlag = varForBreakConcat[key1][2]; - } else { - t1 = varForBreakConcat[key2][0]; - t2 = varForBreakConcat[key2][1]; - xorFlag = varForBreakConcat[key2][2]; - } - refresh_theory_var(t1); - add_nonempty_constraint(t1); - refresh_theory_var(t2); - add_nonempty_constraint(t2); - } - - // For split types 0 through 2, we can get away with providing - // fewer split options since more length information is available. - if (splitType == 0) { - //-------------------------------------- - // Type 0: M cuts Y. - // len(x) < len(m) || len(y) > len(n) - //-------------------------------------- - expr_ref_vector ax_l_items(mgr); - expr_ref_vector ax_r_items(mgr); - - ax_l_items.push_back(ctx.mk_eq_atom(concatAst1, concatAst2)); - - expr_ref x_t1(mk_concat(x, t1), mgr); - expr_ref t1_n(mk_concat(t1, n), mgr); - - ax_r_items.push_back(ctx.mk_eq_atom(m, x_t1)); - ax_r_items.push_back(ctx.mk_eq_atom(y, t1_n)); - - if (m_len_exists && x_len_exists) { - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_int(x_len))); - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(m), mk_int(m_len))); - rational m_sub_x = m_len - x_len; - ax_r_items.push_back(ctx.mk_eq_atom(mk_strlen(t1), mk_int(m_sub_x))); - } else { - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(y_len))); - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(n), mk_int(n_len))); - rational y_sub_n = y_len - n_len; - ax_r_items.push_back(ctx.mk_eq_atom(mk_strlen(t1), mk_int(y_sub_n))); - } - - expr_ref ax_l(mk_and(ax_l_items), mgr); - expr_ref ax_r(mk_and(ax_r_items), mgr); - - if (!has_self_cut(m, y)) { - // Cut Info - add_cut_info_merge(t1, sLevel, m); - add_cut_info_merge(t1, sLevel, y); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, ax_r), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ax_l, ax_r); - } - } else { - loopDetected = true; - TRACE(str, tout << "AVOID LOOP: SKIPPED" << std::endl;); - TRACE(str, {print_cut_var(m, tout); print_cut_var(y, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - assert_implication(ax_l, new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - } - } else if (splitType == 1) { - // Type 1: - // len(x) = len(m) || len(y) = len(n) - expr_ref ax_l1(ctx.mk_eq_atom(concatAst1, concatAst2), mgr); - expr_ref ax_l2(mgr.mk_or(ctx.mk_eq_atom(mk_strlen(x), mk_strlen(m)), ctx.mk_eq_atom(mk_strlen(y), mk_strlen(n))), mgr); - expr_ref ax_l(mgr.mk_and(ax_l1, ax_l2), mgr); - expr_ref ax_r(mgr.mk_and(ctx.mk_eq_atom(x,m), ctx.mk_eq_atom(y,n)), mgr); - assert_implication(ax_l, ax_r); - } else if (splitType == 2) { - // Type 2: X cuts N. - // len(x) > len(m) || len(y) < len(n) - expr_ref m_t2(mk_concat(m, t2), mgr); - expr_ref t2_y(mk_concat(t2, y), mgr); - - expr_ref_vector ax_l_items(mgr); - ax_l_items.push_back(ctx.mk_eq_atom(concatAst1, concatAst2)); - - expr_ref_vector ax_r_items(mgr); - ax_r_items.push_back(ctx.mk_eq_atom(x, m_t2)); - ax_r_items.push_back(ctx.mk_eq_atom(t2_y, n)); - - if (m_len_exists && x_len_exists) { - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_int(x_len))); - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(m), mk_int(m_len))); - rational x_sub_m = x_len - m_len; - ax_r_items.push_back(ctx.mk_eq_atom(mk_strlen(t2), mk_int(x_sub_m))); - } else { - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(y_len))); - ax_l_items.push_back(ctx.mk_eq_atom(mk_strlen(n), mk_int(n_len))); - rational n_sub_y = n_len - y_len; - ax_r_items.push_back(ctx.mk_eq_atom(mk_strlen(t2), mk_int(n_sub_y))); - } - - expr_ref ax_l(mk_and(ax_l_items), mgr); - expr_ref ax_r(mk_and(ax_r_items), mgr); - - if (!has_self_cut(x, n)) { - // Cut Info - add_cut_info_merge(t2, sLevel, x); - add_cut_info_merge(t2, sLevel, n); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, ax_r), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ax_l, ax_r); - } - } else { - loopDetected = true; - - TRACE(str, tout << "AVOID LOOP: SKIPPED" << std::endl;); - TRACE(str, {print_cut_var(m, tout); print_cut_var(y, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - assert_implication(ax_l, new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - - } - } else if (splitType == -1) { - // Here we don't really have a choice. We have no length information at all... - - // This vector will eventually contain one term for each possible arrangement we explore. - expr_ref_vector arrangement_disjunction(mgr); - - // break option 1: m cuts y - // len(x) < len(m) || len(y) > len(n) - if (!avoidLoopCut || !has_self_cut(m, y)) { - expr_ref_vector and_item(mgr); - // break down option 1-1 - expr_ref x_t1(mk_concat(x, t1), mgr); - expr_ref t1_n(mk_concat(t1, n), mgr); - - and_item.push_back(ctx.mk_eq_atom(m, x_t1)); - and_item.push_back(ctx.mk_eq_atom(y, t1_n)); - - expr_ref x_plus_t1(m_autil.mk_add(mk_strlen(x), mk_strlen(t1)), mgr); - and_item.push_back(ctx.mk_eq_atom(mk_strlen(m), x_plus_t1)); - // These were crashing the solver because the integer theory - // expects a constant on the right-hand side. - // The things we want to assert here are len(m) > len(x) and len(y) > len(n). - // We rewrite A > B as A-B > 0 and then as not(A-B <= 0), - // and then, *because we aren't allowed to use subtraction*, - // as not(A + -1*B <= 0) - and_item.push_back( - mgr.mk_not(m_autil.mk_le( - m_autil.mk_add(mk_strlen(m), m_autil.mk_mul(mk_int(-1), mk_strlen(x))), - mk_int(0))) ); - and_item.push_back( - mgr.mk_not(m_autil.mk_le( - m_autil.mk_add(mk_strlen(y),m_autil.mk_mul(mk_int(-1), mk_strlen(n))), - mk_int(0))) ); - - expr_ref option1(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option1); - add_theory_aware_branching_info(option1, 0.1, l_true); - - add_cut_info_merge(t1, ctx.get_scope_level(), m); - add_cut_info_merge(t1, ctx.get_scope_level(), y); - } else { - loopDetected = true; - - TRACE(str, tout << "AVOID LOOP: SKIPPED" << std::endl;); - TRACE(str, {print_cut_var(m, tout); print_cut_var(y, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - arrangement_disjunction.push_back(new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - - } - - // break option 2: - // x = m . t2 - // n = t2 . y - if (!avoidLoopCut || !has_self_cut(x, n)) { - expr_ref_vector and_item(mgr); - // break down option 1-2 - expr_ref m_t2(mk_concat(m, t2), mgr); - expr_ref t2_y(mk_concat(t2, y), mgr); - - and_item.push_back(ctx.mk_eq_atom(x, m_t2)); - and_item.push_back(ctx.mk_eq_atom(n, t2_y)); - - - expr_ref m_plus_t2(m_autil.mk_add(mk_strlen(m), mk_strlen(t2)), mgr); - - and_item.push_back(ctx.mk_eq_atom(mk_strlen(x), m_plus_t2)); - // want len(x) > len(m) and len(n) > len(y) - and_item.push_back( - mgr.mk_not(m_autil.mk_le( - m_autil.mk_add(mk_strlen(x), m_autil.mk_mul(mk_int(-1), mk_strlen(m))), - mk_int(0))) ); - and_item.push_back( - mgr.mk_not(m_autil.mk_le( - m_autil.mk_add(mk_strlen(n), m_autil.mk_mul(mk_int(-1), mk_strlen(y))), - mk_int(0))) ); - - expr_ref option2(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option2); - add_theory_aware_branching_info(option2, 0.1, l_true); - - add_cut_info_merge(t2, ctx.get_scope_level(), x); - add_cut_info_merge(t2, ctx.get_scope_level(), n); - } else { - loopDetected = true; - - TRACE(str, tout << "AVOID LOOP: SKIPPED" << std::endl;); - TRACE(str, {print_cut_var(x, tout); print_cut_var(n, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - arrangement_disjunction.push_back(new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - - } - - // option 3: - // x = m, y = n - if (can_two_nodes_eq(x, m) && can_two_nodes_eq(y, n)) { - expr_ref_vector and_item(mgr); - - and_item.push_back(ctx.mk_eq_atom(x, m)); - and_item.push_back(ctx.mk_eq_atom(y, n)); - and_item.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_strlen(m))); - and_item.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_strlen(n))); - - expr_ref option3(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option3); - // prioritize this case, it is easier - add_theory_aware_branching_info(option3, 0.5, l_true); - } - - if (!arrangement_disjunction.empty()) { - expr_ref premise(ctx.mk_eq_atom(concatAst1, concatAst2), mgr); - expr_ref conclusion(mk_or(arrangement_disjunction), mgr); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(premise, conclusion), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(premise, conclusion); - } - // assert mutual exclusion between each branch of the arrangement - generate_mutual_exclusion(arrangement_disjunction); - } else { - TRACE(str, tout << "STOP: no split option found for two EQ concats." << std::endl;); - } - } // (splitType == -1) - } - - /************************************************************* - * Type 2: concat(x, y) = concat(m, "str") - *************************************************************/ - bool theory_str::is_concat_eq_type2(expr * concatAst1, expr * concatAst2) { - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - if ((!u.str.is_string(v1_arg0)) && u.str.is_string(v1_arg1) - && (!u.str.is_string(v2_arg0)) && (!u.str.is_string(v2_arg1))) { - return true; - } else if ((!u.str.is_string(v2_arg0)) && u.str.is_string(v2_arg1) - && (!u.str.is_string(v1_arg0)) && (!u.str.is_string(v1_arg1))) { - return true; - } else { - return false; - } - } - - void theory_str::process_concat_eq_type2(expr * concatAst1, expr * concatAst2) { - ast_manager & mgr = get_manager(); - - bool overlapAssumptionUsed = false; - - TRACE(str, tout << "process_concat_eq TYPE 2" << std::endl - << "concatAst1 = " << mk_ismt2_pp(concatAst1, mgr) << std::endl - << "concatAst2 = " << mk_ismt2_pp(concatAst2, mgr) << std::endl; - ); - - if (!u.str.is_concat(to_app(concatAst1))) { - TRACE(str, tout << "concatAst1 is not a concat function" << std::endl;); - return; - } - if (!u.str.is_concat(to_app(concatAst2))) { - TRACE(str, tout << "concatAst2 is not a concat function" << std::endl;); - return; - } - - expr * x = nullptr; - expr * y = nullptr; - expr * strAst = nullptr; - expr * m = nullptr; - - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - if (u.str.is_string(v1_arg1) && !u.str.is_string(v2_arg1)) { - m = v1_arg0; - strAst = v1_arg1; - x = v2_arg0; - y = v2_arg1; - } else { - m = v2_arg0; - strAst = v2_arg1; - x = v1_arg0; - y = v1_arg1; - } - - zstring strValue; - u.str.is_string(strAst, strValue); - - rational x_len, y_len, m_len, str_len; - bool x_len_exists = get_len_value(x, x_len); - bool y_len_exists = get_len_value(y, y_len); - bool m_len_exists = get_len_value(m, m_len); - bool str_len_exists = true; - str_len = rational(strValue.length()); - - // setup - - expr * xorFlag = nullptr; - expr_ref temp1(mgr); - std::pair key1(concatAst1, concatAst2); - std::pair key2(concatAst2, concatAst1); - - // check the entries in this map to make sure they're still in scope - // before we use them. - - std::map, std::map >::iterator entry1 = varForBreakConcat.find(key1); - std::map, std::map >::iterator entry2 = varForBreakConcat.find(key2); - - // prevent checking scope for the XOR term, as it's always in the same scope as the split var - - bool entry1InScope; - if (entry1 == varForBreakConcat.end()) { - entry1InScope = false; - } else { - if (internal_variable_set.find((entry1->second)[0]) == internal_variable_set.end() - /*|| internal_variable_set.find((entry1->second)[1]) == internal_variable_set.end()*/ - ) { - entry1InScope = false; - } else { - entry1InScope = true; - } - } - - bool entry2InScope; - if (entry2 == varForBreakConcat.end()) { - entry2InScope = false; - } else { - if (internal_variable_set.find((entry2->second)[0]) == internal_variable_set.end() - /*|| internal_variable_set.find((entry2->second)[1]) == internal_variable_set.end()*/ - ) { - entry2InScope = false; - } else { - entry2InScope = true; - } - } - - TRACE(str, tout << "entry 1 " << (entry1InScope ? "in scope" : "not in scope") << std::endl - << "entry 2 " << (entry2InScope ? "in scope" : "not in scope") << std::endl;); - - - if (!entry1InScope && !entry2InScope) { - temp1 = mk_nonempty_str_var(); - xorFlag = mk_internal_xor_var(); - varForBreakConcat[key1][0] = temp1; - varForBreakConcat[key1][1] = xorFlag; - } else { - if (entry1InScope) { - temp1 = varForBreakConcat[key1][0]; - xorFlag = varForBreakConcat[key1][1]; - } else if (entry2InScope) { - temp1 = varForBreakConcat[key2][0]; - xorFlag = varForBreakConcat[key2][1]; - } - refresh_theory_var(temp1); - add_nonempty_constraint(temp1); - } - - int splitType = -1; - if (x_len_exists && m_len_exists) { - if (x_len < m_len) - splitType = 0; - else if (x_len == m_len) - splitType = 1; - else - splitType = 2; - } - if (splitType == -1 && y_len_exists && str_len_exists) { - if (y_len > str_len) - splitType = 0; - else if (y_len == str_len) - splitType = 1; - else - splitType = 2; - } - - TRACE(str, tout << "Split type " << splitType << std::endl;); - - // Provide fewer split options when length information is available. - - if (splitType == 0) { - // M cuts Y - // | x | y | - // | m | str | - expr_ref temp1_strAst(mk_concat(temp1, strAst), mgr); - if (can_two_nodes_eq(y, temp1_strAst)) { - expr_ref_vector l_items(mgr); - l_items.push_back(ctx.mk_eq_atom(concatAst1, concatAst2)); - - expr_ref_vector r_items(mgr); - expr_ref x_temp1(mk_concat(x, temp1), mgr); - r_items.push_back(ctx.mk_eq_atom(m, x_temp1)); - r_items.push_back(ctx.mk_eq_atom(y, temp1_strAst)); - - if (x_len_exists && m_len_exists) { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_int(x_len))); - l_items.push_back(ctx.mk_eq_atom(mk_strlen(m), mk_int(m_len))); - rational m_sub_x = (m_len - x_len); - r_items.push_back(ctx.mk_eq_atom(mk_strlen(temp1), mk_int(m_sub_x))); - } else { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(y_len))); - l_items.push_back(ctx.mk_eq_atom(mk_strlen(strAst), mk_int(str_len))); - rational y_sub_str = (y_len - str_len); - r_items.push_back(ctx.mk_eq_atom(mk_strlen(temp1), mk_int(y_sub_str))); - } - - expr_ref ax_l(mk_and(l_items), mgr); - expr_ref ax_r(mk_and(r_items), mgr); - - if (!avoidLoopCut || !(has_self_cut(m, y))) { - // break down option 2-1 - add_cut_info_merge(temp1, sLevel, y); - add_cut_info_merge(temp1, sLevel, m); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, ax_r), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ax_l, ax_r); - } - } else { - loopDetected = true; - - TRACE(str, tout << "AVOID LOOP: SKIP" << std::endl;); - TRACE(str, {print_cut_var(m, tout); print_cut_var(y, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - assert_implication(ax_l, new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - - } - } - } else if (splitType == 1) { - // | x | y | - // | m | str | - expr_ref ax_l1(ctx.mk_eq_atom(concatAst1, concatAst2), mgr); - expr_ref ax_l2(mgr.mk_or( - ctx.mk_eq_atom(mk_strlen(x), mk_strlen(m)), - ctx.mk_eq_atom(mk_strlen(y), mk_strlen(strAst))), mgr); - expr_ref ax_l(mgr.mk_and(ax_l1, ax_l2), mgr); - expr_ref ax_r(mgr.mk_and(ctx.mk_eq_atom(x, m), ctx.mk_eq_atom(y, strAst)), mgr); - assert_implication(ax_l, ax_r); - } else if (splitType == 2) { - // m cut y, - // | x | y | - // | m | str | - rational lenDelta; - expr_ref_vector l_items(mgr); - l_items.push_back(ctx.mk_eq_atom(concatAst1, concatAst2)); - if (x_len_exists && m_len_exists) { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_int(x_len))); - l_items.push_back(ctx.mk_eq_atom(mk_strlen(m), mk_int(m_len))); - lenDelta = x_len - m_len; - } else { - l_items.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(y_len))); - lenDelta = str_len - y_len; - } - TRACE(str, - tout - << "xLen? " << (x_len_exists ? "yes" : "no") << std::endl - << "mLen? " << (m_len_exists ? "yes" : "no") << std::endl - << "yLen? " << (y_len_exists ? "yes" : "no") << std::endl - << "xLen = " << x_len.to_string() << std::endl - << "yLen = " << y_len.to_string() << std::endl - << "mLen = " << m_len.to_string() << std::endl - << "strLen = " << str_len.to_string() << std::endl - << "lenDelta = " << lenDelta.to_string() << std::endl - << "strValue = \"" << strValue << "\" (len=" << strValue.length() << ")" << "\n" - ; - ); - - zstring part1Str = strValue.extract(0, lenDelta.get_unsigned()); - zstring part2Str = strValue.extract(lenDelta.get_unsigned(), strValue.length() - lenDelta.get_unsigned()); - - expr_ref prefixStr(mk_string(part1Str), mgr); - expr_ref x_concat(mk_concat(m, prefixStr), mgr); - expr_ref cropStr(mk_string(part2Str), mgr); - - if (can_two_nodes_eq(x, x_concat) && can_two_nodes_eq(y, cropStr)) { - expr_ref_vector r_items(mgr); - r_items.push_back(ctx.mk_eq_atom(x, x_concat)); - r_items.push_back(ctx.mk_eq_atom(y, cropStr)); - expr_ref ax_l(mk_and(l_items), mgr); - expr_ref ax_r(mk_and(r_items), mgr); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, ax_r), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ax_l, ax_r); - } - } else { - // negate! It's impossible to split str with these lengths - TRACE(str, tout << "CONFLICT: Impossible to split str with these lengths." << std::endl;); - expr_ref ax_l(mk_and(l_items), mgr); - assert_axiom(mgr.mk_not(ax_l)); - } - } else { - // Split type -1: no idea about the length... - expr_ref_vector arrangement_disjunction(mgr); - - expr_ref temp1_strAst(mk_concat(temp1, strAst), mgr); - - // m cuts y - if (can_two_nodes_eq(y, temp1_strAst)) { - if (!avoidLoopCut || !has_self_cut(m, y)) { - // break down option 2-1 - expr_ref_vector and_item(mgr); - - expr_ref x_temp1(mk_concat(x, temp1), mgr); - and_item.push_back(ctx.mk_eq_atom(m, x_temp1)); - and_item.push_back(ctx.mk_eq_atom(y, temp1_strAst)); - - and_item.push_back(ctx.mk_eq_atom(mk_strlen(m), - m_autil.mk_add(mk_strlen(x), mk_strlen(temp1)))); - - expr_ref option1(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option1); - add_theory_aware_branching_info(option1, 0.1, l_true); - add_cut_info_merge(temp1, ctx.get_scope_level(), y); - add_cut_info_merge(temp1, ctx.get_scope_level(), m); - } else { - loopDetected = true; - TRACE(str, tout << "AVOID LOOP: SKIPPED" << std::endl;); - TRACE(str, {print_cut_var(m, tout); print_cut_var(y, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - arrangement_disjunction.push_back(new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - } - } - - for (unsigned int i = 0; i <= strValue.length(); ++i) { - zstring part1Str = strValue.extract(0, i); - zstring part2Str = strValue.extract(i, strValue.length() - i); - expr_ref prefixStr(mk_string(part1Str), mgr); - expr_ref x_concat(mk_concat(m, prefixStr), mgr); - expr_ref cropStr(mk_string(part2Str), mgr); - if (can_two_nodes_eq(x, x_concat) && can_two_nodes_eq(y, cropStr)) { - // break down option 2-2 - expr_ref_vector and_item(mgr); - and_item.push_back(ctx.mk_eq_atom(x, x_concat)); - and_item.push_back(ctx.mk_eq_atom(y, cropStr)); - and_item.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(part2Str.length()))); - expr_ref option2(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option2); - double priority; - // prioritize the option where y is equal to the original string - if (i == 0) { - priority = 0.5; - } else { - priority = 0.1; - } - add_theory_aware_branching_info(option2, priority, l_true); - } - } - - if (!arrangement_disjunction.empty()) { - expr_ref implyR(mk_or(arrangement_disjunction), mgr); - - if (m_params.m_StrongArrangements) { - expr_ref implyLHS(ctx.mk_eq_atom(concatAst1, concatAst2), mgr); - expr_ref ax_strong(ctx.mk_eq_atom(implyLHS, implyR), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - generate_mutual_exclusion(arrangement_disjunction); - } else { - TRACE(str, tout << "STOP: Should not split two EQ concats." << std::endl;); - } - } // (splitType == -1) - } - - /************************************************************* - * Type 3: concat(x, y) = concat("str", n) - *************************************************************/ - bool theory_str::is_concat_eq_type3(expr * concatAst1, expr * concatAst2) { - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - if (u.str.is_string(v1_arg0) && (!u.str.is_string(v1_arg1)) - && (!u.str.is_string(v2_arg0)) && (!u.str.is_string(v2_arg1))) { - return true; - } else if (u.str.is_string(v2_arg0) && (!u.str.is_string(v2_arg1)) - && (!u.str.is_string(v1_arg0)) && (!u.str.is_string(v1_arg1))) { - return true; - } else { - return false; - } - } - - void theory_str::process_concat_eq_type3(expr * concatAst1, expr * concatAst2) { - ast_manager & mgr = get_manager(); - - bool overlapAssumptionUsed = false; - - TRACE(str, tout << "process_concat_eq TYPE 3" << std::endl - << "concatAst1 = " << mk_ismt2_pp(concatAst1, mgr) << std::endl - << "concatAst2 = " << mk_ismt2_pp(concatAst2, mgr) << std::endl; - ); - - if (!u.str.is_concat(to_app(concatAst1))) { - TRACE(str, tout << "concatAst1 is not a concat function" << std::endl;); - return; - } - if (!u.str.is_concat(to_app(concatAst2))) { - TRACE(str, tout << "concatAst2 is not a concat function" << std::endl;); - return; - } - - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - expr * x = nullptr; - expr * y = nullptr; - expr * strAst = nullptr; - expr * n = nullptr; - - if (u.str.is_string(v1_arg0) && !u.str.is_string(v2_arg0)) { - strAst = v1_arg0; - n = v1_arg1; - x = v2_arg0; - y = v2_arg1; - } else { - strAst = v2_arg0; - n = v2_arg1; - x = v1_arg0; - y = v1_arg1; - } - - zstring strValue; - u.str.is_string(strAst, strValue); - - rational x_len, y_len, str_len, n_len; - bool x_len_exists = get_len_value(x, x_len); - bool y_len_exists = get_len_value(y, y_len); - str_len = rational((unsigned)(strValue.length())); - bool n_len_exists = get_len_value(n, n_len); - - expr_ref xorFlag(mgr); - expr_ref temp1(mgr); - std::pair key1(concatAst1, concatAst2); - std::pair key2(concatAst2, concatAst1); - - // check the entries in this map to make sure they're still in scope - // before we use them. - - std::map, std::map >::iterator entry1 = varForBreakConcat.find(key1); - std::map, std::map >::iterator entry2 = varForBreakConcat.find(key2); - - bool entry1InScope; - if (entry1 == varForBreakConcat.end()) { - entry1InScope = false; - } else { - if (internal_variable_set.find((entry1->second)[0]) == internal_variable_set.end() - /* || internal_variable_set.find((entry1->second)[1]) == internal_variable_set.end() */) { - entry1InScope = false; - } else { - entry1InScope = true; - } - } - - bool entry2InScope; - if (entry2 == varForBreakConcat.end()) { - entry2InScope = false; - } else { - if (internal_variable_set.find((entry2->second)[0]) == internal_variable_set.end() - /* || internal_variable_set.find((entry2->second)[1]) == internal_variable_set.end() */) { - entry2InScope = false; - } else { - entry2InScope = true; - } - } - - TRACE(str, tout << "entry 1 " << (entry1InScope ? "in scope" : "not in scope") << std::endl - << "entry 2 " << (entry2InScope ? "in scope" : "not in scope") << std::endl;); - - - if (!entry1InScope && !entry2InScope) { - temp1 = mk_nonempty_str_var(); - xorFlag = mk_internal_xor_var(); - - varForBreakConcat[key1][0] = temp1; - varForBreakConcat[key1][1] = xorFlag; - } else { - if (entry1InScope) { - temp1 = varForBreakConcat[key1][0]; - xorFlag = varForBreakConcat[key1][1]; - } else if (varForBreakConcat.find(key2) != varForBreakConcat.end()) { - temp1 = varForBreakConcat[key2][0]; - xorFlag = varForBreakConcat[key2][1]; - } - refresh_theory_var(temp1); - add_nonempty_constraint(temp1); - } - - - - int splitType = -1; - if (x_len_exists) { - if (x_len < str_len) - splitType = 0; - else if (x_len == str_len) - splitType = 1; - else - splitType = 2; - } - if (splitType == -1 && y_len_exists && n_len_exists) { - if (y_len > n_len) - splitType = 0; - else if (y_len == n_len) - splitType = 1; - else - splitType = 2; - } - - TRACE(str, tout << "Split type " << splitType << std::endl;); - - // Provide fewer split options when length information is available. - if (splitType == 0) { - // | x | y | - // | str | n | - expr_ref_vector litems(mgr); - litems.push_back(ctx.mk_eq_atom(concatAst1, concatAst2)); - rational prefixLen; - if (!x_len_exists) { - prefixLen = str_len - (y_len - n_len); - litems.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(y_len))); - litems.push_back(ctx.mk_eq_atom(mk_strlen(n), mk_int(n_len))); - } else { - prefixLen = x_len; - litems.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_int(x_len))); - } - zstring prefixStr = strValue.extract(0, prefixLen.get_unsigned()); - rational str_sub_prefix = str_len - prefixLen; - zstring suffixStr = strValue.extract(prefixLen.get_unsigned(), str_sub_prefix.get_unsigned()); - expr_ref prefixAst(mk_string(prefixStr), mgr); - expr_ref suffixAst(mk_string(suffixStr), mgr); - expr_ref ax_l(mgr.mk_and(litems.size(), litems.data()), mgr); - - expr_ref suf_n_concat(mk_concat(suffixAst, n), mgr); - if (can_two_nodes_eq(x, prefixAst) && can_two_nodes_eq(y, suf_n_concat)) { - expr_ref_vector r_items(mgr); - r_items.push_back(ctx.mk_eq_atom(x, prefixAst)); - r_items.push_back(ctx.mk_eq_atom(y, suf_n_concat)); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, mk_and(r_items)), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ax_l, mk_and(r_items)); - } - } else { - // negate! It's impossible to split str with these lengths - TRACE(str, tout << "CONFLICT: Impossible to split str with these lengths." << std::endl;); - assert_axiom(mgr.mk_not(ax_l)); - } - } - else if (splitType == 1) { - expr_ref ax_l1(ctx.mk_eq_atom(concatAst1, concatAst2), mgr); - expr_ref ax_l2(mgr.mk_or( - ctx.mk_eq_atom(mk_strlen(x), mk_strlen(strAst)), - ctx.mk_eq_atom(mk_strlen(y), mk_strlen(n))), mgr); - expr_ref ax_l(mgr.mk_and(ax_l1, ax_l2), mgr); - expr_ref ax_r(mgr.mk_and(ctx.mk_eq_atom(x, strAst), ctx.mk_eq_atom(y, n)), mgr); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, ax_r), mgr); - assert_axiom(ax_strong); - } else { - assert_implication(ax_l, ax_r); - } - } - else if (splitType == 2) { - // | x | y | - // | str | n | - expr_ref_vector litems(mgr); - litems.push_back(ctx.mk_eq_atom(concatAst1, concatAst2)); - rational tmpLen; - if (!x_len_exists) { - tmpLen = n_len - y_len; - litems.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_int(y_len))); - litems.push_back(ctx.mk_eq_atom(mk_strlen(n), mk_int(n_len))); - } else { - tmpLen = x_len - str_len; - litems.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_int(x_len))); - } - expr_ref ax_l(mgr.mk_and(litems.size(), litems.data()), mgr); - - expr_ref str_temp1(mk_concat(strAst, temp1), mgr); - expr_ref temp1_y(mk_concat(temp1, y), mgr); - - if (can_two_nodes_eq(x, str_temp1)) { - if (!avoidLoopCut || !(has_self_cut(x, n))) { - expr_ref_vector r_items(mgr); - r_items.push_back(ctx.mk_eq_atom(x, str_temp1)); - r_items.push_back(ctx.mk_eq_atom(n, temp1_y)); - r_items.push_back(ctx.mk_eq_atom(mk_strlen(temp1), mk_int(tmpLen))); - expr_ref ax_r(mk_and(r_items), mgr); - - //Cut Info - add_cut_info_merge(temp1, sLevel, x); - add_cut_info_merge(temp1, sLevel, n); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(ax_l, ax_r), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ax_l, ax_r); - } - } else { - loopDetected = true; - TRACE(str, tout << "AVOID LOOP: SKIPPED" << std::endl;); - TRACE(str, {print_cut_var(x, tout); print_cut_var(n, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - assert_implication(ax_l, new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - } - } - // else { - // // negate! It's impossible to split str with these lengths - // __debugPrint(logFile, "[Conflict] Negate! It's impossible to split str with these lengths @ %d.\n", __LINE__); - // addAxiom(t, Z3_mk_not(ctx, ax_l), __LINE__); - // } - } - else { - // Split type -1. We know nothing about the length... - - expr_ref_vector arrangement_disjunction(mgr); - - int pos = 1; - (void)pos; - for (unsigned int i = 0; i <= strValue.length(); i++) { - zstring part1Str = strValue.extract(0, i); - zstring part2Str = strValue.extract(i, strValue.length() - i); - expr_ref cropStr(mk_string(part1Str), mgr); - expr_ref suffixStr(mk_string(part2Str), mgr); - expr_ref y_concat(mk_concat(suffixStr, n), mgr); - - if (can_two_nodes_eq(x, cropStr) && can_two_nodes_eq(y, y_concat)) { - expr_ref_vector and_item(mgr); - // break down option 3-1 - expr_ref x_eq_str(ctx.mk_eq_atom(x, cropStr), mgr); - - and_item.push_back(x_eq_str); ++pos; - and_item.push_back(ctx.mk_eq_atom(y, y_concat)); - and_item.push_back(ctx.mk_eq_atom(mk_strlen(x), mk_strlen(cropStr))); ++pos; - - // and_item[pos++] = Z3_mk_eq(ctx, or_item[option], Z3_mk_eq(ctx, mk_length(t, y), mk_length(t, y_concat))); - // adding length constraint for _ = constStr seems slowing things down. - - expr_ref option1(mk_and(and_item), mgr); - ctx.get_rewriter()(option1); - arrangement_disjunction.push_back(option1); - double priority; - if (i == strValue.length()) { - priority = 0.5; - } else { - priority = 0.1; - } - add_theory_aware_branching_info(option1, priority, l_true); - } - } - - expr_ref strAst_temp1(mk_concat(strAst, temp1), mgr); - - - //-------------------------------------------------------- - // x cut n - //-------------------------------------------------------- - if (can_two_nodes_eq(x, strAst_temp1)) { - if (!avoidLoopCut || !(has_self_cut(x, n))) { - // break down option 3-2 - expr_ref_vector and_item(mgr); - - expr_ref temp1_y(mk_concat(temp1, y), mgr); - and_item.push_back(ctx.mk_eq_atom(x, strAst_temp1)); ++pos; - and_item.push_back(ctx.mk_eq_atom(n, temp1_y)); ++pos; - - and_item.push_back(ctx.mk_eq_atom(mk_strlen(x), - m_autil.mk_add(mk_strlen(strAst), mk_strlen(temp1)) ) ); ++pos; - - expr_ref option2(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option2); - add_theory_aware_branching_info(option2, 0.1, l_true); - - add_cut_info_merge(temp1, sLevel, x); - add_cut_info_merge(temp1, sLevel, n); - } else { - loopDetected = true; - TRACE(str, tout << "AVOID LOOP: SKIPPED." << std::endl;); - TRACE(str, {print_cut_var(x, tout); print_cut_var(n, tout);}); - - if (!overlapAssumptionUsed) { - overlapAssumptionUsed = true; - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - arrangement_disjunction.push_back(new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - } - } - } - - - if (!arrangement_disjunction.empty()) { - expr_ref implyR(mk_or(arrangement_disjunction), mgr); - - if (m_params.m_StrongArrangements) { - expr_ref ax_lhs(ctx.mk_eq_atom(concatAst1, concatAst2), mgr); - expr_ref ax_strong(ctx.mk_eq_atom(ax_lhs, implyR), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - generate_mutual_exclusion(arrangement_disjunction); - } else { - TRACE(str, tout << "STOP: should not split two eq. concats" << std::endl;); - } - } - - } - - /************************************************************* - * Type 4: concat("str1", y) = concat("str2", n) - *************************************************************/ - bool theory_str::is_concat_eq_type4(expr * concatAst1, expr * concatAst2) { - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - if (u.str.is_string(v1_arg0) && (!u.str.is_string(v1_arg1)) - && u.str.is_string(v2_arg0) && (!u.str.is_string(v2_arg1))) { - return true; - } else { - return false; - } - } - - void theory_str::process_concat_eq_type4(expr * concatAst1, expr * concatAst2) { - ast_manager & mgr = get_manager(); - TRACE(str, tout << "process_concat_eq TYPE 4" << std::endl - << "concatAst1 = " << mk_ismt2_pp(concatAst1, mgr) << std::endl - << "concatAst2 = " << mk_ismt2_pp(concatAst2, mgr) << std::endl; - ); - - if (!u.str.is_concat(to_app(concatAst1))) { - TRACE(str, tout << "concatAst1 is not a concat function" << std::endl;); - return; - } - if (!u.str.is_concat(to_app(concatAst2))) { - TRACE(str, tout << "concatAst2 is not a concat function" << std::endl;); - return; - } - - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - expr * str1Ast = v1_arg0; - expr * y = v1_arg1; - expr * str2Ast = v2_arg0; - expr * n = v2_arg1; - - zstring str1Value, str2Value; - u.str.is_string(str1Ast, str1Value); - u.str.is_string(str2Ast, str2Value); - - unsigned int str1Len = str1Value.length(); - unsigned int str2Len = str2Value.length(); - - int commonLen = (str1Len > str2Len) ? str2Len : str1Len; - if (str1Value.extract(0, commonLen) != str2Value.extract(0, commonLen)) { - TRACE(str, tout << "Conflict: " << mk_ismt2_pp(concatAst1, mgr) - << " has no common prefix with " << mk_ismt2_pp(concatAst2, mgr) << std::endl;); - expr_ref toNegate(mgr.mk_not(ctx.mk_eq_atom(concatAst1, concatAst2)), mgr); - assert_axiom(toNegate); - return; - } else { - if (str1Len > str2Len) { - zstring deltaStr = str1Value.extract(str2Len, str1Len - str2Len); - expr_ref tmpAst(mk_concat(mk_string(deltaStr), y), mgr); - if (!in_same_eqc(tmpAst, n)) { - // break down option 4-1 - expr_ref implyR(ctx.mk_eq_atom(n, tmpAst), mgr); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - } - } else if (str1Len == str2Len) { - if (!in_same_eqc(n, y)) { - //break down option 4-2 - expr_ref implyR(ctx.mk_eq_atom(n, y), mgr); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - } - } else { - zstring deltaStr = str2Value.extract(str1Len, str2Len - str1Len); - expr_ref tmpAst(mk_concat(mk_string(deltaStr), n), mgr); - if (!in_same_eqc(y, tmpAst)) { - //break down option 4-3 - expr_ref implyR(ctx.mk_eq_atom(y, tmpAst), mgr); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - } - } - } - } - - /************************************************************* - * case 5: concat(x, "str1") = concat(m, "str2") - *************************************************************/ - bool theory_str::is_concat_eq_type5(expr * concatAst1, expr * concatAst2) { - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - if ((!u.str.is_string(v1_arg0)) && u.str.is_string(v1_arg1) - && (!u.str.is_string(v2_arg0)) && u.str.is_string(v2_arg1)) { - return true; - } else { - return false; - } - } - - void theory_str::process_concat_eq_type5(expr * concatAst1, expr * concatAst2) { - ast_manager & mgr = get_manager(); - TRACE(str, tout << "process_concat_eq TYPE 5" << std::endl - << "concatAst1 = " << mk_ismt2_pp(concatAst1, mgr) << std::endl - << "concatAst2 = " << mk_ismt2_pp(concatAst2, mgr) << std::endl; - ); - - if (!u.str.is_concat(to_app(concatAst1))) { - TRACE(str, tout << "concatAst1 is not a concat function" << std::endl;); - return; - } - if (!u.str.is_concat(to_app(concatAst2))) { - TRACE(str, tout << "concatAst2 is not a concat function" << std::endl;); - return; - } - - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - expr * x = v1_arg0; - expr * str1Ast = v1_arg1; - expr * m = v2_arg0; - expr * str2Ast = v2_arg1; - - zstring str1Value, str2Value; - u.str.is_string(str1Ast, str1Value); - u.str.is_string(str2Ast, str2Value); - - unsigned int str1Len = str1Value.length(); - unsigned int str2Len = str2Value.length(); - - int cLen = (str1Len > str2Len) ? str2Len : str1Len; - if (str1Value.extract(str1Len - cLen, cLen) != str2Value.extract(str2Len - cLen, cLen)) { - TRACE(str, tout << "Conflict: " << mk_ismt2_pp(concatAst1, mgr) - << " has no common suffix with " << mk_ismt2_pp(concatAst2, mgr) << std::endl;); - expr_ref toNegate(mgr.mk_not(ctx.mk_eq_atom(concatAst1, concatAst2)), mgr); - assert_axiom(toNegate); - return; - } else { - if (str1Len > str2Len) { - zstring deltaStr = str1Value.extract(0, str1Len - str2Len); - expr_ref x_deltaStr(mk_concat(x, mk_string(deltaStr)), mgr); - if (!in_same_eqc(m, x_deltaStr)) { - expr_ref implyR(ctx.mk_eq_atom(m, x_deltaStr), mgr); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - } - } else if (str1Len == str2Len) { - // test - if (!in_same_eqc(x, m)) { - expr_ref implyR(ctx.mk_eq_atom(x, m), mgr); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - } - } else { - zstring deltaStr = str2Value.extract(0, str2Len - str1Len); - expr_ref m_deltaStr(mk_concat(m, mk_string(deltaStr)), mgr); - if (!in_same_eqc(x, m_deltaStr)) { - expr_ref implyR(ctx.mk_eq_atom(x, m_deltaStr), mgr); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - } - } - } - } - - /************************************************************* - * case 6: concat("str1", y) = concat(m, "str2") - *************************************************************/ - bool theory_str::is_concat_eq_type6(expr * concatAst1, expr * concatAst2) { - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - if (u.str.is_string(v1_arg0) && (!u.str.is_string(v1_arg1)) - && (!u.str.is_string(v2_arg0)) && u.str.is_string(v2_arg1)) { - return true; - } else if (u.str.is_string(v2_arg0) && (!u.str.is_string(v2_arg1)) - && (!u.str.is_string(v1_arg0)) && u.str.is_string(v1_arg1)) { - return true; - } else { - return false; - } - } - - void theory_str::process_concat_eq_type6(expr * concatAst1, expr * concatAst2) { - ast_manager & mgr = get_manager(); - TRACE(str, tout << "process_concat_eq TYPE 6" << std::endl - << "concatAst1 = " << mk_ismt2_pp(concatAst1, mgr) << std::endl - << "concatAst2 = " << mk_ismt2_pp(concatAst2, mgr) << std::endl; - ); - - if (!u.str.is_concat(to_app(concatAst1))) { - TRACE(str, tout << "concatAst1 is not a concat function" << std::endl;); - return; - } - if (!u.str.is_concat(to_app(concatAst2))) { - TRACE(str, tout << "concatAst2 is not a concat function" << std::endl;); - return; - } - - expr * v1_arg0 = to_app(concatAst1)->get_arg(0); - expr * v1_arg1 = to_app(concatAst1)->get_arg(1); - expr * v2_arg0 = to_app(concatAst2)->get_arg(0); - expr * v2_arg1 = to_app(concatAst2)->get_arg(1); - - - expr * str1Ast = nullptr; - expr * y = nullptr; - expr * m = nullptr; - expr * str2Ast = nullptr; - - if (u.str.is_string(v1_arg0)) { - str1Ast = v1_arg0; - y = v1_arg1; - m = v2_arg0; - str2Ast = v2_arg1; - } else { - str1Ast = v2_arg0; - y = v2_arg1; - m = v1_arg0; - str2Ast = v1_arg1; - } - - zstring str1Value, str2Value; - u.str.is_string(str1Ast, str1Value); - u.str.is_string(str2Ast, str2Value); - - unsigned int str1Len = str1Value.length(); - unsigned int str2Len = str2Value.length(); - - //---------------------------------------- - //(a) |---str1---|----y----| - // |--m--|-----str2-----| - // - //(b) |---str1---|----y----| - // |-----m----|--str2---| - // - //(c) |---str1---|----y----| - // |------m------|-str2-| - //---------------------------------------- - - std::list overlapLen; - overlapLen.push_back(0); - - for (unsigned int i = 1; i <= str1Len && i <= str2Len; i++) { - if (str1Value.extract(str1Len - i, i) == str2Value.extract(0, i)) - overlapLen.push_back(i); - } - - //---------------------------------------------------------------- - expr_ref commonVar(mgr); - expr * xorFlag = nullptr; - std::pair key1(concatAst1, concatAst2); - std::pair key2(concatAst2, concatAst1); - - // check the entries in this map to make sure they're still in scope - // before we use them. - - std::map, std::map >::iterator entry1 = varForBreakConcat.find(key1); - std::map, std::map >::iterator entry2 = varForBreakConcat.find(key2); - - bool entry1InScope; - if (entry1 == varForBreakConcat.end()) { - entry1InScope = false; - } else { - if (internal_variable_set.find((entry1->second)[0]) == internal_variable_set.end() - /* || internal_variable_set.find((entry1->second)[1]) == internal_variable_set.end() */) { - entry1InScope = false; - } else { - entry1InScope = true; - } - } - - bool entry2InScope; - if (entry2 == varForBreakConcat.end()) { - entry2InScope = false; - } else { - if (internal_variable_set.find((entry2->second)[0]) == internal_variable_set.end() - /* || internal_variable_set.find((entry2->second)[1]) == internal_variable_set.end() */) { - entry2InScope = false; - } else { - entry2InScope = true; - } - } - - TRACE(str, tout << "entry 1 " << (entry1InScope ? "in scope" : "not in scope") << std::endl - << "entry 2 " << (entry2InScope ? "in scope" : "not in scope") << std::endl;); - - if (!entry1InScope && !entry2InScope) { - commonVar = mk_nonempty_str_var(); - xorFlag = mk_internal_xor_var(); - varForBreakConcat[key1][0] = commonVar; - varForBreakConcat[key1][1] = xorFlag; - } else { - if (entry1InScope) { - commonVar = (entry1->second)[0]; - xorFlag = (entry1->second)[1]; - } else { - commonVar = (entry2->second)[0]; - xorFlag = (entry2->second)[1]; - } - refresh_theory_var(commonVar); - add_nonempty_constraint(commonVar); - } - - bool overlapAssumptionUsed = false; - - expr_ref_vector arrangement_disjunction(mgr); - int pos = 1; - (void)pos; - - if (!avoidLoopCut || !has_self_cut(m, y)) { - expr_ref_vector and_item(mgr); - - expr_ref str1_commonVar(mk_concat(str1Ast, commonVar), mgr); - and_item.push_back(ctx.mk_eq_atom(m, str1_commonVar)); - pos += 1; - - expr_ref commonVar_str2(mk_concat(commonVar, str2Ast), mgr); - and_item.push_back(ctx.mk_eq_atom(y, commonVar_str2)); - pos += 1; - - and_item.push_back(ctx.mk_eq_atom(mk_strlen(m), - m_autil.mk_add(mk_strlen(str1Ast), mk_strlen(commonVar)) )); - pos += 1; - (void)pos; - - // addItems[0] = mk_length(t, commonVar); - // addItems[1] = mk_length(t, str2Ast); - // and_item[pos++] = Z3_mk_eq(ctx, or_item[option], Z3_mk_eq(ctx, mk_length(t, y), Z3_mk_add(ctx, 2, addItems))); - - expr_ref option1(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option1); - add_theory_aware_branching_info(option1, 0.1, l_true); - } else { - loopDetected = true; - - TRACE(str, tout << "AVOID LOOP: SKIPPED." << std::endl;); - TRACE(str, print_cut_var(m, tout); print_cut_var(y, tout);); - - // only add the overlap assumption one time - if (!overlapAssumptionUsed) { - // add context dependent formula overlap predicate and relate it to the global overlap predicate - sort * s = get_manager().mk_bool_sort(); - expr_ref new_OverlapAssumption_term = expr_ref(mk_fresh_const(newOverlapStr, s), get_manager()); - arrangement_disjunction.push_back(new_OverlapAssumption_term); - assert_implication(new_OverlapAssumption_term, m_theoryStrOverlapAssumption_term); - overlapAssumptionUsed = true; - } - - } - - for (unsigned int overLen : overlapLen) { - zstring prefix = str1Value.extract(0, str1Len - overLen); - zstring suffix = str2Value.extract(overLen, str2Len - overLen); - - expr_ref_vector and_item(mgr); - - expr_ref prefixAst(mk_string(prefix), mgr); - expr_ref x_eq_prefix(ctx.mk_eq_atom(m, prefixAst), mgr); - and_item.push_back(x_eq_prefix); - pos += 1; - - and_item.push_back( - ctx.mk_eq_atom(mk_strlen(m), mk_strlen(prefixAst))); - pos += 1; - - // adding length constraint for _ = constStr seems slowing things down. - - expr_ref suffixAst(mk_string(suffix), mgr); - expr_ref y_eq_suffix(ctx.mk_eq_atom(y, suffixAst), mgr); - and_item.push_back(y_eq_suffix); - pos += 1; - - and_item.push_back(ctx.mk_eq_atom(mk_strlen(y), mk_strlen(suffixAst))); - pos += 1; - - expr_ref option2(mk_and(and_item), mgr); - arrangement_disjunction.push_back(option2); - double priority; - // prefer the option "str1" = x - if (prefix == str1Value) { - priority = 0.5; - } else { - priority = 0.1; - } - add_theory_aware_branching_info(option2, priority, l_true); - } - - // case 6: concat("str1", y) = concat(m, "str2") - - expr_ref implyR(mk_or(arrangement_disjunction), mgr); - - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom( ctx.mk_eq_atom(concatAst1, concatAst2), implyR ), mgr); - assert_axiom_rw(ax_strong); - } else { - assert_implication(ctx.mk_eq_atom(concatAst1, concatAst2), implyR); - } - generate_mutual_exclusion(arrangement_disjunction); - } - - bool theory_str::get_string_constant_eqc(expr * e, zstring & stringVal) { - bool exists; - expr * strExpr = get_eqc_value(e, exists); - if (!exists) { - return false;} - u.str.is_string(strExpr, stringVal); - return true; - } - - /* - * Look through the equivalence class of n to find a string constant. - * Return that constant if it is found, and set hasEqcValue to true. - * Otherwise, return n, and set hasEqcValue to false. - */ - - expr * theory_str::get_eqc_value(expr * n, bool & hasEqcValue) { - return z3str2_get_eqc_value(n, hasEqcValue); - } - - - // Simulate the behaviour of get_eqc_value() from Z3str2. - // We only check m_find for a string constant. - - expr * theory_str::z3str2_get_eqc_value(expr * n , bool & hasEqcValue) { - theory_var curr = get_var(n); - if (curr != null_theory_var) { - curr = m_find.find(curr); - theory_var first = curr; - do { - expr* a = get_ast(curr); - if (u.str.is_string(a)) { - hasEqcValue = true; - return a; - } - curr = m_find.next(curr); - } - while (curr != first && curr != null_theory_var); - } - hasEqcValue = false; - return n; - } - - bool theory_str::get_arith_value(expr* e, rational& val) const { - ast_manager & m = get_manager(); - (void)m; - if (!ctx.e_internalized(e)) { - return false; - } - // check root of the eqc for an integer constant - // if an integer constant exists in the eqc, it should be the root - enode * en_e = ctx.get_enode(e); - enode * root_e = en_e->get_root(); - if (m_autil.is_numeral(root_e->get_expr(), val) && val.is_int()) { - TRACE(str, tout << mk_pp(e, get_manager()) << " ~= " << mk_pp(root_e->get_expr(), get_manager()) << std::endl;); - return true; - } else { - TRACE(str, tout << "root of eqc of " << mk_pp(e, get_manager()) << " is not a numeral" << std::endl;); - return false; - } - - } - - bool theory_str::lower_bound(expr* _e, rational& lo) { - if (opt_DisableIntegerTheoryIntegration) { - TRACE(str, tout << "WARNING: integer theory integration disabled" << std::endl;); - return false; - } - - arith_value v(get_manager()); - v.init(&ctx); - bool strict; - return v.get_lo_equiv(_e, lo, strict); - } - - bool theory_str::upper_bound(expr* _e, rational& hi) { - if (opt_DisableIntegerTheoryIntegration) { - TRACE(str, tout << "WARNING: integer theory integration disabled" << std::endl;); - return false; - } - - arith_value v(get_manager()); - v.init(&ctx); - bool strict; - return v.get_up_equiv(_e, hi, strict); - } - - bool theory_str::get_len_value(expr* e, rational& val) { - if (opt_DisableIntegerTheoryIntegration) { - TRACE(str, tout << "WARNING: integer theory integration disabled" << std::endl;); - return false; - } - - ast_manager & m = get_manager(); - - TRACE(str, tout << "checking len value of " << mk_ismt2_pp(e, m) << std::endl;); - - rational val1; - expr_ref len(m), len_val(m); - expr* e1, *e2; - ptr_vector todo; - todo.push_back(e); - val.reset(); - while (!todo.empty()) { - expr* c = todo.back(); - todo.pop_back(); - if (u.str.is_concat(to_app(c))) { - e1 = to_app(c)->get_arg(0); - e2 = to_app(c)->get_arg(1); - todo.push_back(e1); - todo.push_back(e2); - } - else if (u.str.is_string(to_app(c))) { - zstring tmp; - u.str.is_string(to_app(c), tmp); - unsigned int sl = tmp.length(); - val += rational(sl); - } - else { - len = mk_strlen(c); - - // debugging - TRACE(str, { - tout << mk_pp(len, m) << ":" << std::endl - << (ctx.is_relevant(len.get()) ? "relevant" : "not relevant") << std::endl - << (ctx.e_internalized(len) ? "internalized" : "not internalized") << std::endl - ; - if (ctx.e_internalized(len)) { - enode * e_len = ctx.get_enode(len); - tout << "has " << e_len->get_num_th_vars() << " theory vars" << std::endl; - - // eqc debugging - { - tout << "dump equivalence class of " << mk_pp(len, get_manager()) << std::endl; - enode * nNode = ctx.get_enode(len); - enode * eqcNode = nNode; - do { - app * ast = eqcNode->get_expr(); - tout << mk_pp(ast, get_manager()) << std::endl; - eqcNode = eqcNode->get_next(); - } while (eqcNode != nNode); - } - } - }); - - if (ctx.e_internalized(len) && get_arith_value(len, val1)) { - val += val1; - TRACE(str, tout << "integer theory: subexpression " << mk_ismt2_pp(len, m) << " has length " << val1 << std::endl;); - } - else { - TRACE(str, tout << "integer theory: subexpression " << mk_ismt2_pp(len, m) << " has no length assignment; bailing out" << std::endl;); - return false; - } - } - } - - TRACE(str, tout << "length of " << mk_ismt2_pp(e, m) << " is " << val << std::endl;); - return val.is_int() && val.is_nonneg(); - } - - /* - * Decide whether n1 and n2 are already in the same equivalence class. - * This only checks whether the core considers them to be equal; - * they may not actually be equal. - */ - bool theory_str::in_same_eqc(expr * n1, expr * n2) { - if (n1 == n2) return true; - - // similar to get_eqc_value(), make absolutely sure - // that we've set this up properly for the context - - if (!ctx.e_internalized(n1)) { - TRACE(str, tout << "WARNING: expression " << mk_ismt2_pp(n1, get_manager()) << " was not internalized" << std::endl;); - ctx.internalize(n1, false); - } - if (!ctx.e_internalized(n2)) { - TRACE(str, tout << "WARNING: expression " << mk_ismt2_pp(n2, get_manager()) << " was not internalized" << std::endl;); - ctx.internalize(n2, false); - } - - expr * curr = get_eqc_next(n1); - while (curr != n1) { - if (curr == n2) - return true; - curr = get_eqc_next(curr); - } - return false; - } - - expr * theory_str::collect_eq_nodes(expr * n, expr_ref_vector & eqcSet) { - expr * constStrNode = nullptr; - - expr * ex = n; - do { - if (u.str.is_string(to_app(ex))) { - constStrNode = ex; - } - eqcSet.push_back(ex); - - ex = get_eqc_next(ex); - } while (ex != n); - return constStrNode; - } - - /* - * Collect constant strings (from left to right) in an AST node. - */ - void theory_str::get_const_str_asts_in_node(expr * node, expr_ref_vector & astList) { - if (u.str.is_string(node)) { - astList.push_back(node); - //} else if (getNodeType(t, node) == my_Z3_Func) { - } else if (is_app(node)) { - app * func_app = to_app(node); - // the following check is only valid when the operator is string concatenate - if (u.str.is_concat(func_app)) { - unsigned int argCount = func_app->get_num_args(); - for (unsigned int i = 0; i < argCount; i++) { - expr * argAst = func_app->get_arg(i); - get_const_str_asts_in_node(argAst, astList); - } - } - } - } - - void theory_str::check_contain_by_eqc_val(expr * varNode, expr * constNode) { - ast_manager & m = get_manager(); - - TRACE(str, tout << "varNode = " << mk_pp(varNode, m) << ", constNode = " << mk_pp(constNode, m) << std::endl;); - - expr_ref_vector litems(m); - - if (contain_pair_idx_map.contains(varNode)) { - for (auto entry : contain_pair_idx_map[varNode]) { - expr * strAst = entry.first; - expr * substrAst = entry.second; - - expr * boolVar = nullptr; - if (!contain_pair_bool_map.find(strAst, substrAst, boolVar)) { - TRACE(str, tout << "warning: no entry for boolVar in contain_pair_bool_map" << std::endl;); - } - - // we only want to inspect the Contains terms where either of strAst or substrAst - // are equal to varNode. - - TRACE(t_str_detail, tout << "considering Contains with strAst = " << mk_pp(strAst, m) << ", substrAst = " << mk_pp(substrAst, m) << "..." << std::endl;); - - if (varNode != strAst && varNode != substrAst) { - TRACE(str, tout << "varNode not equal to strAst or substrAst, skip" << std::endl;); - continue; - } - TRACE(str, tout << "varNode matched one of strAst or substrAst. Continuing" << std::endl;); - - // varEqcNode is str - if (strAst == varNode) { - expr_ref implyR(m); - litems.reset(); - - if (strAst != constNode) { - litems.push_back(ctx.mk_eq_atom(strAst, constNode)); - } - zstring strConst; - u.str.is_string(constNode, strConst); - bool subStrHasEqcValue = false; - expr * substrValue = get_eqc_value(substrAst, subStrHasEqcValue); - if (substrValue != substrAst) { - litems.push_back(ctx.mk_eq_atom(substrAst, substrValue)); - } - - if (subStrHasEqcValue) { - // subStr has an eqc constant value - zstring subStrConst; - u.str.is_string(substrValue, subStrConst); - - TRACE(t_str_detail, tout << "strConst = " << strConst << ", subStrConst = " << subStrConst << "\n";); - - if (strConst.contains(subStrConst)) { - //implyR = ctx.mk_eq(ctx, boolVar, Z3_mk_true(ctx)); - implyR = boolVar; - } else { - //implyR = Z3_mk_eq(ctx, boolVar, Z3_mk_false(ctx)); - implyR = mk_not(m, boolVar); - } - } else { - // ------------------------------------------------------------------------------------------------ - // subStr doesn't have an eqc constant value - // however, subStr equals to some concat(arg_1, arg_2, ..., arg_n) - // if arg_j is a constant and is not a part of the strConst, it's sure that the contains is false - // ** This check is needed here because the "strConst" and "strAst" may not be in a same eqc yet - // ------------------------------------------------------------------------------------------------ - // collect eqc concat - std::set eqcConcats; - get_concats_in_eqc(substrAst, eqcConcats); - for (expr * aConcat : eqcConcats) { - expr_ref_vector constList(m); - bool counterEgFound = false; - get_const_str_asts_in_node(aConcat, constList); - for (auto const& cst : constList) { - zstring pieceStr; - u.str.is_string(cst, pieceStr); - if (!strConst.contains(pieceStr)) { - counterEgFound = true; - if (aConcat != substrAst) { - litems.push_back(ctx.mk_eq_atom(substrAst, aConcat)); - } - implyR = mk_not(m, boolVar); - break; - } - } - if (counterEgFound) { - TRACE(str, tout << "Inconsistency found!" << std::endl;); - break; - } - } - } - // add assertion - if (implyR) { - expr_ref implyLHS(mk_and(litems), m); - assert_implication(implyLHS, implyR); - } - } - // varEqcNode is subStr - else if (substrAst == varNode) { - expr_ref implyR(m); - litems.reset(); - - if (substrAst != constNode) { - litems.push_back(ctx.mk_eq_atom(substrAst, constNode)); - } - bool strHasEqcValue = false; - expr * strValue = get_eqc_value(strAst, strHasEqcValue); - if (strValue != strAst) { - litems.push_back(ctx.mk_eq_atom(strAst, strValue)); - } - - if (strHasEqcValue) { - zstring strConst, subStrConst; - u.str.is_string(strValue, strConst); - u.str.is_string(constNode, subStrConst); - if (strConst.contains(subStrConst)) { - //implyR = Z3_mk_eq(ctx, boolVar, Z3_mk_true(ctx)); - implyR = boolVar; - } else { - // implyR = Z3_mk_eq(ctx, boolVar, Z3_mk_false(ctx)); - implyR = mk_not(m, boolVar); - } - } - - // add assertion - if (implyR) { - expr_ref implyLHS(mk_and(litems), m); - assert_implication(implyLHS, implyR); - } - } - } // for (itor1 : contains_map) - } // if varNode in contain_pair_idx_map - } - - void theory_str::check_contain_by_substr(expr * varNode, expr_ref_vector & willEqClass) { - ast_manager & m = get_manager(); - expr_ref_vector litems(m); - - if (contain_pair_idx_map.contains(varNode)) { - for (auto entry : contain_pair_idx_map[varNode]) { - expr * strAst = entry.first; - expr * substrAst = entry.second; - - expr * boolVar = nullptr; - if (!contain_pair_bool_map.find(strAst, substrAst, boolVar)) { - TRACE(str, tout << "warning: no entry for boolVar in contain_pair_bool_map" << std::endl;); - } - - // we only want to inspect the Contains terms where either of strAst or substrAst - // are equal to varNode. - - TRACE(t_str_detail, tout << "considering Contains with strAst = " << mk_pp(strAst, m) << ", substrAst = " << mk_pp(substrAst, m) << "..." << std::endl;); - - if (varNode != strAst && varNode != substrAst) { - TRACE(str, tout << "varNode not equal to strAst or substrAst, skip" << std::endl;); - continue; - } - TRACE(str, tout << "varNode matched one of strAst or substrAst. Continuing" << std::endl;); - - if (substrAst == varNode) { - bool strAstHasVal = false; - expr * strValue = get_eqc_value(strAst, strAstHasVal); - if (strAstHasVal) { - TRACE(str, tout << mk_pp(strAst, m) << " has constant eqc value " << mk_pp(strValue, m) << std::endl;); - if (strValue != strAst) { - litems.push_back(ctx.mk_eq_atom(strAst, strValue)); - } - zstring strConst; - u.str.is_string(strValue, strConst); - // iterate eqc (also eqc-to-be) of substr - for (auto itAst : willEqClass) { - bool counterEgFound = false; - if (u.str.is_concat(to_app(itAst))) { - expr_ref_vector constList(m); - // get constant strings in concat - app * aConcat = to_app(itAst); - get_const_str_asts_in_node(aConcat, constList); - for (auto cst : constList) { - zstring pieceStr; - u.str.is_string(cst, pieceStr); - if (!strConst.contains(pieceStr)) { - TRACE(str, tout << "Inconsistency found!" << std::endl;); - counterEgFound = true; - if (aConcat != substrAst) { - litems.push_back(ctx.mk_eq_atom(substrAst, aConcat)); - } - expr_ref implyLHS(mk_and(litems), m); - expr_ref implyR(mk_not(m, boolVar), m); - assert_implication(implyLHS, implyR); - break; - } - } - } - if (counterEgFound) { - break; - } - } - } - } - } - } // varNode in contain_pair_idx_map - } - - bool theory_str::in_contain_idx_map(expr * n) { - return contain_pair_idx_map.contains(n); - } - - void theory_str::check_contain_by_eq_nodes(expr * n1, expr * n2) { - ast_manager & m = get_manager(); - - if (in_contain_idx_map(n1) && in_contain_idx_map(n2)) { - for (auto const& key1 : contain_pair_idx_map[n1]) { - // keysItor1 is on set {<.., n1>, ..., , ...} - //std::pair key1 = *keysItor1; - if (key1.first == n1 && key1.second == n2) { - expr_ref implyL(m); - expr_ref implyR(contain_pair_bool_map[key1], m); - if (n1 != n2) { - implyL = ctx.mk_eq_atom(n1, n2); - assert_implication(implyL, implyR); - } else { - assert_axiom(implyR); - } - } - - for (auto const& key2 : contain_pair_idx_map[n2]) { - // keysItor2 is on set {<.., n2>, ..., , ...} - //std::pair key2 = *keysItor2; - // skip if the pair is eq - if (key1 == key2) { - continue; - } - - // *************************** - // Case 1: Contains(m, ...) /\ Contains(n, ) /\ m = n - // *************************** - if (key1.first == n1 && key2.first == n2) { - expr * subAst1 = key1.second; - expr * subAst2 = key2.second; - bool subAst1HasValue = false; - bool subAst2HasValue = false; - expr * subValue1 = get_eqc_value(subAst1, subAst1HasValue); - expr * subValue2 = get_eqc_value(subAst2, subAst2HasValue); - - TRACE(str, - tout << "(Contains " << mk_pp(n1, m) << " " << mk_pp(subAst1, m) << ")" << std::endl; - tout << "(Contains " << mk_pp(n2, m) << " " << mk_pp(subAst2, m) << ")" << std::endl; - if (subAst1 != subValue1) { - tout << mk_pp(subAst1, m) << " = " << mk_pp(subValue1, m) << std::endl; - } - if (subAst2 != subValue2) { - tout << mk_pp(subAst2, m) << " = " << mk_pp(subValue2, m) << std::endl; - } - ); - - if (subAst1HasValue && subAst2HasValue) { - expr_ref_vector litems1(m); - if (n1 != n2) { - litems1.push_back(ctx.mk_eq_atom(n1, n2)); - } - if (subValue1 != subAst1) { - litems1.push_back(ctx.mk_eq_atom(subAst1, subValue1)); - } - if (subValue2 != subAst2) { - litems1.push_back(ctx.mk_eq_atom(subAst2, subValue2)); - } - - zstring subConst1, subConst2; - u.str.is_string(subValue1, subConst1); - u.str.is_string(subValue2, subConst2); - expr_ref implyR(m); - if (subConst1 == subConst2) { - // key1.first = key2.first /\ key1.second = key2.second - // ==> (containPairBoolMap[key1] = containPairBoolMap[key2]) - implyR = ctx.mk_eq_atom(contain_pair_bool_map[key1], contain_pair_bool_map[key2]); - } else if (subConst1.contains(subConst2)) { - // key1.first = key2.first /\ Contains(key1.second, key2.second) - // ==> (containPairBoolMap[key1] --> containPairBoolMap[key2]) - implyR = rewrite_implication(contain_pair_bool_map[key1], contain_pair_bool_map[key2]); - } else if (subConst2.contains(subConst1)) { - // key1.first = key2.first /\ Contains(key2.second, key1.second) - // ==> (containPairBoolMap[key2] --> containPairBoolMap[key1]) - implyR = rewrite_implication(contain_pair_bool_map[key2], contain_pair_bool_map[key1]); - } - - if (implyR) { - if (litems1.empty()) { - assert_axiom(implyR); - } else { - assert_implication(mk_and(litems1), implyR); - } - } - } else { - expr_ref_vector subAst1Eqc(m); - expr_ref_vector subAst2Eqc(m); - collect_eq_nodes(subAst1, subAst1Eqc); - collect_eq_nodes(subAst2, subAst2Eqc); - - if (subAst1Eqc.contains(subAst2)) { - // ----------------------------------------------------------- - // * key1.first = key2.first /\ key1.second = key2.second - // --> containPairBoolMap[key1] = containPairBoolMap[key2] - // ----------------------------------------------------------- - expr_ref_vector litems2(m); - if (n1 != n2) { - litems2.push_back(ctx.mk_eq_atom(n1, n2)); - } - if (subAst1 != subAst2) { - litems2.push_back(ctx.mk_eq_atom(subAst1, subAst2)); - } - expr_ref implyR(ctx.mk_eq_atom(contain_pair_bool_map[key1], contain_pair_bool_map[key2]), m); - if (litems2.empty()) { - assert_axiom(implyR); - } else { - assert_implication(mk_and(litems2), implyR); - } - } else { - // ----------------------------------------------------------- - // * key1.first = key2.first - // check eqc(key1.second) and eqc(key2.second) - // ----------------------------------------------------------- - for (auto eqSubVar1 : subAst1Eqc) { - for (auto eqSubVar2 : subAst2Eqc) { - // ------------ - // key1.first = key2.first /\ containPairBoolMap[] - // ==> (containPairBoolMap[key1] --> containPairBoolMap[key2]) - // ------------ - { - expr_ref_vector litems3(m); - if (n1 != n2) { - litems3.push_back(ctx.mk_eq_atom(n1, n2)); - } - - if (eqSubVar1 != subAst1) { - litems3.push_back(ctx.mk_eq_atom(subAst1, eqSubVar1)); - } - - if (eqSubVar2 != subAst2) { - litems3.push_back(ctx.mk_eq_atom(subAst2, eqSubVar2)); - } - std::pair tryKey1 = std::make_pair(eqSubVar1, eqSubVar2); - if (contain_pair_bool_map.contains(tryKey1)) { - TRACE(str, tout << "(Contains " << mk_pp(eqSubVar1, m) << " " << mk_pp(eqSubVar2, m) << ")" << std::endl;); - litems3.push_back(contain_pair_bool_map[tryKey1]); - expr_ref implR(rewrite_implication(contain_pair_bool_map[key1], contain_pair_bool_map[key2]), m); - assert_implication(mk_and(litems3), implR); - } - } - // ------------ - // key1.first = key2.first /\ containPairBoolMap[] - // ==> (containPairBoolMap[key2] --> containPairBoolMap[key1]) - // ------------ - { - expr_ref_vector litems4(m); - if (n1 != n2) { - litems4.push_back(ctx.mk_eq_atom(n1, n2)); - } - - if (eqSubVar1 != subAst1) { - litems4.push_back(ctx.mk_eq_atom(subAst1, eqSubVar1)); - } - - if (eqSubVar2 != subAst2) { - litems4.push_back(ctx.mk_eq_atom(subAst2, eqSubVar2)); - } - std::pair tryKey2 = std::make_pair(eqSubVar2, eqSubVar1); - if (contain_pair_bool_map.contains(tryKey2)) { - TRACE(str, tout << "(Contains " << mk_pp(eqSubVar2, m) << " " << mk_pp(eqSubVar1, m) << ")" << std::endl;); - litems4.push_back(contain_pair_bool_map[tryKey2]); - expr_ref implR(rewrite_implication(contain_pair_bool_map[key2], contain_pair_bool_map[key1]), m); - assert_implication(mk_and(litems4), implR); - } - } - } - } - } - } - } - // *************************** - // Case 2: Contains(..., m) /\ Contains(... , n) /\ m = n - // *************************** - else if (key1.second == n1 && key2.second == n2) { - expr * str1 = key1.first; - expr * str2 = key2.first; - bool str1HasValue = false; - bool str2HasValue = false; - expr * strVal1 = get_eqc_value(str1, str1HasValue); - expr * strVal2 = get_eqc_value(str2, str2HasValue); - - TRACE(str, - tout << "(Contains " << mk_pp(str1, m) << " " << mk_pp(n1, m) << ")" << std::endl; - tout << "(Contains " << mk_pp(str2, m) << " " << mk_pp(n2, m) << ")" << std::endl; - if (str1 != strVal1) { - tout << mk_pp(str1, m) << " = " << mk_pp(strVal1, m) << std::endl; - } - if (str2 != strVal2) { - tout << mk_pp(str2, m) << " = " << mk_pp(strVal2, m) << std::endl; - } - ); - - if (str1HasValue && str2HasValue) { - expr_ref_vector litems1(m); - if (n1 != n2) { - litems1.push_back(ctx.mk_eq_atom(n1, n2)); - } - if (strVal1 != str1) { - litems1.push_back(ctx.mk_eq_atom(str1, strVal1)); - } - if (strVal2 != str2) { - litems1.push_back(ctx.mk_eq_atom(str2, strVal2)); - } - - zstring const1, const2; - u.str.is_string(strVal1, const1); - u.str.is_string(strVal2, const2); - expr_ref implyR(m); - - if (const1 == const2) { - // key1.second = key2.second /\ key1.first = key2.first - // ==> (containPairBoolMap[key1] = containPairBoolMap[key2]) - implyR = ctx.mk_eq_atom(contain_pair_bool_map[key1], contain_pair_bool_map[key2]); - } else if (const1.contains(const2)) { - // key1.second = key2.second /\ Contains(key1.first, key2.first) - // ==> (containPairBoolMap[key2] --> containPairBoolMap[key1]) - implyR = rewrite_implication(contain_pair_bool_map[key2], contain_pair_bool_map[key1]); - } else if (const2.contains(const1)) { - // key1.first = key2.first /\ Contains(key2.first, key1.first) - // ==> (containPairBoolMap[key1] --> containPairBoolMap[key2]) - implyR = rewrite_implication(contain_pair_bool_map[key1], contain_pair_bool_map[key2]); - } - - if (implyR) { - if (litems1.empty()) { - assert_axiom(implyR); - } else { - assert_implication(mk_and(litems1), implyR); - } - } - } - - else { - expr_ref_vector str1Eqc(m); - expr_ref_vector str2Eqc(m); - collect_eq_nodes(str1, str1Eqc); - collect_eq_nodes(str2, str2Eqc); - - if (str1Eqc.contains(str2)) { - // ----------------------------------------------------------- - // * key1.first = key2.first /\ key1.second = key2.second - // --> containPairBoolMap[key1] = containPairBoolMap[key2] - // ----------------------------------------------------------- - expr_ref_vector litems2(m); - if (n1 != n2) { - litems2.push_back(ctx.mk_eq_atom(n1, n2)); - } - if (str1 != str2) { - litems2.push_back(ctx.mk_eq_atom(str1, str2)); - } - expr_ref implyR(ctx.mk_eq_atom(contain_pair_bool_map[key1], contain_pair_bool_map[key2]), m); - if (litems2.empty()) { - assert_axiom(implyR); - } else { - assert_implication(mk_and(litems2), implyR); - } - } else { - // ----------------------------------------------------------- - // * key1.second = key2.second - // check eqc(key1.first) and eqc(key2.first) - // ----------------------------------------------------------- - for (auto const& eqStrVar1 : str1Eqc) { - for (auto const& eqStrVar2 : str2Eqc) { - { - expr_ref_vector litems3(m); - if (n1 != n2) { - litems3.push_back(ctx.mk_eq_atom(n1, n2)); - } - - if (eqStrVar1 != str1) { - litems3.push_back(ctx.mk_eq_atom(str1, eqStrVar1)); - } - - if (eqStrVar2 != str2) { - litems3.push_back(ctx.mk_eq_atom(str2, eqStrVar2)); - } - std::pair tryKey1 = std::make_pair(eqStrVar1, eqStrVar2); - if (contain_pair_bool_map.contains(tryKey1)) { - TRACE(str, tout << "(Contains " << mk_pp(eqStrVar1, m) << " " << mk_pp(eqStrVar2, m) << ")" << std::endl;); - litems3.push_back(contain_pair_bool_map[tryKey1]); - - // ------------ - // key1.second = key2.second /\ containPairBoolMap[] - // ==> (containPairBoolMap[key2] --> containPairBoolMap[key1]) - // ------------ - expr_ref implR(rewrite_implication(contain_pair_bool_map[key2], contain_pair_bool_map[key1]), m); - assert_implication(mk_and(litems3), implR); - } - } - - { - expr_ref_vector litems4(m); - if (n1 != n2) { - litems4.push_back(ctx.mk_eq_atom(n1, n2)); - } - if (eqStrVar1 != str1) { - litems4.push_back(ctx.mk_eq_atom(str1, eqStrVar1)); - } - if (eqStrVar2 != str2) { - litems4.push_back(ctx.mk_eq_atom(str2, eqStrVar2)); - } - std::pair tryKey2 = std::make_pair(eqStrVar2, eqStrVar1); - - if (contain_pair_bool_map.contains(tryKey2)) { - TRACE(str, tout << "(Contains " << mk_pp(eqStrVar2, m) << " " << mk_pp(eqStrVar1, m) << ")" << std::endl;); - litems4.push_back(contain_pair_bool_map[tryKey2]); - // ------------ - // key1.first = key2.first /\ containPairBoolMap[] - // ==> (containPairBoolMap[key1] --> containPairBoolMap[key2]) - // ------------ - expr_ref implR(rewrite_implication(contain_pair_bool_map[key1], contain_pair_bool_map[key2]), m); - assert_implication(mk_and(litems4), implR); - } - } - } - } - } - } - - } - } - - if (n1 == n2) { - break; - } - } - } // (in_contain_idx_map(n1) && in_contain_idx_map(n2)) - } - - void theory_str::check_contain_in_new_eq(expr * n1, expr * n2) { - if (contains_map.empty()) { - return; - } - - ast_manager & m = get_manager(); - TRACE(str, tout << "consistency check for contains wrt. " << mk_pp(n1, m) << " and " << mk_pp(n2, m) << std::endl;); - - expr_ref_vector willEqClass(m); - expr * constStrAst_1 = collect_eq_nodes(n1, willEqClass); - expr * constStrAst_2 = collect_eq_nodes(n2, willEqClass); - expr * constStrAst = (constStrAst_1 != nullptr) ? constStrAst_1 : constStrAst_2; - - TRACE(str, tout << "eqc of n1 is {"; - for (expr * el : willEqClass) { - tout << " " << mk_pp(el, m); - } - tout << std::endl; - if (constStrAst == nullptr) { - tout << "constStrAst = NULL" << std::endl; - } else { - tout << "constStrAst = " << mk_pp(constStrAst, m) << std::endl; - } - ); - - // step 1: we may have constant values for Contains checks now - if (constStrAst != nullptr) { - for (auto a : willEqClass) { - if (a == constStrAst) { - continue; - } - check_contain_by_eqc_val(a, constStrAst); - } - } else { - // no concrete value to be put in eqc, solely based on context - // Check here is used to detected the facts as follows: - // * known: contains(Z, Y) /\ Z = "abcdefg" /\ Y = M - // * new fact: M = concat(..., "jio", ...) - // Note that in this branch, either M or concat(..., "jio", ...) has a constant value - // So, only need to check - // * "EQC(M) U EQC(concat(..., "jio", ...))" as substr and - // * If strAst registered has an eqc constant in the context - // ------------------------------------------------------------- - for (auto a : willEqClass) { - check_contain_by_substr(a, willEqClass); - } - } - - // ------------------------------------------ - // step 2: check for b1 = contains(x, m), b2 = contains(y, n) - // (1) x = y /\ m = n ==> b1 = b2 - // (2) x = y /\ Contains(const(m), const(n)) ==> (b1 -> b2) - // (3) x = y /\ Contains(const(n), const(m)) ==> (b2 -> b1) - // (4) x = y /\ containPairBoolMap[] ==> (b1 -> b2) - // (5) x = y /\ containPairBoolMap[] ==> (b2 -> b1) - // (6) Contains(const(x), const(y)) /\ m = n ==> (b2 -> b1) - // (7) Contains(const(y), const(x)) /\ m = n ==> (b1 -> b2) - // (8) containPairBoolMap[] /\ m = n ==> (b2 -> b1) - // (9) containPairBoolMap[] /\ m = n ==> (b1 -> b2) - // ------------------------------------------ - - for (auto varAst1 : willEqClass) { - for (auto varAst2 : willEqClass) { - check_contain_by_eq_nodes(varAst1, varAst2); - } - } - } - - expr * theory_str::dealias_node(expr * node, std::map & varAliasMap, std::map & concatAliasMap) { - if (variable_set.find(node) != variable_set.end()) { - return get_alias_index_ast(varAliasMap, node); - } else if (u.str.is_concat(to_app(node))) { - return get_alias_index_ast(concatAliasMap, node); - } - return node; - } - - void theory_str::get_grounded_concats(unsigned depth, - expr* node, std::map & varAliasMap, - std::map & concatAliasMap, std::map & varConstMap, - std::map & concatConstMap, std::map > & varEqConcatMap, - std::map, std::set > > & groundedMap) { - // ************************************************** - // first deAlias the node if it is a var or concat - // ************************************************** - node = dealias_node(node, varAliasMap, concatAliasMap); - - if (groundedMap.find(node) != groundedMap.end()) { - return; - } - IF_VERBOSE(100, verbose_stream() << "concats " << depth << "\n"; - if (depth > 100) verbose_stream() << mk_pp(node, get_manager()) << "\n"; - ); - - // haven't computed grounded concats for "node" (de-aliased) - // --------------------------------------------------------- - - - // const strings: node is de-aliased - if (u.str.is_string(node)) { - std::vector concatNodes; - concatNodes.push_back(node); - groundedMap[node][concatNodes].clear(); // no condition - } - // Concat functions - else if (u.str.is_concat(to_app(node))) { - // if "node" equals to a constant string, thenjust push the constant into the concat vector - // Again "node" has been de-aliased at the very beginning - if (concatConstMap.find(node) != concatConstMap.end()) { - std::vector concatNodes; - concatNodes.push_back(concatConstMap[node]); - groundedMap[node][concatNodes].clear(); - groundedMap[node][concatNodes].insert(ctx.mk_eq_atom(node, concatConstMap[node])); - } - // node doesn't have eq constant value. Process its children. - else { - // merge arg0 and arg1 - expr * arg0 = to_app(node)->get_arg(0); - expr * arg1 = to_app(node)->get_arg(1); - expr * arg0DeAlias = dealias_node(arg0, varAliasMap, concatAliasMap); - expr * arg1DeAlias = dealias_node(arg1, varAliasMap, concatAliasMap); - get_grounded_concats(depth + 1, arg0DeAlias, varAliasMap, concatAliasMap, varConstMap, concatConstMap, varEqConcatMap, groundedMap); - get_grounded_concats(depth + 1, arg1DeAlias, varAliasMap, concatAliasMap, varConstMap, concatConstMap, varEqConcatMap, groundedMap); - - std::map, std::set >::iterator arg1_grdItor; - for (auto const &arg0_grdItor : groundedMap[arg0DeAlias]) { - for (auto const &arg1_grdItor : groundedMap[arg1DeAlias]) { - std::vector ndVec; - ndVec.insert(ndVec.end(), arg0_grdItor.first.begin(), arg0_grdItor.first.end()); - size_t arg0VecSize = arg0_grdItor.first.size(); - size_t arg1VecSize = arg1_grdItor.first.size(); - if (arg0VecSize > 0 && arg1VecSize > 0 && u.str.is_string(arg0_grdItor.first[arg0VecSize - 1]) && u.str.is_string(arg1_grdItor.first[0])) { - ndVec.pop_back(); - ndVec.push_back(mk_concat(arg0_grdItor.first[arg0VecSize - 1], arg1_grdItor.first[0])); - for (size_t i = 1; i < arg1VecSize; i++) { - ndVec.push_back(arg1_grdItor.first[i]); - } - } else { - ndVec.insert(ndVec.end(), arg1_grdItor.first.begin(), arg1_grdItor.first.end()); - } - // only insert if we don't know "node = concat(ndVec)" since one set of condition leads to this is enough - if (groundedMap[node].find(ndVec) == groundedMap[node].end()) { - groundedMap[node][ndVec]; - if (arg0 != arg0DeAlias) { - groundedMap[node][ndVec].insert(ctx.mk_eq_atom(arg0, arg0DeAlias)); - } - groundedMap[node][ndVec].insert(arg0_grdItor.second.begin(), arg0_grdItor.second.end()); - - if (arg1 != arg1DeAlias) { - groundedMap[node][ndVec].insert(ctx.mk_eq_atom(arg1, arg1DeAlias)); - } - groundedMap[node][ndVec].insert(arg1_grdItor.second.begin(), arg1_grdItor.second.end()); - } - } - } - } - } - // string variables - else if (variable_set.find(node) != variable_set.end()) { - // deAliasedVar = Constant - if (varConstMap.find(node) != varConstMap.end()) { - std::vector concatNodes; - concatNodes.push_back(varConstMap[node]); - groundedMap[node][concatNodes].clear(); - groundedMap[node][concatNodes].insert(ctx.mk_eq_atom(node, varConstMap[node])); - } - // deAliasedVar = someConcat - else if (varEqConcatMap.find(node) != varEqConcatMap.end()) { - expr * eqConcat = varEqConcatMap[node].begin()->first; - expr * deAliasedEqConcat = dealias_node(eqConcat, varAliasMap, concatAliasMap); - get_grounded_concats(depth + 1, deAliasedEqConcat, varAliasMap, concatAliasMap, varConstMap, concatConstMap, varEqConcatMap, groundedMap); - - for (auto const &grdItor : groundedMap[deAliasedEqConcat]) { - std::vector ndVec; - ndVec.insert(ndVec.end(), grdItor.first.begin(), grdItor.first.end()); - // only insert if we don't know "node = concat(ndVec)" since one set of condition leads to this is enough - if (groundedMap[node].find(ndVec) == groundedMap[node].end()) { - // condition: node = deAliasedEqConcat - groundedMap[node][ndVec].insert(ctx.mk_eq_atom(node, deAliasedEqConcat)); - // appending conditions for "deAliasedEqConcat = CONCAT(ndVec)" - groundedMap[node][ndVec].insert(grdItor.second.begin(), grdItor.second.end()); - } - } - } - // node (has been de-aliased) != constant && node (has been de-aliased) != any concat - // just push in the deAliasedVar - else { - std::vector concatNodes; - concatNodes.push_back(node); - groundedMap[node][concatNodes]; - } - } - } - - void theory_str::print_grounded_concat(expr * node, std::map, std::set > > & groundedMap) { - TRACE(str, tout << mk_pp(node, get_manager()) << std::endl;); - if (groundedMap.find(node) != groundedMap.end()) { - for (auto const &itor : groundedMap[node]) { - (void) itor; - TRACE(str, - tout << "\t[grounded] "; - for (auto const &vIt : itor.first) { - tout << mk_pp(vIt, get_manager()) << ", "; - } - tout << std::endl; - tout << "\t[condition] "; - for (auto const &sIt : itor.second) { - tout << mk_pp(sIt, get_manager()) << ", "; - } - tout << std::endl; - ); - } - } else { - TRACE(str, tout << "not found" << std::endl;); - } - } - - bool theory_str::is_partial_in_grounded_concat(const std::vector & strVec, const std::vector & subStrVec) { - size_t strCnt = strVec.size(); - size_t subStrCnt = subStrVec.size(); - - if (strCnt == 0 || subStrCnt == 0) { - return false; - } - - // The assumption is that all consecutive constant strings are merged into one node - if (strCnt < subStrCnt) { - return false; - } - - if (subStrCnt == 1) { - zstring subStrVal; - if (u.str.is_string(subStrVec[0], subStrVal)) { - for (size_t i = 0; i < strCnt; i++) { - zstring strVal; - if (u.str.is_string(strVec[i], strVal)) { - if (strVal.contains(subStrVal)) { - return true; - } - } - } - } else { - for (size_t i = 0; i < strCnt; i++) { - if (strVec[i] == subStrVec[0]) { - return true; - } - } - } - return false; - } else { - for (size_t i = 0; i <= (strCnt - subStrCnt); i++) { - // The first node in subStrVect should be - // * constant: a suffix of a note in strVec[i] - // * variable: - bool firstNodesOK = true; - zstring subStrHeadVal; - if (u.str.is_string(subStrVec[0], subStrHeadVal)) { - zstring strHeadVal; - if (u.str.is_string(strVec[i], strHeadVal)) { - if (strHeadVal.length() >= subStrHeadVal.length()) { - zstring suffix = strHeadVal.extract(strHeadVal.length() - subStrHeadVal.length(), subStrHeadVal.length()); - if (suffix != subStrHeadVal) { - firstNodesOK = false; - } - } else { - firstNodesOK = false; - } - } else { - if (subStrVec[0] != strVec[i]) { - firstNodesOK = false; - } - } - } - if (!firstNodesOK) { - continue; - } - - // middle nodes - bool midNodesOK = true; - for (size_t j = 1; j < subStrCnt - 1; j++) { - if (subStrVec[j] != strVec[i + j]) { - midNodesOK = false; - break; - } - } - if (!midNodesOK) { - continue; - } - - // tail nodes - size_t tailIdx = i + subStrCnt - 1; - zstring subStrTailVal; - if (u.str.is_string(subStrVec[subStrCnt - 1], subStrTailVal)) { - zstring strTailVal; - if (u.str.is_string(strVec[tailIdx], strTailVal)) { - if (strTailVal.length() >= subStrTailVal.length()) { - zstring prefix = strTailVal.extract(0, subStrTailVal.length()); - if (prefix == subStrTailVal) { - return true; - } else { - continue; - } - } else { - continue; - } - } - } else { - if (subStrVec[subStrCnt - 1] == strVec[tailIdx]) { - return true; - } else { - continue; - } - } - } - return false; - } - } - - void theory_str::check_subsequence(expr* str, expr* strDeAlias, expr* subStr, expr* subStrDeAlias, expr* boolVar, - std::map, std::set > > & groundedMap) { - - ast_manager & m = get_manager(); - for (auto const &itorStr : groundedMap[strDeAlias]) { - for (auto const &itorSubStr : groundedMap[subStrDeAlias]) { - bool contain = is_partial_in_grounded_concat(itorStr.first, itorSubStr.first); - if (contain) { - expr_ref_vector litems(m); - if (str != strDeAlias) { - litems.push_back(ctx.mk_eq_atom(str, strDeAlias)); - } - if (subStr != subStrDeAlias) { - litems.push_back(ctx.mk_eq_atom(subStr, subStrDeAlias)); - } - - for (auto const &i1: itorStr.second) { - litems.push_back(i1); - } - for (auto const &i1 : itorSubStr.second) { - litems.push_back(i1); - } - - expr_ref implyR(boolVar, m); - - if (litems.empty()) { - assert_axiom(implyR); - } else { - expr_ref implyL(mk_and(litems), m); - assert_implication(implyL, implyR); - } - - } - } - } - } - - void theory_str::compute_contains(std::map & varAliasMap, - std::map & concatAliasMap, std::map & varConstMap, - std::map & concatConstMap, std::map > & varEqConcatMap) { - std::map, std::set > > groundedMap; - for (auto const& kv : contain_pair_bool_map) { - expr* containBoolVar = kv.get_value(); - expr* str = kv.get_key1(); - expr* subStr = kv.get_key2(); - - expr* strDeAlias = dealias_node(str, varAliasMap, concatAliasMap); - expr* subStrDeAlias = dealias_node(subStr, varAliasMap, concatAliasMap); - - get_grounded_concats(0, strDeAlias, varAliasMap, concatAliasMap, varConstMap, concatConstMap, varEqConcatMap, groundedMap); - get_grounded_concats(0, subStrDeAlias, varAliasMap, concatAliasMap, varConstMap, concatConstMap, varEqConcatMap, groundedMap); - - // debugging - print_grounded_concat(strDeAlias, groundedMap); - print_grounded_concat(subStrDeAlias, groundedMap); - - check_subsequence(str, strDeAlias, subStr, subStrDeAlias, containBoolVar, groundedMap); - } - } - - bool theory_str::can_concat_eq_str(expr * concat, zstring& str) { - unsigned int strLen = str.length(); - if (u.str.is_concat(to_app(concat))) { - ptr_vector args; - get_nodes_in_concat(concat, args); - expr * ml_node = args[0]; - expr * mr_node = args[args.size() - 1]; - - zstring ml_str; - if (u.str.is_string(ml_node, ml_str)) { - unsigned int ml_len = ml_str.length(); - if (ml_len > strLen) { - return false; - } - unsigned int cLen = ml_len; - if (ml_str != str.extract(0, cLen)) { - return false; - } - } - - zstring mr_str; - if (u.str.is_string(mr_node, mr_str)) { - unsigned int mr_len = mr_str.length(); - if (mr_len > strLen) { - return false; - } - unsigned int cLen = mr_len; - if (mr_str != str.extract(strLen - cLen, cLen)) { - return false; - } - } - - unsigned int sumLen = 0; - for (unsigned int i = 0 ; i < args.size() ; i++) { - expr * oneArg = args[i]; - zstring arg_str; - if (u.str.is_string(oneArg, arg_str)) { - if (!str.contains(arg_str)) { - return false; - } - sumLen += arg_str.length(); - } - } - - if (sumLen > strLen) { - return false; - } - } - return true; - } - - bool theory_str::can_concat_eq_concat(expr * concat1, expr * concat2) { - if (u.str.is_concat(to_app(concat1)) && u.str.is_concat(to_app(concat2))) { - { - // Suppose concat1 = (Concat X Y) and concat2 = (Concat M N). - expr * concat1_mostL = getMostLeftNodeInConcat(concat1); - expr * concat2_mostL = getMostLeftNodeInConcat(concat2); - // if both X and M are constant strings, check whether they have the same prefix - zstring concat1_mostL_str, concat2_mostL_str; - if (u.str.is_string(concat1_mostL, concat1_mostL_str) && u.str.is_string(concat2_mostL, concat2_mostL_str)) { - unsigned int cLen = std::min(concat1_mostL_str.length(), concat2_mostL_str.length()); - if (concat1_mostL_str.extract(0, cLen) != concat2_mostL_str.extract(0, cLen)) { - return false; - } - } - } - - { - // Similarly, if both Y and N are constant strings, check whether they have the same suffix - expr * concat1_mostR = getMostRightNodeInConcat(concat1); - expr * concat2_mostR = getMostRightNodeInConcat(concat2); - zstring concat1_mostR_str, concat2_mostR_str; - if (u.str.is_string(concat1_mostR, concat1_mostR_str) && u.str.is_string(concat2_mostR, concat2_mostR_str)) { - unsigned int cLen = std::min(concat1_mostR_str.length(), concat2_mostR_str.length()); - if (concat1_mostR_str.extract(concat1_mostR_str.length() - cLen, cLen) != - concat2_mostR_str.extract(concat2_mostR_str.length() - cLen, cLen)) { - return false; - } - } - } - } - return true; - } - - /* - * Check whether n1 and n2 could be equal. - * Returns true if n1 could equal n2 (maybe), - * and false if n1 is definitely not equal to n2 (no). - */ - bool theory_str::can_two_nodes_eq(expr * n1, expr * n2) { - app * n1_curr = to_app(n1); - app * n2_curr = to_app(n2); - - // case 0: n1_curr is const string, n2_curr is const string - zstring n1_curr_str, n2_curr_str; - if (u.str.is_string(n1_curr, n1_curr_str) && u.str.is_string(n2_curr, n2_curr_str)) { - TRACE(str, tout << "checking string constants: n1=" << n1_curr_str << ", n2=" << n2_curr_str << std::endl;); - if (n1_curr_str == n2_curr_str) { - // TODO(mtrberzi) potential correction: if n1_curr != n2_curr, - // assert that these two terms are in fact equal, because they ought to be - return true; - } else { - return false; - } - } - // case 1: n1_curr is concat, n2_curr is const string - else if (u.str.is_concat(n1_curr) && u.str.is_string(n2_curr)) { - zstring n2_curr_str; - u.str.is_string(n2_curr, n2_curr_str); - if (!can_concat_eq_str(n1_curr, n2_curr_str)) { - return false; - } - } - // case 2: n2_curr is concat, n1_curr is const string - else if (u.str.is_concat(n2_curr) && u.str.is_string(n1_curr)) { - zstring n1_curr_str; - u.str.is_string(n1_curr, n1_curr_str); - if (!can_concat_eq_str(n2_curr, n1_curr_str)) { - return false; - } - } - // case 3: both are concats - else if (u.str.is_concat(n1_curr) && u.str.is_concat(n2_curr)) { - if (!can_concat_eq_concat(n1_curr, n2_curr)) { - return false; - } - } - - return true; - } - - // was checkLength2ConstStr() in Z3str2 - // returns true if everything is OK, or false if inconsistency detected - // - note that these are different from the semantics in Z3str2 - bool theory_str::check_length_const_string(expr * n1, expr * constStr) { - ast_manager & mgr = get_manager(); - - zstring tmp; - u.str.is_string(constStr, tmp); - rational strLen(tmp.length()); - - if (u.str.is_concat(to_app(n1))) { - ptr_vector args; - expr_ref_vector items(mgr); - - get_nodes_in_concat(n1, args); - - rational sumLen(0); - for (unsigned int i = 0; i < args.size(); ++i) { - rational argLen; - bool argLen_exists = get_len_value(args[i], argLen); - if (argLen_exists) { - if (!u.str.is_string(args[i])) { - items.push_back(ctx.mk_eq_atom(mk_strlen(args[i]), mk_int(argLen))); - } - TRACE(str, tout << "concat arg: " << mk_pp(args[i], mgr) << " has len = " << argLen.to_string() << std::endl;); - sumLen += argLen; - if (sumLen > strLen) { - items.push_back(ctx.mk_eq_atom(n1, constStr)); - expr_ref toAssert(mgr.mk_not(mk_and(items)), mgr); - TRACE(str, tout << "inconsistent length: concat (len = " << sumLen << ") <==> string constant (len = " << strLen << ")" << std::endl;); - assert_axiom(toAssert); - return false; - } - } - } - } else { // !is_concat(n1) - rational oLen; - bool oLen_exists = get_len_value(n1, oLen); - if (oLen_exists && oLen != strLen) { - TRACE(str, tout << "inconsistent length: var (len = " << oLen << ") <==> string constant (len = " << strLen << ")" << std::endl;); - expr_ref l(ctx.mk_eq_atom(n1, constStr), mgr); - expr_ref r(ctx.mk_eq_atom(mk_strlen(n1), mk_strlen(constStr)), mgr); - assert_implication(l, r); - return false; - } - } - rational unused; - if (get_len_value(n1, unused) == false) { - expr_ref l(ctx.mk_eq_atom(n1, constStr), mgr); - expr_ref r(ctx.mk_eq_atom(mk_strlen(n1), mk_strlen(constStr)), mgr); - assert_implication(l, r); - } - return true; - } - - bool theory_str::check_length_concat_concat(expr * n1, expr * n2) { - ast_manager & mgr = get_manager(); - - ptr_vector concat1Args; - ptr_vector concat2Args; - get_nodes_in_concat(n1, concat1Args); - get_nodes_in_concat(n2, concat2Args); - - bool concat1LenFixed = true; - bool concat2LenFixed = true; - - expr_ref_vector items(mgr); - - rational sum1(0), sum2(0); - - for (unsigned int i = 0; i < concat1Args.size(); ++i) { - expr * oneArg = concat1Args[i]; - rational argLen; - bool argLen_exists = get_len_value(oneArg, argLen); - if (argLen_exists) { - sum1 += argLen; - if (!u.str.is_string(oneArg)) { - items.push_back(ctx.mk_eq_atom(mk_strlen(oneArg), mk_int(argLen))); - } - } else { - concat1LenFixed = false; - } - } - - for (unsigned int i = 0; i < concat2Args.size(); ++i) { - expr * oneArg = concat2Args[i]; - rational argLen; - bool argLen_exists = get_len_value(oneArg, argLen); - if (argLen_exists) { - sum2 += argLen; - if (!u.str.is_string(oneArg)) { - items.push_back(ctx.mk_eq_atom(mk_strlen(oneArg), mk_int(argLen))); - } - } else { - concat2LenFixed = false; - } - } - - items.push_back(ctx.mk_eq_atom(n1, n2)); - - bool conflict = false; - - if (concat1LenFixed && concat2LenFixed) { - if (sum1 != sum2) { - conflict = true; - } - } else if (!concat1LenFixed && concat2LenFixed) { - if (sum1 > sum2) { - conflict = true; - } - } else if (concat1LenFixed && !concat2LenFixed) { - if (sum1 < sum2) { - conflict = true; - } - } - - if (conflict) { - TRACE(str, tout << "inconsistent length detected in concat <==> concat" << std::endl;); - expr_ref toAssert(mgr.mk_not(mk_and(items)), mgr); - assert_axiom(toAssert); - return false; - } - return true; - } - - bool theory_str::check_length_concat_var(expr * concat, expr * var) { - ast_manager & mgr = get_manager(); - - rational varLen; - bool varLen_exists = get_len_value(var, varLen); - if (!varLen_exists) { - return true; - } else { - rational sumLen(0); - ptr_vector args; - expr_ref_vector items(mgr); - get_nodes_in_concat(concat, args); - for (unsigned int i = 0; i < args.size(); ++i) { - expr * oneArg = args[i]; - rational argLen; - bool argLen_exists = get_len_value(oneArg, argLen); - if (argLen_exists) { - if (!u.str.is_string(oneArg) && !argLen.is_zero()) { - items.push_back(ctx.mk_eq_atom(mk_strlen(oneArg), mk_int(argLen))); - } - sumLen += argLen; - if (sumLen > varLen) { - TRACE(str, tout << "inconsistent length detected in concat <==> var" << std::endl;); - items.push_back(ctx.mk_eq_atom(mk_strlen(var), mk_int(varLen))); - items.push_back(ctx.mk_eq_atom(concat, var)); - expr_ref toAssert(mgr.mk_not(mk_and(items)), mgr); - assert_axiom(toAssert); - return false; - } - } - } - return true; - } - } - - bool theory_str::check_length_var_var(expr * var1, expr * var2) { - ast_manager & mgr = get_manager(); - - rational var1Len, var2Len; - bool var1Len_exists = get_len_value(var1, var1Len); - bool var2Len_exists = get_len_value(var2, var2Len); - - if (var1Len_exists && var2Len_exists && var1Len != var2Len) { - TRACE(str, tout << "inconsistent length detected in var <==> var" << std::endl;); - expr_ref_vector items(mgr); - items.push_back(ctx.mk_eq_atom(mk_strlen(var1), mk_int(var1Len))); - items.push_back(ctx.mk_eq_atom(mk_strlen(var2), mk_int(var2Len))); - items.push_back(ctx.mk_eq_atom(var1, var2)); - expr_ref toAssert(mgr.mk_not(mk_and(items)), mgr); - assert_axiom(toAssert); - return false; - } - return true; - } - - // returns true if everything is OK, or false if inconsistency detected - // - note that these are different from the semantics in Z3str2 - bool theory_str::check_length_eq_var_concat(expr * n1, expr * n2) { - // n1 and n2 are not const string: either variable or concat - bool n1Concat = u.str.is_concat(to_app(n1)); - bool n2Concat = u.str.is_concat(to_app(n2)); - if (n1Concat && n2Concat) { - return check_length_concat_concat(n1, n2); - } - // n1 is concat, n2 is variable - else if (n1Concat && (!n2Concat)) { - return check_length_concat_var(n1, n2); - } - // n1 is variable, n2 is concat - else if ((!n1Concat) && n2Concat) { - return check_length_concat_var(n2, n1); - } - // n1 and n2 are both variables - else { - return check_length_var_var(n1, n2); - } - return true; - } - - // returns false if an inconsistency is detected, or true if no inconsistencies were found - // - note that these are different from the semantics of checkLengConsistency() in Z3str2 - bool theory_str::check_length_consistency(expr * n1, expr * n2) { - if (u.str.is_string(n1) && u.str.is_string(n2)) { - // consistency has already been checked in can_two_nodes_eq(). - return true; - } else if (u.str.is_string(n1) && (!u.str.is_string(n2))) { - return check_length_const_string(n2, n1); - } else if (u.str.is_string(n2) && (!u.str.is_string(n1))) { - return check_length_const_string(n1, n2); - } else { - // n1 and n2 are vars or concats - return check_length_eq_var_concat(n1, n2); - } - return true; - } - - // Modified signature: returns true if nothing was learned, or false if at least one axiom was asserted. - // (This is used for deferred consistency checking) - bool theory_str::check_concat_len_in_eqc(expr * concat) { - bool no_assertions = true; - - expr * eqc_n = concat; - do { - if (u.str.is_concat(to_app(eqc_n))) { - rational unused; - bool status = infer_len_concat(eqc_n, unused); - if (status) { - no_assertions = false; - } - } - eqc_n = get_eqc_next(eqc_n); - } while (eqc_n != concat); - - return no_assertions; - } - - /* - * strArgmt::solve_concat_eq_str() - * Solve concatenations of the form: - * const == Concat(const, X) - * const == Concat(X, const) - */ - void theory_str::solve_concat_eq_str(expr * concat, expr * str) { - ast_manager & m = get_manager(); - - TRACE(str, tout << mk_ismt2_pp(concat, m) << " == " << mk_ismt2_pp(str, m) << std::endl;); - - zstring const_str; - if (u.str.is_concat(to_app(concat)) && u.str.is_string(to_app(str), const_str)) { - app * a_concat = to_app(concat); - SASSERT(a_concat->get_num_args() == 2); - expr * a1 = a_concat->get_arg(0); - expr * a2 = a_concat->get_arg(1); - - if (const_str.empty()) { - TRACE(str, tout << "quick path: concat == \"\"" << std::endl;); - // assert the following axiom: - // ( (Concat a1 a2) == "" ) -> ( (a1 == "") AND (a2 == "") ) - - - expr_ref premise(ctx.mk_eq_atom(concat, str), m); - expr_ref c1(ctx.mk_eq_atom(a1, str), m); - expr_ref c2(ctx.mk_eq_atom(a2, str), m); - expr_ref conclusion(m.mk_and(c1, c2), m); - assert_implication(premise, conclusion); - - return; - } - bool arg1_has_eqc_value = false; - bool arg2_has_eqc_value = false; - expr * arg1 = get_eqc_value(a1, arg1_has_eqc_value); - expr * arg2 = get_eqc_value(a2, arg2_has_eqc_value); - expr_ref newConcat(m); - if (arg1 != a1 || arg2 != a2) { - TRACE(str, tout << "resolved concat argument(s) to eqc string constants" << std::endl;); - expr_ref_vector item1(m); - if (a1 != arg1) { - item1.push_back(ctx.mk_eq_atom(a1, arg1)); - } - if (a2 != arg2) { - item1.push_back(ctx.mk_eq_atom(a2, arg2)); - } - expr_ref implyL1(mk_and(item1), m); - newConcat = mk_concat(arg1, arg2); - if (newConcat != str) { - expr_ref implyR1(ctx.mk_eq_atom(concat, newConcat), m); - assert_implication(implyL1, implyR1); - } - } else { - newConcat = concat; - } - if (newConcat == str) { - return; - } - if (!u.str.is_concat(to_app(newConcat))) { - return; - } - if (arg1_has_eqc_value && arg2_has_eqc_value) { - // Case 1: Concat(const, const) == const - TRACE(str, tout << "Case 1: Concat(const, const) == const" << std::endl;); - zstring arg1_str, arg2_str; - u.str.is_string(arg1, arg1_str); - u.str.is_string(arg2, arg2_str); - - zstring result_str = arg1_str + arg2_str; - if (result_str != const_str) { - // Inconsistency - TRACE(str, tout << "inconsistency detected: \"" - << arg1_str << "\" + \"" << arg2_str << - "\" != \"" << const_str << "\"" << "\n";); - expr_ref equality(ctx.mk_eq_atom(concat, str), m); - expr_ref diseq(mk_not(m, equality), m); - assert_axiom(diseq); - return; - } - } else if (!arg1_has_eqc_value && arg2_has_eqc_value) { - // Case 2: Concat(var, const) == const - TRACE(str, tout << "Case 2: Concat(var, const) == const" << std::endl;); - zstring arg2_str; - u.str.is_string(arg2, arg2_str); - unsigned int resultStrLen = const_str.length(); - unsigned int arg2StrLen = arg2_str.length(); - if (resultStrLen < arg2StrLen) { - // Inconsistency - TRACE(str, tout << "inconsistency detected: \"" - << arg2_str << - "\" is longer than \"" << const_str << "\"," - << " so cannot be concatenated with anything to form it" << "\n";); - expr_ref equality(ctx.mk_eq_atom(newConcat, str), m); - expr_ref diseq(mk_not(m, equality), m); - assert_axiom(diseq); - return; - } else { - int varStrLen = resultStrLen - arg2StrLen; - zstring firstPart = const_str.extract(0, varStrLen); - zstring secondPart = const_str.extract(varStrLen, arg2StrLen); - if (arg2_str != secondPart) { - // Inconsistency - TRACE(str, tout << "inconsistency detected: " - << "suffix of concatenation result expected \"" << secondPart << "\", " - << "actually \"" << arg2_str << "\"" - << "\n";); - expr_ref equality(ctx.mk_eq_atom(newConcat, str), m); - expr_ref diseq(mk_not(m, equality), m); - assert_axiom(diseq); - return; - } else { - expr_ref tmpStrConst(mk_string(firstPart), m); - expr_ref premise(ctx.mk_eq_atom(newConcat, str), m); - expr_ref conclusion(ctx.mk_eq_atom(arg1, tmpStrConst), m); - assert_implication(premise, conclusion); - return; - } - } - } else if (arg1_has_eqc_value && !arg2_has_eqc_value) { - // Case 3: Concat(const, var) == const - TRACE(str, tout << "Case 3: Concat(const, var) == const" << std::endl;); - zstring arg1_str; - u.str.is_string(arg1, arg1_str); - unsigned int resultStrLen = const_str.length(); - unsigned int arg1StrLen = arg1_str.length(); - if (resultStrLen < arg1StrLen) { - // Inconsistency - TRACE(str, tout << "inconsistency detected: \"" - << arg1_str << - "\" is longer than \"" << const_str << "\"," - << " so cannot be concatenated with anything to form it" << "\n";); - expr_ref equality(ctx.mk_eq_atom(newConcat, str), m); - expr_ref diseq(m.mk_not(equality), m); - assert_axiom(diseq); - return; - } else { - int varStrLen = resultStrLen - arg1StrLen; - zstring firstPart = const_str.extract(0, arg1StrLen); - zstring secondPart = const_str.extract(arg1StrLen, varStrLen); - if (arg1_str != firstPart) { - // Inconsistency - TRACE(str, tout << "inconsistency detected: " - << "prefix of concatenation result expected \"" << secondPart << "\", " - << "actually \"" << arg1_str << "\"" - << "\n";); - expr_ref equality(ctx.mk_eq_atom(newConcat, str), m); - expr_ref diseq(m.mk_not(equality), m); - assert_axiom(diseq); - return; - } else { - expr_ref tmpStrConst(mk_string(secondPart), m); - expr_ref premise(ctx.mk_eq_atom(newConcat, str), m); - expr_ref conclusion(ctx.mk_eq_atom(arg2, tmpStrConst), m); - assert_implication(premise, conclusion); - return; - } - } - } else { - // Case 4: Concat(var, var) == const - TRACE(str, tout << "Case 4: Concat(var, var) == const" << std::endl;); - if (eval_concat(arg1, arg2) == nullptr) { - rational arg1Len, arg2Len; - bool arg1Len_exists = get_len_value(arg1, arg1Len); - bool arg2Len_exists = get_len_value(arg2, arg2Len); - rational concatStrLen((unsigned)const_str.length()); - if (arg1Len_exists || arg2Len_exists) { - expr_ref ax_l1(ctx.mk_eq_atom(concat, str), m); - expr_ref ax_l2(m); - zstring prefixStr, suffixStr; - if (arg1Len_exists) { - if (arg1Len.is_neg()) { - TRACE(str, tout << "length conflict: arg1Len = " << arg1Len << ", concatStrLen = " << concatStrLen << std::endl;); - expr_ref toAssert(m_autil.mk_ge(mk_strlen(arg1), mk_int(0)), m); - assert_axiom(toAssert); - return; - } else if (arg1Len > concatStrLen) { - TRACE(str, tout << "length conflict: arg1Len = " << arg1Len << ", concatStrLen = " << concatStrLen << std::endl;); - expr_ref ax_r1(m_autil.mk_le(mk_strlen(arg1), mk_int(concatStrLen)), m); - assert_implication(ax_l1, ax_r1); - return; - } - - prefixStr = const_str.extract(0, arg1Len.get_unsigned()); - rational concat_minus_arg1 = concatStrLen - arg1Len; - suffixStr = const_str.extract(arg1Len.get_unsigned(), concat_minus_arg1.get_unsigned()); - ax_l2 = ctx.mk_eq_atom(mk_strlen(arg1), mk_int(arg1Len)); - } else { - // arg2's length is available - if (arg2Len.is_neg()) { - TRACE(str, tout << "length conflict: arg2Len = " << arg2Len << ", concatStrLen = " << concatStrLen << std::endl;); - expr_ref toAssert(m_autil.mk_ge(mk_strlen(arg2), mk_int(0)), m); - assert_axiom(toAssert); - return; - } else if (arg2Len > concatStrLen) { - TRACE(str, tout << "length conflict: arg2Len = " << arg2Len << ", concatStrLen = " << concatStrLen << std::endl;); - expr_ref ax_r1(m_autil.mk_le(mk_strlen(arg2), mk_int(concatStrLen)), m); - assert_implication(ax_l1, ax_r1); - return; - } - - rational concat_minus_arg2 = concatStrLen - arg2Len; - prefixStr = const_str.extract(0, concat_minus_arg2.get_unsigned()); - suffixStr = const_str.extract(concat_minus_arg2.get_unsigned(), arg2Len.get_unsigned()); - ax_l2 = ctx.mk_eq_atom(mk_strlen(arg2), mk_int(arg2Len)); - } - // consistency check - if (u.str.is_concat(to_app(arg1)) && !can_concat_eq_str(arg1, prefixStr)) { - expr_ref ax_r(m.mk_not(ax_l2), m); - assert_implication(ax_l1, ax_r); - return; - } - if (u.str.is_concat(to_app(arg2)) && !can_concat_eq_str(arg2, suffixStr)) { - expr_ref ax_r(m.mk_not(ax_l2), m); - assert_implication(ax_l1, ax_r); - return; - } - expr_ref_vector r_items(m); - r_items.push_back(ctx.mk_eq_atom(arg1, mk_string(prefixStr))); - r_items.push_back(ctx.mk_eq_atom(arg2, mk_string(suffixStr))); - if (!arg1Len_exists) { - r_items.push_back(ctx.mk_eq_atom(mk_strlen(arg1), mk_int(prefixStr.length()))); - } - if (!arg2Len_exists) { - r_items.push_back(ctx.mk_eq_atom(mk_strlen(arg2), mk_int(suffixStr.length()))); - } - expr_ref lhs(m.mk_and(ax_l1, ax_l2), m); - expr_ref rhs(mk_and(r_items), m); - assert_implication(lhs, rhs); - } else { /* ! (arg1Len != 1 || arg2Len != 1) */ - expr_ref xorFlag(m); - std::pair key1(arg1, arg2); - std::pair key2(arg2, arg1); - - // check the entries in this map to make sure they're still in scope - // before we use them. - - std::map, std::map >::iterator entry1 = varForBreakConcat.find(key1); - std::map, std::map >::iterator entry2 = varForBreakConcat.find(key2); - - bool entry1InScope; - if (entry1 == varForBreakConcat.end()) { - TRACE(str, tout << "key1 no entry" << std::endl;); - entry1InScope = false; - } else { - // OVERRIDE. - entry1InScope = true; - TRACE(str, tout << "key1 entry" << std::endl;); - /* - if (internal_variable_set.find((entry1->second)[0]) == internal_variable_set.end()) { - TRACE(str, tout << "key1 entry not in scope" << std::endl;); - entry1InScope = false; - } else { - TRACE(str, tout << "key1 entry in scope" << std::endl;); - entry1InScope = true; - } - */ - } - - bool entry2InScope; - if (entry2 == varForBreakConcat.end()) { - TRACE(str, tout << "key2 no entry" << std::endl;); - entry2InScope = false; - } else { - // OVERRIDE. - entry2InScope = true; - TRACE(str, tout << "key2 entry" << std::endl;); - /* - if (internal_variable_set.find((entry2->second)[0]) == internal_variable_set.end()) { - TRACE(str, tout << "key2 entry not in scope" << std::endl;); - entry2InScope = false; - } else { - TRACE(str, tout << "key2 entry in scope" << std::endl;); - entry2InScope = true; - } - */ - } - - TRACE(str, tout << "entry 1 " << (entry1InScope ? "in scope" : "not in scope") << std::endl - << "entry 2 " << (entry2InScope ? "in scope" : "not in scope") << std::endl;); - - if (!entry1InScope && !entry2InScope) { - xorFlag = mk_internal_xor_var(); - varForBreakConcat[key1][0] = xorFlag; - } else if (entry1InScope) { - xorFlag = varForBreakConcat[key1][0]; - } else { // entry2InScope - xorFlag = varForBreakConcat[key2][0]; - } - - int concatStrLen = const_str.length(); - int and_count = 1; - - expr_ref_vector arrangement_disjunction(m); - - for (int i = 0; i < concatStrLen + 1; ++i) { - expr_ref_vector and_items(m); - zstring prefixStr = const_str.extract(0, i); - zstring suffixStr = const_str.extract(i, concatStrLen - i); - // skip invalid options - if (u.str.is_concat(to_app(arg1)) && !can_concat_eq_str(arg1, prefixStr)) { - continue; - } - if (u.str.is_concat(to_app(arg2)) && !can_concat_eq_str(arg2, suffixStr)) { - continue; - } - - expr_ref prefixAst(mk_string(prefixStr), m); - expr_ref arg1_eq (ctx.mk_eq_atom(arg1, prefixAst), m); - and_items.push_back(arg1_eq); - and_count += 1; - - expr_ref suffixAst(mk_string(suffixStr), m); - expr_ref arg2_eq (ctx.mk_eq_atom(arg2, suffixAst), m); - and_items.push_back(arg2_eq); - and_count += 1; - (void) and_count; - - arrangement_disjunction.push_back(mk_and(and_items)); - } - - expr_ref implyL(ctx.mk_eq_atom(concat, str), m); - expr_ref implyR1(m); - if (arrangement_disjunction.empty()) { - // negate - expr_ref concat_eq_str(ctx.mk_eq_atom(concat, str), m); - expr_ref negate_ast(m.mk_not(concat_eq_str), m); - assert_axiom(negate_ast); - } else { - implyR1 = mk_or(arrangement_disjunction); - if (m_params.m_StrongArrangements) { - expr_ref ax_strong(ctx.mk_eq_atom(implyL, implyR1), m); - assert_axiom(ax_strong); - } else { - assert_implication(implyL, implyR1); - } - generate_mutual_exclusion(arrangement_disjunction); - } - } /* (arg1Len != 1 || arg2Len != 1) */ - } /* if (Concat(arg1, arg2) == nullptr) */ - } - } - } - - void theory_str::handle_equality(expr * lhs, expr * rhs) { - // both terms must be of sort String - sort * lhs_sort = lhs->get_sort(); - sort * rhs_sort = rhs->get_sort(); - sort * str_sort = u.str.mk_string_sort(); - - // Pick up new terms added during the search (e.g. recursive function expansion). - if (!existing_toplevel_exprs.contains(lhs)) { - existing_toplevel_exprs.insert(lhs); - set_up_axioms(lhs); - propagate(); - } - if (!existing_toplevel_exprs.contains(rhs)) { - existing_toplevel_exprs.insert(rhs); - set_up_axioms(rhs); - propagate(); - } - - if (lhs_sort != str_sort || rhs_sort != str_sort) { - TRACE(str, tout << "skip equality: not String sort" << std::endl;); - return; - } - - if (u.str.is_concat(to_app(lhs)) && u.str.is_concat(to_app(rhs))) { - bool nn1HasEqcValue = false; - bool nn2HasEqcValue = false; - expr * nn1_value = get_eqc_value(lhs, nn1HasEqcValue); - expr * nn2_value = get_eqc_value(rhs, nn2HasEqcValue); - if (nn1HasEqcValue && !nn2HasEqcValue) { - simplify_parent(rhs, nn1_value); - } - if (!nn1HasEqcValue && nn2HasEqcValue) { - simplify_parent(lhs, nn2_value); - } - - expr * nn1_arg0 = to_app(lhs)->get_arg(0); - expr * nn1_arg1 = to_app(lhs)->get_arg(1); - expr * nn2_arg0 = to_app(rhs)->get_arg(0); - expr * nn2_arg1 = to_app(rhs)->get_arg(1); - if (nn1_arg0 == nn2_arg0 && in_same_eqc(nn1_arg1, nn2_arg1)) { - TRACE(str, tout << "skip: lhs arg0 == rhs arg0" << std::endl;); - return; - } - - if (nn1_arg1 == nn2_arg1 && in_same_eqc(nn1_arg0, nn2_arg0)) { - TRACE(str, tout << "skip: lhs arg1 == rhs arg1" << std::endl;); - return; - } - } - - if (opt_DeferEQCConsistencyCheck) { - TRACE(str, tout << "opt_DeferEQCConsistencyCheck is set; deferring new_eq_check call" << std::endl;); - } else { - // newEqCheck() -- check consistency wrt. existing equivalence classes - if (!new_eq_check(lhs, rhs)) { - return; - } - } - - // BEGIN new_eq_handler() in strTheory - - check_eqc_empty_string(lhs, rhs); - instantiate_str_eq_length_axiom(ctx.get_enode(lhs), ctx.get_enode(rhs)); - - // group terms by equivalence class (groupNodeInEqc()) - - std::set eqc_concat_lhs; - std::set eqc_var_lhs; - std::set eqc_const_lhs; - group_terms_by_eqc(lhs, eqc_concat_lhs, eqc_var_lhs, eqc_const_lhs); - - std::set eqc_concat_rhs; - std::set eqc_var_rhs; - std::set eqc_const_rhs; - group_terms_by_eqc(rhs, eqc_concat_rhs, eqc_var_rhs, eqc_const_rhs); - - TRACE(str, - tout << "lhs eqc:" << std::endl; - tout << "Concats:" << std::endl; - for (auto const &ex : eqc_concat_lhs) { - tout << mk_ismt2_pp(ex, get_manager()) << std::endl; - } - tout << "Variables:" << std::endl; - for (auto const &ex : eqc_var_lhs) { - tout << mk_ismt2_pp(ex, get_manager()) << std::endl; - } - tout << "Constants:" << std::endl; - for (auto const &ex : eqc_const_lhs) { - tout << mk_ismt2_pp(ex, get_manager()) << std::endl; - } - - tout << "rhs eqc:" << std::endl; - tout << "Concats:" << std::endl; - for (auto const &ex : eqc_concat_rhs) { - tout << mk_ismt2_pp(ex, get_manager()) << std::endl; - } - tout << "Variables:" << std::endl; - for (auto const &ex : eqc_var_rhs) { - tout << mk_ismt2_pp(ex, get_manager()) << std::endl; - } - tout << "Constants:" << std::endl; - for (auto const &ex : eqc_const_rhs) { - tout << mk_ismt2_pp(ex, get_manager()) << std::endl; - } - ); - - // step 1: Concat == Concat - check_eqc_concat_concat(eqc_concat_lhs, eqc_concat_rhs); - - // step 2: Concat == Constant - - if (!eqc_const_lhs.empty()) { - expr * conStr = *(eqc_const_lhs.begin()); - for (auto const &itor2 : eqc_concat_rhs) { - solve_concat_eq_str(itor2, conStr); - } - } else if (!eqc_const_rhs.empty()) { - expr* conStr = *(eqc_const_rhs.begin()); - for (auto const &itor1 : eqc_concat_lhs) { - solve_concat_eq_str(itor1, conStr); - } - } - - // simplify parents wrt. the equivalence class of both sides - bool nn1HasEqcValue = false; - bool nn2HasEqcValue = false; - // we want the Z3str2 eqc check here... - expr * nn1_value = z3str2_get_eqc_value(lhs, nn1HasEqcValue); - expr * nn2_value = z3str2_get_eqc_value(rhs, nn2HasEqcValue); - if (nn1HasEqcValue && !nn2HasEqcValue) { - simplify_parent(rhs, nn1_value); - } - - if (!nn1HasEqcValue && nn2HasEqcValue) { - simplify_parent(lhs, nn2_value); - } - } - - // Check that a string's length can be 0 iff it is the empty string. - void theory_str::check_eqc_empty_string(expr * lhs, expr * rhs) { - ast_manager & m = get_manager(); - - rational nn1Len, nn2Len; - bool nn1Len_exists = get_len_value(lhs, nn1Len); - bool nn2Len_exists = get_len_value(rhs, nn2Len); - expr_ref emptyStr(mk_string(""), m); - - if (nn1Len_exists && nn1Len.is_zero()) { - if (!in_same_eqc(lhs, emptyStr) && rhs != emptyStr) { - expr_ref eql(ctx.mk_eq_atom(mk_strlen(lhs), mk_int(0)), m); - expr_ref eqr(ctx.mk_eq_atom(lhs, emptyStr), m); - expr_ref toAssert(ctx.mk_eq_atom(eql, eqr), m); - assert_axiom(toAssert); - } - } - - if (nn2Len_exists && nn2Len.is_zero()) { - if (!in_same_eqc(rhs, emptyStr) && lhs != emptyStr) { - expr_ref eql(ctx.mk_eq_atom(mk_strlen(rhs), mk_int(0)), m); - expr_ref eqr(ctx.mk_eq_atom(rhs, emptyStr), m); - expr_ref toAssert(ctx.mk_eq_atom(eql, eqr), m); - assert_axiom(toAssert); - } - } - } - - void theory_str::check_eqc_concat_concat(std::set & eqc_concat_lhs, std::set & eqc_concat_rhs) { - ast_manager & m = get_manager(); - (void)m; - - int hasCommon = 0; - if (!eqc_concat_lhs.empty() && !eqc_concat_rhs.empty()) { - for (auto const &itor1 : eqc_concat_lhs) { - if (eqc_concat_rhs.find(itor1) != eqc_concat_rhs.end()) { - hasCommon = 1; - break; - } - } - for (auto const &itor2 : eqc_concat_rhs) { - if (eqc_concat_lhs.find(itor2) != eqc_concat_lhs.end()) { - hasCommon = 1; - break; - } - } - if (hasCommon == 0) { - if (opt_ConcatOverlapAvoid) { - bool found = false; - // check each pair and take the first ones that won't immediately overlap - for (auto const &concat_lhs : eqc_concat_lhs) { - if (found) { - break; - } - for (auto const &concat_rhs : eqc_concat_rhs) { - if (will_result_in_overlap(concat_lhs, concat_rhs)) { - TRACE(str, tout << "Concats " << mk_pp(concat_lhs, m) << " and " - << mk_pp(concat_rhs, m) << " will result in overlap; skipping." << std::endl;); - } else { - TRACE(str, tout << "Concats " << mk_pp(concat_lhs, m) << " and " - << mk_pp(concat_rhs, m) << " won't overlap. Simplifying here." << std::endl;); - simplify_concat_equality(concat_lhs, concat_rhs); - found = true; - break; - } - } - } - if (!found) { - TRACE(str, tout << "All pairs of concats expected to overlap, falling back." << std::endl;); - simplify_concat_equality(*(eqc_concat_lhs.begin()), *(eqc_concat_rhs.begin())); - } - } else { - // default behaviour - simplify_concat_equality(*(eqc_concat_lhs.begin()), *(eqc_concat_rhs.begin())); - } - } - } - } - - bool theory_str::is_var(expr * e) const { - ast_manager & m = get_manager(); - sort * ex_sort = e->get_sort(); - sort * str_sort = u.str.mk_string_sort(); - // non-string-sort terms cannot be string variables - if (ex_sort != str_sort) return false; - // string constants cannot be variables - if (u.str.is_string(e)) return false; - if (u.str.is_concat(e) || u.str.is_at(e) || u.str.is_extract(e) || u.str.is_replace(e) || u.str.is_itos(e) || u.str.is_from_code(e)) - return false; - if (m.is_ite(e)) - return false; - return true; - } - - void theory_str::set_up_axioms(expr * ex) { - ast_manager & m = get_manager(); - - // workaround for #3756: - // the map existing_toplevel_exprs is never cleared on backtracking. - // to ensure the expressions are valid we persist validity of the - // expression throughout the lifetime of theory_str - m_trail.push_back(ex); - - sort * ex_sort = ex->get_sort(); - sort * str_sort = u.str.mk_string_sort(); - sort * bool_sort = m.mk_bool_sort(); - - family_id m_arith_fid = m.mk_family_id("arith"); - sort * int_sort = m.mk_sort(m_arith_fid, INT_SORT); - - // reject unhandled expressions - if (u.str.is_replace_all(ex) || u.str.is_replace_re(ex) || u.str.is_replace_re_all(ex)) { - TRACE(str, tout << "ERROR: Z3str3 has encountered an unsupported operator. Aborting." << std::endl;); - m.raise_exception("Z3str3 encountered an unsupported operator."); - } - - if (ex_sort == str_sort) { - TRACE(str, tout << "setting up axioms for " << mk_ismt2_pp(ex, get_manager()) << - ": expr is of sort String" << std::endl;); - // set up basic string axioms - enode * n = ctx.get_enode(ex); - SASSERT(n); - m_basicstr_axiom_todo.push_back(n); - TRACE(str, tout << "add " << mk_pp(ex, m) << " to m_basicstr_axiom_todo" << std::endl;); - - - if (is_app(ex)) { - app * ap = to_app(ex); - if (u.str.is_concat(ap)) { - // if ex is a concat, set up concat axioms later - m_concat_axiom_todo.push_back(n); - // we also want to check whether we can eval this concat, - // in case the rewriter did not totally finish with this term - m_concat_eval_todo.push_back(n); - } else if (u.str.is_at(ap) || u.str.is_extract(ap) || u.str.is_replace(ap)) { - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } else if (u.str.is_itos(ap)) { - TRACE(str, tout << "found string-integer conversion term: " << mk_pp(ex, get_manager()) << std::endl;); - string_int_conversion_terms.push_back(ap); - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } else if (u.str.is_from_code(ap)) { - TRACE(str, tout << "found string-codepoint conversion term: " << mk_pp(ex, get_manager()) << std::endl;); - string_int_conversion_terms.push_back(ap); - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } else if (is_var(ex)) { - // if ex is a variable, add it to our list of variables - TRACE(str, tout << "tracking variable " << mk_ismt2_pp(ap, get_manager()) << std::endl;); - variable_set.insert(ex); - ctx.mark_as_relevant(ex); - // this might help?? - theory_var v = mk_var(n); - TRACE(str, tout << "variable " << mk_ismt2_pp(ap, get_manager()) << " is #" << v << std::endl;); - (void)v; - } - } - } else if (ex_sort == bool_sort && !is_quantifier(ex)) { - TRACE(str, tout << "setting up axioms for " << mk_ismt2_pp(ex, get_manager()) << - ": expr is of sort Bool" << std::endl;); - // set up axioms for boolean terms - - ensure_enode(ex); - if (ctx.e_internalized(ex)) { - enode * n = ctx.get_enode(ex); - SASSERT(n); - - if (is_app(ex)) { - app * ap = to_app(ex); - if (u.str.is_prefix(ap) || u.str.is_suffix(ap) || u.str.is_contains(ap) || u.str.is_in_re(ap) || u.str.is_is_digit(ap)) { - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } - } - } else { - TRACE(str, tout << "WARNING: Bool term " << mk_ismt2_pp(ex, get_manager()) << " not internalized. Delaying axiom setup to prevent a crash." << std::endl;); - ENSURE(!search_started); // infinite loop prevention - m_delayed_axiom_setup_terms.push_back(ex); - return; - } - } else if (ex_sort == int_sort) { - TRACE(str, tout << "setting up axioms for " << mk_ismt2_pp(ex, get_manager()) << - ": expr is of sort Int" << std::endl;); - // set up axioms for integer terms - enode * n = ensure_enode(ex); - SASSERT(n); - - if (is_app(ex)) { - app * ap = to_app(ex); - if (u.str.is_index(ap)) { - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } else if (u.str.is_stoi(ap)) { - TRACE(str, tout << "found string-integer conversion term: " << mk_pp(ex, get_manager()) << std::endl;); - string_int_conversion_terms.push_back(ap); - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } else if (u.str.is_to_code(ex)) { - TRACE(str, tout << "found string-codepoint conversion term: " << mk_pp(ex, get_manager()) << std::endl;); - string_int_conversion_terms.push_back(ap); - m_library_aware_axiom_todo.push_back(n); - m_library_aware_trail_stack.push(push_back_trail(m_library_aware_axiom_todo)); - } - } - } else { - if (u.str.is_non_string_sequence(ex)) { - TRACE(str, tout << "ERROR: Z3str3 does not support non-string sequence terms. Aborting." << std::endl;); - m.raise_exception("Z3str3 does not support non-string sequence terms."); - } - TRACE(str, tout << "setting up axioms for " << mk_ismt2_pp(ex, get_manager()) << - ": expr is of wrong sort, ignoring" << std::endl;); - } - - // if expr is an application, recursively inspect all arguments - if (is_app(ex)) { - app * term = to_app(ex); - unsigned num_args = term->get_num_args(); - for (unsigned i = 0; i < num_args; i++) { - set_up_axioms(term->get_arg(i)); - } - } - } - - void theory_str::add_theory_assumptions(expr_ref_vector & assumptions) { - TRACE(str, tout << "add overlap assumption for theory_str" << std::endl;); - const char* strOverlap = "!!TheoryStrOverlapAssumption!!"; - sort * s = get_manager().mk_bool_sort(); - m_theoryStrOverlapAssumption_term = expr_ref(mk_fresh_const(strOverlap, s), get_manager()); - assumptions.push_back(get_manager().mk_not(m_theoryStrOverlapAssumption_term)); - } - - lbool theory_str::validate_unsat_core(expr_ref_vector & unsat_core) { - app * target_term = to_app(get_manager().mk_not(m_theoryStrOverlapAssumption_term)); - ctx.internalize(target_term, false); - enode* e1 = ctx.get_enode(target_term); - for (unsigned i = 0; i < unsat_core.size(); ++i) { - app * core_term = to_app(unsat_core.get(i)); - // not sure if this is the correct way to compare terms in this context - if (!ctx.e_internalized(core_term)) continue; - enode *e2 = ctx.get_enode(core_term); - if (e1 == e2) { - TRACE(str, tout << "overlap detected in unsat core, changing UNSAT to UNKNOWN" << std::endl;); - return l_undef; - } - } - - return l_false; - } - - void theory_str::init_search_eh() { - - reset_internal_data_structures(); - - TRACE(str, - tout << "dumping all asserted formulas:" << std::endl; - unsigned nFormulas = ctx.get_num_asserted_formulas(); - for (unsigned i = 0; i < nFormulas; ++i) { - expr * ex = ctx.get_asserted_formula(i); - tout << mk_pp(ex, get_manager()) << (ctx.is_relevant(ex) ? " (rel)" : " (NOT REL)") << std::endl; - } - ); - - TRACE(str, - expr_ref_vector formulas(get_manager()); - ctx.get_assignments(formulas); - tout << "dumping all formulas:" << std::endl; - for (auto const &ex : formulas) { - tout << mk_pp(ex, get_manager()) << (ctx.is_relevant(ex) ? "" : " (NOT REL)") << std::endl; - } - ); - /* - * Recursive descent through all asserted formulas to set up axioms. - * Note that this is just the input structure and not necessarily things - * that we know to be true or false. We're just doing this to see - * which terms are explicitly mentioned. - */ - unsigned nFormulas = ctx.get_num_asserted_formulas(); - for (unsigned i = 0; i < nFormulas; ++i) { - expr * ex = ctx.get_asserted_formula(i); - set_up_axioms(ex); - } - - TRACE(str, tout << "search started" << std::endl;); - search_started = true; - } - - void theory_str::new_eq_eh(theory_var x, theory_var y) { - //TRACE(str, tout << "new eq: v#" << x << " = v#" << y << std::endl;); - TRACE(str, tout << "new eq: " << mk_ismt2_pp(get_enode(x)->get_expr(), get_manager()) << " = " << - mk_ismt2_pp(get_enode(y)->get_expr(), get_manager()) << std::endl;); - candidate_model.reset(); - - /* - if (m_find.find(x) == m_find.find(y)) { - return; - } - */ - handle_equality(get_enode(x)->get_expr(), get_enode(y)->get_expr()); - - // replicate Z3str2 behaviour: merge eqc **AFTER** handle_equality - m_find.merge(x, y); - } - - void theory_str::new_diseq_eh(theory_var x, theory_var y) { - //TRACE(str, tout << "new diseq: v#" << x << " != v#" << y << std::endl;); - TRACE(str, tout << "new diseq: " << mk_ismt2_pp(get_enode(x)->get_expr(), get_manager()) << " != " << - mk_ismt2_pp(get_enode(y)->get_expr(), get_manager()) << std::endl;); - candidate_model.reset(); - } - - void theory_str::relevant_eh(app * n) { - TRACE(str, tout << "relevant: " << mk_ismt2_pp(n, get_manager()) << std::endl;); - } - - void theory_str::assign_eh(bool_var v, bool is_true) { - candidate_model.reset(); - expr * e = ctx.bool_var2expr(v); - TRACE(str, tout << "assert: v" << v << " " << mk_pp(e, get_manager()) << " is_true: " << is_true << std::endl;); - DEBUG_CODE( - for (auto * f : existing_toplevel_exprs) { - SASSERT(f->get_ref_count() > 0); - }); - if (!existing_toplevel_exprs.contains(e)) { - existing_toplevel_exprs.insert(e); - set_up_axioms(e); - propagate(); - } - - // heuristics - - if (u.str.is_prefix(e)) { - check_consistency_prefix(e, is_true); - } else if (u.str.is_suffix(e)) { - check_consistency_suffix(e, is_true); - } else if (u.str.is_contains(e)) { - check_consistency_contains(e, is_true); - } - } - - // terms like int.to.str cannot start with / end with / contain non-digit characters - // in the future this could be expanded to regex checks as well - void theory_str::check_consistency_prefix(expr * e, bool is_true) { - context & ctx = get_context(); - ast_manager & m = get_manager(); - expr * needle = nullptr; - expr * haystack = nullptr; - - VERIFY(u.str.is_prefix(e, needle, haystack)); - TRACE(str, tout << "check consistency of prefix predicate: " << mk_pp(needle, m) << " prefixof " << mk_pp(haystack, m) << std::endl;); - - zstring needleStringConstant; - if (get_string_constant_eqc(needle, needleStringConstant)) { - if (u.str.is_itos(haystack) && is_true) { - // needle cannot contain non-digit characters - for (unsigned i = 0; i < needleStringConstant.length(); ++i) { - if (! ('0' <= needleStringConstant[i] && needleStringConstant[i] <= '9')) { - TRACE(str, tout << "conflict: needle = \"" << needleStringConstant << "\" contains non-digit character, but is a prefix of int-to-string term" << std::endl;); - expr_ref premise(ctx.mk_eq_atom(needle, mk_string(needleStringConstant)), m); - expr_ref conclusion(m.mk_not(e), m); - expr_ref conflict(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(conflict); - return; - } - } - } - } - } - - void theory_str::check_consistency_suffix(expr * e, bool is_true) { - context & ctx = get_context(); - ast_manager & m = get_manager(); - expr * needle = nullptr; - expr * haystack = nullptr; - - VERIFY(u.str.is_suffix(e, needle, haystack)); - TRACE(str, tout << "check consistency of suffix predicate: " << mk_pp(needle, m) << " suffixof " << mk_pp(haystack, m) << std::endl;); - - zstring needleStringConstant; - if (get_string_constant_eqc(needle, needleStringConstant)) { - if (u.str.is_itos(haystack) && is_true) { - // needle cannot contain non-digit characters - for (unsigned i = 0; i < needleStringConstant.length(); ++i) { - if (! ('0' <= needleStringConstant[i] && needleStringConstant[i] <= '9')) { - TRACE(str, tout << "conflict: needle = \"" << needleStringConstant << "\" contains non-digit character, but is a suffix of int-to-string term" << std::endl;); - expr_ref premise(ctx.mk_eq_atom(needle, mk_string(needleStringConstant)), m); - expr_ref conclusion(m.mk_not(e), m); - expr_ref conflict(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(conflict); - return; - } - } - } - } - } - - void theory_str::check_consistency_contains(expr * e, bool is_true) { - context & ctx = get_context(); - ast_manager & m = get_manager(); - expr * needle = nullptr; - expr * haystack = nullptr; - - VERIFY(u.str.is_contains(e, haystack, needle)); // first string contains second one - TRACE(str, tout << "check consistency of contains predicate: " << mk_pp(haystack, m) << " contains " << mk_pp(needle, m) << std::endl;); - - zstring needleStringConstant; - if (get_string_constant_eqc(needle, needleStringConstant)) { - if (u.str.is_itos(haystack) && is_true) { - // needle cannot contain non-digit characters - for (unsigned i = 0; i < needleStringConstant.length(); ++i) { - if (! ('0' <= needleStringConstant[i] && needleStringConstant[i] <= '9')) { - TRACE(str, tout << "conflict: needle = \"" << needleStringConstant << "\" contains non-digit character, but int-to-string term contains it" << std::endl;); - expr_ref premise(ctx.mk_eq_atom(needle, mk_string(needleStringConstant)), m); - expr_ref conclusion(m.mk_not(e), m); - expr_ref conflict(rewrite_implication(premise, conclusion), m); - assert_axiom_rw(conflict); - return; - } - } - } - } - } - - void theory_str::push_scope_eh() { - theory::push_scope_eh(); - m_trail_stack.push_scope(); - m_library_aware_trail_stack.push_scope(); - - sLevel += 1; - TRACE(str, tout << "push to " << sLevel << std::endl;); - TRACE_CODE(if (is_trace_enabled(TraceTag::t_str_dump_assign_on_scope_change)) { dump_assignments(); }); - candidate_model.reset(); - } - - void theory_str::recursive_check_variable_scope(expr * ex) { - - if (is_app(ex)) { - app * a = to_app(ex); - if (a->get_num_args() == 0) { - // we only care about string variables - sort * s = ex->get_sort(); - sort * string_sort = u.str.mk_string_sort(); - if (s != string_sort) { - return; - } - // base case: string constant / var - if (u.str.is_string(a)) { - return; - } else { - // assume var - if (variable_set.find(ex) == variable_set.end() - && internal_variable_set.find(ex) == internal_variable_set.end()) { - TRACE(str, tout << "WARNING: possible reference to out-of-scope variable " << mk_pp(ex, m) << std::endl;); - } - } - } else { - for (unsigned i = 0; i < a->get_num_args(); ++i) { - recursive_check_variable_scope(a->get_arg(i)); - } - } - } - } - - void theory_str::check_variable_scope() { - if (!opt_CheckVariableScope) { - return; - } - - if (!is_trace_enabled(TraceTag::t_str_detail)) { - return; - } - - TRACE(str, tout << "checking scopes of variables in the current assignment" << std::endl;); - - ast_manager & m = get_manager(); - - expr_ref_vector assignments(m); - ctx.get_assignments(assignments); - for (auto const &ex : assignments) { - recursive_check_variable_scope(ex); - } - } - - void theory_str::add_persisted_axiom(expr * a) { - m_persisted_axioms.push_back(a); - } - - void theory_str::pop_scope_eh(unsigned num_scopes) { - sLevel -= num_scopes; - TRACE(str, tout << "pop " << num_scopes << " to " << sLevel << std::endl;); - candidate_model.reset(); - - m_basicstr_axiom_todo.reset(); - m_concat_axiom_todo.reset(); - m_concat_eval_todo.reset(); - m_delayed_axiom_setup_terms.reset(); - m_delayed_assertions_todo.reset(); - - TRACE_CODE(if (is_trace_enabled(TraceTag::t_str_dump_assign_on_scope_change)) { dump_assignments(); }); - - // list of expr* to remove from cut_var_map - ptr_vector cutvarmap_removes; - - for (auto const &varItor : cut_var_map) { - std::stack & val = cut_var_map[varItor.m_key]; - while ((!val.empty()) && (val.top()->level != 0) && (val.top()->level >= sLevel)) { - // TRACE(str, tout << "remove cut info for " << mk_pp(e, get_manager()) << std::endl; print_cut_var(e, tout);); - // T_cut * aCut = val.top(); - val.pop(); - // dealloc(aCut); - } - if (val.empty()) { - cutvarmap_removes.insert(varItor.m_key); - } - } - - for (expr* ex : cutvarmap_removes) - cut_var_map.remove(ex); - - ptr_vector new_m_basicstr; - for (enode* e : m_basicstr_axiom_todo) { - TRACE(str, tout << "consider deleting " << mk_pp(e->get_expr(), get_manager()) - << ", enode scope level is " << e->get_iscope_lvl() - << std::endl;); - if (e->get_iscope_lvl() <= (unsigned)sLevel) { - new_m_basicstr.push_back(e); - } - } - m_basicstr_axiom_todo.reset(); - m_basicstr_axiom_todo = new_m_basicstr; - - if (ctx.is_searching()) { - for (expr * e : m_persisted_axioms) { - TRACE(str, tout << "persist axiom: " << mk_pp(e, get_manager()) << std::endl;); - m_persisted_axiom_todo.push_back(e); - } - } - - m_trail_stack.pop_scope(num_scopes); - // m_library_aware_trail_stack owns m_library_aware_todo vector. - // the vector cannot be reset outside. - m_library_aware_trail_stack.pop_scope(num_scopes); - theory::pop_scope_eh(num_scopes); - - //check_variable_scope(); - } - - void theory_str::dump_assignments() { - TRACE_CODE( - ast_manager & m = get_manager(); - tout << "dumping all assignments:" << std::endl; - expr_ref_vector assignments(m); - ctx.get_assignments(assignments); - for (auto const &ex : assignments) { - tout << mk_ismt2_pp(ex, m) << (ctx.is_relevant(ex) ? "" : " (NOT REL)") << std::endl; - } - ); - } - - // returns true if needle appears as a subterm anywhere under haystack, - // or if needle appears in the same EQC as a subterm anywhere under haystack - bool theory_str::term_appears_as_subterm(expr * needle, expr * haystack) { - if (in_same_eqc(needle, haystack)) { - return true; - } - - if (is_app(haystack)) { - app * a_haystack = to_app(haystack); - for (unsigned i = 0; i < a_haystack->get_num_args(); ++i) { - expr * subterm = a_haystack->get_arg(i); - if (term_appears_as_subterm(needle, subterm)) { - return true; - } - } - } - - // not found - return false; - } - - void theory_str::classify_ast_by_type(expr * node, std::map & varMap, - std::map & concatMap, std::map & unrollMap) { - - // check whether the node is a string variable; - // testing set membership here bypasses several expensive checks. - // note that internal variables don't count if they're only length tester / value tester vars. - if (variable_set.find(node) != variable_set.end()) { - if (varMap[node] != 1) { - TRACE(str, tout << "new variable: " << mk_pp(node, get_manager()) << std::endl;); - } - varMap[node] = 1; - } - // check whether the node is a function that we want to inspect - else if (is_app(node)) { - app * aNode = to_app(node); - if (u.str.is_length(aNode)) { - // Length - return; - } else if (u.str.is_concat(aNode)) { - expr * arg0 = aNode->get_arg(0); - expr * arg1 = aNode->get_arg(1); - bool arg0HasEq = false; - bool arg1HasEq = false; - expr * arg0Val = get_eqc_value(arg0, arg0HasEq); - expr * arg1Val = get_eqc_value(arg1, arg1HasEq); - - int canskip = 0; - zstring tmp; - u.str.is_string(arg0Val, tmp); - if (arg0HasEq && tmp.empty()) { - canskip = 1; - } - u.str.is_string(arg1Val, tmp); - if (canskip == 0 && arg1HasEq && tmp.empty()) { - canskip = 1; - } - if (canskip == 0 && concatMap.find(node) == concatMap.end()) { - concatMap[node] = 1; - } - } - // recursively visit all arguments - for (unsigned i = 0; i < aNode->get_num_args(); ++i) { - expr * arg = aNode->get_arg(i); - classify_ast_by_type(arg, varMap, concatMap, unrollMap); - } - } - } - - void theory_str::classify_ast_by_type_in_positive_context(std::map & varMap, - std::map & concatMap, std::map & unrollMap) { - - ast_manager & m = get_manager(); - expr_ref_vector assignments(m); - ctx.get_assignments(assignments); - - for (auto const &argAst : assignments) { - // the original code jumped through some hoops to check whether the AST node - // is a function, then checked whether that function is "interesting". - // however, the only thing that's considered "interesting" is an equality predicate. - // so we bypass a huge amount of work by doing the following... - - if (m.is_eq(argAst)) { - TRACE(str, tout - << "eq ast " << mk_pp(argAst, m) << " is between args of sort " - << to_app(argAst)->get_arg(0)->get_sort()->get_name() - << std::endl;); - classify_ast_by_type(argAst, varMap, concatMap, unrollMap); - } - } - } - - inline expr * theory_str::get_alias_index_ast(std::map & aliasIndexMap, expr * node) { - if (aliasIndexMap.find(node) != aliasIndexMap.end()) - return aliasIndexMap[node]; - else - return node; - } - - inline expr * theory_str::getMostLeftNodeInConcat(expr * node) { - app * aNode = to_app(node); - if (!u.str.is_concat(aNode)) { - return node; - } else { - expr * concatArgL = aNode->get_arg(0); - return getMostLeftNodeInConcat(concatArgL); - } - } - - inline expr * theory_str::getMostRightNodeInConcat(expr * node) { - app * aNode = to_app(node); - if (!u.str.is_concat(aNode)) { - return node; - } else { - expr * concatArgR = aNode->get_arg(1); - return getMostRightNodeInConcat(concatArgR); - } - } - - void theory_str::trace_ctx_dep(std::ofstream & tout, - std::map & aliasIndexMap, - std::map & var_eq_constStr_map, - std::map > & var_eq_concat_map, - std::map > & var_eq_unroll_map, - std::map & concat_eq_constStr_map, - std::map > & concat_eq_concat_map) { -#ifdef _TRACE - ast_manager & mgr = get_manager(); - { - tout << "(0) alias: variables" << std::endl; - std::map > aliasSumMap; - for (auto const &itor0 : aliasIndexMap) { - aliasSumMap[itor0.second][itor0.first] = 1; - } - for (auto const &keyItor : aliasSumMap) { - tout << " * "; - tout << mk_pp(keyItor.first, mgr); - tout << " : "; - for (auto const &innerItor : keyItor.second) { - tout << mk_pp(innerItor.first, mgr); - tout << ", "; - } - tout << std::endl; - } - tout << std::endl; - } - - { - tout << "(1) var = constStr:" << std::endl; - for (auto const &itor1 : var_eq_constStr_map) { - tout << " * "; - tout << mk_pp(itor1.first, mgr); - tout << " = "; - tout << mk_pp(itor1.second, mgr); - if (!in_same_eqc(itor1.first, itor1.second)) { - tout << " (not true in ctx)"; - } - tout << std::endl; - } - tout << std::endl; - } - - { - tout << "(2) var = concat:" << std::endl; - for (auto const &itor2 : var_eq_concat_map) { - tout << " * "; - tout << mk_pp(itor2.first, mgr); - tout << " = { "; - for (auto const &i_itor : itor2.second) { - tout << mk_pp(i_itor.first, mgr); - tout << ", "; - } - tout << std::endl; - } - tout << std::endl; - } - - { - tout << "(3) var = unrollFunc:" << std::endl; - for (auto const &itor2 : var_eq_unroll_map) { - tout << " * " << mk_pp(itor2.first, mgr) << " = { "; - for (auto const &i_itor : itor2.second) { - tout << mk_pp(i_itor.first, mgr) << ", "; - } - tout << " }" << std::endl; - } - tout << std::endl; - } - - { - tout << "(4) concat = constStr:" << std::endl; - for (auto const &itor3 : concat_eq_constStr_map) { - tout << " * "; - tout << mk_pp(itor3.first, mgr); - tout << " = "; - tout << mk_pp(itor3.second, mgr); - tout << std::endl; - - } - tout << std::endl; - } - - { - tout << "(5) eq concats:" << std::endl; - for (auto const &itor4 : concat_eq_concat_map) { - if (itor4.second.size() > 1) { - tout << " * "; - for (auto const &i_itor : itor4.second) { - tout << mk_pp(i_itor.first, mgr); - tout << " , "; - } - tout << std::endl; - } - } - tout << std::endl; - } - -#else - return; -#endif // _TRACE - } - - - /* - * Dependence analysis from current context assignment - * - "freeVarMap" contains a set of variables that doesn't constrained by Concats. - * But it's possible that it's bounded by unrolls - * For the case of - * (1) var1 = unroll(r1, t1) - * var1 is in the freeVarMap - * > should unroll r1 for var1 - * (2) var1 = unroll(r1, t1) /\ var1 = Concat(var2, var3) - * var2, var3 are all in freeVar - * > should split the unroll function so that var2 and var3 are bounded by new unrolls - */ - int theory_str::ctx_dep_analysis(std::map & strVarMap, std::map & freeVarMap, - std::map > & var_eq_concat_map) { - std::map concatMap; - std::map unrollMap; - std::map aliasIndexMap; - std::map var_eq_constStr_map; - std::map concat_eq_constStr_map; - std::map > var_eq_unroll_map; - std::map > concat_eq_concat_map; - std::map > depMap; - - ast_manager & m = get_manager(); - - // note that the old API concatenated these assignments into - // a massive conjunction; we may have the opportunity to avoid that here - expr_ref_vector assignments(m); - ctx.get_assignments(assignments); - - // Step 1: get variables / concat AST appearing in the context - // the thing we iterate over should just be variable_set - internal_variable_set - // so we avoid computing the set difference (but this might be slower) - for (expr* var : variable_set) { - if (internal_variable_set.find(var) == internal_variable_set.end()) { - TRACE(str, tout << "new variable: " << mk_pp(var, m) << std::endl;); - strVarMap[var] = 1; - } - } - classify_ast_by_type_in_positive_context(strVarMap, concatMap, unrollMap); - - // Step 2: collect alias relation - // e.g. suppose we have the equivalence class {x, y, z}; - // then we set aliasIndexMap[y] = x - // and aliasIndexMap[z] = x - - std::map::iterator varItor = strVarMap.begin(); - for (; varItor != strVarMap.end(); ++varItor) { - if (aliasIndexMap.find(varItor->first) != aliasIndexMap.end()) { - continue; - } - expr * aRoot = nullptr; - expr * curr = varItor->first; - do { - if (variable_set.find(curr) != variable_set.end()) { - if (aRoot == nullptr) { - aRoot = curr; - } else { - aliasIndexMap[curr] = aRoot; - } - } - curr = get_eqc_next(curr); - } while (curr != varItor->first); - } - - // Step 3: Collect interested cases - - varItor = strVarMap.begin(); - for (; varItor != strVarMap.end(); ++varItor) { - expr * deAliasNode = get_alias_index_ast(aliasIndexMap, varItor->first); - // Case 1: variable = string constant - // e.g. z = "str1" ::= var_eq_constStr_map[z] = "str1" - - if (var_eq_constStr_map.find(deAliasNode) == var_eq_constStr_map.end()) { - bool nodeHasEqcValue = false; - expr * nodeValue = get_eqc_value(deAliasNode, nodeHasEqcValue); - if (nodeHasEqcValue) { - var_eq_constStr_map[deAliasNode] = nodeValue; - } - } - - // Case 2: var_eq_concat - // e.g. z = concat("str1", b) ::= var_eq_concat[z][concat(c, "str2")] = 1 - // var_eq_unroll - // e.g. z = unroll(...) ::= var_eq_unroll[z][unroll(...)] = 1 - - if (var_eq_concat_map.find(deAliasNode) == var_eq_concat_map.end()) { - expr * curr = get_eqc_next(deAliasNode); - while (curr != deAliasNode) { - app * aCurr = to_app(curr); - // collect concat - if (u.str.is_concat(aCurr)) { - expr * arg0 = aCurr->get_arg(0); - expr * arg1 = aCurr->get_arg(1); - bool arg0HasEqcValue = false; - bool arg1HasEqcValue = false; - expr * arg0_value = get_eqc_value(arg0, arg0HasEqcValue); - expr * arg1_value = get_eqc_value(arg1, arg1HasEqcValue); - - bool is_arg0_emptyStr = false; - if (arg0HasEqcValue) { - zstring strval; - u.str.is_string(arg0_value, strval); - if (strval.empty()) { - is_arg0_emptyStr = true; - } - } - - bool is_arg1_emptyStr = false; - if (arg1HasEqcValue) { - zstring strval; - u.str.is_string(arg1_value, strval); - if (strval.empty()) { - is_arg1_emptyStr = true; - } - } - - if (!is_arg0_emptyStr && !is_arg1_emptyStr) { - var_eq_concat_map[deAliasNode][curr] = 1; - } - } - - curr = get_eqc_next(curr); - } - } - - } // for(varItor in strVarMap) - - // -------------------------------------------------- - // * collect aliasing relation among eq concats - // e.g EQC={concat1, concat2, concat3} - // concats_eq_Index_map[concat2] = concat1 - // concats_eq_Index_map[concat3] = concat1 - // -------------------------------------------------- - - std::map concats_eq_index_map; - for(auto const &concatItor : concatMap) { - if (concats_eq_index_map.find(concatItor.first) != concats_eq_index_map.end()) { - continue; - } - expr * aRoot = nullptr; - expr * curr = concatItor.first; - do { - if (u.str.is_concat(to_app(curr))) { - if (aRoot == nullptr) { - aRoot = curr; - } else { - concats_eq_index_map[curr] = aRoot; - } - } - curr = get_eqc_next(curr); - } while (curr != concatItor.first); - } - - for(auto const &concatItor : concatMap) { - expr * deAliasConcat = nullptr; - if (concats_eq_index_map.find(concatItor.first) != concats_eq_index_map.end()) { - deAliasConcat = concats_eq_index_map[concatItor.first]; - } else { - deAliasConcat = concatItor.first; - } - - // (3) concat_eq_conststr, e.g. concat(a,b) = "str1" - if (concat_eq_constStr_map.find(deAliasConcat) == concat_eq_constStr_map.end()) { - bool nodeHasEqcValue = false; - expr * nodeValue = get_eqc_value(deAliasConcat, nodeHasEqcValue); - if (nodeHasEqcValue) { - concat_eq_constStr_map[deAliasConcat] = nodeValue; - } - } - - // (4) concat_eq_concat, e.g. - // concat(a,b) = concat("str1", c) AND z = concat(a,b) AND z = concat(e,f) - if (concat_eq_concat_map.find(deAliasConcat) == concat_eq_concat_map.end()) { - expr * curr = deAliasConcat; - do { - if (u.str.is_concat(to_app(curr))) { - // curr cannot be reduced - if (concatMap.find(curr) != concatMap.end()) { - concat_eq_concat_map[deAliasConcat][curr] = 1; - } - } - curr = get_eqc_next(curr); - } while (curr != deAliasConcat); - } - } - - // print some debugging info - TRACE(str, trace_ctx_dep(tout, aliasIndexMap, var_eq_constStr_map, - var_eq_concat_map, var_eq_unroll_map, - concat_eq_constStr_map, concat_eq_concat_map);); - - /* - if (!contain_pair_bool_map.empty()) { - compute_contains(aliasIndexMap, concats_eq_index_map, var_eq_constStr_map, concat_eq_constStr_map, var_eq_concat_map); - } - */ - - // step 4: dependence analysis - - // (1) var = string constant - for (auto const &itor : var_eq_constStr_map) { - expr * var = get_alias_index_ast(aliasIndexMap, itor.first); - expr * strAst = itor.second; - depMap[var][strAst] = 1; - } - - // (2) var = concat - for (auto const &itor : var_eq_concat_map) { - expr * var = get_alias_index_ast(aliasIndexMap, itor.first); - for (auto const &itor1 : itor.second) { - expr * concat = itor1.first; - std::map inVarMap; - std::map inConcatMap; - std::map inUnrollMap; - classify_ast_by_type(concat, inVarMap, inConcatMap, inUnrollMap); - for (auto const &itor2 : inVarMap) { - expr * varInConcat = get_alias_index_ast(aliasIndexMap, itor2.first); - if (!(depMap[var].find(varInConcat) != depMap[var].end() && depMap[var][varInConcat] == 1)) { - depMap[var][varInConcat] = 2; - } - } - } - } - - for (auto const &itor : var_eq_unroll_map) { - expr * var = get_alias_index_ast(aliasIndexMap, itor.first); - for (auto const &itor1 : itor.second) { - expr * unrollFunc = itor1.first; - std::map inVarMap; - std::map inConcatMap; - std::map inUnrollMap; - classify_ast_by_type(unrollFunc, inVarMap, inConcatMap, inUnrollMap); - for (auto const &itor2 : inVarMap) { - expr * varInFunc = get_alias_index_ast(aliasIndexMap, itor2.first); - - TRACE(str, tout << "var in unroll = " << - mk_ismt2_pp(itor2.first, m) << std::endl - << "dealiased var = " << mk_ismt2_pp(varInFunc, m) << std::endl;); - - // it's possible that we have both (Unroll $$_regVar_0 $$_unr_0) /\ (Unroll abcd $$_unr_0), - // while $$_regVar_0 = "abcd" - // have to exclude such cases - bool varHasValue = false; - get_eqc_value(varInFunc, varHasValue); - if (varHasValue) - continue; - - if (depMap[var].find(varInFunc) == depMap[var].end()) { - depMap[var][varInFunc] = 6; - } - } - } - } - - // (3) concat = string constant - for (auto const &itor : concat_eq_constStr_map) { - expr * concatAst = itor.first; - expr * constStr = itor.second; - std::map inVarMap; - std::map inConcatMap; - std::map inUnrollMap; - classify_ast_by_type(concatAst, inVarMap, inConcatMap, inUnrollMap); - for (auto const &itor2 : inVarMap) { - expr * varInConcat = get_alias_index_ast(aliasIndexMap, itor2.first); - if (!(depMap[varInConcat].find(constStr) != depMap[varInConcat].end() && depMap[varInConcat][constStr] == 1)) - depMap[varInConcat][constStr] = 3; - } - } - - // (4) equivalent concats - // - possibility 1 : concat("str", v1) = concat(concat(v2, v3), v4) = concat(v5, v6) - // ==> v2, v5 are constrained by "str" - // - possibility 2 : concat(v1, "str") = concat(v2, v3) = concat(v4, v5) - // ==> v2, v4 are constrained by "str" - //-------------------------------------------------------------- - - std::map mostLeftNodes; - std::map mostRightNodes; - - std::map mLIdxMap; - std::map > mLMap; - std::map mRIdxMap; - std::map > mRMap; - std::set nSet; - - for (auto const &itor : concat_eq_concat_map) { - mostLeftNodes.clear(); - mostRightNodes.clear(); - - expr * mLConst = nullptr; - expr * mRConst = nullptr; - - for (auto const &itor1 : itor.second) { - expr * concatNode = itor1.first; - expr * mLNode = getMostLeftNodeInConcat(concatNode); - zstring strval; - if (u.str.is_string(to_app(mLNode), strval)) { - if (mLConst == nullptr && strval.empty()) { - mLConst = mLNode; - } - } else { - mostLeftNodes[mLNode] = concatNode; - } - - expr * mRNode = getMostRightNodeInConcat(concatNode); - if (u.str.is_string(to_app(mRNode), strval)) { - if (mRConst == nullptr && strval.empty()) { - mRConst = mRNode; - } - } else { - mostRightNodes[mRNode] = concatNode; - } - } - - if (mLConst != nullptr) { - // ------------------------------------------------------------------------------------- - // The left most variable in a concat is constrained by a constant string in eqc concat - // ------------------------------------------------------------------------------------- - // e.g. Concat(x, ...) = Concat("abc", ...) - // ------------------------------------------------------------------------------------- - for (auto const &itor1 : mostLeftNodes) { - expr * deVar = get_alias_index_ast(aliasIndexMap, itor1.first); - if (depMap[deVar].find(mLConst) == depMap[deVar].end() || depMap[deVar][mLConst] != 1) { - depMap[deVar][mLConst] = 4; - } - } - } - - { - // ------------------------------------------------------------------------------------- - // The left most variables in eqc concats are constrained by each other - // ------------------------------------------------------------------------------------- - // e.g. concat(x, ...) = concat(u, ...) = ... - // x and u are constrained by each other - // ------------------------------------------------------------------------------------- - nSet.clear(); - for (auto const &itl : mostLeftNodes) { - bool lfHasEqcValue = false; - get_eqc_value(itl.first, lfHasEqcValue); - if (lfHasEqcValue) - continue; - expr * deVar = get_alias_index_ast(aliasIndexMap, itl.first); - nSet.insert(deVar); - } - - if (nSet.size() > 1) { - int lId = -1; - for (auto const &itor2 : nSet) { - if (mLIdxMap.find(itor2) != mLIdxMap.end()) { - lId = mLIdxMap[itor2]; - break; - } - } - if (lId == -1) - lId = static_cast(mLMap.size()); - for (auto const &itor2 : nSet) { - bool itorHasEqcValue = false; - get_eqc_value(itor2, itorHasEqcValue); - if (itorHasEqcValue) - continue; - mLIdxMap[itor2] = lId; - mLMap[lId].insert(itor2); - } - } - } - - if (mRConst != nullptr) { - for (auto const &itor1 : mostRightNodes) { - expr * deVar = get_alias_index_ast(aliasIndexMap, itor1.first); - if (depMap[deVar].find(mRConst) == depMap[deVar].end() || depMap[deVar][mRConst] != 1) { - depMap[deVar][mRConst] = 5; - } - } - } - - { - nSet.clear(); - for (auto const &itr : mostRightNodes) { - expr * deVar = get_alias_index_ast(aliasIndexMap, itr.first); - nSet.insert(deVar); - } - if (nSet.size() > 1) { - int rId = -1; - for (auto const &itor2 : nSet) { - if (mRIdxMap.find(itor2) != mRIdxMap.end()) { - rId = mRIdxMap[itor2]; - break; - } - } - if (rId == -1) - rId = static_cast(mRMap.size()); - for (auto const &itor2 : nSet) { - bool rHasEqcValue = false; - get_eqc_value(itor2, rHasEqcValue); - if (rHasEqcValue) - continue; - mRIdxMap[itor2] = rId; - mRMap[rId].insert(itor2); - } - } - } - } - - // print the dependence map - TRACE(str, - tout << "Dependence Map" << std::endl; - for(auto const &itor : depMap) { - tout << mk_pp(itor.first, m); - rational nnLen; - bool nnLen_exists = get_len_value(itor.first, nnLen); - tout << " [len = " << (nnLen_exists ? nnLen.to_string() : "?") << "] \t-->\t"; - for (auto const &itor1 : itor.second) { - tout << mk_pp(itor1.first, m) << "(" << itor1.second << "), "; - } - tout << std::endl; - } - ); - - // step, errr, 5: compute free variables based on the dependence map - - // the case dependence map is empty, every var in VarMap is free - //--------------------------------------------------------------- - // remove L/R most var in eq concat since they are constrained with each other - std::map > lrConstrainedMap; - for (auto const &itor : mLMap) { - for (std::set::iterator it1 = itor.second.begin(); it1 != itor.second.end(); it1++) { - std::set::iterator it2 = it1; - it2++; - for (; it2 != itor.second.end(); it2++) { - expr * n1 = *it1; - expr * n2 = *it2; - lrConstrainedMap[n1][n2] = 1; - lrConstrainedMap[n2][n1] = 1; - } - } - } - for (auto const &itor : mRMap) { - for (std::set::iterator it1 = itor.second.begin(); it1 != itor.second.end(); it1++) { - std::set::iterator it2 = it1; - it2++; - for (; it2 != itor.second.end(); it2++) { - expr * n1 = *it1; - expr * n2 = *it2; - lrConstrainedMap[n1][n2] = 1; - lrConstrainedMap[n2][n1] = 1; - } - } - } - - if (depMap.empty()) { - for (auto const &itor : strVarMap) { - expr * var = get_alias_index_ast(aliasIndexMap, itor.first); - if (lrConstrainedMap.find(var) == lrConstrainedMap.end()) { - freeVarMap[var] = 1; - } else { - int lrConstrained = 0; - for (auto const &lrit : freeVarMap) { - if (lrConstrainedMap[var].find(lrit.first) != lrConstrainedMap[var].end()) { - lrConstrained = 1; - break; - } - } - if (lrConstrained == 0) { - freeVarMap[var] = 1; - } - } - } - } else { - // if the keys in aliasIndexMap are not contained in keys in depMap, they are free - // e.g., x= y /\ x = z /\ t = "abc" - // aliasIndexMap[y]= x, aliasIndexMap[z] = x - // depMap t ~ "abc"(1) - // x should be free - for (auto const &itor2 : strVarMap) { - if (aliasIndexMap.find(itor2.first) != aliasIndexMap.end()) { - expr * var = aliasIndexMap[itor2.first]; - if (depMap.find(var) == depMap.end()) { - if (lrConstrainedMap.find(var) == lrConstrainedMap.end()) { - freeVarMap[var] = 1; - } else { - int lrConstrained = 0; - for (auto const &lrit : freeVarMap) { - if (lrConstrainedMap[var].find(lrit.first) != lrConstrainedMap[var].end()) { - lrConstrained = 1; - break; - } - } - if (lrConstrained == 0) { - freeVarMap[var] = 1; - } - } - } - } else if (aliasIndexMap.find(itor2.first) == aliasIndexMap.end()) { - // if a variable is not in aliasIndexMap and not in depMap, it's free - if (depMap.find(itor2.first) == depMap.end()) { - expr * var = itor2.first; - if (lrConstrainedMap.find(var) == lrConstrainedMap.end()) { - freeVarMap[var] = 1; - } else { - int lrConstrained = 0; - for (auto const &lrit : freeVarMap) { - if (lrConstrainedMap[var].find(lrit.first) != lrConstrainedMap[var].end()) { - lrConstrained = 1; - break; - } - } - if (lrConstrained == 0) { - freeVarMap[var] = 1; - } - } - } - } - } - - for (auto const &itor : depMap) { - for (auto const &itor1 : itor.second) { - if (variable_set.find(itor1.first) != variable_set.end()) { // expr type = var - expr * var = get_alias_index_ast(aliasIndexMap, itor1.first); - // if a var is dep on itself and all dependence are type 2, it's a free variable - // e.g {y --> x(2), y(2), m --> m(2), n(2)} y,m are free - { - if (depMap.find(var) == depMap.end()) { - if (freeVarMap.find(var) == freeVarMap.end()) { - if (lrConstrainedMap.find(var) == lrConstrainedMap.end()) { - freeVarMap[var] = 1; - } else { - int lrConstrained = 0; - for (auto const &lrit : freeVarMap) { - if (lrConstrainedMap[var].find(lrit.first) != lrConstrainedMap[var].end()) { - lrConstrained = 1; - break; - } - } - if (lrConstrained == 0) { - freeVarMap[var] = 1; - } - } - - } else { - freeVarMap[var] = freeVarMap[var] + 1; - } - } - } - } - } - } - } - - return 0; - } - - // Attempts to convert a string to a non-negative integer. - // Returns true if this can be done in a valid way, placing the converted value in the argument. - // Otherwise, returns false, if str is empty or contains non-digit characters. - bool theory_str::string_integer_conversion_valid(zstring str, rational& converted) const { - // bool valid = true; - converted = rational::zero(); - rational ten(10); - if (str.length() == 0) { - return false; - } else { - for (unsigned i = 0; i < str.length(); ++i) { - if (!('0' <= str[i] && str[i] <= '9')) { - return false; - } else { - // accumulate - char digit = (int)str[i]; - std::string sDigit(1, digit); - int val = atoi(sDigit.c_str()); - converted = (ten * converted) + rational(val); - } - } - return true; - } - } - - // Check agreement between integer and string theories for the term a = (str.to-int S). - // Returns true if axioms were added, and false otherwise. - bool theory_str::finalcheck_str2int(app * a) { - SASSERT(u.str.is_stoi(a)); - bool axiomAdd = false; - ast_manager & m = get_manager(); - - expr * S = a->get_arg(0); - - // check integer theory - rational Ival; - bool Ival_exists = get_arith_value(a, Ival); - if (Ival_exists) { - TRACE(str, tout << "integer theory assigns " << mk_pp(a, m) << " = " << Ival.to_string() << std::endl;); - // if that value is not -1, and we know the length of S, we can assert (str.to.int S) = Ival --> S = "0...(len(S)-len(Ival))...0" ++ "Ival" - if (!Ival.is_minus_one()) { - rational Slen; - if (get_len_value(S, Slen)) { - zstring Ival_str(Ival.to_string()); - if (rational(Ival_str.length()) <= Slen) { - zstring padding; - for (rational i = rational::zero(); i < Slen - rational(Ival_str.length()); ++i) { - padding = padding + zstring("0"); - } - expr_ref premise(ctx.mk_eq_atom(a, m_autil.mk_numeral(Ival, true)), m); - expr_ref conclusion(ctx.mk_eq_atom(S, mk_string(padding + Ival_str)), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - if (!string_int_axioms.contains(axiom)) { - string_int_axioms.insert(axiom); - assert_axiom(axiom); - m_trail_stack.push(insert_obj_trail(string_int_axioms, axiom)); - axiomAdd = true; - } - } else { - // assigned length is too short for the string value - expr_ref premise(ctx.mk_eq_atom(a, mk_int(Ival)), m); - expr_ref conclusion(m_autil.mk_ge(mk_strlen(S), mk_int(Slen)), m); - assert_axiom_rw(rewrite_implication(premise, conclusion)); - axiomAdd = true; - } - } - } - } else { - TRACE(str, tout << "integer theory has no assignment for " << mk_pp(a, m) << std::endl;); - expr_ref is_zero(ctx.mk_eq_atom(a, m_autil.mk_int(0)), m); - /* literal is_zero_l = */ mk_literal(is_zero); - axiomAdd = true; - TRACE(str, ctx.display(tout);); - } - - bool S_hasEqcValue; - expr * S_str = get_eqc_value(S, S_hasEqcValue); - if (S_hasEqcValue) { - zstring str; - u.str.is_string(S_str, str); - rational convertedRepresentation(0); - // TODO this duplicates code a bit, we can simplify the branch on "conclusion" only - if (string_integer_conversion_valid(str, convertedRepresentation)) { - expr_ref premise(ctx.mk_eq_atom(S, mk_string(str)), m); - expr_ref conclusion(ctx.mk_eq_atom(a, m_autil.mk_numeral(convertedRepresentation, true)), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - if (!string_int_axioms.contains(axiom)) { - string_int_axioms.insert(axiom); - assert_axiom(axiom); - m_trail_stack.push(insert_obj_trail(string_int_axioms, axiom)); - axiomAdd = true; - } - } else { - expr_ref premise(ctx.mk_eq_atom(S, mk_string(str)), m); - expr_ref conclusion(ctx.mk_eq_atom(a, m_autil.mk_numeral(rational::minus_one(), true)), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - if (!string_int_axioms.contains(axiom)) { - string_int_axioms.insert(axiom); - assert_axiom(axiom); - m_trail_stack.push(insert_obj_trail(string_int_axioms, axiom)); - axiomAdd = true; - } - } - } - - return axiomAdd; - } - - bool theory_str::finalcheck_int2str(app * a) { - SASSERT(u.str.is_itos(a)); - bool axiomAdd = false; - ast_manager & m = get_manager(); - - expr * N = a->get_arg(0); - - // check string theory - bool Sval_expr_exists; - expr * Sval_expr = get_eqc_value(a, Sval_expr_exists); - if (Sval_expr_exists) { - zstring Sval; - u.str.is_string(Sval_expr, Sval); - TRACE(str, tout << "string theory assigns " << mk_pp(a, m) << " = \"" << Sval << "\"\n";); - // empty string --> integer value < 0 - if (Sval.empty()) { - // ignore this. we should already assert the axiom for what happens when the string is "" - } else { - // check for leading zeroes. if the first character is '0', the entire string must be "0" - char firstChar = (int)Sval[0]; - if (firstChar == '0' && !(Sval == zstring("0"))) { - TRACE(str, tout << "str.from-int argument " << Sval << " contains leading zeroes" << std::endl;); - expr_ref axiom(m.mk_not(ctx.mk_eq_atom(a, mk_string(Sval))), m); - assert_axiom(axiom); - return true; - } - // nonempty string --> convert to correct integer value, or disallow it - rational convertedRepresentation(0); - if (string_integer_conversion_valid(Sval, convertedRepresentation)) { - expr_ref premise(ctx.mk_eq_atom(a, mk_string(Sval)), m); - expr_ref conclusion(ctx.mk_eq_atom(N, m_autil.mk_numeral(convertedRepresentation, true)), m); - expr_ref axiom(rewrite_implication(premise, conclusion), m); - if (!string_int_axioms.contains(axiom)) { - string_int_axioms.insert(axiom); - assert_axiom(axiom); - m_trail_stack.push(insert_obj_trail(string_int_axioms, axiom)); - axiomAdd = true; - } - } else { - expr_ref axiom(m.mk_not(ctx.mk_eq_atom(a, mk_string(Sval))), m); - // always assert this axiom because this is a conflict clause - assert_axiom(axiom); - axiomAdd = true; - } - } - } else { - TRACE(str, tout << "string theory has no assignment for " << mk_pp(a, m) << std::endl;); - // see if the integer theory has assigned N yet - arith_value v(m); - v.init(&ctx); - rational Nval; - if (v.get_value(N, Nval)) { - expr_ref premise(ctx.mk_eq_atom(N, mk_int(Nval)), m); - expr_ref conclusion(m); - if (Nval.is_neg()) { - // negative argument -> "" - conclusion = expr_ref(ctx.mk_eq_atom(a, mk_string("")), m); - } else { - // non-negative argument -> convert to string of digits - zstring Nval_str(Nval.to_string()); - conclusion = expr_ref(ctx.mk_eq_atom(a, mk_string(Nval_str)), m); - } - expr_ref axiom(rewrite_implication(premise, conclusion), m); - assert_axiom(axiom); - axiomAdd = true; - } else { - TRACE(str, tout << "integer theory has no assignment for " << mk_pp(N, m) << std::endl;); - expr_ref is_zero(ctx.mk_eq_atom(N, m_autil.mk_int(0)), m); - /* literal is_zero_l = */ mk_literal(is_zero); - axiomAdd = true; - TRACE(str, ctx.display(tout);); - } - } - return axiomAdd; - } - - void theory_str::collect_var_concat(expr * node, std::set & varSet, std::set & concatSet) { - if (variable_set.find(node) != variable_set.end()) { - varSet.insert(node); - } - else if (is_app(node)) { - app * aNode = to_app(node); - if (u.str.is_length(aNode)) { - // Length - return; - } - if (u.str.is_concat(aNode)) { - if (concatSet.find(node) == concatSet.end()) { - concatSet.insert(node); - } - } - // recursively visit all arguments - for (unsigned i = 0; i < aNode->get_num_args(); ++i) { - expr * arg = aNode->get_arg(i); - collect_var_concat(arg, varSet, concatSet); - } - } - } - - bool theory_str::propagate_length_within_eqc(expr * var) { - bool res = false; - ast_manager & m = get_manager(); - - TRACE(str, tout << "propagate_length_within_eqc: " << mk_ismt2_pp(var, m) << std::endl ;); - - rational varLen; - if (! get_len_value(var, varLen)) { - bool hasLen = false; - expr * nodeWithLen= var; - do { - if (get_len_value(nodeWithLen, varLen)) { - hasLen = true; - break; - } - nodeWithLen = get_eqc_next(nodeWithLen); - } while (nodeWithLen != var); - - if (hasLen) { - // var = nodeWithLen --> |var| = |nodeWithLen| - expr_ref_vector l_items(m); - expr_ref varEqNode(ctx.mk_eq_atom(var, nodeWithLen), m); - l_items.push_back(varEqNode); - - expr_ref nodeWithLenExpr (mk_strlen(nodeWithLen), m); - expr_ref varLenExpr (mk_int(varLen), m); - expr_ref lenEqNum(ctx.mk_eq_atom(nodeWithLenExpr, varLenExpr), m); - l_items.push_back(lenEqNum); - - expr_ref axl(m.mk_and(l_items.size(), l_items.data()), m); - expr_ref varLen(mk_strlen(var), m); - expr_ref axr(ctx.mk_eq_atom(varLen, mk_int(varLen)), m); - assert_implication(axl, axr); - TRACE(str, tout << mk_ismt2_pp(axl, m) << std::endl << " ---> " << std::endl << mk_ismt2_pp(axr, m);); - res = true; - } - } - return res; - } - - bool theory_str::propagate_length(std::set & varSet, std::set & concatSet, std::map & exprLenMap) { - ast_manager & m = get_manager(); - expr_ref_vector assignments(m); - ctx.get_assignments(assignments); - bool axiomAdded = false; - // collect all concats in context - for (auto const &it : assignments) { - if (! ctx.is_relevant(it)) { - continue; - } - if (m.is_eq(it)) { - collect_var_concat(it, varSet, concatSet); - } - } - // iterate each concat - // if a concat doesn't have length info, check if the length of all leaf nodes can be resolved - for (auto const &concat : concatSet) { - rational lenValue; - expr_ref concatlenExpr (mk_strlen(concat), m) ; - bool allLeafResolved = true; - if (! get_arith_value(concatlenExpr, lenValue)) { - // the length of concat is unresolved yet - if (get_len_value(concat, lenValue)) { - // but all leaf nodes have length information - TRACE(str, tout << "* length pop-up: " << mk_ismt2_pp(concat, m) << "| = " << lenValue << std::endl;); - std::set leafNodes; - get_unique_non_concat_nodes(concat, leafNodes); - expr_ref_vector l_items(m); - for (auto const &leafIt : leafNodes) { - rational leafLenValue; - if (get_len_value(leafIt, leafLenValue)) { - expr_ref leafItLenExpr (mk_strlen(leafIt), m); - expr_ref leafLenValueExpr (mk_int(leafLenValue), m); - expr_ref lcExpr (ctx.mk_eq_atom(leafItLenExpr, leafLenValueExpr), m); - l_items.push_back(lcExpr); - } else { - allLeafResolved = false; - break; - } - } - if (allLeafResolved) { - expr_ref axl(m.mk_and(l_items.size(), l_items.data()), m); - expr_ref lenValueExpr (mk_int(lenValue), m); - expr_ref axr(ctx.mk_eq_atom(concatlenExpr, lenValueExpr), m); - assert_implication(axl, axr); - TRACE(str, tout << mk_ismt2_pp(axl, m) << std::endl << " ---> " << std::endl << mk_ismt2_pp(axr, m)<< std::endl;); - axiomAdded = true; - } - } - } - } - // if no concat length is propagated, check the length of variables. - if (! axiomAdded) { - for (auto const &var : varSet) { - rational lenValue; - expr_ref varlen (mk_strlen(var), m) ; - if (! get_arith_value(varlen, lenValue)) { - if (propagate_length_within_eqc(var)) { - axiomAdded = true; - } - } - } - - } - return axiomAdded; - } - - void theory_str::get_unique_non_concat_nodes(expr * node, std::set & argSet) { - app * a_node = to_app(node); - if (!u.str.is_concat(a_node)) { - argSet.insert(node); - return; - } else { - SASSERT(a_node->get_num_args() == 2); - expr * leftArg = a_node->get_arg(0); - expr * rightArg = a_node->get_arg(1); - get_unique_non_concat_nodes(leftArg, argSet); - get_unique_non_concat_nodes(rightArg, argSet); - } - } - - final_check_status theory_str::final_check_eh() { - ast_manager & m = get_manager(); - - //expr_ref_vector assignments(m); - //ctx.get_assignments(assignments); - - if (opt_VerifyFinalCheckProgress) { - finalCheckProgressIndicator = false; - } - - TRACE(str, tout << "final check" << std::endl;); - TRACE_CODE(if (is_trace_enabled(TraceTag::t_str_dump_assign)) { dump_assignments(); }); - check_variable_scope(); - - if (opt_DeferEQCConsistencyCheck) { - TRACE(str, tout << "performing deferred EQC consistency check" << std::endl;); - std::set eqc_roots; - for (auto const &e : ctx.enodes()) { - enode * root = e->get_root(); - eqc_roots.insert(root); - } - - bool found_inconsistency = false; - - for (auto const &e : eqc_roots) { - app * a = e->get_expr(); - if (!(a->get_sort() == u.str.mk_string_sort())) { - TRACE(str, tout << "EQC root " << mk_pp(a, m) << " not a string term; skipping" << std::endl;); - } else { - TRACE(str, tout << "EQC root " << mk_pp(a, m) << " is a string term. Checking this EQC" << std::endl;); - // first call check_concat_len_in_eqc() on each member of the eqc - enode * e_it = e; - enode * e_root = e_it; - do { - bool status = check_concat_len_in_eqc(e_it->get_expr()); - if (!status) { - TRACE(str, tout << "concat-len check asserted an axiom on " << mk_pp(e_it->get_expr(), m) << std::endl;); - found_inconsistency = true; - } - e_it = e_it->get_next(); - } while (e_it != e_root); - - // now grab any two distinct elements from the EQC and call new_eq_check() on them - enode * e1 = e; - enode * e2 = e1->get_next(); - if (e1 != e2) { - TRACE(str, tout << "deferred new_eq_check() over EQC of " << mk_pp(e1->get_expr(), m) << " and " << mk_pp(e2->get_expr(), m) << std::endl;); - bool result = new_eq_check(e1->get_expr(), e2->get_expr()); - if (!result) { - TRACE(str, tout << "new_eq_check found inconsistencies" << std::endl;); - found_inconsistency = true; - } - } - } - } - - if (found_inconsistency) { - TRACE(str, tout << "Found inconsistency in final check! Returning to search." << std::endl;); - return FC_CONTINUE; - } else { - TRACE(str, tout << "Deferred consistency check passed. Continuing in final check." << std::endl;); - } - } - - // run dependence analysis to find free string variables - std::map varAppearInAssign; - std::map freeVar_map; - std::map > var_eq_concat_map; - int conflictInDep = ctx_dep_analysis(varAppearInAssign, freeVar_map, var_eq_concat_map); - if (conflictInDep == -1) { - m_stats.m_solved_by = 2; - return FC_DONE; - } - - // enhancement: improved backpropagation of string constants into var=concat terms - bool backpropagation_occurred = false; - for (auto const &veqc_map_it : var_eq_concat_map) { - expr * var = veqc_map_it.first; - for (auto const &concat_map_it : veqc_map_it.second) { - app * concat = to_app(concat_map_it.first); - expr * concat_lhs = concat->get_arg(0); - expr * concat_rhs = concat->get_arg(1); - // If the concat LHS and RHS both have a string constant in their EQC, - // but the var does not, then we assert an axiom of the form - // (lhs = "lhs" AND rhs = "rhs") --> (Concat lhs rhs) = "lhsrhs" - bool concat_lhs_haseqc, concat_rhs_haseqc, var_haseqc; - expr * concat_lhs_str = get_eqc_value(concat_lhs, concat_lhs_haseqc); - expr * concat_rhs_str = get_eqc_value(concat_rhs, concat_rhs_haseqc); - get_eqc_value(var, var_haseqc); - if (concat_lhs_haseqc && concat_rhs_haseqc && !var_haseqc) { - TRACE(str, tout << "backpropagate into " << mk_pp(var, m) << " = " << mk_pp(concat, m) << std::endl - << "LHS ~= " << mk_pp(concat_lhs_str, m) << " RHS ~= " << mk_pp(concat_rhs_str, m) << std::endl;); - - zstring lhsString, rhsString; - u.str.is_string(concat_lhs_str, lhsString); - u.str.is_string(concat_rhs_str, rhsString); - zstring concatString = lhsString + rhsString; - - // special handling: don't assert that string constants are equal to themselves - expr_ref_vector lhs_terms(m); - if (!u.str.is_string(concat_lhs)) { - lhs_terms.push_back(ctx.mk_eq_atom(concat_lhs, concat_lhs_str)); - } - - if (!u.str.is_string(concat_rhs)) { - lhs_terms.push_back(ctx.mk_eq_atom(concat_rhs, concat_rhs_str)); - - } - - if (lhs_terms.empty()) { - // no assumptions on LHS - expr_ref rhs(ctx.mk_eq_atom(concat, mk_string(concatString)), m); - assert_axiom(rhs); - } else { - expr_ref lhs(mk_and(lhs_terms), m); - expr_ref rhs(ctx.mk_eq_atom(concat, mk_string(concatString)), m); - assert_implication(lhs, rhs); - } - backpropagation_occurred = true; - } - } - } - - if (backpropagation_occurred) { - TRACE(str, tout << "Resuming search due to axioms added by backpropagation." << std::endl;); - return FC_CONTINUE; - } - - // enhancement: improved backpropagation of length information - { - std::set varSet; - std::set concatSet; - std::map exprLenMap; - - bool length_propagation_occurred = propagate_length(varSet, concatSet, exprLenMap); - if (length_propagation_occurred) { - TRACE(str, tout << "Resuming search due to axioms added by length propagation." << std::endl;); - return FC_CONTINUE; - } - } - - if (!solve_regex_automata()) { - TRACE(str, tout << "regex engine requested to give up!" << std::endl;); - return FC_GIVEUP; - } - - bool needToAssignFreeVars = false; - expr_ref_vector free_variables(m); - std::set unused_internal_variables; - { // Z3str2 free variables check - for (auto const &itor : varAppearInAssign) { - if (internal_variable_set.find(itor.first) != internal_variable_set.end()) { - // this can be ignored, I think - TRACE(str, tout << "free internal variable " << mk_pp(itor.first, m) << " ignored" << std::endl;); - continue; - } - bool hasEqcValue = false; - get_eqc_value(itor.first, hasEqcValue); - if (!hasEqcValue) { - TRACE(str, tout << "found free variable " << mk_pp(itor.first, m) << std::endl;); - needToAssignFreeVars = true; - free_variables.push_back(itor.first); - // break; - } else { - // debug - // TRACE(str, tout << "variable " << mk_pp(itor->first, m) << " = " << mk_pp(eqcString, m) << std::endl;); - } - } - } - - bool existNegativeContains = false; - expr_ref_vector assignments(m); - ctx.get_assignments(assignments); - for (expr * a : assignments) { - expr * subterm; - if (m.is_not(a, subterm) && u.str.is_contains(subterm)) existNegativeContains = true; - } - - if (!needToAssignFreeVars) { - - // check string-int terms - bool addedStrIntAxioms = false; - for (unsigned i = 0; i < string_int_conversion_terms.size(); ++i) { - app * ex = to_app(string_int_conversion_terms[i].get()); - if (u.str.is_stoi(ex)) { - bool axiomAdd = finalcheck_str2int(ex); - if (axiomAdd) { - addedStrIntAxioms = true; - } - } else if (u.str.is_itos(ex)) { - bool axiomAdd = finalcheck_int2str(ex); - if (axiomAdd) { - addedStrIntAxioms = true; - } - } - } - if (addedStrIntAxioms) { - TRACE(str, tout << "Resuming search due to addition of string-integer conversion axioms." << std::endl;); - return FC_CONTINUE; - } - - // We must be be 100% certain that if there are any regex constraints, - // the string assignment for each variable is consistent with the automaton. - bool regexOK = true; - if (!regex_terms.empty()) { - for (auto& str_in_re : regex_terms) { - expr * str = nullptr; - expr * re = nullptr; - VERIFY(u.str.is_in_re(str_in_re, str, re)); - lbool current_assignment = ctx.get_assignment(str_in_re); - if (current_assignment == l_undef) { - continue; - } - zstring strValue; - if (get_string_constant_eqc(str, strValue)) { - // try substituting the current assignment and solving the regex - expr_ref valueInRe(u.re.mk_in_re(mk_string(strValue), re), m); - ctx.get_rewriter()(valueInRe); - if (m.is_true(valueInRe)) { - if (current_assignment == l_false) { - TRACE(str, tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must not be in the language " << mk_pp(re, m) << std::endl;); - expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), str_in_re), m); - assert_axiom(conflictClause); - add_persisted_axiom(conflictClause); - return FC_CONTINUE; - } - } else if (m.is_false(valueInRe)) { - if (current_assignment == l_true) { - TRACE(str, tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must be in the language " << mk_pp(re, m) << std::endl;); - expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), m.mk_not(str_in_re)), m); - assert_axiom(conflictClause); - add_persisted_axiom(conflictClause); - return FC_CONTINUE; - } - } else { - // try to keep going, but don't assume the current assignment is right or wrong - regexOK = false; - break; - } - } else { - regexOK = false; - break; - } - } // foreach (str.in.re in regex_terms) - } - // we're not done if some variable in a regex membership predicate was unassigned - if (regexOK) { - if (unused_internal_variables.empty()) { - if (!existNegativeContains) { - TRACE(str, tout << "All variables are assigned. Done!" << std::endl;); - m_stats.m_solved_by = 2; - return FC_DONE; - } - } else { - TRACE(str, tout << "Assigning decoy values to free internal variables." << std::endl;); - for (auto const &var : unused_internal_variables) { - expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m); - assert_axiom(assignment); - } - return FC_CONTINUE; - } - } - } - - CTRACE(str, needToAssignFreeVars, - tout << "Need to assign values to the following free variables:" << std::endl; - for (expr* v : free_variables) { - tout << mk_ismt2_pp(v, m) << std::endl; - } - tout << "freeVar_map has the following entries:" << std::endl; - for (auto const& kv : freeVar_map) { - expr * var = kv.first; - tout << mk_ismt2_pp(var, m) << std::endl; - } - ); - - // Assign free variables - - { - TRACE(str, tout << "free var map (#" << freeVar_map.size() << "):" << std::endl; - for (auto const &freeVarItor1 : freeVar_map) { - expr * freeVar = freeVarItor1.first; - rational lenValue; - bool lenValue_exists = get_len_value(freeVar, lenValue); - tout << mk_pp(freeVar, m) << " [depCnt = " << freeVarItor1.second << ", length = " - << (lenValue_exists ? lenValue.to_string() : "?") - << "]" << std::endl; - } - ); - } - - { - // TODO if we're using fixed-length testing, do we care about finding free variables any more? - // that work might be useless - TRACE(str, tout << "using fixed-length model construction" << std::endl;); - - arith_value v(get_manager()); - v.init(&ctx); - final_check_status arith_fc_status = v.final_check(); - if (arith_fc_status != FC_DONE) { - TRACE(str, tout << "arithmetic solver not done yet, continuing search" << std::endl;); - return FC_CONTINUE; - } - TRACE(str, tout << "arithmetic solver done in final check" << std::endl;); - - expr_ref_vector precondition(m); - expr_ref_vector cex(m); - lbool model_status = fixed_length_model_construction(assignments, precondition, free_variables, candidate_model, cex); - - if (model_status == l_true) { - m_stats.m_solved_by = 2; - return FC_DONE; - } else if (model_status == l_false) { - // whatever came back in CEX is the conflict clause. - // negate its conjunction and assert that - expr_ref conflict(m.mk_not(mk_and(cex)), m); - assert_axiom(conflict); - add_persisted_axiom(conflict); - return FC_CONTINUE; - } else { // model_status == l_undef - TRACE(str, tout << "fixed-length model construction found missing side conditions; continuing search" << std::endl;); - return FC_CONTINUE; - } - } - - if (opt_VerifyFinalCheckProgress && !finalCheckProgressIndicator) { - TRACE(str, tout << "BUG: no progress in final check, giving up!!" << std::endl;); - m.raise_exception("no progress in theory_str final check"); - } - - return FC_CONTINUE; // since by this point we've added axioms - } - - void theory_str::get_concats_in_eqc(expr * n, std::set & concats) { - - expr * eqcNode = n; - do { - if (u.str.is_concat(to_app(eqcNode))) { - concats.insert(eqcNode); - } - eqcNode = get_eqc_next(eqcNode); - } while (eqcNode != n); - } - - void theory_str::get_var_in_eqc(expr * n, std::set & varSet) { - expr * eqcNode = n; - do { - if (variable_set.find(eqcNode) != variable_set.end()) { - varSet.insert(eqcNode); - } - eqcNode = get_eqc_next(eqcNode); - } while (eqcNode != n); - } - - bool cmpvarnames(expr * lhs, expr * rhs) { - symbol lhs_name = to_app(lhs)->get_decl()->get_name(); - symbol rhs_name = to_app(rhs)->get_decl()->get_name(); - return lhs_name.str() < rhs_name.str(); - } - - void theory_str::init_model(model_generator & mg) { - //TRACE(str, tout << "initializing model" << std::endl; display(tout);); - m_factory = alloc(str_value_factory, get_manager(), get_family_id()); - mg.register_factory(m_factory); - } - - /* - * Helper function for mk_value(). - * Attempts to resolve the expression 'n' to a string constant. - * Stronger than get_eqc_value() in that it will perform recursive descent - * through every subexpression and attempt to resolve those to concrete values as well. - * Returns the concrete value obtained from this process, - * guaranteed to satisfy m_strutil.is_string(), - * if one could be obtained, - * or else returns NULL if no concrete value was derived. - */ - app * theory_str::mk_value_helper(app * n) { - if (u.str.is_string(n)) { - return n; - } else if (u.str.is_concat(n)) { - // recursively call this function on each argument - SASSERT(n->get_num_args() == 2); - expr * a0 = n->get_arg(0); - expr * a1 = n->get_arg(1); - - app * a0_conststr = mk_value_helper(to_app(a0)); - app * a1_conststr = mk_value_helper(to_app(a1)); - - if (a0_conststr != nullptr && a1_conststr != nullptr) { - zstring a0_s, a1_s; - u.str.is_string(a0_conststr, a0_s); - u.str.is_string(a1_conststr, a1_s); - zstring result = a0_s + a1_s; - return to_app(mk_string(result)); - } - } - - zstring assignedValue; - if (candidate_model.find(n, assignedValue)) { - return to_app(mk_string(assignedValue)); - } - - // fallback path - // try to find some constant string, anything, in the equivalence class of n - if (!candidate_model.empty()) { - zstring val; - if (candidate_model.find(n, val)) { - return to_app(mk_string(val)); - } - } - bool hasEqc = false; - expr * n_eqc = get_eqc_value(n, hasEqc); - if (hasEqc) { - return to_app(n_eqc); - } else { - theory_var curr = get_var(n); - if (curr != null_theory_var) { - curr = m_find.find(curr); - theory_var first = curr; - do { - expr* a = get_ast(curr); - zstring val; - if (candidate_model.find(a, val)) { - return to_app(mk_string(val)); - } - curr = m_find.next(curr); - } - while (curr != first && curr != null_theory_var); - } - // fail to find - return nullptr; - } - } - - model_value_proc * theory_str::mk_value(enode * n, model_generator & mg) { - TRACE(str, tout << "mk_value for: " << mk_ismt2_pp(n->get_expr(), get_manager()) << - " (sort " << mk_ismt2_pp(n->get_expr()->get_sort(), get_manager()) << ")" << std::endl;); - ast_manager & m = get_manager(); - app_ref owner(m); - owner = n->get_expr(); - - // If the owner is not internalized, it doesn't have an enode associated. - SASSERT(ctx.e_internalized(owner)); - - app * val = mk_value_helper(owner); - if (val != nullptr) { - return alloc(expr_wrapper_proc, val); - } else { - TRACE(str, tout << "WARNING: failed to find a concrete value, falling back" << std::endl;); - std::ostringstream unused; - unused << "**UNUSED**" << (m_unused_id++); - return alloc(expr_wrapper_proc, to_app(mk_string(unused.str()))); - } - } - - void theory_str::finalize_model(model_generator & mg) {} - - void theory_str::display(std::ostream & out) const { - out << "TODO: theory_str display" << std::endl; - } - - rational theory_str::get_refine_length(expr* ex, expr_ref_vector& extra_deps){ - ast_manager & m = get_manager(); - - TRACE(str_fl, tout << "finding length for " << mk_ismt2_pp(ex, m) << std::endl;); - if (u.str.is_string(ex)) { - bool str_exists; - expr * str = get_eqc_value(ex, str_exists); - SASSERT(str_exists); - zstring str_const; - u.str.is_string(str, str_const); - return rational(str_const.length()); - } else if (u.str.is_itos(ex)) { - expr* fromInt = nullptr; - u.str.is_itos(ex, fromInt); - - arith_value v(m); - v.init(&ctx); - rational val; - VERIFY(v.get_value(fromInt, val)); - - std::string s = std::to_string(val.get_int32()); - extra_deps.push_back(ctx.mk_eq_atom(fromInt, mk_int(val))); - return rational((unsigned)s.length()); - - } else if (u.str.is_at(ex)) { - expr* substrBase = nullptr; - expr* substrPos = nullptr; - u.str.is_at(ex, substrBase, substrPos); - arith_value v(m); - v.init(&ctx); - rational pos; - VERIFY(v.get_value(substrPos, pos)); - - extra_deps.push_back(ctx.mk_eq_atom(substrPos, mk_int(pos))); - return rational::one(); - - } else if (u.str.is_extract(ex)) { - expr* substrBase = nullptr; - expr* substrPos = nullptr; - expr* substrLen = nullptr; - u.str.is_extract(ex, substrBase, substrPos, substrLen); - arith_value v(m); - v.init(&ctx); - rational len, pos; - VERIFY(v.get_value(substrLen, len)); - VERIFY(v.get_value(substrPos, pos)); - - extra_deps.push_back(ctx.mk_eq_atom(substrPos, mk_int(pos))); - return len; - - } else if (u.str.is_replace(ex)) { - TRACE(str_fl, tout << "replace is like contains---not in conjunctive fragment!" << std::endl;); - UNREACHABLE(); - } - //find asserts that it exists - return fixed_length_used_len_terms.find(ex); - } - - expr* theory_str::refine(expr* lhs, expr* rhs, rational offset) { - // TRACE(str, tout << "refine with " << offset.get_unsigned() << std::endl;); - if (offset >= rational(0)) { - ++m_stats.m_refine_eq; - return refine_eq(lhs, rhs, offset.get_unsigned()); - } - // Let's just giveup if we find ourselves in the disjunctive fragment. - if (offset == NEQ) { // negative equation - ++m_stats.m_refine_neq; - return refine_dis(lhs, rhs); - } - if (offset == PFUN) { // function like contains, prefix,... - SASSERT(rhs == lhs); - ++m_stats.m_refine_f; - return refine_function(lhs); - } - if (offset == NFUN) { // negated function - SASSERT(rhs == lhs); - ++m_stats.m_refine_nf; - ast_manager & m = get_manager(); - return refine_function(m.mk_not(lhs)); - } - UNREACHABLE(); - return nullptr; - } - - expr* theory_str::refine_eq(expr* lhs, expr* rhs, unsigned _offset) { - TRACE(str_fl, tout << "refine eq " << _offset << std::endl;); - ast_manager & m = get_manager(); - - expr_ref_vector Gamma(m); - expr_ref_vector Delta(m); - - if (!flatten(lhs, Gamma) || !flatten(rhs, Delta)){ - UNREACHABLE(); - } - - expr_ref_vector extra_deps(m); - rational offset(_offset); - - // find len(Gamma[:i]) - unsigned left_count = 0; - rational left_length(0), last_length(0); - while(left_count < Gamma.size() && left_length <= offset) { - last_length = get_refine_length(Gamma.get(left_count), extra_deps); - left_length += last_length; - left_count++; - } - left_count--; - SASSERT(left_count >= 0 && left_count < Gamma.size()); - left_length -= last_length; - - expr* left_sublen = nullptr; - for (unsigned i = 0; i < left_count; i++) { - expr* len; - if (!u.str.is_string(to_app(Gamma.get(i)))) { - len = u.str.mk_length(Gamma.get(i)); - } else { - rational lenDiff = offset - left_length; - len = mk_int(lenDiff); - } - if (left_sublen == nullptr) { - left_sublen = len; - } else { - left_sublen = m_autil.mk_add(left_sublen, len); - } - } - if (offset - left_length != 0) { - rational lenDiff = offset - left_length; - if (left_sublen == nullptr) { - left_sublen = mk_int(lenDiff); - } else { - left_sublen = m_autil.mk_add(left_sublen, mk_int(lenDiff)); - } - } - expr* extra_left_cond = nullptr; - if (!u.str.is_string(to_app(Gamma.get(left_count)))) { - rational offsetLen = offset - left_length + 1; - extra_left_cond = m_autil.mk_ge(u.str.mk_length(Gamma.get(left_count)), mk_int(offsetLen)); - } - - // find len(Delta[:j]) - unsigned right_count = 0; - rational right_length(0); - last_length = 0; - while(right_count < Delta.size() && right_length <= offset) { - last_length = get_refine_length(Delta.get(right_count), extra_deps); - right_length += last_length; - right_count++; - } - right_count--; - SASSERT(right_count >= 0 && right_count < Delta.size()); - right_length -= last_length; - - expr* right_sublen = nullptr; - for (unsigned i = 0; i < right_count; i++) { - expr* len; - if (!u.str.is_string(to_app(Delta.get(i)))) { - len = u.str.mk_length(Delta.get(i)); - } else { - rational offsetLen = offset - right_length; - len = mk_int(offsetLen); - } - if (right_sublen == nullptr) { - right_sublen = len; - } else { - right_sublen = m_autil.mk_add(right_sublen, len); - } - } - if (offset - right_length != 0) { - rational offsetLen = offset - right_length; - if (right_sublen == nullptr) { - right_sublen = mk_int(offsetLen); - } else { - right_sublen = m_autil.mk_add(right_sublen, mk_int(offsetLen)); - } - } - expr* extra_right_cond = nullptr; - if (!u.str.is_string(to_app(Delta.get(right_count)))) { - rational offsetLen = offset - right_length + 1; - extra_right_cond = m_autil.mk_ge(u.str.mk_length(Delta.get(right_count)), mk_int(offsetLen)); - } - - // Offset tells us that Gamma[i+1:]) != Delta[j+1:] - // so learn that len(Gamma[:i]) != len(Delta[:j]) - expr_ref_vector diseqs(m); - diseqs.push_back(ctx.mk_eq_atom(lhs, rhs)); - if (left_sublen != right_sublen) { //nullptr actually means zero - if (left_sublen == nullptr) { - left_sublen = mk_int(0); - } - if (right_sublen == nullptr) { - right_sublen = mk_int(0); - } - // len(Gamma[:i]) == len(Delta[:j]) - expr* sublen_eq = ctx.mk_eq_atom(left_sublen, right_sublen); - TRACE(str, tout << "sublen_eq " << mk_pp(sublen_eq, m) << std::endl;); - diseqs.push_back(sublen_eq); - } - if (extra_left_cond != nullptr) { - TRACE(str, tout << "extra_left_cond " << mk_pp(extra_left_cond, m) << std::endl;); - diseqs.push_back(extra_left_cond); - } - if (extra_right_cond != nullptr) { - TRACE(str, tout << "extra_right_cond " << mk_pp(extra_right_cond, m) << std::endl;); - diseqs.push_back(extra_right_cond); - } - if (extra_deps.size() > 0) { - diseqs.push_back(m.mk_and(extra_deps.size(), extra_deps.data())); - TRACE(str, tout << "extra_deps " << mk_pp(diseqs.get(diseqs.size()-1), m) << std::endl;); - } - expr* final_diseq = m.mk_and(diseqs.size(), diseqs.data()); - TRACE(str, tout << "learning not " << mk_pp(final_diseq, m) << std::endl;); - return final_diseq; - } - - expr* theory_str::refine_dis(expr* lhs, expr* rhs) { - ast_manager & m = get_manager(); - - expr_ref lesson(m); - lesson = m.mk_not(m.mk_eq(lhs, rhs)); - TRACE(str, tout << "learning not " << mk_pp(lesson, m) << std::endl;); - return lesson; - } - - expr* theory_str::refine_function(expr* f) { - //Can we learn something better? - TRACE(str, tout << "learning not " << mk_pp(f, get_manager()) << std::endl;); - return f; - } - - bool theory_str::flatten(expr* ex, expr_ref_vector & flat) { - - sort * ex_sort = ex->get_sort(); - sort * str_sort = u.str.mk_string_sort(); - - if (ex_sort == str_sort) { - if (is_app(ex)) { - app * ap = to_app(ex); - if(u.str.is_concat(ap)) { - unsigned num_args = ap->get_num_args(); - bool success = true; - for (unsigned i = 0; i < num_args; i++) { - success = success && flatten(ap->get_arg(i), flat); - } - return success; - } else { - flat.push_back(ex); - return true; - } - } - } - TRACE(str, tout << "non string term!" << mk_pp(ex, m) << std::endl;); - return false; - } -}; /* namespace smt */ diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h deleted file mode 100644 index f462beff7..000000000 --- a/src/smt/theory_str.h +++ /dev/null @@ -1,779 +0,0 @@ -/*++ - Module Name: - - theory_str.h - - Abstract: - - String Theory Plugin - - Author: - - Murphy Berzish and Yunhui Zheng - - Revision History: - - --*/ -#pragma once - -#include "util/trail.h" -#include "util/union_find.h" -#include "util/scoped_ptr_vector.h" -#include "util/hashtable.h" -#include "ast/ast_pp.h" -#include "ast/arith_decl_plugin.h" -#include "ast/rewriter/th_rewriter.h" -#include "ast/rewriter/seq_rewriter.h" -#include "ast/seq_decl_plugin.h" -#include "model/value_factory.h" -#include "smt/smt_theory.h" -#include "params/theory_str_params.h" -#include "smt/smt_model_generator.h" -#include "smt/smt_arith_value.h" -#include "smt/smt_kernel.h" -#include -#include -#include -#include -#include - -namespace smt { - -typedef hashtable symbol_set; -typedef int_hashtable > integer_set; - -class str_value_factory : public value_factory { - seq_util u; - symbol_set m_strings; - std::string delim; - unsigned m_next; -public: - str_value_factory(ast_manager & m, family_id fid) : - value_factory(m, fid), - u(m), delim("!"), m_next(0) {} - expr * get_some_value(sort * s) override { - return u.str.mk_string("some value"); - } - bool get_some_values(sort * s, expr_ref & v1, expr_ref & v2) override { - v1 = u.str.mk_string("value 1"); - v2 = u.str.mk_string("value 2"); - return true; - } - expr * get_fresh_value(sort * s) override { - if (u.is_string(s)) { - while (true) { - std::ostringstream strm; - strm << delim << std::hex << (m_next++) << std::dec << delim; - std::string s(strm.str()); - symbol sym(s); - if (m_strings.contains(sym)) continue; - m_strings.insert(sym); - return u.str.mk_string(s); - } - } - sort* seq = nullptr; - if (u.is_re(s, seq)) { - expr* v0 = get_fresh_value(seq); - return u.re.mk_to_re(v0); - } - TRACE(t_str, tout << "unexpected sort in get_fresh_value(): " << mk_pp(s, m_manager) << std::endl;); - UNREACHABLE(); return nullptr; - } - void register_value(expr * n) override { /* Ignore */ } -}; - -// NSB: added operator[] and contains to obj_pair_hashtable -class theory_str_contain_pair_bool_map_t : public obj_pair_map {}; - -template -class binary_search_trail : public trail { - obj_map > & target; - expr * entry; -public: - binary_search_trail(obj_map > & target, expr * entry) : - target(target), entry(entry) {} - void undo() override { - TRACE(t_str_binary_search, tout << "in binary_search_trail::undo()" << std::endl;); - if (target.contains(entry)) { - if (!target[entry].empty()) { - target[entry].pop_back(); - } else { - TRACE(t_str_binary_search, tout << "WARNING: attempt to remove length tester from an empty stack" << std::endl;); - } - } else { - TRACE(t_str_binary_search, tout << "WARNING: attempt to access length tester map via invalid key" << std::endl;); - } - } -}; - -class regex_automaton_under_assumptions { -protected: - expr * re_term; - eautomaton * aut; - bool polarity; - - bool assume_lower_bound; - rational lower_bound; - - bool assume_upper_bound; - rational upper_bound; -public: - regex_automaton_under_assumptions() : - re_term(nullptr), aut(nullptr), polarity(false), - assume_lower_bound(false), assume_upper_bound(false) {} - - regex_automaton_under_assumptions(expr * re_term, eautomaton * aut, bool polarity) : - re_term(re_term), aut(aut), polarity(polarity), - assume_lower_bound(false), assume_upper_bound(false) {} - - void set_lower_bound(rational & lb) { - lower_bound = lb; - assume_lower_bound = true; - } - void unset_lower_bound() { - assume_lower_bound = false; - } - - void set_upper_bound(rational & ub) { - upper_bound = ub; - assume_upper_bound = true; - } - void unset_upper_bound() { - assume_upper_bound = false; - } - - bool get_lower_bound(rational & lb) const { - if (assume_lower_bound) { - lb = lower_bound; - return true; - } else { - return false; - } - } - - bool get_upper_bound(rational & ub) const { - if (assume_upper_bound) { - ub = upper_bound; - return true; - } else { - return false; - } - } - - eautomaton * get_automaton() const { return aut; } - expr * get_regex_term() const { return re_term; } - bool get_polarity() const { return polarity; } -}; - -class char_union_find { - unsigned_vector m_find; - unsigned_vector m_size; - unsigned_vector m_next; - - integer_set char_const_set; - - u_map > m_justification; // representative -> list of formulas justifying EQC - - void ensure_size(unsigned v) { - while (v >= get_num_vars()) { - mk_var(); - } - } - public: - unsigned mk_var() { - unsigned r = m_find.size(); - m_find.push_back(r); - m_size.push_back(1); - m_next.push_back(r); - return r; - } - unsigned get_num_vars() const { return m_find.size(); } - void mark_as_char_const(unsigned r) { - char_const_set.insert((int)r); - } - bool is_char_const(unsigned r) { - return char_const_set.contains((int)r); - } - - unsigned find(unsigned v) const { - if (v >= get_num_vars()) { - return v; - } - while (true) { - unsigned new_v = m_find[v]; - if (new_v == v) - return v; - v = new_v; - } - } - - unsigned next(unsigned v) const { - if (v >= get_num_vars()) { - return v; - } - return m_next[v]; - } - - bool is_root(unsigned v) const { - return v >= get_num_vars() || m_find[v] == v; - } - - svector get_justification(unsigned v) { - unsigned r = find(v); - svector retval; - if (m_justification.find(r, retval)) { - return retval; - } else { - return svector(); - } - } - - void merge(unsigned v1, unsigned v2, expr * justification) { - unsigned r1 = find(v1); - unsigned r2 = find(v2); - if (r1 == r2) - return; - ensure_size(v1); - ensure_size(v2); - // swap r1 and r2 if: - // 1. EQC of r1 is bigger than EQC of r2 - // 2. r1 is a character constant and r2 is not. - // this maintains the invariant that if a character constant is in an eqc then it is the root of that eqc - if (m_size[r1] > m_size[r2] || (is_char_const(r1) && !is_char_const(r2))) { - std::swap(r1, r2); - } - m_find[r1] = r2; - m_size[r2] += m_size[r1]; - std::swap(m_next[r1], m_next[r2]); - - if (m_justification.contains(r1)) { - // add r1's justifications to r2 - if (!m_justification.contains(r2)) { - m_justification.insert(r2, m_justification[r1]); - } else { - m_justification[r2].append(m_justification[r1]); - } - m_justification.remove(r1); - } - if (justification != nullptr) { - if (!m_justification.contains(r2)) { - m_justification.insert(r2, svector()); - } - m_justification[r2].push_back(justification); - } - } - - void reset() { - m_find.reset(); - m_next.reset(); - m_size.reset(); - char_const_set.reset(); - m_justification.reset(); - } -}; - -class theory_str : public theory { - struct T_cut - { - int level; - obj_map vars; - - T_cut() { - level = -100; - } - }; - - typedef union_find th_union_find; - - typedef map, default_eq > rational_map; - struct zstring_hash_proc { - unsigned operator()(zstring const & s) const { - auto str = s.encode(); - return string_hash(str.c_str(), static_cast(s.length()), 17); - } - }; - typedef map > string_map; - - struct stats { - stats() { reset(); } - void reset() { memset(this, 0, sizeof(stats)); } - unsigned m_refine_eq; - unsigned m_refine_neq; - unsigned m_refine_f; - unsigned m_refine_nf; - unsigned m_solved_by; - unsigned m_fixed_length_iterations; - }; - -protected: - theory_str_params const & m_params; - - /* - * Setting EagerStringConstantLengthAssertions to true allows some methods, - * in particular internalize_term(), to add - * length assertions about relevant string constants. - * Note that currently this should always be set to 'true', or else *no* length assertions - * will be made about string constants. - */ - bool opt_EagerStringConstantLengthAssertions; - - /* - * If VerifyFinalCheckProgress is set to true, continuing after final check is invoked - * without asserting any new axioms is considered a bug and will throw an exception. - */ - bool opt_VerifyFinalCheckProgress; - - /* - * This constant controls how eagerly we expand unrolls in unbounded regex membership tests. - */ - int opt_LCMUnrollStep; - - /* - * If NoQuickReturn_IntegerTheory is set to true, - * integer theory integration checks that assert axioms - * will not return from the function after asserting their axioms. - * The default behaviour of Z3str2 is to set this to 'false'. This may be incorrect. - */ - bool opt_NoQuickReturn_IntegerTheory; - - /* - * If DisableIntegerTheoryIntegration is set to true, - * ALL calls to the integer theory integration methods - * (get_arith_value, get_len_value, lower_bound, upper_bound) - * will ignore what the arithmetic solver believes about length terms, - * and will return no information. - * - * This reduces performance significantly, but can be useful to enable - * if it is suspected that string-integer integration, or the arithmetic solver itself, - * might have a bug. - * - * The default behaviour of Z3str2 is to set this to 'false'. - */ - bool opt_DisableIntegerTheoryIntegration; - - /* - * If DeferEQCConsistencyCheck is set to true, - * expensive calls to new_eq_check() will be deferred until final check, - * at which time the consistency of *all* string equivalence classes will be validated. - */ - bool opt_DeferEQCConsistencyCheck; - - /* - * If CheckVariableScope is set to true, - * pop_scope_eh() and final_check_eh() will run extra checks - * to determine whether the current assignment - * contains references to any internal variables that are no longer in scope. - */ - bool opt_CheckVariableScope; - - /* - * If ConcatOverlapAvoid is set to true, - * the check to simplify Concat = Concat in handle_equality() will - * avoid simplifying wrt. pairs of Concat terms that will immediately - * result in an overlap. (false = Z3str2 behaviour) - */ - bool opt_ConcatOverlapAvoid; - - bool search_started; - arith_util m_autil; - seq_util u; - int sLevel; - - bool finalCheckProgressIndicator; - - expr_ref_vector m_trail; // trail for generated terms - - str_value_factory * m_factory; - - re2automaton m_mk_aut; - - // Unique identifier appended to unused variables to ensure that model construction - // does not introduce equalities when they weren't enforced. - unsigned m_unused_id; - - const char* newOverlapStr = "!!NewOverlapAssumption!!"; - - // terms we couldn't go through set_up_axioms() with because they weren't internalized - expr_ref_vector m_delayed_axiom_setup_terms; - - ptr_vector m_basicstr_axiom_todo; - ptr_vector m_concat_axiom_todo; - ptr_vector m_string_constant_length_todo; - ptr_vector m_concat_eval_todo; - expr_ref_vector m_delayed_assertions_todo; - - // enode lists for library-aware/high-level string terms (e.g. substr, contains) - ptr_vector m_library_aware_axiom_todo; - - // list of axioms that are re-asserted every time the scope is popped - expr_ref_vector m_persisted_axioms; - expr_ref_vector m_persisted_axiom_todo; - - // hashtable of all exprs for which we've already set up term-specific axioms -- - // this prevents infinite recursive descent with respect to axioms that - // include an occurrence of the term for which axioms are being generated - obj_hashtable axiomatized_terms; - - // hashtable of all top-level exprs for which set_up_axioms() has been called - obj_hashtable existing_toplevel_exprs; - - int tmpStringVarCount; - int tmpXorVarCount; - // obj_pair_map > varForBreakConcat; - std::map, std::map > varForBreakConcat; - bool avoidLoopCut; - bool loopDetected; - obj_map > cut_var_map; - scoped_ptr_vector m_cut_allocs; - expr_ref m_theoryStrOverlapAssumption_term; - - obj_hashtable variable_set; - obj_hashtable internal_variable_set; - std::map > internal_variable_scope_levels; - - expr_ref_vector contains_map; - - theory_str_contain_pair_bool_map_t contain_pair_bool_map; - obj_map > > contain_pair_idx_map; - - // regex automata - scoped_ptr_vector m_automata; - ptr_vector regex_automata; - obj_hashtable regex_terms; - obj_map > regex_terms_by_string; // S --> [ (str.in.re S *) ] - obj_map > regex_automaton_assumptions; // RegEx --> [ aut+assumptions ] - obj_hashtable regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope - obj_hashtable regex_terms_with_length_constraints; // set of regex terms which had had length constraints asserted in the current scope - obj_map regex_term_to_length_constraint; // (str.in.re S R) -> (length constraint over S wrt. R) - obj_map > regex_term_to_extra_length_vars; // extra length vars used in regex_term_to_length_constraint entries - - // keep track of the last lower/upper bound we saw for each string term - // so we don't perform duplicate work - obj_map regex_last_lower_bound; - obj_map regex_last_upper_bound; - - // each counter maps a (str.in.re) expression to an integer. - // use helper functions regex_inc_counter() and regex_get_counter() to access - obj_map regex_length_attempt_count; - obj_map regex_fail_count; - obj_map regex_intersection_fail_count; - - obj_map > string_chars; // S --> [S_0, S_1, ...] for character terms S_i - - obj_pair_map concat_astNode_map; - - // all (str.to-int) and (int.to-str) terms - expr_ref_vector string_int_conversion_terms; - obj_hashtable string_int_axioms; - - string_map stringConstantCache; - unsigned long totalCacheAccessCount; - unsigned long cacheHitCount; - unsigned long cacheMissCount; - - unsigned m_fresh_id; - - // cache mapping each string S to Length(S) - obj_map length_ast_map; - - trail_stack m_trail_stack; - trail_stack m_library_aware_trail_stack; - th_union_find m_find; - theory_var get_var(expr * n) const; - expr * get_eqc_next(expr * n); - app * get_ast(theory_var i); - - // fixed length model construction - expr_ref_vector fixed_length_subterm_trail; // trail for subterms generated *in the subsolver* - expr_ref_vector fixed_length_assumptions; // cache of boolean terms to assert *into the subsolver*, unsat core is a subset of these - obj_map fixed_length_used_len_terms; // constraints used in generating fixed length model - obj_map var_to_char_subterm_map; // maps a var to a list of character terms *in the subsolver* - obj_map uninterpreted_to_char_subterm_map; // maps an "uninterpreted" string term to a list of character terms *in the subsolver* - obj_map> fixed_length_lesson; //keep track of information for the lesson - unsigned preprocessing_iteration_count; // number of attempts we've made to solve by preprocessing length information - obj_map candidate_model; - - stats m_stats; - -protected: - void reset_internal_data_structures(); - - void assert_axiom(expr * e); - void assert_implication(expr * premise, expr * conclusion); - expr * rewrite_implication(expr * premise, expr * conclusion); - // Use the rewriter to simplify an axiom, then assert it. - void assert_axiom_rw(expr * e); - - expr * mk_string(zstring const& str); - expr * mk_string(const char * str); - - app * mk_strlen(expr * e); - expr * mk_concat(expr * n1, expr * n2); - expr * mk_concat_const_str(expr * n1, expr * n2); - app * mk_contains(expr * haystack, expr * needle); - app * mk_indexof(expr * haystack, expr * needle); - app * mk_fresh_const(char const* name, sort* s); - - literal mk_literal(expr* _e); - app * mk_int(int n); - app * mk_int(rational & q); - - void check_and_init_cut_var(expr * node); - void add_cut_info_one_node(expr * baseNode, int slevel, expr * node); - void add_cut_info_merge(expr * destNode, int slevel, expr * srcNode); - bool has_self_cut(expr * n1, expr * n2); - - // for ConcatOverlapAvoid - bool will_result_in_overlap(expr * lhs, expr * rhs); - - void track_variable_scope(expr * var); - app * mk_str_var(std::string name); - app * mk_int_var(std::string name); - app_ref mk_nonempty_str_var(); - app * mk_internal_xor_var(); - void add_nonempty_constraint(expr * s); - - void instantiate_concat_axiom(enode * cat); - void try_eval_concat(enode * cat); - void instantiate_basic_string_axioms(enode * str); - void instantiate_str_eq_length_axiom(enode * lhs, enode * rhs); - - // for count abstraction and refinement - expr* refine(expr* lhs, expr* rhs, rational offset); - expr* refine_eq(expr* lhs, expr* rhs, unsigned offset); - expr* refine_dis(expr* lhs, expr* rhs); - expr* refine_function(expr* f); - bool flatten(expr* ex, expr_ref_vector & flat); - rational get_refine_length(expr* ex, expr_ref_vector& extra_deps); - - void instantiate_axiom_CharAt(enode * e); - void instantiate_axiom_prefixof(enode * e); - void instantiate_axiom_suffixof(enode * e); - void instantiate_axiom_Contains(enode * e); - void instantiate_axiom_Indexof(enode * e); - void instantiate_axiom_Indexof_extended(enode * e); - void instantiate_axiom_LastIndexof(enode * e); - void instantiate_axiom_Substr(enode * e); - void instantiate_axiom_Replace(enode * e); - void instantiate_axiom_str_to_int(enode * e); - void instantiate_axiom_int_to_str(enode * e); - void instantiate_axiom_is_digit(enode * e); - void instantiate_axiom_str_to_code(enode * e); - void instantiate_axiom_str_from_code(enode * e); - - void add_persisted_axiom(expr * a); - - expr * mk_RegexIn(expr * str, expr * regexp); - void instantiate_axiom_RegexIn(enode * e); - - // regex automata and length-aware regex - bool solve_regex_automata(); - unsigned estimate_regex_complexity(expr * re); - unsigned estimate_regex_complexity_under_complement(expr * re); - unsigned estimate_automata_intersection_difficulty(eautomaton * aut1, eautomaton * aut2); - bool check_regex_length_linearity(expr * re); - bool check_regex_length_linearity_helper(expr * re, bool already_star); - expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables); - void check_subterm_lengths(expr * re, integer_set & lens); - void find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut); - bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound); - bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound); - expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints); - void aut_path_add_next(u_map& next, expr_ref_vector& trail, unsigned idx, expr* cond); - expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var); - void regex_inc_counter(obj_map & counter_map, expr * key); - unsigned regex_get_counter(obj_map & counter_map, expr * key); - - void set_up_axioms(expr * ex); - void handle_equality(expr * lhs, expr * rhs); - - app * mk_value_helper(app * n); - expr * get_eqc_value(expr * n, bool & hasEqcValue); - bool get_string_constant_eqc(expr * n, zstring & stringVal); - expr * z3str2_get_eqc_value(expr * n , bool & hasEqcValue); - bool in_same_eqc(expr * n1, expr * n2); - expr * collect_eq_nodes(expr * n, expr_ref_vector & eqcSet); - bool is_var(expr * e) const; - - bool get_arith_value(expr* e, rational& val) const; - bool get_len_value(expr* e, rational& val); - bool lower_bound(expr* _e, rational& lo); - bool upper_bound(expr* _e, rational& hi); - - bool can_two_nodes_eq(expr * n1, expr * n2); - bool can_concat_eq_str(expr * concat, zstring& str); - bool can_concat_eq_concat(expr * concat1, expr * concat2); - bool check_concat_len_in_eqc(expr * concat); - void check_eqc_empty_string(expr * lhs, expr * rhs); - void check_eqc_concat_concat(std::set & eqc_concat_lhs, std::set & eqc_concat_rhs); - bool check_length_consistency(expr * n1, expr * n2); - bool check_length_const_string(expr * n1, expr * constStr); - bool check_length_eq_var_concat(expr * n1, expr * n2); - bool check_length_concat_concat(expr * n1, expr * n2); - bool check_length_concat_var(expr * concat, expr * var); - bool check_length_var_var(expr * var1, expr * var2); - void check_contain_in_new_eq(expr * n1, expr * n2); - void check_contain_by_eqc_val(expr * varNode, expr * constNode); - void check_contain_by_substr(expr * varNode, expr_ref_vector & willEqClass); - void check_contain_by_eq_nodes(expr * n1, expr * n2); - bool in_contain_idx_map(expr * n); - void compute_contains(std::map & varAliasMap, - std::map & concatAliasMap, std::map & varConstMap, - std::map & concatConstMap, std::map > & varEqConcatMap); - expr * dealias_node(expr * node, std::map & varAliasMap, std::map & concatAliasMap); - void get_grounded_concats(unsigned depth, - expr* node, std::map & varAliasMap, - std::map & concatAliasMap, std::map & varConstMap, - std::map & concatConstMap, std::map > & varEqConcatMap, - std::map, std::set > > & groundedMap); - void print_grounded_concat(expr * node, std::map, std::set > > & groundedMap); - void check_subsequence(expr* str, expr* strDeAlias, expr* subStr, expr* subStrDeAlias, expr* boolVar, - std::map, std::set > > & groundedMap); - bool is_partial_in_grounded_concat(const std::vector & strVec, const std::vector & subStrVec); - - void get_nodes_in_concat(expr * node, ptr_vector & nodeList); - expr * simplify_concat(expr * node); - - void simplify_parent(expr * nn, expr * eq_str); - - void simplify_concat_equality(expr * lhs, expr * rhs); - void solve_concat_eq_str(expr * concat, expr * str); - - void infer_len_concat_equality(expr * nn1, expr * nn2); - bool infer_len_concat(expr * n, rational & nLen); - void infer_len_concat_arg(expr * n, rational len); - - bool is_concat_eq_type1(expr * concatAst1, expr * concatAst2); - bool is_concat_eq_type2(expr * concatAst1, expr * concatAst2); - bool is_concat_eq_type3(expr * concatAst1, expr * concatAst2); - bool is_concat_eq_type4(expr * concatAst1, expr * concatAst2); - bool is_concat_eq_type5(expr * concatAst1, expr * concatAst2); - bool is_concat_eq_type6(expr * concatAst1, expr * concatAst2); - - void process_concat_eq_type1(expr * concatAst1, expr * concatAst2); - void process_concat_eq_type2(expr * concatAst1, expr * concatAst2); - void process_concat_eq_type3(expr * concatAst1, expr * concatAst2); - void process_concat_eq_type4(expr * concatAst1, expr * concatAst2); - void process_concat_eq_type5(expr * concatAst1, expr * concatAst2); - void process_concat_eq_type6(expr * concatAst1, expr * concatAst2); - - void print_cut_var(expr * node, std::ofstream & xout); - - void generate_mutual_exclusion(expr_ref_vector & exprs); - void add_theory_aware_branching_info(expr * term, double priority, lbool phase); - - bool new_eq_check(expr * lhs, expr * rhs); - void group_terms_by_eqc(expr * n, std::set & concats, std::set & vars, std::set & consts); - - void check_consistency_prefix(expr * e, bool is_true); - void check_consistency_suffix(expr * e, bool is_true); - void check_consistency_contains(expr * e, bool is_true); - - int ctx_dep_analysis(std::map & strVarMap, std::map & freeVarMap, - std::map > & var_eq_concat_map); - void trace_ctx_dep(std::ofstream & tout, - std::map & aliasIndexMap, - std::map & var_eq_constStr_map, - std::map > & var_eq_concat_map, - std::map > & var_eq_unroll_map, - std::map & concat_eq_constStr_map, - std::map > & concat_eq_concat_map); - - bool term_appears_as_subterm(expr * needle, expr * haystack); - void classify_ast_by_type(expr * node, std::map & varMap, - std::map & concatMap, std::map & unrollMap); - void classify_ast_by_type_in_positive_context(std::map & varMap, - std::map & concatMap, std::map & unrollMap); - - expr * get_alias_index_ast(std::map & aliasIndexMap, expr * node); - expr * getMostLeftNodeInConcat(expr * node); - expr * getMostRightNodeInConcat(expr * node); - void get_var_in_eqc(expr * n, std::set & varSet); - void get_concats_in_eqc(expr * n, std::set & concats); - void get_const_str_asts_in_node(expr * node, expr_ref_vector & constList); - expr * eval_concat(expr * n1, expr * n2); - - bool finalcheck_str2int(app * a); - bool finalcheck_int2str(app * a); - bool string_integer_conversion_valid(zstring str, rational& converted) const; - - lbool fixed_length_model_construction(expr_ref_vector formulas, expr_ref_vector &precondition, - expr_ref_vector& free_variables, - obj_map &model, expr_ref_vector &cex); - bool fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, expr_ref_vector & term_chars, expr_ref & cex); - bool fixed_length_get_len_value(expr * e, rational & val); - bool fixed_length_reduce_eq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex); - bool fixed_length_reduce_diseq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex); - bool fixed_length_reduce_contains(smt::kernel & subsolver, expr_ref f, expr_ref & cex); - bool fixed_length_reduce_negative_contains(smt::kernel & subsolver, expr_ref f, expr_ref & cex); - bool fixed_length_reduce_prefix(smt::kernel & subsolver, expr_ref f, expr_ref & cex); - bool fixed_length_reduce_negative_prefix(smt::kernel & subsolver, expr_ref f, expr_ref & cex); - bool fixed_length_reduce_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex); - bool fixed_length_reduce_negative_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex); - bool fixed_length_reduce_regex_membership(smt::kernel & subsolver, expr_ref f, expr_ref & cex, bool polarity); - - void dump_assignments(); - - void check_variable_scope(); - void recursive_check_variable_scope(expr * ex); - - void collect_var_concat(expr * node, std::set & varSet, std::set & concatSet); - bool propagate_length(std::set & varSet, std::set & concatSet, std::map & exprLenMap); - void get_unique_non_concat_nodes(expr * node, std::set & argSet); - bool propagate_length_within_eqc(expr * var); - - - const rational NEQ = rational(-1); // negative word equation lesson - const rational PFUN = rational(-2); // positive function lesson - const rational NFUN = rational(-3); // negative function lesson - - // TESTING - void refresh_theory_var(expr * e); - -public: - theory_str(context& ctx, ast_manager & m, theory_str_params const & params); - ~theory_str() override; - - char const * get_name() const override { return "seq"; } - void init() override; - void display(std::ostream & out) const override; - - void collect_statistics(::statistics & st) const override; - - bool overlapping_variables_detected() const { return loopDetected; } - - trail_stack& get_trail_stack() { return m_trail_stack; } - void merge_eh(theory_var, theory_var, theory_var v1, theory_var v2) {} - void after_merge_eh(theory_var r1, theory_var r2, theory_var v1, theory_var v2) { } - void unmerge_eh(theory_var v1, theory_var v2) {} -protected: - bool internalize_atom(app * atom, bool gate_ctx) override; - bool internalize_term(app * term) override; - virtual enode* ensure_enode(expr* e); - theory_var mk_var(enode * n) override; - - void new_eq_eh(theory_var, theory_var) override; - void new_diseq_eh(theory_var, theory_var) override; - - theory* mk_fresh(context* c) override { return alloc(theory_str, *c, c->get_manager(), m_params); } - void init_search_eh() override; - void add_theory_assumptions(expr_ref_vector & assumptions) override; - lbool validate_unsat_core(expr_ref_vector & unsat_core) override; - void relevant_eh(app * n) override; - void assign_eh(bool_var v, bool is_true) override; - void push_scope_eh() override; - void pop_scope_eh(unsigned num_scopes) override; - void reset_eh() override; - - bool can_propagate() override; - void propagate() override; - - final_check_status final_check_eh() override; - virtual void attach_new_th_var(enode * n); - - void init_model(model_generator & m) override; - model_value_proc * mk_value(enode * n, model_generator & mg) override; - void finalize_model(model_generator & mg) override; -}; - -}; diff --git a/src/smt/theory_str_mc.cpp b/src/smt/theory_str_mc.cpp deleted file mode 100644 index f434363e2..000000000 --- a/src/smt/theory_str_mc.cpp +++ /dev/null @@ -1,1549 +0,0 @@ -/*++ - Module Name: - - theory_str_mc.cpp - - Abstract: - - Model Construction for String Theory Plugin - - Author: - - Murphy Berzish and Yunhui Zheng - - Revision History: - - --*/ -#include "ast/ast_smt2_pp.h" -#include "smt/smt_context.h" -#include "smt/theory_str.h" -#include "smt/smt_model_generator.h" -#include "ast/ast_pp.h" -#include "ast/ast_ll_pp.h" -#include -#include -#include "smt/theory_seq_empty.h" -#include "smt/theory_arith.h" -#include "ast/ast_util.h" -#include "ast/rewriter/seq_rewriter.h" -#include "ast/rewriter/expr_replacer.h" -#include "smt_kernel.h" -#include "model/model_smt2_pp.h" - -namespace smt { - - /* - * Use the current model in the arithmetic solver to get the length of a term. - * Returns true if this could be done, placing result in 'termLen', or false otherwise. - * Works like get_len_value() except uses arithmetic solver model instead of EQCs. - */ - bool theory_str::fixed_length_get_len_value(expr * e, rational & val) { - ast_manager & m = get_manager(); - - rational val1; - expr_ref len(m), len_val(m); - expr* e1 = nullptr, *e2 = nullptr; - expr_ref_vector todo(m); - todo.push_back(e); - val.reset(); - while (!todo.empty()) { - expr* c = todo.back(); - todo.pop_back(); - zstring tmp; - if (u.str.is_concat(c, e1, e2)) { - todo.push_back(e1); - todo.push_back(e2); - } - else if (u.str.is_string(c, tmp)) { - unsigned int sl = tmp.length(); - val += rational(sl); - } - else { - len = mk_strlen(c); - arith_value v(get_manager()); - v.init(&get_context()); - if (v.get_value(len, val1)) { - val += val1; - } else { - return false; - } - } - } - return val.is_int(); - } - - - bool theory_str::fixed_length_reduce_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { - ast_manager & m = get_manager(); - - ast_manager & sub_m = subsolver.m(); - - expr * full = nullptr; - expr * suff = nullptr; - VERIFY(u.str.is_suffix(f, suff, full)); - - expr_ref haystack(full, m); - expr_ref needle(suff, m); - - expr_ref_vector full_chars(m), suff_chars(m); - - if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) - || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { - return false; - } - - if (suff_chars.size() == 0) { - // all strings endwith the empty one - return true; - } - - if (full_chars.size() == 0 && suff_chars.size() > 0) { - // the empty string doesn't "endwith" any non-empty string - cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(suff), mk_int(0)), - m_autil.mk_ge(mk_strlen(full), mk_int(0))); - th_rewriter m_rw(m); - m_rw(cex); - return false; - } - - if (full_chars.size() < suff_chars.size()) { - // a string can't endwith a longer one - // X startswith Y -> len(X) >= len(Y) - expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); - expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); - expr_ref lens(m_autil.mk_add(mk_strlen(full), m_autil.mk_mul(minus_one, mk_strlen(suff))), m); - cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero)); - th_rewriter m_rw(m); - m_rw(cex); - return false; - } - - expr_ref_vector branch(sub_m); - for (unsigned j = 0; j < suff_chars.size(); ++j) { - // full[j] == suff[j] - expr_ref cLHS(full_chars.get(full_chars.size() - j - 1), sub_m); - expr_ref cRHS(suff_chars.get(suff_chars.size() - j - 1), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - branch.push_back(_e); - } - - expr_ref final_diseq(mk_and(branch), sub_m); - fixed_length_assumptions.push_back(final_diseq); - TRACE(str_fl, tout << "inserting into fixed_lesson" < 0) { - // the empty string doesn't "endwith" any non-empty string - return true; - } - - if (full_chars.size() < suff_chars.size()) { - // a string can't endwith a longer one - // X startswith Y -> len(X) >= len(Y) - return true; - } - - expr_ref_vector branch(sub_m); - for (unsigned j = 0; j < suff_chars.size(); ++j) { - // full[j] == suff[j] - expr_ref cLHS(full_chars.get(full_chars.size() - j - 1), sub_m); - expr_ref cRHS(suff_chars.get(suff_chars.size() - j - 1), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - branch.push_back(_e); - } - - expr_ref final_diseq(mk_not(sub_m, mk_and(branch)), sub_m); - fixed_length_assumptions.push_back(final_diseq); - TRACE(str_fl, tout << "inserting into fixed_lesson" < 0) { - // the empty string doesn't "stratwith" any non-empty string - cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(pref), mk_int(0)), - m_autil.mk_ge(mk_strlen(full), mk_int(0))); - th_rewriter m_rw(m); - m_rw(cex); - return false; - } - - if (full_chars.size() < pref_chars.size()) { - // a string can't startwith a longer one - // X startswith Y -> len(X) >= len(Y) - expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); - expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); - expr_ref lens(m_autil.mk_add(mk_strlen(full), m_autil.mk_mul(minus_one, mk_strlen(pref))), m); - cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero)); - th_rewriter m_rw(m); - m_rw(cex); - return false; - } - - expr_ref_vector branch(m); - for (unsigned j = 0; j < pref_chars.size(); ++j) { - // full[j] == pref[j] - expr_ref cLHS(full_chars.get(j), sub_m); - expr_ref cRHS(pref_chars.get(j), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - branch.push_back(_e); - } - - expr_ref final_diseq(mk_and(branch), sub_m); - fixed_length_assumptions.push_back(final_diseq); - TRACE(str_fl, tout << "inserting into fixed_lesson" < 0) { - // the empty string doesn't "stratwith" any non-empty string - return true; - } - - if (full_chars.size() < pref_chars.size()) { - // a string can't startwith a longer one - // X startswith Y -> len(X) >= len(Y) - return true; - } - - expr_ref_vector branch(m); - for (unsigned j = 0; j < pref_chars.size(); ++j) { - // full[j] == pref[j] - expr_ref cLHS(full_chars.get(j), sub_m); - expr_ref cRHS(pref_chars.get(j), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - branch.push_back(_e); - } - - expr_ref final_diseq(mk_not(sub_m, mk_and(branch)), sub_m); - fixed_length_assumptions.push_back(final_diseq); - TRACE(str_fl, tout << "inserting into fixed_lesson" < 0) { - // the empty string doesn't "contain" any non-empty string - cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(needle), mk_int(0)), - m_autil.mk_ge(mk_strlen(haystack), mk_int(0))); - th_rewriter m_rw(m); - m_rw(cex); - return false; - } - - if (needle_chars.size() > haystack_chars.size()) { - // a string can't contain a longer one - // X contains Y -> len(X) >= len(Y) - expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); - expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); - expr_ref lens(m_autil.mk_add(mk_strlen(haystack), m_autil.mk_mul(minus_one, mk_strlen(needle))), m); - cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero)); - th_rewriter m_rw(m); - m_rw(cex); - return false; - } - // find all positions at which `needle` could occur in `haystack` - expr_ref_vector branches(m); - for (unsigned i = 0; i <= (haystack_chars.size() - needle_chars.size()); ++i) { - // i defines the offset into haystack_chars - expr_ref_vector branch(m); - for (unsigned j = 0; j < needle_chars.size(); ++j) { - // needle[j] == haystack[i+j] - ENSURE(i+j < haystack_chars.size()); - expr_ref cLHS(needle_chars.get(j), sub_m); - expr_ref cRHS(haystack_chars.get(i+j), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - branch.push_back(_e); - } - branches.push_back(mk_and(branch)); - } - - expr_ref final_diseq(mk_or(branches), sub_m); - fixed_length_assumptions.push_back(final_diseq); - TRACE(str_fl, tout << "inserting into fixed_lesson" < 0) { - // the empty string doesn't "contain" any non-empty string - return true; - } - - if (needle_chars.size() > haystack_chars.size()) { - // a string can't contain a longer one - // X contains Y -> len(X) >= len(Y) - return true; - } - - - // find all positions at which `needle` could occur in `haystack` - expr_ref_vector branches(m); - for (unsigned i = 0; i <= (haystack_chars.size() - needle_chars.size()); ++i) { - // i defines the offset into haystack_chars - expr_ref_vector branch(m); - for (unsigned j = 0; j < needle_chars.size(); ++j) { - // needle[j] == haystack[i+j] - ENSURE(i+j < haystack_chars.size()); - expr_ref cLHS(needle_chars.get(j), sub_m); - expr_ref cRHS(haystack_chars.get(i+j), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - branch.push_back(_e); - } - branches.push_back(mk_and(branch)); - } - - expr_ref final_diseq(mk_not(sub_m, mk_or(branches)), sub_m); - fixed_length_assumptions.push_back(final_diseq); - TRACE(str_fl, tout << "inserting into fixed_lesson" <& next, expr_ref_vector& trail, unsigned idx, expr* cond, ast_manager & m) { - expr* acc; - if (!m.is_true(cond) && next.find(idx, acc)) { - expr* args[2] = { cond, acc }; - cond = mk_or(m, 2, args); - } - trail.push_back(cond); - next.insert(idx, cond); - - } - - bool theory_str::fixed_length_reduce_regex_membership(smt::kernel & subsolver, expr_ref f, expr_ref & cex, bool polarity) { - ast_manager & m = get_manager(); - - ast_manager & sub_m = subsolver.m(); - - expr * str = nullptr, *re = nullptr; - VERIFY(u.str.is_in_re(f, str, re)); - - // TODO reuse some of the automaton framework from theory_str_regex - eautomaton * aut = m_mk_aut(re); - aut->compress(); - - expr_ref_vector str_chars(m); - if (!fixed_length_reduce_string_term(subsolver, str, str_chars, cex)) { - return false; - } - - if (str_chars.empty()) { - // check 0-length solution - bool zero_solution = false; - unsigned initial_state = aut->init(); - if (aut->is_final_state(initial_state)) { - zero_solution = true; - } else { - unsigned_vector eps_states; - aut->get_epsilon_closure(initial_state, eps_states); - for (unsigned state : eps_states) { - if (aut->is_final_state(state)) { - zero_solution = true; - break; - } - } - } - if (!zero_solution && polarity) { - TRACE(str_fl, tout << "contradiction: regex has no zero-length solutions, but our string must be a solution" << std::endl;); - cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(0)))); - ctx.get_rewriter()(cex); - return false; - } else if (zero_solution && !polarity) { - TRACE(str_fl, tout << "contradiction: regex has zero-length solutions, but our string must not be a solution" << std::endl;); - cex = m.mk_or(f, m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(0)))); - ctx.get_rewriter()(cex); - return false; - } else { - TRACE(str_fl, tout << "regex constraint satisfied without asserting constraints to subsolver" << std::endl;); - return true; - } - } else { - expr_ref_vector trail(m); - u_map maps[2]; - bool select_map = false; - expr_ref cond(m); - eautomaton::moves mvs; - maps[0].insert(aut->init(), m.mk_true()); - // is_accepted(a, aut) & some state in frontier is final. - - for (auto& ch : str_chars) { - u_map& frontier = maps[select_map]; - u_map& next = maps[!select_map]; - select_map = !select_map; - next.reset(); - u_map::iterator it = frontier.begin(), end = frontier.end(); - for (; it != end; ++it) { - mvs.reset(); - unsigned state = it->m_key; - expr* acc = it->m_value; - aut->get_moves_from(state, mvs, false); - for (eautomaton::move& mv : mvs) { - SASSERT(mv.t()); - if (mv.t()->is_char() && m.is_value(mv.t()->get_char()) && m.is_value(ch)) { - if (mv.t()->get_char() == ch) { - add_next(next, trail, mv.dst(), acc, sub_m); - } - else { - continue; - } - } - else { - cond = mv.t()->accept(ch); - if (m.is_false(cond)) { - continue; - } - if (m.is_true(cond)) { - add_next(next, trail, mv.dst(), acc, sub_m); - continue; - } - expr* args[2] = { cond, acc }; - cond = mk_and(m, 2, args); - add_next(next, trail, mv.dst(), cond, sub_m); - } - } - } - } - u_map const& frontier = maps[select_map]; - expr_ref_vector ors(sub_m); - for (auto const& kv : frontier) { - unsigned_vector states; - bool has_final = false; - aut->get_epsilon_closure(kv.m_key, states); - for (unsigned i = 0; i < states.size() && !has_final; ++i) { - has_final = aut->is_final_state(states[i]); - } - if (has_final) { - ors.push_back(kv.m_value); - } - } - expr_ref result(mk_or(ors), sub_m); - th_rewriter rw(sub_m); - rw(result); - TRACE(str_fl, tout << "regex path constraint: " << mk_pp(result, sub_m) << std::endl;); - - if (sub_m.is_false(result)) { - // There are no solutions of that length in the automaton. - // If the membership constraint is true, we assert a conflict clause. - // If the membership constraint is false, we ignore the constraint. - if (polarity) { - // Decompose `str` into its components if it is a concatenation of terms. - // This fixes cases where the length of S in (S in RE) might be correct - // if the lengths of components of S are assigned in a different way. - expr_ref_vector str_terms(m); - expr_ref_vector str_terms_eq_len(m); - str_terms.push_back(str); - while (!str_terms.empty()) { - expr* str_term = str_terms.back(); - str_terms.pop_back(); - expr* arg0; - expr* arg1; - if (u.str.is_concat(str_term, arg0, arg1)) { - str_terms.push_back(arg0); - str_terms.push_back(arg1); - } else { - rational termLen; - if (fixed_length_get_len_value(str_term, termLen)) { - str_terms_eq_len.push_back(ctx.mk_eq_atom(mk_strlen(str_term), mk_int(termLen))); - } else { - // this is strange, since we knew the length of `str` in order to get here - cex = expr_ref(m_autil.mk_ge(mk_strlen(str_term), mk_int(0)), m); - return false; - } - } - } - - cex = m.mk_or(m.mk_not(f), m.mk_not(mk_and(str_terms_eq_len))); - ctx.get_rewriter()(cex); - return false; - } else { - TRACE(str_fl, tout << "regex constraint satisfied without asserting constraints to subsolver" << std::endl;); - return true; - } - } else { - if (polarity) { - fixed_length_assumptions.push_back(result); - fixed_length_lesson.insert(result, std::make_tuple(PFUN, f, f)); - } else { - fixed_length_assumptions.push_back(sub_m.mk_not(result)); - fixed_length_lesson.insert(sub_m.mk_not(result), std::make_tuple(NFUN, f, f)); - } - return true; - } - } - } - - /* - * Expressions in the vector eqc_chars exist only in the subsolver. - * If this method returns false, a conflict clause is returned in cex; - * this conflict clause exists in the main solver. - */ - bool theory_str::fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, - expr_ref_vector & eqc_chars, expr_ref & cex) { - ast_manager & m = get_manager(); - - ast_manager & sub_m = subsolver.m(); - - expr * arg0; - expr * arg1; - expr * arg2; - - zstring strConst; - if (u.str.is_string(term, strConst)) { - for (unsigned i = 0; i < strConst.length(); ++i) { - expr_ref chTerm(u.mk_char(strConst[i]), m); - eqc_chars.push_back(chTerm); - fixed_length_subterm_trail.push_back(chTerm); - } - } else if (to_app(term)->get_num_args() == 0 && !u.str.is_string(term)) { - // this is a variable; get its length and create/reuse character terms - expr_ref_vector * chars = nullptr; - if (!var_to_char_subterm_map.find(term, chars)) { - rational varLen_value; - bool var_hasLen = fixed_length_get_len_value(term, varLen_value); - if (!var_hasLen || varLen_value.is_neg()) { - TRACE(str_fl, tout << "variable " << mk_pp(term, m) << " has no length assignment or impossible length assignment - asserting conflict axiom" << std::endl;); - cex = expr_ref(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m); - return false; - } - TRACE(str_fl, tout << "creating character terms for variable " << mk_pp(term, m) << ", length = " << varLen_value << std::endl;); - chars = alloc(expr_ref_vector, m); - for (rational i = rational::zero(); i < varLen_value; ++i) { - // TODO we can probably name these better for the sake of debugging - expr_ref ch(mk_fresh_const("char", u.mk_char_sort()), m); - chars->push_back(ch); - fixed_length_subterm_trail.push_back(ch); - } - var_to_char_subterm_map.insert(term, chars); - fixed_length_used_len_terms.insert(term, varLen_value); - } - for (auto c : *chars) { - eqc_chars.push_back(c); - } - } else if (u.str.is_concat(term, arg0, arg1)) { - expr_ref first(arg0, sub_m); - expr_ref second(arg1, sub_m); - expr_ref_vector chars0(m), chars1(m); - if (!fixed_length_reduce_string_term(subsolver, first, chars0, cex) - || !fixed_length_reduce_string_term(subsolver, second, chars1, cex)) { - return false; - } - eqc_chars.append(chars0); - eqc_chars.append(chars1); - } else if (u.str.is_extract(term, arg0, arg1, arg2)) { - // (str.substr Base Pos Len) - expr_ref first(arg0, sub_m); - expr_ref second(arg1, sub_m); - expr_ref third(arg2, sub_m); - expr_ref_vector base_chars(m); - if (!fixed_length_reduce_string_term(subsolver, first, base_chars, cex)) { - return false; - } - arith_value v(m); - v.init(&get_context()); - rational pos, len; - bool pos_exists = v.get_value(arg1, pos); - bool len_exists = v.get_value(arg2, len); - if (!pos_exists) { - cex = expr_ref(m.mk_or(m_autil.mk_ge(arg1, mk_int(0)), m_autil.mk_le(arg1, mk_int(0))), m); - return false; - } - if (!len_exists) { - cex = expr_ref(m.mk_or(m_autil.mk_ge(arg2, mk_int(0)), m_autil.mk_le(arg2, mk_int(0))), m); - return false; - } - TRACE(str_fl, tout << "reduce substring term: base=" << mk_pp(term, m) << " (length="<= rational(base_chars.size()) || len.is_neg()) { - eqc_chars.reset(); - return true; - } - else if (!pos.is_unsigned() || !len.is_unsigned()) { - return false; - } else { - unsigned _pos = pos.get_unsigned(); - unsigned _len = len.get_unsigned(); - if (_pos + _len < _pos) - return false; - if (_pos + _len >= base_chars.size()) { - // take as many characters as possible up to the end of base_chars - for (unsigned i = _pos; i < base_chars.size(); ++i) { - eqc_chars.push_back(base_chars.get(i)); - } - } else { - for (unsigned i = _pos; i < _pos + _len; ++i) { - eqc_chars.push_back(base_chars.get(i)); - } - } - } - } else if (u.str.is_at(term, arg0, arg1)) { - // (str.at Base Pos) - expr_ref base(arg0, sub_m); - expr_ref pos(arg1, sub_m); - expr_ref_vector base_chars(m); - if (!fixed_length_reduce_string_term(subsolver, base, base_chars, cex)) { - return false; - } - arith_value v(m); - v.init(&get_context()); - rational pos_value; - bool pos_exists = v.get_value(pos, pos_value); - if (!pos_exists) { - cex = m.mk_or(m_autil.mk_ge(pos, mk_int(0)), m_autil.mk_le(pos, mk_int(0))); - return false; - } - TRACE(str_fl, tout << "reduce str.at: base=" << mk_pp(base, m) << ", pos=" << pos_value.to_string() << std::endl;); - if (pos_value.is_neg() || pos_value >= rational(base_chars.size())) { - // return the empty string - eqc_chars.reset(); - } - else if (!pos_value.is_unsigned()) { - return false; - } else { - eqc_chars.push_back(base_chars.get(pos_value.get_unsigned())); - } - return true; - } else if (u.str.is_itos(term, arg0)) { - expr_ref i(arg0, m); - arith_value v(m); - v.init(&get_context()); - rational iValue; - bool iValue_exists = v.get_value(i, iValue); - if (!iValue_exists) { - cex = expr_ref(m.mk_or(m_autil.mk_ge(arg0, mk_int(0)), m_autil.mk_le(arg0, mk_int(0))), m); - return false; - } - rational termLen; - bool termLen_exists = v.get_value(mk_strlen(term), termLen); - if(!termLen_exists) { - cex = expr_ref(m.mk_or(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m_autil.mk_le(mk_strlen(term), mk_int(0))), m); - return false; - } - TRACE(str_fl, tout << "reduce int.to.str: n=" << iValue << std::endl;); - if (iValue.is_neg()) { - if (!termLen.is_zero()) { - // conflict - cex = expr_ref(m.mk_not(m.mk_and(m_autil.mk_le(arg0, mk_int(-1)), m.mk_not(mk_strlen(term)))), m); - return false; - } - // return the empty string - eqc_chars.reset(); - return true; - } else { - if (termLen != iValue.get_num_decimal()) { - // conflict - cex = expr_ref(m.mk_not(m.mk_and(get_context().mk_eq_atom(mk_strlen(term), mk_int(termLen)), get_context().mk_eq_atom(arg0, mk_int(iValue)))), m); - return false; - } - // convert iValue to a constant - zstring iValue_str(iValue.to_string()); - for (unsigned idx = 0; idx < iValue_str.length(); ++idx) { - expr_ref chTerm(u.mk_char(iValue_str[idx]), m); - eqc_chars.push_back(chTerm); - } - return true; - } - } else { - TRACE(str_fl, tout << "string term " << mk_pp(term, m) << " handled as uninterpreted function" << std::endl;); - expr_ref_vector *chars = nullptr; - if (!uninterpreted_to_char_subterm_map.find(term, chars)) { - rational ufLen_value; - bool uf_hasLen = fixed_length_get_len_value(term, ufLen_value); - if (!uf_hasLen || ufLen_value.is_neg()) { - TRACE(str_fl, tout << "uninterpreted function " << mk_pp(term, m) << " has no length assignment or impossible length assignment - asserting conflict axiom" << std::endl;); - cex = expr_ref(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m); - return false; - } - TRACE(str_fl, tout << "creating character terms for uninterpreted function " << mk_pp(term, m) << ", length = " << ufLen_value << std::endl;); - chars = alloc(expr_ref_vector, m); - for (rational i = rational::zero(); i < ufLen_value; ++i) { - expr_ref ch(mk_fresh_const("char", u.mk_char_sort()), m); - chars->push_back(ch); - fixed_length_subterm_trail.push_back(ch); - } - uninterpreted_to_char_subterm_map.insert(term, chars); - fixed_length_used_len_terms.insert(term, ufLen_value); - } - for (auto c : *chars) { - eqc_chars.push_back(c); - } - } - return true; - } - - bool theory_str::fixed_length_reduce_eq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex) { - ast_manager & m = get_manager(); - - ast_manager & sub_m = subsolver.m(); - - expr_ref_vector lhs_chars(m), rhs_chars(m); - - if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) - || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { - return false; - } - - if (lhs_chars.size() != rhs_chars.size()) { - TRACE(str_fl, tout << "length information inconsistent: " << mk_pp(lhs, m) << " has " << lhs_chars.size() << - " chars, " << mk_pp(rhs, m) << " has " << rhs_chars.size() << " chars" << std::endl;); - // equal strings ought to have equal lengths - cex = m.mk_or(m.mk_not(ctx.mk_eq_atom(lhs, rhs)), ctx.mk_eq_atom(mk_strlen(lhs), mk_strlen(rhs))); - return false; - } - for (unsigned i = 0; i < lhs_chars.size(); ++i) { - expr_ref cLHS(lhs_chars.get(i), sub_m); - expr_ref cRHS(rhs_chars.get(i), sub_m); - expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); - fixed_length_assumptions.push_back(_e); - TRACE(str_fl, tout << "inserting into fixed_lesson" < &model, expr_ref_vector &cex) { - - ast_manager & m = get_manager(); - - TRACE(str, - ast_manager & m = get_manager(); - tout << "dumping all formulas:" << std::endl; - for (expr_ref_vector::iterator i = formulas.begin(); i != formulas.end(); ++i) { - expr * ex = *i; - tout << mk_pp(ex, m) << (ctx.is_relevant(ex) ? "" : " (NOT REL)") << std::endl; - } - ); - - fixed_length_subterm_trail.reset(); - fixed_length_used_len_terms.reset(); - fixed_length_assumptions.reset(); - - for (auto& kv: var_to_char_subterm_map) dealloc(kv.m_value); - var_to_char_subterm_map.reset(); - for (auto& kv: uninterpreted_to_char_subterm_map) dealloc(kv.m_value); - uninterpreted_to_char_subterm_map.reset(); - fixed_length_lesson.reset(); - - // All reduced Boolean formulas in the current assignment - expr_ref_vector fixed_length_reduced_boolean_formulas(m); - - // Boolean formulas on which to apply abstraction refinement. - expr_ref_vector abstracted_boolean_formulas(m); - - smt_params subsolver_params; - subsolver_params.m_string_solver = symbol("char"); - smt::kernel subsolver(m, subsolver_params); - subsolver.set_logic(symbol("QF_S")); - sort * str_sort = u.str.mk_string_sort(); - sort * bool_sort = m.mk_bool_sort(); - - for (expr * var : free_variables) { - TRACE(str_fl, tout << "initialize free variable " << mk_pp(var, m) << std::endl;); - rational var_lenVal; - if (!fixed_length_get_len_value(var, var_lenVal)) { - TRACE(str_fl, tout << "free variable " << mk_pp(var, m) << " has no length assignment" << std::endl;); - expr_ref var_len_assertion(m_autil.mk_ge(mk_strlen(var), mk_int(0)), m); - assert_axiom(var_len_assertion); - add_persisted_axiom(var_len_assertion); - return l_undef; - } - expr_ref_vector var_chars(m); - expr_ref str_counterexample(m); - if (!fixed_length_reduce_string_term(subsolver, var, var_chars, str_counterexample)) { - TRACE(str_fl, tout << "free variable " << mk_pp(var, m) << " caused a conflict; asserting and continuing" << std::endl;); - assert_axiom(str_counterexample); - return l_undef; - } - } - - for (expr * f : formulas) { - if (!get_context().is_relevant(f)) { - expr * subformula = nullptr; - if (m.is_not(f, subformula)) { - if (!get_context().is_relevant(subformula)) { - TRACE(str_fl, tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant (and neither is its subformula)" << std::endl;); - continue; - } else { - TRACE(str_fl, tout << "considering formula " << mk_pp(f, m) << ", its subformula is relevant but it is not" << std::endl;); - } - } else { - TRACE(str_fl, tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant" << std::endl;); - continue; - } - } - // reduce string formulas only. ignore others - sort * fSort = f->get_sort(); - if (fSort == bool_sort && !is_quantifier(f)) { - // extracted terms - expr * subterm; - expr * lhs; - expr * rhs; - if (m.is_eq(f, lhs, rhs)) { - sort * lhs_sort = lhs->get_sort(); - if (lhs_sort == str_sort) { - TRACE(str_fl, tout << "reduce string equality: " << mk_pp(lhs, m) << " == " << mk_pp(rhs, m) << std::endl;); - expr_ref cex(m); - expr_ref left(lhs, m); - expr_ref right(rhs, m); - if (!fixed_length_reduce_eq(subsolver, left, right, cex)) { - // missing a side condition. assert it and return unknown - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else { - TRACE(str_fl, tout << "skip reducing formula " << mk_pp(f, m) << ", not an equality over strings" << std::endl;); - } - } else if (u.str.is_in_re(f)) { - TRACE(str_fl, tout << "reduce regex membership: " << mk_pp(f, m) << std::endl;); - expr_ref cex_clause(m); - expr_ref re(f, m); - if (!fixed_length_reduce_regex_membership(subsolver, re, cex_clause, true)) { - assert_axiom(cex_clause); - add_persisted_axiom(cex_clause); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else if (u.str.is_contains(f)) { - // TODO in some cases (e.g. len(haystack) is only slightly greater than len(needle)) - // we might be okay to assert the full disjunction because there are very few disjuncts - if (m_params.m_FixedLengthRefinement) { - TRACE(str_fl, tout << "abstracting out positive contains: " << mk_pp(f, m) << std::endl;); - abstracted_boolean_formulas.push_back(f); - } else { - TRACE(str_fl, tout << "reduce positive contains: " << mk_pp(f, m) << std::endl;); - expr_ref cex(m); - expr_ref cont(f, m); - if (!fixed_length_reduce_contains(subsolver, cont, cex)) { - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } - } else if (u.str.is_prefix(f)) { - TRACE(str_fl, tout << "reduce positive prefix: " << mk_pp(f, m) << std::endl;); - expr_ref cex(m); - expr_ref pref(f, m); - if (!fixed_length_reduce_prefix(subsolver, pref, cex)) { - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else if (u.str.is_suffix(f)) { - TRACE(str_fl, tout << "reduce positive suffix: " << mk_pp(f, m) << std::endl;); - expr_ref cex(m); - expr_ref suf(f, m); - if (!fixed_length_reduce_suffix(subsolver, suf, cex)) { - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - }else if (m.is_not(f, subterm)) { - // if subterm is a string formula such as an equality, reduce it as a disequality - if (m.is_eq(subterm, lhs, rhs)) { - sort * lhs_sort = lhs->get_sort(); - if (lhs_sort == str_sort) { - TRACE(str_fl, tout << "reduce string disequality: " << mk_pp(lhs, m) << " != " << mk_pp(rhs, m) << std::endl;); - expr_ref cex(m); - expr_ref left(lhs, m); - expr_ref right(rhs, m); - if (!fixed_length_reduce_diseq(subsolver, left, right, cex)) { - // missing a side condition. assert it and return unknown - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } - } else if (u.str.is_in_re(subterm)) { - TRACE(str_fl, tout << "reduce negative regex membership: " << mk_pp(f, m) << std::endl;); - expr_ref cex_clause(m); - expr_ref re(subterm, m); - if (!fixed_length_reduce_regex_membership(subsolver, re, cex_clause, false)) { - assert_axiom(cex_clause); - add_persisted_axiom(cex_clause); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else if (u.str.is_contains(subterm)) { - TRACE(str_fl, tout << "reduce negative contains: " << mk_pp(subterm, m) << std::endl;); - expr_ref cex(m); - expr_ref cont(subterm, m); - if (!fixed_length_reduce_negative_contains(subsolver, cont, cex)) { - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else if (u.str.is_prefix(subterm)) { - TRACE(str_fl, tout << "reduce negative prefix: " << mk_pp(subterm, m) << std::endl;); - expr_ref cex(m); - expr_ref pref(subterm, m); - if (!fixed_length_reduce_negative_prefix(subsolver, pref, cex)) { - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else if (u.str.is_suffix(subterm)) { - TRACE(str_fl, tout << "reduce negative suffix: " << mk_pp(subterm, m) << std::endl;); - expr_ref cex(m); - expr_ref suf(subterm, m); - if (!fixed_length_reduce_negative_suffix(subsolver, suf, cex)) { - assert_axiom(cex); - add_persisted_axiom(cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(f); - } else { - TRACE(str_fl, tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;); - } - } else { - TRACE(str_fl, tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;); - continue; - } - } else { - TRACE(str_fl, tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant to strings" << std::endl;); - continue; - } - } - - // Check consistency of all string-integer conversion terms wrt. integer theory before we solve, - // possibly generating additional constraints for the bit-vector solver. - { - arith_value v(get_manager()); - v.init(&get_context()); - for (auto e : string_int_conversion_terms) { - TRACE(str_fl, tout << "pre-run check str-int term " << mk_pp(e, get_manager()) << std::endl;); - expr* _arg; - if (u.str.is_stoi(e, _arg)) { - expr_ref arg(_arg, m); - rational slen; - if (!fixed_length_get_len_value(arg, slen)) { - expr_ref stoi_cex(m_autil.mk_ge(mk_strlen(arg), mk_int(0)), m); - assert_axiom(stoi_cex); - add_persisted_axiom(stoi_cex); - return l_undef; - } - TRACE(str_fl, tout << "length of term is " << slen << std::endl;); - - rational ival; - if (v.get_value(e, ival)) { - TRACE(str_fl, tout << "integer theory assigns " << ival << " to " << mk_pp(e, get_manager()) << std::endl;); - // if ival is non-negative, because we know the length of arg, we can add a character constraint for arg - if (ival.is_nonneg()) { - zstring ival_str(ival.to_string()); - zstring padding; - for (rational i = rational::zero(); i < slen - rational(ival_str.length()); ++i) { - padding = padding + zstring("0"); - } - zstring arg_val = padding + ival_str; - expr_ref stoi_cex(m); - expr_ref arg_char_expr(mk_string(arg_val), m); - - // Add (e == ival) as a precondition. - precondition.push_back(m.mk_eq(e, mk_int(ival))); - // Assert (arg == arg_chars) in the subsolver. - if (!fixed_length_reduce_eq(subsolver, arg, arg_char_expr, stoi_cex)) { - // Counterexample: (str.to_int S) == ival AND len(S) == slen cannot both be true. - stoi_cex = expr_ref(m.mk_not(m.mk_and( - m.mk_eq(e, mk_int(ival)), - m.mk_eq(mk_strlen(arg), mk_int(slen)) - )), m); - assert_axiom(stoi_cex); - add_persisted_axiom(stoi_cex); - - return l_undef; - } - - fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival))); - } - } else { - TRACE(str_fl, tout << "integer theory has no assignment for " << mk_pp(e, get_manager()) << std::endl;); - // consistency needs to be checked after the string is assigned - } - } else if (u.str.is_to_code(e, _arg)) { - expr_ref arg(_arg, m); - rational ival; - if (v.get_value(e, ival)) { - TRACE(str_fl, tout << "integer theory assigns " << ival << " to " << mk_pp(e, m) << std::endl;); - if (ival >= rational::zero() && ival <= rational(u.max_char())) { - zstring ival_str(ival.get_unsigned()); - expr_ref arg_char_expr(mk_string(ival_str), m); - expr_ref stoi_cex(m); - // Add (e == ival) as a precondition - precondition.push_back(m.mk_eq(e, mk_int(ival))); - if (!fixed_length_reduce_eq(subsolver, arg, arg_char_expr, stoi_cex)) { - // Counterexample: (str.to_code arg) == ival AND arg == arg_char_expr cannot both be true. - stoi_cex = expr_ref(m.mk_not(m.mk_and(m.mk_eq(e, mk_int(ival)), m.mk_eq(arg, arg_char_expr))), m); - assert_axiom(stoi_cex); - add_persisted_axiom(stoi_cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival))); - } - } else { - TRACE(str_fl, tout << "integer theory has no assignment for " << mk_pp(e, m) << std::endl;); - // consistency needs to be checked after the string is assigned - } - } else if (u.str.is_itos(e, _arg)) { - expr_ref arg(_arg, m); - rational slen; - if (!fixed_length_get_len_value(e, slen)) { - expr_ref stoi_cex(m_autil.mk_ge(mk_strlen(e), mk_int(0)), m); - assert_axiom(stoi_cex); - add_persisted_axiom(stoi_cex); - return l_undef; - } - TRACE(str_fl, tout << "length of term is " << slen << std::endl;); - rational ival; - if (v.get_value(arg, ival)) { - TRACE(str_fl, tout << "integer theory assigns " << ival << " to " << mk_pp(arg, get_manager()) << std::endl;); - zstring ival_str; - if (ival.is_neg()) { - // e must be the empty string, i.e. have length 0 - ival_str = zstring(""); - } else { - // e must be equal to the string representation of ival - ival_str = zstring(ival.to_string()); - } - // Add (arg == ival) as a precondition. - precondition.push_back(m.mk_eq(arg, mk_int(ival))); - // Assert (e == ival_str) in the subsolver. - expr_ref itos_cex(m); - expr_ref _e(e, m); - expr_ref arg_char_expr(mk_string(ival_str), m); - if (!fixed_length_reduce_eq(subsolver, _e, arg_char_expr, itos_cex)) { - // Counterexample: N in (str.from_int N) == ival AND len(str.from_int N) == slen cannot both be true. - itos_cex = expr_ref(m.mk_not(m.mk_and( - m.mk_eq(arg, mk_int(ival)), - m.mk_eq(mk_strlen(e), mk_int(slen)) - )), m); - assert_axiom(itos_cex); - add_persisted_axiom(itos_cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(arg, mk_int(ival))); - } else { - TRACE(str_fl, tout << "integer theory has no assignment for " << mk_pp(arg, get_manager()) << std::endl;); - // consistency needs to be checked after the string is assigned - } - } else if (u.str.is_from_code(e, _arg)) { - expr_ref arg(_arg, m); - rational ival; - if (v.get_value(arg, ival)) { - TRACE(str_fl, tout << "integer theory assigns " << ival << " to " << mk_pp(arg, m) << std::endl;); - if (ival >= rational::zero() && ival <= rational(u.max_char())) { - zstring ival_str(ival.get_unsigned()); - expr_ref arg_char_expr(mk_string(ival_str), m); - expr_ref itos_cex(m); - // Add (arg == ival) as a precondition - precondition.push_back(m.mk_eq(arg, mk_int(ival))); - expr_ref _e(e, m); - if (!fixed_length_reduce_eq(subsolver, _e, arg_char_expr, itos_cex)) { - // Counterexample: (str.from_code arg) == arg_char AND arg == ival cannot both be true. - itos_cex = expr_ref(m.mk_not(m.mk_and(m.mk_eq(arg, mk_int(ival)), m.mk_eq(e, arg_char_expr))), m); - assert_axiom(itos_cex); - add_persisted_axiom(itos_cex); - return l_undef; - } - fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival))); - } - } else { - TRACE(str_fl, tout << "integer theory has no assignment for " << mk_pp(arg, m) << std::endl;); - // consistency needs to be checked after the string is assigned - } - } - } - } - - for (auto e : fixed_length_used_len_terms) { - expr * var = &e.get_key(); - rational val = e.get_value(); - precondition.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val))); - } - - TRACE(str_fl, - tout << "formulas asserted to subsolver:" << std::endl; - for (auto e : fixed_length_assumptions) { - tout << mk_pp(e, subsolver.m()) << std::endl; - } - tout << "variable to character mappings:" << std::endl; - for (auto &entry : var_to_char_subterm_map) { - tout << mk_pp(entry.m_key, get_manager()) << ":"; - for (auto e : *entry.m_value) { - tout << " " << mk_pp(e, subsolver.m()); - } - tout << std::endl; - } - tout << "reduced boolean formulas:" << std::endl; - for (expr* e : fixed_length_reduced_boolean_formulas) { - tout << mk_pp(e, m) << std::endl; - } - ); - - TRACE(str_fl, tout << "calling subsolver" << std::endl;); - - lbool subproblem_status = subsolver.check(fixed_length_assumptions); - - if (subproblem_status == l_true) { - TRACE(str_fl, tout << "subsolver found SAT; reconstructing model" << std::endl;); - model_ref subModel; - subsolver.get_model(subModel); - - expr_substitution subst(m); - - //model_smt2_pp(std::cout, m, *subModel, 2); - for (auto entry : var_to_char_subterm_map) { - svector assignment; - expr * var = entry.m_key; - for (expr * chExpr : *(entry.m_value)) { - expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); - unsigned n = 0; - if (chAssignment != nullptr && u.is_const_char(chAssignment, n)) { - assignment.push_back(n); - } else { - assignment.push_back((unsigned)'?'); - } - } - zstring strValue(assignment.size(), assignment.data()); - model.insert(var, strValue); - subst.insert(var, mk_string(strValue)); - } - TRACE(str_fl, - for (auto entry : model) { - tout << mk_pp(entry.m_key, m) << " = " << entry.m_value << std::endl; - } - ); - for (auto entry : uninterpreted_to_char_subterm_map) { - svector assignment; - expr * var = entry.m_key; - for (expr * chExpr : *(entry.m_value)) { - expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); - unsigned n = 0; - if (chAssignment != nullptr && u.is_const_char(chAssignment, n)) { - assignment.push_back(n); - } else { - assignment.push_back((unsigned)'?'); - } - } - zstring strValue(assignment.size(), assignment.data()); - model.insert(var, strValue); - subst.insert(var, mk_string(strValue)); - } - - // Check consistency of string-integer conversion terms after the search. - { - scoped_ptr replacer = mk_default_expr_replacer(m, false); - replacer->set_substitution(&subst); - th_rewriter rw(m); - arith_value v(get_manager()); - v.init(&get_context()); - for (auto e : string_int_conversion_terms) { - TRACE(str_fl, tout << "post-run check str-int term " << mk_pp(e, get_manager()) << std::endl;); - expr* _arg; - if (u.str.is_stoi(e, _arg)) { - expr_ref arg(_arg, m); - rational ival; - if (v.get_value(e, ival)) { - expr_ref arg_subst(arg, m); - (*replacer)(arg, arg_subst); - rw(arg_subst); - TRACE(str_fl, tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(arg_subst, m) << std::endl;); - - zstring arg_zstr; - if (u.str.is_string(arg_subst, arg_zstr)) { - rational arg_value; - if (string_integer_conversion_valid(arg_zstr, arg_value)) { - if (ival != arg_value) { - // contradiction - expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); - assert_axiom(cex); - return l_undef; - } - } else { - if (!ival.is_minus_one()) { - expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); - assert_axiom(cex); - return l_undef; - } - } - } - } - } else if (u.str.is_to_code(e, _arg)) { - expr_ref arg(_arg, m); - rational ival; - if (v.get_value(e, ival)) { - expr_ref arg_subst(arg, m); - (*replacer)(arg, arg_subst); - rw(arg_subst); - TRACE(str_fl, tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(arg_subst, m) << std::endl;); - zstring arg_zstr; - if (u.str.is_string(arg_subst, arg_zstr)) { - if (ival >= rational::zero() && ival <= rational(u.max_char())) { - // check that arg_subst has length 1 and that the codepoints are the same - if (arg_zstr.length() != 1 || rational(arg_zstr[0]) != ival) { - // contradiction - expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); - assert_axiom(cex); - return l_undef; - } - } else { - // arg_subst must not be a singleton char - if (arg_zstr.length() == 1) { - // contradiction - expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); - assert_axiom(cex); - return l_undef; - } - } - } - } - } else if (u.str.is_itos(e, _arg)) { - expr_ref arg(_arg, m); - rational ival; - if (v.get_value(arg, ival)) { - expr_ref e_subst(e, m); - (*replacer)(e, e_subst); - rw(e_subst); - TRACE(str_fl, tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(e_subst, m) << std::endl;); - - zstring e_zstr; - if (u.str.is_string(e_subst, e_zstr)) { - // if arg is negative, e must be empty - // if arg is non-negative, e must be valid AND cannot contain leading zeroes - - if (ival.is_neg()) { - if (!e_zstr.empty()) { - // contradiction - expr_ref cex(ctx.mk_eq_atom(m_autil.mk_le(arg, mk_int(-1)), ctx.mk_eq_atom(e, mk_string(""))), m); - assert_axiom(cex); - return l_undef; - } - } else { - rational e_value; - if (string_integer_conversion_valid(e_zstr, e_value)) { - // e contains leading zeroes if its first character is 0 but converted to something other than 0 - if (e_zstr[0] == '0' && !e_value.is_zero()) { - // contradiction - expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_int(ival)), ctx.mk_eq_atom(e, mk_string(e_zstr)))), m); - assert_axiom(cex); - return l_undef; - } - } else { - // contradiction - expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_int(ival)), ctx.mk_eq_atom(e, mk_string(e_zstr)))), m); - assert_axiom(cex); - return l_undef; - } - } - } - } - } else if (u.str.is_from_code(e, _arg)) { - expr_ref arg(_arg, m); - rational ival; - if (v.get_value(arg, ival)) { - expr_ref e_subst(e, m); - (*replacer)(e, e_subst); - rw(e_subst); - TRACE(str_fl, tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(e_subst, m) << std::endl;); - zstring e_zstr; - if (u.str.is_string(e_subst, e_zstr)) { - // if arg is out of range, e must be empty - // if arg is in range, e must be valid - if (ival <= rational::zero() || ival >= rational(u.max_char())) { - if (!e_zstr.empty()) { - // contradiction - expr_ref cex(ctx.mk_eq_atom( - m.mk_or(m_autil.mk_le(arg, mk_int(0)), m_autil.mk_ge(arg, mk_int(u.max_char() + 1))), - ctx.mk_eq_atom(e, mk_string("")) - ), m); - assert_axiom(cex); - return l_undef; - } - } else { - if (e_zstr.length() != 1 || e_zstr[0] != ival.get_unsigned()) { - // contradiction - expr_ref premise(ctx.mk_eq_atom(arg, mk_int(ival)), m); - expr_ref conclusion(ctx.mk_eq_atom(e, mk_string(zstring(ival.get_unsigned()))), m); - expr_ref cex(rewrite_implication(premise, conclusion), m); - assert_axiom(cex); - return l_undef; - } - } - } - } - } - } - } - - // TODO insert length values into substitution table as well? - if (m_params.m_FixedLengthRefinement) { - scoped_ptr replacer = mk_default_expr_replacer(m, false); - replacer->set_substitution(&subst); - th_rewriter rw(m); - if (!abstracted_boolean_formulas.empty()) { - for (auto f : abstracted_boolean_formulas) { - TRACE(str_fl, tout << "refinement of boolean formula: " << mk_pp(f, m) << std::endl;); - expr_ref f_new(m); - (*replacer)(f, f_new); - rw(f_new); - TRACE(str_fl, tout << "after substitution and simplification, evaluates to: " << mk_pp(f_new, m) << std::endl;); - // now there are three cases, depending on what f_new evaluates to: - // true -> OK, do nothing - // false -> refine abstraction by generating conflict clause - // anything else -> error, probably our substitution was incomplete - if (m.is_true(f_new)) { - // do nothing - } else if (m.is_false(f_new)) { - expr * needle = nullptr, *haystack = nullptr; - if (u.str.is_contains(f, haystack, needle)) { - expr_ref haystack_assignment(m); - expr_ref needle_assignment(m); - (*replacer)(haystack, haystack_assignment); - (*replacer)(needle, needle_assignment); - cex.push_back(f); - cex.push_back(ctx.mk_eq_atom(haystack, haystack_assignment)); - cex.push_back(ctx.mk_eq_atom(needle, needle_assignment)); - return l_false; - } else { - TRACE(str_fl, tout << "error: unhandled refinement term " << mk_pp(f, m) << std::endl;); - NOT_IMPLEMENTED_YET(); - } - } else { - NOT_IMPLEMENTED_YET(); - } - } - } - } - - return l_true; - } else if (subproblem_status == l_false) { - if (m_params.m_FixedLengthNaiveCounterexamples) { - TRACE(str_fl, tout << "subsolver found UNSAT; constructing length counterexample" << std::endl;); - for (auto e : fixed_length_used_len_terms) { - expr * var = &e.get_key(); - rational val = e.get_value(); - cex.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val))); - } - for (auto e : fixed_length_reduced_boolean_formulas) { - cex.push_back(e); - } - return l_false; - } else { - TRACE(str_fl, tout << "subsolver found UNSAT; reconstructing unsat core" << std::endl;); - TRACE(str_fl, tout << "unsat core has size " << subsolver.get_unsat_core_size() << std::endl;); - bool negate_pre = false; - for (unsigned i = 0; i < subsolver.get_unsat_core_size(); ++i) { - TRACE(str, tout << "entry " << i << " = " << mk_pp(subsolver.get_unsat_core_expr(i), m) << std::endl;); - rational index; - expr* lhs; - expr* rhs; - TRACE(str_fl, tout << fixed_length_lesson.size() << std::endl;); - std::tie(index, lhs, rhs) = fixed_length_lesson.find(subsolver.get_unsat_core_expr(i)); - TRACE(str_fl, tout << "lesson: " << mk_pp(lhs, m) << " == " << mk_pp(rhs, m) << " at index " << index << std::endl;); - cex.push_back(refine(lhs, rhs, index)); - if (index < rational(0)) { - negate_pre = true; - } - } - if (negate_pre || subsolver.get_unsat_core_size() == 0){ - for (auto ex : precondition) { - cex.push_back(ex); - } - } - return l_false; - } - } else { // l_undef - TRACE(str_fl, tout << "WARNING: subsolver found UNKNOWN" << std::endl;); - return l_undef; - } - } - -}; // namespace smt diff --git a/src/smt/theory_str_regex.cpp b/src/smt/theory_str_regex.cpp deleted file mode 100644 index 094220d7b..000000000 --- a/src/smt/theory_str_regex.cpp +++ /dev/null @@ -1,1526 +0,0 @@ -/*++ - Module Name: - - theory_str_regex.cpp - - Abstract: - - Regular expression components for Z3str3 (theory_str). - - Author: - - Murphy Berzish (2019-10-25) - - Revision History: - - --*/ - -#include "smt/theory_str.h" - -namespace smt { - - // saturating unsigned addition - unsigned inline _qadd(unsigned a, unsigned b) { - if (a == UINT_MAX || b == UINT_MAX) { - return UINT_MAX; - } - unsigned result = a + b; - if (result < a || result < b) { - return UINT_MAX; - } - return result; - } - - // saturating unsigned multiply - unsigned inline _qmul(unsigned a, unsigned b) { - if (a == UINT_MAX || b == UINT_MAX) { - return UINT_MAX; - } - uint64_t result = static_cast(a) * static_cast(b); - if (result > UINT_MAX) { - return UINT_MAX; - } - return static_cast(result); - } - - // Returns false if we need to give up solving, e.g. because we found symbolic expressions in an automaton. - bool theory_str::solve_regex_automata() { - for (auto str_in_re : regex_terms) { - expr * str = nullptr; - expr * re = nullptr; - u.str.is_in_re(str_in_re, str, re); - if (!ctx.b_internalized(str_in_re)) { - TRACE(str, tout << "regex term " << mk_pp(str_in_re, m) << " not internalized; fixing and continuing" << std::endl;); - ctx.internalize(str_in_re, false); - finalCheckProgressIndicator = true; - continue; - } - lbool current_assignment = ctx.get_assignment(str_in_re); - TRACE(str, tout << "regex term: " << mk_pp(str, m) << " in " << mk_pp(re, m) << " : " << current_assignment << std::endl;); - if (current_assignment == l_undef) { - continue; - } - - if (!regex_terms_with_length_constraints.contains(str_in_re)) { - if (current_assignment == l_true && check_regex_length_linearity(re)) { - TRACE(str, tout << "regex length constraints expected to be linear -- generating and asserting them" << std::endl;); - - if (regex_term_to_length_constraint.contains(str_in_re)) { - // use existing length constraint - expr * top_level_length_constraint = nullptr; - regex_term_to_length_constraint.find(str_in_re, top_level_length_constraint); - - ptr_vector extra_length_vars; - regex_term_to_extra_length_vars.find(str_in_re, extra_length_vars); - - assert_axiom(top_level_length_constraint); - for(auto v : extra_length_vars) { - refresh_theory_var(v); - expr_ref len_constraint(m_autil.mk_ge(v, m_autil.mk_numeral(rational::zero(), true)), m); - assert_axiom(len_constraint); - } - } else { - // generate new length constraint - expr_ref_vector extra_length_vars(m); - expr_ref _top_level_length_constraint = infer_all_regex_lengths(mk_strlen(str), re, extra_length_vars); - expr_ref premise(str_in_re, m); - expr_ref top_level_length_constraint(m.mk_implies(premise, _top_level_length_constraint), m); - th_rewriter rw(m); - rw(top_level_length_constraint); - TRACE(str, tout << "top-level length constraint: " << mk_pp(top_level_length_constraint, m) << std::endl;); - // assert and track length constraint - assert_axiom(top_level_length_constraint); - for(auto v : extra_length_vars) { - expr_ref len_constraint(m_autil.mk_ge(v, m_autil.mk_numeral(rational::zero(), true)), m); - assert_axiom(len_constraint); - } - - regex_term_to_length_constraint.insert(str_in_re, top_level_length_constraint); - ptr_vector vtmp; - for(auto v : extra_length_vars) { - vtmp.push_back(v); - } - regex_term_to_extra_length_vars.insert(str_in_re, vtmp); - } - - regex_terms_with_length_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_length_constraints, str_in_re)); - } - } // re not in regex_terms_with_length_constraints - - rational exact_length_value; - if (get_len_value(str, exact_length_value)) { - TRACE(str, tout << "exact length of " << mk_pp(str, m) << " is " << exact_length_value << std::endl;); - - if (regex_terms_with_path_constraints.contains(str_in_re)) { - TRACE(str, tout << "term " << mk_pp(str_in_re, m) << " already has path constraints set up" << std::endl;); - continue; - } - - // find a consistent automaton for this term - bool found = false; - regex_automaton_under_assumptions assumption; - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - for (auto autA : regex_automaton_assumptions[re]) { - rational assumed_upper_bound, assumed_lower_bound; - bool assumes_upper_bound = autA.get_upper_bound(assumed_upper_bound); - bool assumes_lower_bound = autA.get_lower_bound(assumed_lower_bound); - if (!assumes_upper_bound && !assumes_lower_bound) { - // automaton with no assumptions is always usable - assumption = autA; - found = true; - break; - } - // TODO check consistency of bounds assumptions - } // foreach(a in regex_automaton_assumptions) - } - if (found) { - if (exact_length_value.is_zero()) { - // check consistency of 0-length solution with automaton - eautomaton * aut = assumption.get_automaton(); - bool zero_solution = false; - unsigned initial_state = aut->init(); - if (aut->is_final_state(initial_state)) { - zero_solution = true; - } else { - unsigned_vector eps_states; - aut->get_epsilon_closure(initial_state, eps_states); - for (unsigned_vector::iterator it = eps_states.begin(); it != eps_states.end(); ++it) { - unsigned state = *it; - if (aut->is_final_state(state)) { - zero_solution = true; - break; - } - } - } - - // now check polarity of automaton wrt. original term - if ( (current_assignment == l_true && !assumption.get_polarity()) - || (current_assignment == l_false && assumption.get_polarity())) { - // invert sense - zero_solution = !zero_solution; - } - - if (zero_solution) { - TRACE(str, tout << "zero-length solution OK -- asserting empty path constraint" << std::endl;); - expr_ref_vector lhs_terms(m); - if (current_assignment == l_true) { - lhs_terms.push_back(str_in_re); - } else { - lhs_terms.push_back(m.mk_not(str_in_re)); - } - lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); - expr_ref lhs(mk_and(lhs_terms), m); - expr_ref rhs(ctx.mk_eq_atom(str, mk_string("")), m); - assert_implication(lhs, rhs); - regex_terms_with_path_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); - } else { - TRACE(str, tout << "zero-length solution not admitted by this automaton -- asserting conflict clause" << std::endl;); - expr_ref_vector lhs_terms(m); - if (current_assignment == l_true) { - lhs_terms.push_back(str_in_re); - } else { - lhs_terms.push_back(m.mk_not(str_in_re)); - } - lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); - expr_ref lhs(mk_and(lhs_terms), m); - expr_ref conflict(m.mk_not(lhs), m); - assert_axiom(conflict); - } - regex_inc_counter(regex_length_attempt_count, re); - continue; - } else { - // fixed-length model construction handles path constraints on our behalf, and with a better reduction - continue; - } - } else { - // no automata available, or else all bounds assumptions are invalid - unsigned expected_complexity = estimate_regex_complexity(re); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) { - CTRACE(str, regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold, - tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;); - eautomaton * aut = m_mk_aut(re); - if (aut == nullptr) { - TRACE(str, tout << "ERROR: symbolic automaton construction failed, likely due to non-constant term in regex" << std::endl;); - return false; - } - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE(str, tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - find_automaton_initial_bounds(str_in_re, aut); - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - continue; - } - } // get_len_value() - expr_ref str_len(mk_strlen(str), m); - rational lower_bound_value; - rational upper_bound_value; - bool lower_bound_exists = lower_bound(str_len, lower_bound_value); - bool upper_bound_exists = upper_bound(str_len, upper_bound_value); - CTRACE(str, lower_bound_exists, tout << "lower bound of " << mk_pp(str, m) << " is " << lower_bound_value << std::endl;); - CTRACE(str, upper_bound_exists, tout << "upper bound of " << mk_pp(str, m) << " is " << upper_bound_value << std::endl;); - - bool new_lower_bound_info = true; - bool new_upper_bound_info = true; - // check last seen lower/upper bound to avoid performing duplicate work - if (regex_last_lower_bound.contains(str)) { - rational last_lb_value; - regex_last_lower_bound.find(str, last_lb_value); - if (last_lb_value == lower_bound_value) { - new_lower_bound_info = false; - } - } - if (regex_last_upper_bound.contains(str)) { - rational last_ub_value; - regex_last_upper_bound.find(str, last_ub_value); - if (last_ub_value == upper_bound_value) { - new_upper_bound_info = false; - } - } - - if (new_lower_bound_info) { - regex_last_lower_bound.insert(str, lower_bound_value); - } - if (new_upper_bound_info) { - regex_last_upper_bound.insert(str, upper_bound_value); - } - - if (upper_bound_exists && new_upper_bound_info) { - // check current assumptions - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - // one or more existing assumptions. - // see if the (current best) upper bound can be refined - // (note that if we have an automaton with no assumption, - // this automatically counts as best) - bool need_assumption = true; - regex_automaton_under_assumptions last_assumption; - rational last_ub = rational::minus_one(); - for (auto autA : regex_automaton_assumptions[re]) { - if ((current_assignment == l_true && autA.get_polarity() == false) - || (current_assignment == l_false && autA.get_polarity() == true)) { - // automaton uses incorrect polarity - continue; - } - rational this_ub; - if (autA.get_upper_bound(this_ub)) { - if (last_ub == rational::minus_one() || this_ub < last_ub) { - last_ub = this_ub; - last_assumption = autA; - } - } else { - need_assumption = false; - last_assumption = autA; - break; - } - } - if (!last_ub.is_minus_one() || !need_assumption) { - CTRACE(str, !need_assumption, tout << "using automaton with full length information" << std::endl;); - CTRACE(str, need_assumption, tout << "using automaton with assumed upper bound of " << last_ub << std::endl;); - - rational refined_upper_bound; - bool solution_at_upper_bound = refine_automaton_upper_bound(last_assumption.get_automaton(), - upper_bound_value, refined_upper_bound); - TRACE(str, tout << "refined upper bound is " << refined_upper_bound << - (solution_at_upper_bound?", solution at upper bound":", no solution at upper bound") << std::endl;); - - expr_ref_vector lhs(m); - if (current_assignment == l_false) { - lhs.push_back(m.mk_not(str_in_re)); - } else { - lhs.push_back(str_in_re); - } - if (need_assumption) { - lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(last_ub, true))); - } - lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true))); - - expr_ref_vector rhs(m); - - if (solution_at_upper_bound) { - if (refined_upper_bound.is_minus_one()) { - // If there are solutions at the upper bound but not below it, make the bound exact. - rhs.push_back(ctx.mk_eq_atom(str_len, m_autil.mk_numeral(upper_bound_value, true))); - } else { - // If there are solutions at and below the upper bound, add an additional bound. - rhs.push_back(m.mk_or( - ctx.mk_eq_atom(str_len, m_autil.mk_numeral(upper_bound_value, true)), - m_autil.mk_le(str_len, m_autil.mk_numeral(refined_upper_bound, true)) - )); - } - } else { - if (refined_upper_bound.is_minus_one()) { - // If there are no solutions at or below the upper bound, assert a conflict clause. - rhs.push_back(m.mk_not(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true)))); - } else { - // If there are solutions below the upper bound but not at it, refine the bound. - rhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(refined_upper_bound, true))); - } - } - - if (!rhs.empty()) { - expr_ref lhs_terms(mk_and(lhs), m); - expr_ref rhs_terms(mk_and(rhs), m); - assert_implication(lhs_terms, rhs_terms); - } - } - } else { - // no existing automata/assumptions. - // if it's easy to construct a full automaton for R, do so - unsigned expected_complexity = estimate_regex_complexity(re); - bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || failureThresholdExceeded) { - eautomaton * aut = m_mk_aut(re); - if (aut == nullptr) { - TRACE(str, tout << "ERROR: symbolic automaton construction failed, likely due to non-constant term in regex" << std::endl;); - return false; - } - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE(str, tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - find_automaton_initial_bounds(str_in_re, aut); - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - continue; - } - } else { // !upper_bound_exists - // no upper bound information - if (lower_bound_exists && !lower_bound_value.is_zero() && new_lower_bound_info) { - // nonzero lower bound, no upper bound - - // check current assumptions - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - // one or more existing assumptions. - // see if the (current best) lower bound can be refined - // (note that if we have an automaton with no assumption, - // this automatically counts as best) - bool need_assumption = true; - regex_automaton_under_assumptions last_assumption; - rational last_lb = rational::zero(); // the default - for (auto autA : regex_automaton_assumptions[re]) { - if ((current_assignment == l_true && autA.get_polarity() == false) - || (current_assignment == l_false && autA.get_polarity() == true)) { - // automaton uses incorrect polarity - continue; - } - rational this_lb; - if (autA.get_lower_bound(this_lb)) { - if (this_lb > last_lb) { - last_lb = this_lb; - last_assumption = autA; - } - } else { - need_assumption = false; - last_assumption = autA; - break; - } - } - if (!last_lb.is_zero() || !need_assumption) { - CTRACE(str, !need_assumption, tout << "using automaton with full length information" << std::endl;); - CTRACE(str, need_assumption, tout << "using automaton with assumed lower bound of " << last_lb << std::endl;); - rational refined_lower_bound; - bool solution_at_lower_bound = refine_automaton_lower_bound(last_assumption.get_automaton(), - lower_bound_value, refined_lower_bound); - TRACE(str, tout << "refined lower bound is " << refined_lower_bound << - (solution_at_lower_bound?", solution at lower bound":", no solution at lower bound") << std::endl;); - - expr_ref_vector lhs(m); - if (current_assignment == l_false) { - lhs.push_back(m.mk_not(str_in_re)); - } else { - lhs.push_back(str_in_re); - } - if (need_assumption) { - lhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(last_lb, true))); - } - lhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(lower_bound_value, true))); - - expr_ref_vector rhs(m); - - if (solution_at_lower_bound) { - if (refined_lower_bound.is_minus_one()) { - // If there are solutions at the lower bound but not above it, make the bound exact. - rhs.push_back(ctx.mk_eq_atom(str_len, m_autil.mk_numeral(lower_bound_value, true))); - } else { - // If there are solutions at and above the lower bound, add an additional bound. - // DISABLED as this is causing non-termination in the integer solver. --mtrberzi - /* - rhs.push_back(m.mk_or( - ctx.mk_eq_atom(str_len, m_autil.mk_numeral(lower_bound_value, true)), - m_autil.mk_ge(str_len, m_autil.mk_numeral(refined_lower_bound, true)) - )); - */ - } - } else { - if (refined_lower_bound.is_minus_one()) { - // If there are no solutions at or above the lower bound, assert a conflict clause. - rhs.push_back(m.mk_not(m_autil.mk_ge(str_len, m_autil.mk_numeral(lower_bound_value, true)))); - } else { - // If there are solutions above the lower bound but not at it, refine the bound. - rhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(refined_lower_bound, true))); - } - } - - if (!rhs.empty()) { - expr_ref lhs_terms(mk_and(lhs), m); - expr_ref rhs_terms(mk_and(rhs), m); - assert_implication(lhs_terms, rhs_terms); - } - } - } else { - // no existing automata/assumptions. - // if it's easy to construct a full automaton for R, do so - unsigned expected_complexity = estimate_regex_complexity(re); - bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || failureThresholdExceeded) { - eautomaton * aut = m_mk_aut(re); - if (aut == nullptr) { - TRACE(str, tout << "ERROR: symbolic automaton construction failed, likely due to non-constant term in regex" << std::endl;); - return false; - } - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE(str, tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - find_automaton_initial_bounds(str_in_re, aut); - } else { - // TODO check negation? - // TODO construct a partial automaton for R to the given lower bound? - if (false) { - - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } - continue; - } - } else { // !lower_bound_exists - // no bounds information - // check for existing automata; - // try to construct an automaton if we don't have one yet - // and doing so without bounds is not difficult - bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty()); - bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); - if (!existingAutomata) { - unsigned expected_complexity = estimate_regex_complexity(re); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold - || failureThresholdExceeded) { - eautomaton * aut = m_mk_aut(re); - if (aut == nullptr) { - TRACE(str, tout << "ERROR: symbolic automaton construction failed, likely due to non-constant term in regex" << std::endl;); - return false; - } - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE(str, tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - find_automaton_initial_bounds(str_in_re, aut); - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } - } - } // foreach (entry in regex_terms) - - for (auto entry : regex_terms_by_string) { - // TODO do we need to check equivalence classes of strings here? - - expr* str = entry.m_key; - ptr_vector str_in_re_terms = entry.m_value; - - svector intersect_constraints; - // we may find empty intersection before checking every constraint; - // this vector keeps track of which ones actually take part in intersection - svector used_intersect_constraints; - - // choose an automaton/assumption for each assigned (str.in.re) - // that's consistent with the current length information - for (auto str_in_re_term : str_in_re_terms) { - expr * _unused = nullptr; - expr * re = nullptr; - SASSERT(u.str.is_in_re(str_in_re_term)); - u.str.is_in_re(str_in_re_term, _unused, re); - - rational exact_len; - bool has_exact_len = get_len_value(str, exact_len); - - rational lb, ub; - bool has_lower_bound = lower_bound(mk_strlen(str), lb); - bool has_upper_bound = upper_bound(mk_strlen(str), ub); - - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - for (auto aut : regex_automaton_assumptions[re]) { - rational aut_ub; - bool assume_ub = aut.get_upper_bound(aut_ub); - rational aut_lb; - bool assume_lb = aut.get_lower_bound(aut_lb); - bool consistent = true; - - if (assume_ub) { - // check consistency of assumed upper bound - if (has_exact_len) { - if (exact_len > aut_ub) { - consistent = false; - } - } else { - if (has_upper_bound && ub > aut_ub) { - consistent = false; - } - } - } - - if (assume_lb) { - // check consistency of assumed lower bound - if (has_exact_len) { - if (exact_len < aut_lb) { - consistent = false; - } - } else { - if (has_lower_bound && lb < aut_lb) { - consistent = false; - } - } - } - - if (consistent) { - intersect_constraints.push_back(aut); - break; - } - } - } - } // foreach(term in str_in_re_terms) - - eautomaton * aut_inter = nullptr; - CTRACE(str, !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;); - for (auto aut : intersect_constraints) { - TRACE(str, - { - unsigned v = regex_get_counter(regex_length_attempt_count, aut.get_regex_term()); - tout << "length attempt count of " << mk_pp(aut.get_regex_term(), m) << " is " << v - << ", threshold is " << m_params.m_RegexAutomata_LengthAttemptThreshold << std::endl; - }); - - if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) { - unsigned intersectionDifficulty = 0; - if (aut_inter != nullptr) { - intersectionDifficulty = estimate_automata_intersection_difficulty(aut_inter, aut.get_automaton()); - } - TRACE(str, tout << "intersection difficulty is " << intersectionDifficulty << std::endl;); - if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold - || regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) { - - expr * str_in_re_term(u.re.mk_in_re(str, aut.get_regex_term())); - lbool current_assignment = ctx.get_assignment(str_in_re_term); - // if the assignment is consistent with our assumption, use the automaton directly; - // otherwise, complement it (and save that automaton for next time) - // TODO we should cache these intermediate results - // TODO do we need to push the intermediates into a vector for deletion anyway? - if ( (current_assignment == l_true && aut.get_polarity()) - || (current_assignment == l_false && !aut.get_polarity())) { - if (aut_inter == nullptr) { - aut_inter = aut.get_automaton(); - } else { - aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton()); - m_automata.push_back(aut_inter); - } - } else { - // need to complement first - expr_ref rc(u.re.mk_complement(aut.get_regex_term()), m); - eautomaton * aut_c = m_mk_aut(rc); - if (aut_c == nullptr) { - TRACE(str, tout << "ERROR: symbolic automaton construction failed, likely due to non-constant term in regex" << std::endl;); - return false; - } - regex_automata.push_back(aut_c); - // TODO is there any way to build a complement automaton from an existing one? - // this discards length information - if (aut_inter == nullptr) { - aut_inter = aut_c; - } else { - aut_inter = m_mk_aut.mk_product(aut_inter, aut_c); - m_automata.push_back(aut_inter); - } - } - used_intersect_constraints.push_back(aut); - if (aut_inter->is_empty()) { - break; - } - } else { - // failed intersection - regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term()); - } - } - } // foreach(entry in intersect_constraints) - if (aut_inter != nullptr) { - aut_inter->compress(); - } - TRACE(str, tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;); - - expr_ref_vector conflict_terms(m); - expr_ref conflict_lhs(m); - for (auto aut : used_intersect_constraints) { - expr * str_in_re_term(u.re.mk_in_re(str, aut.get_regex_term())); - lbool current_assignment = ctx.get_assignment(str_in_re_term); - if (current_assignment == l_true) { - conflict_terms.push_back(str_in_re_term); - } else if (current_assignment == l_false) { - conflict_terms.push_back(m.mk_not(str_in_re_term)); - } - // add length assumptions, if any - rational ub; - if (aut.get_upper_bound(ub)) { - expr_ref ub_term(m_autil.mk_le(mk_strlen(str), m_autil.mk_numeral(ub, true)), m); - conflict_terms.push_back(ub_term); - } - rational lb; - if (aut.get_lower_bound(lb)) { - expr_ref lb_term(m_autil.mk_ge(mk_strlen(str), m_autil.mk_numeral(lb, true)), m); - conflict_terms.push_back(lb_term); - } - } - conflict_lhs = mk_and(conflict_terms); - TRACE(str, tout << "conflict lhs: " << mk_pp(conflict_lhs, m) << std::endl;); - - if (used_intersect_constraints.size() > 1 && aut_inter != nullptr) { - // check whether the intersection is only the empty string - unsigned initial_state = aut_inter->init(); - if (aut_inter->final_states().size() == 1 && aut_inter->is_final_state(initial_state)) { - // initial state is final and it is the only final state - // if there are no moves from the initial state, - // the only solution is the empty string - if (aut_inter->get_moves_from(initial_state).empty()) { - TRACE(str, tout << "product automaton only accepts empty string" << std::endl;); - expr_ref rhs1(ctx.mk_eq_atom(str, mk_string("")), m); - expr_ref rhs2(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m); - expr_ref rhs(m.mk_and(rhs1, rhs2), m); - assert_implication(conflict_lhs, rhs); - } - } - } - - if (aut_inter != nullptr && aut_inter->is_empty()) { - TRACE(str, tout << "product automaton is empty; asserting conflict clause" << std::endl;); - expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m); - assert_axiom(conflict_clause); - add_persisted_axiom(conflict_clause); - } - } // foreach (entry in regex_terms_by_string) - return true; - } - - unsigned theory_str::estimate_regex_complexity(expr * re) { - ENSURE(u.is_re(re)); - expr * sub1; - expr * sub2; - unsigned lo, hi; - if (u.re.is_to_re(re, sub1)) { - if (!u.str.is_string(sub1)) - throw default_exception("regular expressions must be built from string literals"); - zstring str; - u.str.is_string(sub1, str); - return str.length(); - } else if (u.re.is_complement(re, sub1)) { - return estimate_regex_complexity_under_complement(sub1); - } else if (u.re.is_concat(re, sub1, sub2)) { - unsigned cx1 = estimate_regex_complexity(sub1); - unsigned cx2 = estimate_regex_complexity(sub2); - return _qadd(cx1, cx2); - } else if (u.re.is_union(re, sub1, sub2)) { - unsigned cx1 = estimate_regex_complexity(sub1); - unsigned cx2 = estimate_regex_complexity(sub2); - return _qadd(cx1, cx2); - } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) { - unsigned cx = estimate_regex_complexity(sub1); - return _qmul(2, cx); - } else if (u.re.is_loop(re, sub1, lo, hi) || u.re.is_loop(re, sub1, lo)) { - unsigned cx = estimate_regex_complexity(sub1); - return _qadd(lo, cx); - } else if (u.re.is_range(re, sub1, sub2)) { - if (!u.re.is_range(re, lo, hi)) throw default_exception("regular expressions must be built from string literals"); - zstring str1, str2; - u.str.is_string(sub1, str1); - u.str.is_string(sub2, str2); - if (str1.length() == 1 && str2.length() == 1) { - return 1 + str2[0] - str1[0]; - } else { - return 1; - } - } else if (u.re.is_full_char(re) || u.re.is_full_seq(re)) { - return 1; - } else { - TRACE(str, tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;); - return 1; - } - } - - unsigned theory_str::estimate_regex_complexity_under_complement(expr * re) { - ENSURE(u.is_re(re)); - expr * sub1; - expr * sub2; - zstring str; - unsigned lo, hi; - if (u.re.is_to_re(re, sub1) && u.str.is_string(sub1)) { - return str.length(); - } else if (u.re.is_complement(re, sub1)) { - // Why don't we return the regular complexity here? - // We could, but this might be called from under another complemented subexpression. - // It's better to give a worst-case complexity. - return estimate_regex_complexity_under_complement(sub1); - } else if (u.re.is_concat(re, sub1, sub2)) { - unsigned cx1 = estimate_regex_complexity_under_complement(sub1); - unsigned cx2 = estimate_regex_complexity_under_complement(sub2); - return _qadd(_qmul(2, cx1), cx2); - } else if (u.re.is_union(re, sub1, sub2)) { - unsigned cx1 = estimate_regex_complexity_under_complement(sub1); - unsigned cx2 = estimate_regex_complexity_under_complement(sub2); - return _qmul(cx1, cx2); - } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1) || u.re.is_loop(re, sub1, lo, hi) || u.re.is_loop(re, sub1, lo)) { - unsigned cx = estimate_regex_complexity_under_complement(sub1); - return _qmul(2, cx); - } else if (u.re.is_range(re, sub1, sub2)) { - if (!u.re.is_range(re, lo, hi)) throw default_exception("regular expressions must be built from string literals"); - zstring str1, str2; - u.str.is_string(sub1, str1); - u.str.is_string(sub2, str2); - SASSERT(str1.length() == 1); - SASSERT(str2.length() == 1); - return 1 + str2[0] - str1[0]; - } else if (u.re.is_full_char(re) || u.re.is_full_seq(re)) { - return 1; - } else { - TRACE(str, tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;); - return 1; - } - } - - unsigned theory_str::estimate_automata_intersection_difficulty(eautomaton * aut1, eautomaton * aut2) { - ENSURE(aut1 != nullptr); - ENSURE(aut2 != nullptr); - return _qmul(aut1->num_states(), aut2->num_states()); - } - - // Check whether a regex translates well to a linear set of length constraints. - bool theory_str::check_regex_length_linearity(expr * re) { - return check_regex_length_linearity_helper(re, false); - } - - bool theory_str::check_regex_length_linearity_helper(expr * re, bool already_star) { - expr * sub1; - expr * sub2; - unsigned lo, hi; - if (u.re.is_to_re(re)) { - return true; - } else if (u.re.is_concat(re, sub1, sub2)) { - return check_regex_length_linearity_helper(sub1, already_star) && check_regex_length_linearity_helper(sub2, already_star); - } else if (u.re.is_union(re, sub1, sub2)) { - return check_regex_length_linearity_helper(sub1, already_star) && check_regex_length_linearity_helper(sub2, already_star); - } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) { - if (already_star) { - return false; - } else { - return check_regex_length_linearity_helper(sub1, true); - } - } else if (u.re.is_range(re)) { - return true; - } else if (u.re.is_full_char(re)) { - return true; - } else if (u.re.is_full_seq(re)) { - return true; - } else if (u.re.is_complement(re)) { - // TODO can we do better? - return false; - } else if (u.re.is_intersection(re)) { - return false; - } else if (u.re.is_loop(re, sub1, lo, hi) || u.re.is_loop(re, sub1, lo)) { - return check_regex_length_linearity_helper(sub1, already_star); - } else { - TRACE(str, tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;); - return false; - } - } - - // note: returns an empty set `lens` if something went wrong - void theory_str::check_subterm_lengths(expr * re, integer_set & lens) { - expr * sub1; - expr * sub2; - unsigned lo, hi; - if (u.re.is_to_re(re, sub1)) { - SASSERT(u.str.is_string(sub1)); - zstring str; - u.str.is_string(sub1, str); - lens.insert(str.length()); - } else if (u.re.is_concat(re, sub1, sub2)) { - integer_set lens_1, lens_2; - check_subterm_lengths(sub1, lens_1); - check_subterm_lengths(sub2, lens_2); - if (lens_1.empty() || lens_2.empty()) { - lens.reset(); - } else { - // take all pairwise lengths - for (integer_set::iterator it1 = lens_1.begin(); it1 != lens_1.end(); ++it1) { - for(integer_set::iterator it2 = lens_2.begin(); it2 != lens_2.end(); ++it2) { - int l1 = *it1; - int l2 = *it2; - lens.insert(l1 + l2); - } - } - } - } else if (u.re.is_union(re, sub1, sub2)) { - integer_set lens_1, lens_2; - check_subterm_lengths(sub1, lens_1); - check_subterm_lengths(sub2, lens_2); - if (lens_1.empty() || lens_2.empty()) { - lens.reset(); - } else { - // take all possibilities from either side - for (integer_set::iterator it1 = lens_1.begin(); it1 != lens_1.end(); ++it1) { - lens.insert(*it1); - } - for (integer_set::iterator it2 = lens_2.begin(); it2 != lens_2.end(); ++it2) { - lens.insert(*it2); - } - } - } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) { - // this is bad -- term generation requires this not to appear - lens.reset(); - } else if (u.re.is_range(re, sub1, sub2)) { - if (!u.re.is_range(re, lo, hi)) throw default_exception("regular expressions must be built from string literals"); - zstring str1, str2; - u.str.is_string(sub1, str1); - u.str.is_string(sub2, str2); - // re.range is a language of singleton strings if both of its arguments are; - // otherwise it is the empty language - if (str1.length() == 1 && str2.length() == 1) { - lens.insert(1); - } else { - lens.insert(0); - } - } else if (u.re.is_full_char(re)) { - lens.insert(1); - } else if (u.re.is_full_seq(re)) { - lens.reset(); - } else if (u.re.is_complement(re)) { - lens.reset(); - } else if (u.re.is_loop(re, sub1, lo, hi)) { - integer_set lens_1; - check_subterm_lengths(sub1, lens_1); - for (unsigned i = lo; i <= hi; ++i) { - for (auto j : lens_1) { - lens.insert(i * j); - } - } - } else { - TRACE(str, tout << "WARNING: unknown regex term " << mk_pp(re, get_manager()) << std::endl;); - lens.reset(); - } - } - - /* - * Infer all length constraints implied by the given regular expression `re` - * in order to constrain `lenVar` (which must be of sort Int). - * This assumes that `re` appears in a positive context. - * Returns a Boolean formula expressing the appropriate constraints over `lenVar`. - * In some cases, the returned formula requires one or more free integer variables to be created. - * These variables are returned in the reference parameter `freeVariables`. - * Extra assertions should be made for these free variables constraining them to be non-negative. - */ - expr_ref theory_str::infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables) { - ENSURE(u.is_re(re)); - expr * sub1; - expr * sub2; - unsigned lo, hi; - if (u.re.is_to_re(re, sub1)) { - if (!u.str.is_string(sub1)) - throw default_exception("regular expressions must be built from string literals"); - zstring str; - u.str.is_string(sub1, str); - rational strlen(str.length()); - expr_ref retval(ctx.mk_eq_atom(lenVar, m_autil.mk_numeral(strlen, true)), m); - return retval; - } else if (u.re.is_union(re, sub1, sub2)) { - expr_ref r1 = infer_all_regex_lengths(lenVar, sub1, freeVariables); - expr_ref r2 = infer_all_regex_lengths(lenVar, sub2, freeVariables); - expr_ref retval(m.mk_or(r1, r2), m); - return retval; - } else if (u.re.is_concat(re, sub1, sub2)) { - expr * v1 = mk_int_var("rlen1"); - expr * v2 = mk_int_var("rlen2"); - freeVariables.push_back(v1); - freeVariables.push_back(v2); - expr_ref r1 = infer_all_regex_lengths(v1, sub1, freeVariables); - expr_ref r2 = infer_all_regex_lengths(v2, sub2, freeVariables); - expr_ref_vector finalResult(m); - finalResult.push_back(ctx.mk_eq_atom(lenVar, m_autil.mk_add(v1, v2))); - finalResult.push_back(r1); - finalResult.push_back(r2); - expr_ref retval(mk_and(finalResult), m); - return retval; - } else if (u.re.is_star(re, sub1) || u.re.is_plus(re, sub1)) { - // stars are generated as a linear combination of all possible subterm lengths; - // this requires that there are no stars under this one - /* - expr * v = mk_int_var("rlen"); - expr * n = mk_int_var("rstar"); - freeVariables.push_back(v); - freeVariables.push_back(n); - expr_ref rsub = infer_all_regex_lengths(v, sub1, freeVariables); - expr_ref_vector finalResult(m); - finalResult.push_back(rsub); - finalResult.push_back(ctx.mk_eq_atom(lenVar, m_autil.mk_mul(v, n))); - expr_ref retval(mk_and(finalResult), m); - return retval; - */ - integer_set subterm_lens; - check_subterm_lengths(sub1, subterm_lens); - if (subterm_lens.empty()) { - // somehow generation was impossible - expr_ref retval(m_autil.mk_ge(lenVar, m_autil.mk_numeral(rational::zero(), true)), m); - return retval; - } else { - TRACE(str, tout << "subterm lengths:"; - for(integer_set::iterator it = subterm_lens.begin(); it != subterm_lens.end(); ++it) { - tout << " " << *it; - } - tout << std::endl;); - expr_ref_vector sum_terms(m); - for (integer_set::iterator it = subterm_lens.begin(); it != subterm_lens.end(); ++it) { - rational lenOption(*it); - expr * n = mk_int_var("rstar"); - freeVariables.push_back(n); - expr_ref term(m_autil.mk_mul(m_autil.mk_numeral(lenOption, true), n), m); - expr_ref term2(term, m); - if (u.re.is_plus(re)) { - // n effectively starts at 1 - term2 = m_autil.mk_add(m_autil.mk_numeral(lenOption, true), term); - } - sum_terms.push_back(term2); - } - expr_ref retval(ctx.mk_eq_atom(lenVar, m_autil.mk_add_simplify(sum_terms)), m); - return retval; - } - } else if (u.re.is_loop(re, sub1, lo, hi)) { - expr * v1 = mk_int_var("rlen"); - freeVariables.push_back(v1); - expr_ref r1 = infer_all_regex_lengths(v1, sub1, freeVariables); - expr_ref_vector v1_choices(m); - for (unsigned i = lo; i <= hi; ++i) { - rational rI(i); - expr_ref v1_i(ctx.mk_eq_atom(lenVar, m_autil.mk_mul(m_autil.mk_numeral(rI, true), v1)), m); - v1_choices.push_back(v1_i); - } - expr_ref_vector finalResult(m); - finalResult.push_back(r1); - finalResult.push_back(mk_or(v1_choices)); - expr_ref retval(mk_and(finalResult), m); - SASSERT(retval); - return retval; - } else if (u.re.is_range(re, sub1, sub2)) { - if (!u.re.is_range(re, lo, hi)) throw default_exception("regular expressions must be built from string literals"); - zstring str1, str2; - u.str.is_string(sub1, str1); - u.str.is_string(sub2, str2); - SASSERT(str1.length() == 1); - SASSERT(str2.length() == 1); - expr_ref retval(ctx.mk_eq_atom(lenVar, m_autil.mk_numeral(rational::one(), true)), m); - return retval; - } else if (u.re.is_full_char(re)) { - expr_ref retval(ctx.mk_eq_atom(lenVar, m_autil.mk_numeral(rational::one(), true)), m); - return retval; - } else if (u.re.is_full_seq(re)) { - // match any unbounded string - expr_ref retval(m_autil.mk_ge(lenVar, m_autil.mk_numeral(rational::zero(), true)), m); - return retval; - } else if (u.re.is_complement(re)) { - // skip complement for now, in general this is difficult to predict - expr_ref retval(m_autil.mk_ge(lenVar, m_autil.mk_numeral(rational::zero(), true)), m); - return retval; - } else { - TRACE(str, tout << "WARNING: unknown regex term " << mk_pp(re, m) << std::endl;); - expr_ref retval(m_autil.mk_ge(lenVar, m_autil.mk_numeral(rational::zero(), true)), m); - return retval; - } - } - - /* - * Assert initial lower and upper bounds for the positive constraint (str in re) corresponding - * to the automaton `aut`. - * This asserts a constraint of the form: - * str_in_re --> (len(str) ?= 0 OR len(str) >= lb) AND len(str) <= ub - * where the upper bound clause is omitted if the upper bound doesn't exist - * and the equality with 0 is based on whether solutions of length 0 are allowed. - */ - void theory_str::find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut) { - ENSURE(aut != nullptr); - - expr_ref_vector rhs(m); - expr * str = nullptr; - expr * re = nullptr; - u.str.is_in_re(str_in_re, str, re); - expr_ref strlen(mk_strlen(str), m); - - // lower bound first - rational nonzero_lower_bound; - bool zero_sol_exists = refine_automaton_lower_bound(aut, rational::zero(), nonzero_lower_bound); - if (zero_sol_exists) { - regex_last_lower_bound.insert(str, rational::zero()); - // solution at 0 - if (!nonzero_lower_bound.is_minus_one()) { - expr_ref rhs1(ctx.mk_eq_atom(strlen, m_autil.mk_numeral(rational::zero(), true)), m); - expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m); - rhs.push_back(m.mk_or(rhs1, rhs2)); - } else { - // length of solution can ONLY be 0 - expr_ref rhs1(ctx.mk_eq_atom(strlen, m_autil.mk_numeral(rational::zero(), true)), m); - rhs.push_back(rhs1); - } - } else { - // no solution at 0 - if (!nonzero_lower_bound.is_minus_one()) { - regex_last_lower_bound.insert(str, nonzero_lower_bound); - expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m); - rhs.push_back(rhs2); - } else { - // probably no solutions at all; just assume that 0 is a (safe) lower bound - regex_last_lower_bound.insert(str, rational::zero()); - rhs.reset(); - } - } - // TODO upper bound check - - if (!rhs.empty()) { - expr_ref lhs(str_in_re, m); - expr_ref _rhs(mk_and(rhs), m); - assert_implication(lhs, _rhs); - } - } - - /* - * Refine the lower bound on the length of a solution to a given automaton. - * The method returns TRUE if a solution of length `current_lower_bound` exists, - * and FALSE otherwise. In addition, the reference parameter `refined_lower_bound` - * is assigned the length of the shortest solution longer than `current_lower_bound` - * if it exists, or -1 otherwise. - */ - bool theory_str::refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound) { - ENSURE(aut != nullptr); - - if (aut->final_states().empty()) { - // no solutions at all - refined_lower_bound = rational::minus_one(); - return false; - } - - // from here we assume that there is a final state reachable from the initial state - - unsigned_vector search_queue; - // populate search_queue with all states reachable from the epsilon-closure of start state - aut->get_epsilon_closure(aut->init(), search_queue); - - unsigned search_depth = 0; - hashtable> next_states; - unsigned_vector next_search_queue; - - bool found_solution_at_lower_bound = false; - - while (!search_queue.empty()) { - // if we are at the lower bound, check for final states - if (search_depth == current_lower_bound.get_unsigned()) { - for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { - unsigned state = *it; - if (aut->is_final_state(state)) { - found_solution_at_lower_bound = true; - break; - } - } - // end phase 1 - break; - } - next_states.reset(); - next_search_queue.clear(); - // move one step along all states - for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { - unsigned src = *it; - eautomaton::moves next_moves; - aut->get_moves_from(src, next_moves, true); - for (eautomaton::moves::iterator move_it = next_moves.begin(); - move_it != next_moves.end(); ++move_it) { - unsigned dst = move_it->dst(); - if (!next_states.contains(dst)) { - next_states.insert(dst); - next_search_queue.push_back(dst); - } - } - } - search_queue.clear(); - search_queue.append(next_search_queue); - search_depth += 1; - } // !search_queue.empty() - - // if we got here before reaching the lower bound, - // there aren't any solutions at or above it, so stop - if (search_depth < current_lower_bound.get_unsigned()) { - refined_lower_bound = rational::minus_one(); - return false; - } - - // phase 2: continue exploring the automaton above the lower bound - SASSERT(search_depth == current_lower_bound.get_unsigned()); - - while (!search_queue.empty()) { - if (search_depth > current_lower_bound.get_unsigned()) { - // check if we have found a solution above the lower bound - for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { - unsigned state = *it; - if (aut->is_final_state(state)) { - // this is a solution at a depth higher than the lower bound - refined_lower_bound = rational(search_depth); - return found_solution_at_lower_bound; - } - } - } - next_states.reset(); - next_search_queue.clear(); - // move one step along all states - for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { - unsigned src = *it; - eautomaton::moves next_moves; - aut->get_moves_from(src, next_moves, true); - for (eautomaton::moves::iterator move_it = next_moves.begin(); - move_it != next_moves.end(); ++move_it) { - unsigned dst = move_it->dst(); - if (!next_states.contains(dst)) { - next_states.insert(dst); - next_search_queue.push_back(dst); - } - } - } - search_queue.clear(); - search_queue.append(next_search_queue); - search_depth += 1; - } - // if we reached this point, we explored the whole automaton and didn't find any - // solutions above the lower bound - refined_lower_bound = rational::minus_one(); - return found_solution_at_lower_bound; - } - - /* - * Refine the upper bound on the length of a solution to a given automaton. - * The method returns TRUE if a solution of length `current_upper_bound` exists, - * and FALSE otherwise. In addition, the reference parameter `refined_upper_bound` - * is assigned the length of the longest solution shorter than `current_upper_bound`, - * if a shorter solution exists, or -1 otherwise. - */ - bool theory_str::refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound) { - ENSURE(aut != nullptr); - - if (aut->final_states().empty()) { - // no solutions at all! - refined_upper_bound = rational::minus_one(); - return false; - } - - // from here we assume there is a final state reachable from the initial state - unsigned_vector search_queue; - // populate search queue with all states reachable from the epsilon-closure of the start state - aut->get_epsilon_closure(aut->init(), search_queue); - - rational last_solution_depth = rational::minus_one(); - bool found_solution_at_upper_bound = false; - - unsigned search_depth = 0; - hashtable > next_states; - unsigned_vector next_search_queue; - - while(!search_queue.empty()) { - // see if any of the current states are final - for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { - unsigned src = *it; - if (aut->is_final_state(src)) { - if (search_depth == current_upper_bound.get_unsigned()) { - found_solution_at_upper_bound = true; - } else { - last_solution_depth = rational(search_depth); - } - break; - } - } - - if (search_depth == current_upper_bound.get_unsigned()) { - break; - } - - next_states.reset(); - next_search_queue.clear(); - // move one step along all states - for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { - unsigned src = *it; - eautomaton::moves next_moves; - aut->get_moves_from(src, next_moves, true); - for (eautomaton::moves::iterator moves_it = next_moves.begin(); - moves_it != next_moves.end(); ++moves_it) { - unsigned dst = moves_it->dst(); - if (!next_states.contains(dst)) { - next_states.insert(dst); - next_search_queue.push_back(dst); - } - } - } - search_queue.clear(); - search_queue.append(next_search_queue); - search_depth += 1; - } //!search_queue.empty() - - refined_upper_bound = last_solution_depth; - return found_solution_at_upper_bound; - } - - void theory_str::aut_path_add_next(u_map& next, expr_ref_vector& trail, unsigned idx, expr* cond) { - expr* acc; - if (!get_manager().is_true(cond) && next.find(idx, acc)) { - expr* args[2] = { cond, acc }; - cond = mk_or(get_manager(), 2, args); - } - trail.push_back(cond); - next.insert(idx, cond); - } - - expr_ref theory_str::aut_path_rewrite_constraint(expr * cond, expr * ch_var) { - - expr_ref retval(m); - - unsigned char_val = 0; - - expr * lhs; - expr * rhs; - - if (u.is_const_char(cond, char_val)) { - SASSERT(char_val < 256); - TRACE(str, tout << "rewrite character constant " << char_val << std::endl;); - zstring str_const(char_val); - retval = u.str.mk_string(str_const); - return retval; - } else if (is_var(cond)) { - TRACE(str, tout << "substitute var" << std::endl;); - retval = ch_var; - return retval; - } else if (m.is_eq(cond, lhs, rhs)) { - // handle this specially because the sort of the equality will change - expr_ref new_lhs(aut_path_rewrite_constraint(lhs, ch_var), m); - SASSERT(new_lhs); - expr_ref new_rhs(aut_path_rewrite_constraint(rhs, ch_var), m); - SASSERT(new_rhs); - retval = ctx.mk_eq_atom(new_lhs, new_rhs); - return retval; - } else if (m.is_bool(cond)) { - TRACE(str, tout << "rewrite boolean term " << mk_pp(cond, m) << std::endl;); - app * a_cond = to_app(cond); - expr_ref_vector rewritten_args(m); - for (unsigned i = 0; i < a_cond->get_num_args(); ++i) { - expr * argI = a_cond->get_arg(i); - expr_ref new_arg(aut_path_rewrite_constraint(argI, ch_var), m); - SASSERT(new_arg); - rewritten_args.push_back(new_arg); - } - retval = m.mk_app(a_cond->get_decl(), rewritten_args.data()); - TRACE(str, tout << "final rewritten term is " << mk_pp(retval, m) << std::endl;); - return retval; - } else { - TRACE(str, tout << "ERROR: unrecognized automaton path constraint " << mk_pp(cond, m) << ", cannot translate" << std::endl;); - retval = nullptr; - return retval; - } - } - - /* - * Create finite path constraints for the string variable `str` with respect to the automaton `aut`. - * The returned expression is the right-hand side of a constraint of the form - * (str in re) AND (|str| = len) AND (any applicable length assumptions on aut) -> (rhs AND character constraints). - * The character constraints, which are (str = c0 . c1 . (...) . cn) and (|c0| = 1, ...), - * are returned in `characterConstraints`. - */ - expr_ref theory_str::generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints) { - ENSURE(aut != nullptr); - - if (lenVal.is_zero()) { - // if any state in the epsilon-closure of the start state is accepting, - // then the empty string is in this language - unsigned_vector states; - bool has_final = false; - aut->get_epsilon_closure(aut->init(), states); - for (unsigned i = 0; i < states.size() && !has_final; ++i) { - has_final = aut->is_final_state(states[i]); - } - if (has_final) { - // empty string is OK, assert axiom - expr_ref rhs(ctx.mk_eq_atom(stringTerm, mk_string("")), m); - SASSERT(rhs); - //regex_automata_assertions.insert(stringTerm, final_axiom); - //m_trail_stack.push(insert_obj_map(regex_automata_assertions, stringTerm) ); - return rhs; - } else { - // negate -- the empty string isn't in the language - //expr_ref conflict(m.mk_not(mk_and(toplevel_lhs)), m); - //assert_axiom(conflict); - expr_ref conflict(m.mk_false(), m); - return conflict; - } - } // lenVal.is_zero() - - expr_ref_vector pathChars(m); - expr_ref_vector pathChars_len_constraints(m); - - // reuse character terms over the same string - if (string_chars.contains(stringTerm)) { - // find out whether we have enough characters already - ptr_vector old_chars; - string_chars.find(stringTerm, old_chars); - if (old_chars.size() < lenVal.get_unsigned()) { - for (unsigned i = old_chars.size(); i < lenVal.get_unsigned(); ++i) { - std::stringstream ss; - ss << "ch" << i; - expr_ref ch(mk_str_var(ss.str()), m); - m_trail.push_back(ch); - old_chars.push_back(ch); - } - } - string_chars.insert(stringTerm, old_chars); - // now we're guaranteed to have at least the right number of characters in old_chars - for (unsigned i = 0; i < lenVal.get_unsigned(); ++i) { - expr_ref ch(old_chars.get(i), m); - refresh_theory_var(ch); - pathChars.push_back(ch); - pathChars_len_constraints.push_back(ctx.mk_eq_atom(mk_strlen(ch), m_autil.mk_numeral(rational::one(), true))); - } - } else { - ptr_vector new_chars; - for (unsigned i = 0; i < lenVal.get_unsigned(); ++i) { - std::stringstream ss; - ss << "ch" << i; - expr_ref ch(mk_str_var(ss.str()), m); - pathChars.push_back(ch); - pathChars_len_constraints.push_back(ctx.mk_eq_atom(mk_strlen(ch), m_autil.mk_numeral(rational::one(), true))); - new_chars.push_back(ch); - } - string_chars.insert(stringTerm, new_chars); - } - - // modification of code in seq_rewriter::mk_str_in_regexp() - expr_ref_vector trail(m); - u_map maps[2]; - bool select_map = false; - expr_ref ch(m), cond(m); - eautomaton::moves mvs; - maps[0].insert(aut->init(), m.mk_true()); - // is_accepted(a, aut) & some state in frontier is final. - for (unsigned i = 0; i < lenVal.get_unsigned(); ++i) { - u_map& frontier = maps[select_map]; - u_map& next = maps[!select_map]; - select_map = !select_map; - ch = pathChars.get(i); - next.reset(); - u_map::iterator it = frontier.begin(), end = frontier.end(); - for (; it != end; ++it) { - mvs.reset(); - unsigned state = it->m_key; - expr* acc = it->m_value; - aut->get_moves_from(state, mvs, false); - for (unsigned j = 0; j < mvs.size(); ++j) { - eautomaton::move const& mv = mvs[j]; - SASSERT(mv.t()); - if (mv.t()->is_char() && m.is_value(mv.t()->get_char())) { - // change this to a string constraint - expr_ref cond_rhs = aut_path_rewrite_constraint(mv.t()->get_char(), ch); - SASSERT(cond_rhs); - cond = ctx.mk_eq_atom(ch, cond_rhs); - SASSERT(cond); - expr * args[2] = {cond, acc}; - cond = mk_and(m, 2, args); - aut_path_add_next(next, trail, mv.dst(), cond); - } else if (mv.t()->is_range()) { - expr_ref range_lo(mv.t()->get_lo(), m); - expr_ref range_hi(mv.t()->get_hi(), m); - - unsigned lo_val, hi_val; - - if (u.is_const_char(range_lo, lo_val) && u.is_const_char(range_hi, hi_val)) { - TRACE(str, tout << "make range predicate from " << lo_val << " to " << hi_val << std::endl;); - expr_ref cond_rhs(m); - expr_ref_vector cond_rhs_terms(m); - for (unsigned i = lo_val; i <= hi_val; ++i) { - zstring str_const(i); - expr_ref str_expr(u.str.mk_string(str_const), m); - cond_rhs_terms.push_back(ctx.mk_eq_atom(ch, str_expr)); - } - cond_rhs = mk_or(cond_rhs_terms); - SASSERT(cond_rhs); - expr * args[2] = {cond_rhs, acc}; - cond = mk_and(m, 2, args); - aut_path_add_next(next, trail, mv.dst(), cond); - } else { - TRACE(str, tout << "warning: non-bitvectors in automaton range predicate" << std::endl;); - UNREACHABLE(); - } - } else if (mv.t()->is_pred()) { - // rewrite this constraint over string terms - expr_ref cond_rhs = aut_path_rewrite_constraint(mv.t()->get_pred(), ch); - SASSERT(cond_rhs); - - if (m.is_false(cond_rhs)) { - continue; - } else if (m.is_true(cond_rhs)) { - aut_path_add_next(next, trail, mv.dst(), acc); - continue; - } - expr * args[2] = {cond_rhs, acc}; - cond = mk_and(m, 2, args); - aut_path_add_next(next, trail, mv.dst(), cond); - } - } - } - } - u_map const& frontier = maps[select_map]; - u_map::iterator it = frontier.begin(), end = frontier.end(); - expr_ref_vector ors(m); - for (; it != end; ++it) { - unsigned_vector states; - bool has_final = false; - aut->get_epsilon_closure(it->m_key, states); - for (unsigned i = 0; i < states.size() && !has_final; ++i) { - has_final = aut->is_final_state(states[i]); - } - if (has_final) { - ors.push_back(it->m_value); - } - } - expr_ref result(mk_or(ors)); - TRACE(str, tout << "regex path constraint: " << mk_pp(result, m) << "\n";); - - expr_ref concat_rhs(m); - if (pathChars.size() == 1) { - concat_rhs = ctx.mk_eq_atom(stringTerm, pathChars.get(0)); - } else { - expr_ref acc(pathChars.get(0), m); - for (unsigned i = 1; i < pathChars.size(); ++i) { - acc = mk_concat(acc, pathChars.get(i)); - } - concat_rhs = ctx.mk_eq_atom(stringTerm, acc); - } - - //expr_ref toplevel_rhs(m.mk_and(result, mk_and(pathChars_len_constraints), concat_rhs), m); - characterConstraints = m.mk_and(mk_and(pathChars_len_constraints), concat_rhs); - //expr_ref final_axiom(rewrite_implication(mk_and(toplevel_lhs), toplevel_rhs), m); - //regex_automata_assertions.insert(stringTerm, final_axiom); - //m_trail_stack.push(insert_obj_map(regex_automata_assertions, stringTerm) ); - return result; - } - - void theory_str::regex_inc_counter(obj_map & counter_map, expr * key) { - unsigned old_v; - if (counter_map.find(key, old_v)) { - unsigned new_v = old_v += 1; - counter_map.insert(key, new_v); - } else { - counter_map.insert(key, 1); - } - } - - unsigned theory_str::regex_get_counter(obj_map & counter_map, expr * key) { - unsigned v; - if (counter_map.find(key, v)) { - return v; - } else { - counter_map.insert(key, 0); - return 0; - } - } - -}; /* namespace smt */