From 7e419137b1bda69a816164c40ae61b7e01137dd3 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Mon, 17 Sep 2018 16:13:34 -0400 Subject: [PATCH] Z3str3: refactor regex automata to subroutine, use arith_value --- src/smt/smt_arith_value.cpp | 6 + src/smt/smt_arith_value.h | 1 + src/smt/smt_theory.h | 1 + src/smt/theory_str.cpp | 1511 +++++++++++++++++------------------ src/smt/theory_str.h | 2 + 5 files changed, 745 insertions(+), 776 deletions(-) diff --git a/src/smt/smt_arith_value.cpp b/src/smt/smt_arith_value.cpp index ce4c0d9a9..fcd6a3c4f 100644 --- a/src/smt/smt_arith_value.cpp +++ b/src/smt/smt_arith_value.cpp @@ -96,4 +96,10 @@ namespace smt { while (next != n); return false; } + + final_check_status arith_value::final_check() { + family_id afid = a.get_family_id(); + theory * th = m_ctx.get_theory(afid); + return th->final_check_eh(); + } }; diff --git a/src/smt/smt_arith_value.h b/src/smt/smt_arith_value.h index 9b0f833ac..b3748923e 100644 --- a/src/smt/smt_arith_value.h +++ b/src/smt/smt_arith_value.h @@ -33,5 +33,6 @@ namespace smt { bool get_lo(expr* e, rational& lo, bool& strict); bool get_up(expr* e, rational& up, bool& strict); bool get_value(expr* e, rational& value); + final_check_status final_check(); }; }; diff --git a/src/smt/smt_theory.h b/src/smt/smt_theory.h index 9d5a4f49f..b791d890e 100644 --- a/src/smt/smt_theory.h +++ b/src/smt/smt_theory.h @@ -36,6 +36,7 @@ namespace smt { unsigned_vector m_var2enode_lim; friend class context; + friend class arith_value; protected: virtual void init(context * ctx); diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index aadcb63a7..436892a20 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -4835,40 +4835,9 @@ namespace smt { return n; } - // from Z3: theory_seq.cpp - - static theory_mi_arith* get_th_arith(context& ctx, theory_id afid, expr* e) { - theory* th = ctx.get_theory(afid); - if (th && ctx.e_internalized(e)) { - return dynamic_cast(th); - } - else { - return nullptr; - } - } - bool theory_str::get_arith_value(expr* e, rational& val) const { - context& ctx = get_context(); - ast_manager & m = get_manager(); - // safety - if (!ctx.e_internalized(e)) { - return false; - } - // if an integer constant exists in the eqc, it should be the root - enode * en_e = ctx.get_enode(e); - enode * root_e = en_e->get_root(); - if (m_autil.is_numeral(root_e->get_owner(), val) && val.is_int()) { - TRACE("str", tout << mk_pp(e, m) << " ~= " << mk_pp(root_e->get_owner(), m) << std::endl;); - return true; - } else { - TRACE("str", tout << "root of eqc of " << mk_pp(e, m) << " is not a numeral" << std::endl;); - return false; - theory_mi_arith* tha = get_th_arith(ctx, m_autil.get_family_id(), e); - if (!tha) return false; - expr_ref val_e(m); - if (tha->get_value(root_e, val_e) && m_autil.is_numeral(val_e, val) && val.is_int()) return true; - return false; - } + arith_value v(get_context()); + return v.get_value(e, val); } bool theory_str::lower_bound(expr* _e, rational& lo) { @@ -4877,12 +4846,9 @@ namespace smt { return false; } - context& ctx = get_context(); - ast_manager & m = get_manager(); - theory_mi_arith* tha = get_th_arith(ctx, m_autil.get_family_id(), _e); - expr_ref _lo(m); - if (!tha || !tha->get_lower(ctx.get_enode(_e), _lo)) return false; - return m_autil.is_numeral(_lo, lo) && lo.is_int(); + arith_value v(get_context()); + bool strict; + return v.get_lo(_e, lo, strict); } bool theory_str::upper_bound(expr* _e, rational& hi) { @@ -4891,12 +4857,9 @@ namespace smt { return false; } - context& ctx = get_context(); - ast_manager & m = get_manager(); - theory_mi_arith* tha = get_th_arith(ctx, m_autil.get_family_id(), _e); - expr_ref _hi(m); - if (!tha || !tha->get_upper(ctx.get_enode(_e), _hi)) return false; - return m_autil.is_numeral(_hi, hi) && hi.is_int(); + arith_value v(get_context()); + bool strict; + return v.get_up(_e, hi, strict); } bool theory_str::get_len_value(expr* e, rational& val) { @@ -4908,17 +4871,6 @@ namespace smt { context& ctx = get_context(); ast_manager & m = get_manager(); - theory* th = ctx.get_theory(m_autil.get_family_id()); - if (!th) { - TRACE("str", tout << "oops, can't get m_autil's theory" << std::endl;); - return false; - } - theory_mi_arith* tha = dynamic_cast(th); - if (!tha) { - TRACE("str", tout << "oops, can't cast to theory_mi_arith" << std::endl;); - return false; - } - TRACE("str", tout << "checking len value of " << mk_ismt2_pp(e, m) << std::endl;); rational val1; @@ -9830,6 +9782,732 @@ namespace smt { } } + void theory_str::solve_regex_automata() { + context & ctx = get_context(); + ast_manager & m = get_manager(); + + // TODO since heuristics might fail, the "no progress" flag might need to be handled specially here + bool regex_axiom_add = false; + for (obj_hashtable::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) { + expr * str_in_re = *it; + expr * str = nullptr; + expr * re = nullptr; + u.str.is_in_re(str_in_re, str, re); + lbool current_assignment = ctx.get_assignment(str_in_re); + TRACE("str", tout << "regex term: " << mk_pp(str, m) << " in " << mk_pp(re, m) << " : " << current_assignment << std::endl;); + if (current_assignment == l_undef) { + continue; + } + + if (!regex_terms_with_length_constraints.contains(str_in_re)) { + if (current_assignment == l_true && check_regex_length_linearity(re)) { + TRACE("str", tout << "regex length constraints expected to be linear -- generating and asserting them" << std::endl;); + + if (regex_term_to_length_constraint.contains(str_in_re)) { + // use existing length constraint + expr * top_level_length_constraint = nullptr; + regex_term_to_length_constraint.find(str_in_re, top_level_length_constraint); + + ptr_vector extra_length_vars; + regex_term_to_extra_length_vars.find(str_in_re, extra_length_vars); + + assert_axiom(top_level_length_constraint); + for(ptr_vector::iterator it = extra_length_vars.begin(); it != extra_length_vars.end(); ++it) { + expr * v = *it; + refresh_theory_var(v); + expr_ref len_constraint(m_autil.mk_ge(v, m_autil.mk_numeral(rational::zero(), true)), m); + assert_axiom(len_constraint); + } + } else { + // generate new length constraint + expr_ref_vector extra_length_vars(m); + expr_ref _top_level_length_constraint = infer_all_regex_lengths(mk_strlen(str), re, extra_length_vars); + expr_ref top_level_length_constraint(_top_level_length_constraint, m); + th_rewriter rw(m); + rw(top_level_length_constraint); + TRACE("str", tout << "top-level length constraint: " << mk_pp(top_level_length_constraint, m) << std::endl;); + // assert and track length constraint + assert_axiom(top_level_length_constraint); + for(expr_ref_vector::iterator it = extra_length_vars.begin(); it != extra_length_vars.end(); ++it) { + expr * v = *it; + expr_ref len_constraint(m_autil.mk_ge(v, m_autil.mk_numeral(rational::zero(), true)), m); + assert_axiom(len_constraint); + } + + regex_term_to_length_constraint.insert(str_in_re, top_level_length_constraint); + ptr_vector vtmp; + for(expr_ref_vector::iterator it = extra_length_vars.begin(); it != extra_length_vars.end(); ++it) { + vtmp.push_back(*it); + } + regex_term_to_extra_length_vars.insert(str_in_re, vtmp); + } + + regex_terms_with_length_constraints.insert(str_in_re); + m_trail_stack.push(insert_obj_trail(regex_terms_with_length_constraints, str_in_re)); + regex_axiom_add = true; + } + } // re not in regex_terms_with_length_constraints + + rational exact_length_value; + if (get_len_value(str, exact_length_value)) { + TRACE("str", tout << "exact length of " << mk_pp(str, m) << " is " << exact_length_value << std::endl;); + + if (regex_terms_with_path_constraints.contains(str_in_re)) { + TRACE("str", tout << "term " << mk_pp(str_in_re, m) << " already has path constraints set up" << std::endl;); + continue; + } + + // find a consistent automaton for this term + bool found = false; + regex_automaton_under_assumptions assumption; + if (regex_automaton_assumptions.contains(re) && + !regex_automaton_assumptions[re].empty()){ + for (svector::iterator it = regex_automaton_assumptions[re].begin(); + it != regex_automaton_assumptions[re].end(); ++it) { + regex_automaton_under_assumptions autA = *it; + rational assumed_upper_bound, assumed_lower_bound; + bool assumes_upper_bound = autA.get_upper_bound(assumed_upper_bound); + bool assumes_lower_bound = autA.get_lower_bound(assumed_lower_bound); + if (!assumes_upper_bound && !assumes_lower_bound) { + // automaton with no assumptions is always usable + assumption = autA; + found = true; + break; + } + // TODO check consistency of bounds assumptions + } // foreach(a in regex_automaton_assumptions) + } + if (found) { + if (exact_length_value.is_zero()) { + // check consistency of 0-length solution with automaton + eautomaton * aut = assumption.get_automaton(); + bool zero_solution = false; + unsigned initial_state = aut->init(); + if (aut->is_final_state(initial_state)) { + zero_solution = true; + } else { + unsigned_vector eps_states; + aut->get_epsilon_closure(initial_state, eps_states); + for (unsigned_vector::iterator it = eps_states.begin(); it != eps_states.end(); ++it) { + unsigned state = *it; + if (aut->is_final_state(state)) { + zero_solution = true; + break; + } + } + } + + // now check polarity of automaton wrt. original term + if ( (current_assignment == l_true && !assumption.get_polarity()) + || (current_assignment == l_false && assumption.get_polarity())) { + // invert sense + zero_solution = !zero_solution; + } + + if (zero_solution) { + TRACE("str", tout << "zero-length solution OK -- asserting empty path constraint" << std::endl;); + expr_ref_vector lhs_terms(m); + if (current_assignment == l_true) { + lhs_terms.push_back(str_in_re); + } else { + lhs_terms.push_back(m.mk_not(str_in_re)); + } + lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); + expr_ref lhs(mk_and(lhs_terms), m); + expr_ref rhs(ctx.mk_eq_atom(str, mk_string("")), m); + assert_implication(lhs, rhs); + regex_terms_with_path_constraints.insert(str_in_re); + m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); + } else { + TRACE("str", tout << "zero-length solution not admitted by this automaton -- asserting conflict clause" << std::endl;); + expr_ref_vector lhs_terms(m); + if (current_assignment == l_true) { + lhs_terms.push_back(str_in_re); + } else { + lhs_terms.push_back(m.mk_not(str_in_re)); + } + lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); + expr_ref lhs(mk_and(lhs_terms), m); + expr_ref conflict(m.mk_not(lhs), m); + assert_axiom(conflict); + } + regex_axiom_add = true; + regex_inc_counter(regex_length_attempt_count, re); + continue; + } else { + expr_ref pathConstraint(m); + expr_ref characterConstraints(m); + pathConstraint = generate_regex_path_constraints(str, assumption.get_automaton(), exact_length_value, characterConstraints); + TRACE("str", tout << "generated regex path constraint " << mk_pp(pathConstraint, m) << std::endl;); + TRACE("str", tout << "character constraints are " << mk_pp(characterConstraints, m) << std::endl;); + + expr_ref_vector lhs_terms(m); + if (current_assignment == l_true) { + lhs_terms.push_back(str_in_re); + } else { + lhs_terms.push_back(m.mk_not(str_in_re)); + } + lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); + expr_ref lhs(mk_and(lhs_terms), m); + + // If the path constraint comes out as "false", this means there are no paths of that length + // in the automaton. If the polarity is the same, we can assert a conflict clause. + // If the polarity is opposite, we ignore the path constraint. + + if (m.is_false(pathConstraint)) { + if ( (current_assignment == l_true && assumption.get_polarity()) + || (current_assignment == l_false && !assumption.get_polarity())) { + // automaton and constraint have same polarity -- assert conflict clause + TRACE("str", tout << "path constraint is false with matching polarity; asserting conflict clause" << std::endl;); + expr_ref conflict(m.mk_not(mk_and(lhs_terms)), m); + assert_axiom(conflict); + // don't set up "regex_terms_with_path_constraints" as a conflict clause is not a path constraint + } else { + // automaton and constraint have opposite polarity -- ignore path constraint + TRACE("str", tout << "path constraint is false with opposite polarity; ignoring path constraint" << std::endl;); + assert_implication(lhs, characterConstraints); + regex_terms_with_path_constraints.insert(str_in_re); + m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); + } + regex_axiom_add = true; + } else { + // If the automaton was built with the same polarity as the constraint, + // assert directly. Otherwise, negate the path constraint + if ( (current_assignment == l_true && assumption.get_polarity()) + || (current_assignment == l_false && !assumption.get_polarity())) { + TRACE("str", tout << "automaton and regex term have same polarity" << std::endl;); + expr_ref rhs(m.mk_and(pathConstraint, characterConstraints), m); + assert_implication(lhs, rhs); + } else { + TRACE("str", tout << "automaton and regex term have opposite polarity" << std::endl;); + expr_ref rhs(m.mk_and(m.mk_not(pathConstraint), characterConstraints), m); + assert_implication(lhs, rhs); + } + regex_terms_with_path_constraints.insert(str_in_re); + m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); + regex_axiom_add = true; + } + + // increment LengthAttemptCount + regex_inc_counter(regex_length_attempt_count, re); + + TRACE("str", + { + unsigned v = regex_get_counter(regex_length_attempt_count, re); + tout << "length attempt count for " << mk_pp(re, m) << " is " << v << std::endl; + }); + + continue; + } + } else { + // no automata available, or else all bounds assumptions are invalid + unsigned expected_complexity = estimate_regex_complexity(re); + if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) { + CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold, + tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;); + eautomaton * aut = m_mk_aut(re); + aut->compress(); + regex_automata.push_back(aut); + regex_automaton_under_assumptions new_aut(re, aut, true); + if (!regex_automaton_assumptions.contains(re)) { + regex_automaton_assumptions.insert(re, svector()); + } + regex_automaton_assumptions[re].push_back(new_aut); + TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); + regex_axiom_add = true; + find_automaton_initial_bounds(str_in_re, aut); + } else { + regex_inc_counter(regex_fail_count, str_in_re); + } + continue; + } + } // get_len_value() + expr_ref str_len(mk_strlen(str), m); + rational lower_bound_value; + rational upper_bound_value; + bool lower_bound_exists = lower_bound(str_len, lower_bound_value); + bool upper_bound_exists = upper_bound(str_len, upper_bound_value); + CTRACE("str", lower_bound_exists, tout << "lower bound of " << mk_pp(str, m) << " is " << lower_bound_value << std::endl;); + CTRACE("str", upper_bound_exists, tout << "upper bound of " << mk_pp(str, m) << " is " << upper_bound_value << std::endl;); + + bool new_lower_bound_info = true; + bool new_upper_bound_info = true; + // check last seen lower/upper bound to avoid performing duplicate work + if (regex_last_lower_bound.contains(str)) { + rational last_lb_value; + regex_last_lower_bound.find(str, last_lb_value); + if (last_lb_value == lower_bound_value) { + new_lower_bound_info = false; + } + } + if (regex_last_upper_bound.contains(str)) { + rational last_ub_value; + regex_last_upper_bound.find(str, last_ub_value); + if (last_ub_value == upper_bound_value) { + new_upper_bound_info = false; + } + } + + if (new_lower_bound_info) { + regex_last_lower_bound.insert(str, lower_bound_value); + } + if (new_upper_bound_info) { + regex_last_upper_bound.insert(str, upper_bound_value); + } + + if (upper_bound_exists && new_upper_bound_info) { + // check current assumptions + if (regex_automaton_assumptions.contains(re) && + !regex_automaton_assumptions[re].empty()){ + // one or more existing assumptions. + // see if the (current best) upper bound can be refined + // (note that if we have an automaton with no assumption, + // this automatically counts as best) + bool need_assumption = true; + regex_automaton_under_assumptions last_assumption; + rational last_ub = rational::minus_one(); + for (svector::iterator it = regex_automaton_assumptions[re].begin(); + it != regex_automaton_assumptions[re].end(); ++it) { + regex_automaton_under_assumptions autA = *it; + if ((current_assignment == l_true && autA.get_polarity() == false) + || (current_assignment == l_false && autA.get_polarity() == true)) { + // automaton uses incorrect polarity + continue; + } + rational this_ub; + if (autA.get_upper_bound(this_ub)) { + if (last_ub == rational::minus_one() || this_ub < last_ub) { + last_ub = this_ub; + last_assumption = autA; + } + } else { + need_assumption = false; + last_assumption = autA; + break; + } + } + if (!last_ub.is_minus_one() || !need_assumption) { + CTRACE("str", !need_assumption, tout << "using automaton with full length information" << std::endl;); + CTRACE("str", need_assumption, tout << "using automaton with assumed upper bound of " << last_ub << std::endl;); + + rational refined_upper_bound; + bool solution_at_upper_bound = refine_automaton_upper_bound(last_assumption.get_automaton(), + upper_bound_value, refined_upper_bound); + TRACE("str", tout << "refined upper bound is " << refined_upper_bound << + (solution_at_upper_bound?", solution at upper bound":", no solution at upper bound") << std::endl;); + + expr_ref_vector lhs(m); + if (current_assignment == l_false) { + lhs.push_back(m.mk_not(str_in_re)); + } else { + lhs.push_back(str_in_re); + } + if (need_assumption) { + lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(last_ub, true))); + } + lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true))); + + expr_ref_vector rhs(m); + + if (solution_at_upper_bound) { + if (refined_upper_bound.is_minus_one()) { + // If there are solutions at the upper bound but not below it, make the bound exact. + rhs.push_back(ctx.mk_eq_atom(str_len, m_autil.mk_numeral(upper_bound_value, true))); + } else { + // If there are solutions at and below the upper bound, add an additional bound. + rhs.push_back(m.mk_or( + ctx.mk_eq_atom(str_len, m_autil.mk_numeral(upper_bound_value, true)), + m_autil.mk_le(str_len, m_autil.mk_numeral(refined_upper_bound, true)) + )); + } + } else { + if (refined_upper_bound.is_minus_one()) { + // If there are no solutions at or below the upper bound, assert a conflict clause. + rhs.push_back(m.mk_not(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true)))); + } else { + // If there are solutions below the upper bound but not at it, refine the bound. + rhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(refined_upper_bound, true))); + } + } + + if (!rhs.empty()) { + expr_ref lhs_terms(mk_and(lhs), m); + expr_ref rhs_terms(mk_and(rhs), m); + assert_implication(lhs_terms, rhs_terms); + regex_axiom_add = true; + } + } + } else { + // no existing automata/assumptions. + // if it's easy to construct a full automaton for R, do so + unsigned expected_complexity = estimate_regex_complexity(re); + bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); + if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || failureThresholdExceeded) { + eautomaton * aut = m_mk_aut(re); + aut->compress(); + regex_automata.push_back(aut); + regex_automaton_under_assumptions new_aut(re, aut, true); + if (!regex_automaton_assumptions.contains(re)) { + regex_automaton_assumptions.insert(re, svector()); + } + regex_automaton_assumptions[re].push_back(new_aut); + TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); + regex_axiom_add = true; + find_automaton_initial_bounds(str_in_re, aut); + } else { + // TODO check negation? + // TODO construct a partial automaton for R to the given upper bound? + if (false) { + + } else { + regex_inc_counter(regex_fail_count, str_in_re); + } + } + continue; + } + // if we have *any* automaton for R, and the upper bound is not too large, + // finitize the automaton (if we have not already done so) and assert all solutions + if (upper_bound_value < 50) { // TODO better metric for threshold + // NOT_IMPLEMENTED_YET(); // TODO(mtrberzi) + } + } else { // !upper_bound_exists + // no upper bound information + if (lower_bound_exists && !lower_bound_value.is_zero() && new_lower_bound_info) { + // nonzero lower bound, no upper bound + + // check current assumptions + if (regex_automaton_assumptions.contains(re) && + !regex_automaton_assumptions[re].empty()){ + // one or more existing assumptions. + // see if the (current best) lower bound can be refined + // (note that if we have an automaton with no assumption, + // this automatically counts as best) + bool need_assumption = true; + regex_automaton_under_assumptions last_assumption; + rational last_lb = rational::zero(); // the default + for (svector::iterator it = regex_automaton_assumptions[re].begin(); + it != regex_automaton_assumptions[re].end(); ++it) { + regex_automaton_under_assumptions autA = *it; + if ((current_assignment == l_true && autA.get_polarity() == false) + || (current_assignment == l_false && autA.get_polarity() == true)) { + // automaton uses incorrect polarity + continue; + } + rational this_lb; + if (autA.get_lower_bound(this_lb)) { + if (this_lb > last_lb) { + last_lb = this_lb; + last_assumption = autA; + } + } else { + need_assumption = false; + last_assumption = autA; + break; + } + } + if (!last_lb.is_zero() || !need_assumption) { + CTRACE("str", !need_assumption, tout << "using automaton with full length information" << std::endl;); + CTRACE("str", need_assumption, tout << "using automaton with assumed lower bound of " << last_lb << std::endl;); + rational refined_lower_bound; + bool solution_at_lower_bound = refine_automaton_lower_bound(last_assumption.get_automaton(), + lower_bound_value, refined_lower_bound); + TRACE("str", tout << "refined lower bound is " << refined_lower_bound << + (solution_at_lower_bound?", solution at lower bound":", no solution at lower bound") << std::endl;); + + expr_ref_vector lhs(m); + if (current_assignment == l_false) { + lhs.push_back(m.mk_not(str_in_re)); + } else { + lhs.push_back(str_in_re); + } + if (need_assumption) { + lhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(last_lb, true))); + } + lhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(lower_bound_value, true))); + + expr_ref_vector rhs(m); + + if (solution_at_lower_bound) { + if (refined_lower_bound.is_minus_one()) { + // If there are solutions at the lower bound but not above it, make the bound exact. + rhs.push_back(ctx.mk_eq_atom(str_len, m_autil.mk_numeral(lower_bound_value, true))); + } else { + // If there are solutions at and above the lower bound, add an additional bound. + // DISABLED as this is causing non-termination in the integer solver. --mtrberzi + /* + rhs.push_back(m.mk_or( + ctx.mk_eq_atom(str_len, m_autil.mk_numeral(lower_bound_value, true)), + m_autil.mk_ge(str_len, m_autil.mk_numeral(refined_lower_bound, true)) + )); + */ + } + } else { + if (refined_lower_bound.is_minus_one()) { + // If there are no solutions at or above the lower bound, assert a conflict clause. + rhs.push_back(m.mk_not(m_autil.mk_ge(str_len, m_autil.mk_numeral(lower_bound_value, true)))); + } else { + // If there are solutions above the lower bound but not at it, refine the bound. + rhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(refined_lower_bound, true))); + } + } + + if (!rhs.empty()) { + expr_ref lhs_terms(mk_and(lhs), m); + expr_ref rhs_terms(mk_and(rhs), m); + assert_implication(lhs_terms, rhs_terms); + regex_axiom_add = true; + } + } + } else { + // no existing automata/assumptions. + // if it's easy to construct a full automaton for R, do so + unsigned expected_complexity = estimate_regex_complexity(re); + bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); + if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || failureThresholdExceeded) { + eautomaton * aut = m_mk_aut(re); + aut->compress(); + regex_automata.push_back(aut); + regex_automaton_under_assumptions new_aut(re, aut, true); + if (!regex_automaton_assumptions.contains(re)) { + regex_automaton_assumptions.insert(re, svector()); + } + regex_automaton_assumptions[re].push_back(new_aut); + TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); + regex_axiom_add = true; + find_automaton_initial_bounds(str_in_re, aut); + } else { + // TODO check negation? + // TODO construct a partial automaton for R to the given lower bound? + if (false) { + + } else { + regex_inc_counter(regex_fail_count, str_in_re); + } + } + continue; + } + } else { // !lower_bound_exists + // no bounds information + // check for existing automata; + // try to construct an automaton if we don't have one yet + // and doing so without bounds is not difficult + bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty()); + bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); + if (!existingAutomata) { + unsigned expected_complexity = estimate_regex_complexity(re); + if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold + || failureThresholdExceeded) { + eautomaton * aut = m_mk_aut(re); + aut->compress(); + regex_automata.push_back(aut); + regex_automaton_under_assumptions new_aut(re, aut, true); + if (!regex_automaton_assumptions.contains(re)) { + regex_automaton_assumptions.insert(re, svector()); + } + regex_automaton_assumptions[re].push_back(new_aut); + TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); + regex_axiom_add = true; + find_automaton_initial_bounds(str_in_re, aut); + } else { + regex_inc_counter(regex_fail_count, str_in_re); + } + } else { + regex_inc_counter(regex_fail_count, str_in_re); + } + } + } + } // foreach (entry in regex_terms) + + for (obj_map >::iterator it = regex_terms_by_string.begin(); + it != regex_terms_by_string.end(); ++it) { + // TODO do we need to check equivalence classes of strings here? + + expr * str = it->m_key; + ptr_vector str_in_re_terms = it->m_value; + + svector intersect_constraints; + // we may find empty intersection before checking every constraint; + // this vector keeps track of which ones actually take part in intersection + svector used_intersect_constraints; + + // choose an automaton/assumption for each assigned (str.in.re) + // that's consistent with the current length information + for (ptr_vector::iterator term_it = str_in_re_terms.begin(); + term_it != str_in_re_terms.end(); ++term_it) { + expr * _unused = nullptr; + expr * re = nullptr; + SASSERT(u.str.is_in_re(*term_it)); + u.str.is_in_re(*term_it, _unused, re); + + rational exact_len; + bool has_exact_len = get_len_value(str, exact_len); + + rational lb, ub; + bool has_lower_bound = lower_bound(mk_strlen(str), lb); + bool has_upper_bound = upper_bound(mk_strlen(str), ub); + + if (regex_automaton_assumptions.contains(re) && + !regex_automaton_assumptions[re].empty()){ + for (svector::iterator aut_it = regex_automaton_assumptions[re].begin(); + aut_it != regex_automaton_assumptions[re].end(); ++aut_it) { + regex_automaton_under_assumptions aut = *aut_it; + rational aut_ub; + bool assume_ub = aut.get_upper_bound(aut_ub); + rational aut_lb; + bool assume_lb = aut.get_lower_bound(aut_lb); + bool consistent = true; + + if (assume_ub) { + // check consistency of assumed upper bound + if (has_exact_len) { + if (exact_len > aut_ub) { + consistent = false; + } + } else { + if (has_upper_bound && ub > aut_ub) { + consistent = false; + } + } + } + + if (assume_lb) { + // check consistency of assumed lower bound + if (has_exact_len) { + if (exact_len < aut_lb) { + consistent = false; + } + } else { + if (has_lower_bound && lb < aut_lb) { + consistent = false; + } + } + } + + if (consistent) { + intersect_constraints.push_back(aut); + break; + } + } + } + } // foreach(term in str_in_re_terms) + + eautomaton * aut_inter = NULL; + CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;); + for (svector::iterator aut_it = intersect_constraints.begin(); + aut_it != intersect_constraints.end(); ++aut_it) { + regex_automaton_under_assumptions aut = *aut_it; + if (aut_inter == NULL) { + // start somewhere + aut_inter = aut.get_automaton(); + used_intersect_constraints.push_back(aut); + continue; + } + + TRACE("str", + { + unsigned v = regex_get_counter(regex_length_attempt_count, aut.get_regex_term()); + tout << "length attempt count of " << mk_pp(aut.get_regex_term(), m) << " is " << v + << ", threshold is " << m_params.m_RegexAutomata_LengthAttemptThreshold << std::endl; + }); + + if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) { + unsigned intersectionDifficulty = estimate_automata_intersection_difficulty(aut_inter, aut.get_automaton()); + TRACE("str", tout << "intersection difficulty is " << intersectionDifficulty << std::endl;); + if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold + || regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) { + + expr * str_in_re_term(u.re.mk_in_re(str, aut.get_regex_term())); + lbool current_assignment = ctx.get_assignment(str_in_re_term); + // if the assignment is consistent with our assumption, use the automaton directly; + // otherwise, complement it (and save that automaton for next time) + // TODO we should cache these intermediate results + // TODO do we need to push the intermediates into a vector for deletion anyway? + if ( (current_assignment == l_true && aut.get_polarity()) + || (current_assignment == l_false && !aut.get_polarity())) { + aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton()); + m_automata.push_back(aut_inter); + } else { + // need to complement first + expr_ref rc(u.re.mk_complement(aut.get_regex_term()), m); + eautomaton * aut_c = m_mk_aut(rc); + regex_automata.push_back(aut_c); + // TODO is there any way to build a complement automaton from an existing one? + // this discards length information + aut_inter = m_mk_aut.mk_product(aut_inter, aut_c); + m_automata.push_back(aut_inter); + } + used_intersect_constraints.push_back(aut); + if (aut_inter->is_empty()) { + break; + } + } else { + // failed intersection + regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term()); + } + } + } // foreach(entry in intersect_constraints) + if (aut_inter != NULL) { + aut_inter->compress(); + } + TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;); + + expr_ref_vector conflict_terms(m); + expr_ref conflict_lhs(m); + for (svector::iterator aut_it = used_intersect_constraints.begin(); + aut_it != used_intersect_constraints.end(); ++aut_it) { + regex_automaton_under_assumptions aut = *aut_it; + expr * str_in_re_term(u.re.mk_in_re(str, aut.get_regex_term())); + lbool current_assignment = ctx.get_assignment(str_in_re_term); + if (current_assignment == l_true) { + conflict_terms.push_back(str_in_re_term); + } else if (current_assignment == l_false) { + conflict_terms.push_back(m.mk_not(str_in_re_term)); + } + // add length assumptions, if any + rational ub; + if (aut.get_upper_bound(ub)) { + expr_ref ub_term(m_autil.mk_le(mk_strlen(str), m_autil.mk_numeral(ub, true)), m); + conflict_terms.push_back(ub_term); + } + rational lb; + if (aut.get_lower_bound(lb)) { + expr_ref lb_term(m_autil.mk_ge(mk_strlen(str), m_autil.mk_numeral(lb, true)), m); + conflict_terms.push_back(lb_term); + } + } + conflict_lhs = mk_and(conflict_terms); + + if (used_intersect_constraints.size() > 1 && aut_inter != NULL) { + // check whether the intersection is only the empty string + unsigned initial_state = aut_inter->init(); + if (aut_inter->final_states().size() == 1 && aut_inter->is_final_state(initial_state)) { + // initial state is final and it is the only final state + // if there are no moves from the initial state, + // the only solution is the empty string + if (aut_inter->get_moves_from(initial_state).empty()) { + TRACE("str", tout << "product automaton only accepts empty string" << std::endl;); + expr_ref rhs1(ctx.mk_eq_atom(str, mk_string("")), m); + expr_ref rhs2(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m); + expr_ref rhs(m.mk_and(rhs1, rhs2), m); + assert_implication(conflict_lhs, rhs); + regex_axiom_add = true; + } + } + } + + if (aut_inter != NULL && aut_inter->is_empty()) { + TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;); + expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m); + assert_axiom(conflict_clause); + add_persisted_axiom(conflict_clause); + regex_axiom_add = true; + } + } // foreach (entry in regex_terms_by_string) + if (regex_axiom_add) { + //return FC_CONTINUE; + } + } + final_check_status theory_str::final_check_eh() { context & ctx = get_context(); ast_manager & m = get_manager(); @@ -9977,726 +10655,7 @@ namespace smt { // regex automata if (m_params.m_RegexAutomata) { - // TODO since heuristics might fail, the "no progress" flag might need to be handled specially here - bool regex_axiom_add = false; - for (obj_hashtable::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) { - expr * str_in_re = *it; - expr * str = nullptr; - expr * re = nullptr; - u.str.is_in_re(str_in_re, str, re); - lbool current_assignment = ctx.get_assignment(str_in_re); - TRACE("str", tout << "regex term: " << mk_pp(str, m) << " in " << mk_pp(re, m) << " : " << current_assignment << std::endl;); - if (current_assignment == l_undef) { - continue; - } - - if (!regex_terms_with_length_constraints.contains(str_in_re)) { - if (current_assignment == l_true && check_regex_length_linearity(re)) { - TRACE("str", tout << "regex length constraints expected to be linear -- generating and asserting them" << std::endl;); - - if (regex_term_to_length_constraint.contains(str_in_re)) { - // use existing length constraint - expr * top_level_length_constraint = nullptr; - regex_term_to_length_constraint.find(str_in_re, top_level_length_constraint); - - ptr_vector extra_length_vars; - regex_term_to_extra_length_vars.find(str_in_re, extra_length_vars); - - assert_axiom(top_level_length_constraint); - for(ptr_vector::iterator it = extra_length_vars.begin(); it != extra_length_vars.end(); ++it) { - expr * v = *it; - refresh_theory_var(v); - expr_ref len_constraint(m_autil.mk_ge(v, m_autil.mk_numeral(rational::zero(), true)), m); - assert_axiom(len_constraint); - } - } else { - // generate new length constraint - expr_ref_vector extra_length_vars(m); - expr_ref _top_level_length_constraint = infer_all_regex_lengths(mk_strlen(str), re, extra_length_vars); - expr_ref top_level_length_constraint(_top_level_length_constraint, m); - th_rewriter rw(m); - rw(top_level_length_constraint); - TRACE("str", tout << "top-level length constraint: " << mk_pp(top_level_length_constraint, m) << std::endl;); - // assert and track length constraint - assert_axiom(top_level_length_constraint); - for(expr_ref_vector::iterator it = extra_length_vars.begin(); it != extra_length_vars.end(); ++it) { - expr * v = *it; - expr_ref len_constraint(m_autil.mk_ge(v, m_autil.mk_numeral(rational::zero(), true)), m); - assert_axiom(len_constraint); - } - - regex_term_to_length_constraint.insert(str_in_re, top_level_length_constraint); - ptr_vector vtmp; - for(expr_ref_vector::iterator it = extra_length_vars.begin(); it != extra_length_vars.end(); ++it) { - vtmp.push_back(*it); - } - regex_term_to_extra_length_vars.insert(str_in_re, vtmp); - } - - regex_terms_with_length_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_length_constraints, str_in_re)); - regex_axiom_add = true; - } - } // re not in regex_terms_with_length_constraints - - rational exact_length_value; - if (get_len_value(str, exact_length_value)) { - TRACE("str", tout << "exact length of " << mk_pp(str, m) << " is " << exact_length_value << std::endl;); - - if (regex_terms_with_path_constraints.contains(str_in_re)) { - TRACE("str", tout << "term " << mk_pp(str_in_re, m) << " already has path constraints set up" << std::endl;); - continue; - } - - // find a consistent automaton for this term - bool found = false; - regex_automaton_under_assumptions assumption; - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - for (svector::iterator it = regex_automaton_assumptions[re].begin(); - it != regex_automaton_assumptions[re].end(); ++it) { - regex_automaton_under_assumptions autA = *it; - rational assumed_upper_bound, assumed_lower_bound; - bool assumes_upper_bound = autA.get_upper_bound(assumed_upper_bound); - bool assumes_lower_bound = autA.get_lower_bound(assumed_lower_bound); - if (!assumes_upper_bound && !assumes_lower_bound) { - // automaton with no assumptions is always usable - assumption = autA; - found = true; - break; - } - // TODO check consistency of bounds assumptions - } // foreach(a in regex_automaton_assumptions) - } - if (found) { - if (exact_length_value.is_zero()) { - // check consistency of 0-length solution with automaton - eautomaton * aut = assumption.get_automaton(); - bool zero_solution = false; - unsigned initial_state = aut->init(); - if (aut->is_final_state(initial_state)) { - zero_solution = true; - } else { - unsigned_vector eps_states; - aut->get_epsilon_closure(initial_state, eps_states); - for (unsigned_vector::iterator it = eps_states.begin(); it != eps_states.end(); ++it) { - unsigned state = *it; - if (aut->is_final_state(state)) { - zero_solution = true; - break; - } - } - } - - // now check polarity of automaton wrt. original term - if ( (current_assignment == l_true && !assumption.get_polarity()) - || (current_assignment == l_false && assumption.get_polarity())) { - // invert sense - zero_solution = !zero_solution; - } - - if (zero_solution) { - TRACE("str", tout << "zero-length solution OK -- asserting empty path constraint" << std::endl;); - expr_ref_vector lhs_terms(m); - if (current_assignment == l_true) { - lhs_terms.push_back(str_in_re); - } else { - lhs_terms.push_back(m.mk_not(str_in_re)); - } - lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); - expr_ref lhs(mk_and(lhs_terms), m); - expr_ref rhs(ctx.mk_eq_atom(str, mk_string("")), m); - assert_implication(lhs, rhs); - regex_terms_with_path_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); - } else { - TRACE("str", tout << "zero-length solution not admitted by this automaton -- asserting conflict clause" << std::endl;); - expr_ref_vector lhs_terms(m); - if (current_assignment == l_true) { - lhs_terms.push_back(str_in_re); - } else { - lhs_terms.push_back(m.mk_not(str_in_re)); - } - lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); - expr_ref lhs(mk_and(lhs_terms), m); - expr_ref conflict(m.mk_not(lhs), m); - assert_axiom(conflict); - } - regex_axiom_add = true; - regex_inc_counter(regex_length_attempt_count, re); - continue; - } else { - expr_ref pathConstraint(m); - expr_ref characterConstraints(m); - pathConstraint = generate_regex_path_constraints(str, assumption.get_automaton(), exact_length_value, characterConstraints); - TRACE("str", tout << "generated regex path constraint " << mk_pp(pathConstraint, m) << std::endl;); - TRACE("str", tout << "character constraints are " << mk_pp(characterConstraints, m) << std::endl;); - - expr_ref_vector lhs_terms(m); - if (current_assignment == l_true) { - lhs_terms.push_back(str_in_re); - } else { - lhs_terms.push_back(m.mk_not(str_in_re)); - } - lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); - expr_ref lhs(mk_and(lhs_terms), m); - - // If the path constraint comes out as "false", this means there are no paths of that length - // in the automaton. If the polarity is the same, we can assert a conflict clause. - // If the polarity is opposite, we ignore the path constraint. - - if (m.is_false(pathConstraint)) { - if ( (current_assignment == l_true && assumption.get_polarity()) - || (current_assignment == l_false && !assumption.get_polarity())) { - // automaton and constraint have same polarity -- assert conflict clause - TRACE("str", tout << "path constraint is false with matching polarity; asserting conflict clause" << std::endl;); - expr_ref conflict(m.mk_not(mk_and(lhs_terms)), m); - assert_axiom(conflict); - // don't set up "regex_terms_with_path_constraints" as a conflict clause is not a path constraint - } else { - // automaton and constraint have opposite polarity -- ignore path constraint - TRACE("str", tout << "path constraint is false with opposite polarity; ignoring path constraint" << std::endl;); - assert_implication(lhs, characterConstraints); - regex_terms_with_path_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); - } - regex_axiom_add = true; - } else { - // If the automaton was built with the same polarity as the constraint, - // assert directly. Otherwise, negate the path constraint - if ( (current_assignment == l_true && assumption.get_polarity()) - || (current_assignment == l_false && !assumption.get_polarity())) { - TRACE("str", tout << "automaton and regex term have same polarity" << std::endl;); - expr_ref rhs(m.mk_and(pathConstraint, characterConstraints), m); - assert_implication(lhs, rhs); - } else { - TRACE("str", tout << "automaton and regex term have opposite polarity" << std::endl;); - expr_ref rhs(m.mk_and(m.mk_not(pathConstraint), characterConstraints), m); - assert_implication(lhs, rhs); - } - regex_terms_with_path_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); - regex_axiom_add = true; - } - - // increment LengthAttemptCount - regex_inc_counter(regex_length_attempt_count, re); - - TRACE("str", - { - unsigned v = regex_get_counter(regex_length_attempt_count, re); - tout << "length attempt count for " << mk_pp(re, m) << " is " << v << std::endl; - }); - - continue; - } - } else { - // no automata available, or else all bounds assumptions are invalid - unsigned expected_complexity = estimate_regex_complexity(re); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) { - CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold, - tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;); - eautomaton * aut = m_mk_aut(re); - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - regex_axiom_add = true; - find_automaton_initial_bounds(str_in_re, aut); - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - continue; - } - } // get_len_value() - expr_ref str_len(mk_strlen(str), m); - rational lower_bound_value; - rational upper_bound_value; - bool lower_bound_exists = lower_bound(str_len, lower_bound_value); - bool upper_bound_exists = upper_bound(str_len, upper_bound_value); - CTRACE("str", lower_bound_exists, tout << "lower bound of " << mk_pp(str, m) << " is " << lower_bound_value << std::endl;); - CTRACE("str", upper_bound_exists, tout << "upper bound of " << mk_pp(str, m) << " is " << upper_bound_value << std::endl;); - - bool new_lower_bound_info = true; - bool new_upper_bound_info = true; - // check last seen lower/upper bound to avoid performing duplicate work - if (regex_last_lower_bound.contains(str)) { - rational last_lb_value; - regex_last_lower_bound.find(str, last_lb_value); - if (last_lb_value == lower_bound_value) { - new_lower_bound_info = false; - } - } - if (regex_last_upper_bound.contains(str)) { - rational last_ub_value; - regex_last_upper_bound.find(str, last_ub_value); - if (last_ub_value == upper_bound_value) { - new_upper_bound_info = false; - } - } - - if (new_lower_bound_info) { - regex_last_lower_bound.insert(str, lower_bound_value); - } - if (new_upper_bound_info) { - regex_last_upper_bound.insert(str, upper_bound_value); - } - - if (upper_bound_exists && new_upper_bound_info) { - // check current assumptions - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - // one or more existing assumptions. - // see if the (current best) upper bound can be refined - // (note that if we have an automaton with no assumption, - // this automatically counts as best) - bool need_assumption = true; - regex_automaton_under_assumptions last_assumption; - rational last_ub = rational::minus_one(); - for (svector::iterator it = regex_automaton_assumptions[re].begin(); - it != regex_automaton_assumptions[re].end(); ++it) { - regex_automaton_under_assumptions autA = *it; - if ((current_assignment == l_true && autA.get_polarity() == false) - || (current_assignment == l_false && autA.get_polarity() == true)) { - // automaton uses incorrect polarity - continue; - } - rational this_ub; - if (autA.get_upper_bound(this_ub)) { - if (last_ub == rational::minus_one() || this_ub < last_ub) { - last_ub = this_ub; - last_assumption = autA; - } - } else { - need_assumption = false; - last_assumption = autA; - break; - } - } - if (!last_ub.is_minus_one() || !need_assumption) { - CTRACE("str", !need_assumption, tout << "using automaton with full length information" << std::endl;); - CTRACE("str", need_assumption, tout << "using automaton with assumed upper bound of " << last_ub << std::endl;); - - rational refined_upper_bound; - bool solution_at_upper_bound = refine_automaton_upper_bound(last_assumption.get_automaton(), - upper_bound_value, refined_upper_bound); - TRACE("str", tout << "refined upper bound is " << refined_upper_bound << - (solution_at_upper_bound?", solution at upper bound":", no solution at upper bound") << std::endl;); - - expr_ref_vector lhs(m); - if (current_assignment == l_false) { - lhs.push_back(m.mk_not(str_in_re)); - } else { - lhs.push_back(str_in_re); - } - if (need_assumption) { - lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(last_ub, true))); - } - lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true))); - - expr_ref_vector rhs(m); - - if (solution_at_upper_bound) { - if (refined_upper_bound.is_minus_one()) { - // If there are solutions at the upper bound but not below it, make the bound exact. - rhs.push_back(ctx.mk_eq_atom(str_len, m_autil.mk_numeral(upper_bound_value, true))); - } else { - // If there are solutions at and below the upper bound, add an additional bound. - rhs.push_back(m.mk_or( - ctx.mk_eq_atom(str_len, m_autil.mk_numeral(upper_bound_value, true)), - m_autil.mk_le(str_len, m_autil.mk_numeral(refined_upper_bound, true)) - )); - } - } else { - if (refined_upper_bound.is_minus_one()) { - // If there are no solutions at or below the upper bound, assert a conflict clause. - rhs.push_back(m.mk_not(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true)))); - } else { - // If there are solutions below the upper bound but not at it, refine the bound. - rhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(refined_upper_bound, true))); - } - } - - if (!rhs.empty()) { - expr_ref lhs_terms(mk_and(lhs), m); - expr_ref rhs_terms(mk_and(rhs), m); - assert_implication(lhs_terms, rhs_terms); - regex_axiom_add = true; - } - } - } else { - // no existing automata/assumptions. - // if it's easy to construct a full automaton for R, do so - unsigned expected_complexity = estimate_regex_complexity(re); - bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || failureThresholdExceeded) { - eautomaton * aut = m_mk_aut(re); - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - regex_axiom_add = true; - find_automaton_initial_bounds(str_in_re, aut); - } else { - // TODO check negation? - // TODO construct a partial automaton for R to the given upper bound? - if (false) { - - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } - continue; - } - // if we have *any* automaton for R, and the upper bound is not too large, - // finitize the automaton (if we have not already done so) and assert all solutions - if (upper_bound_value < 50) { // TODO better metric for threshold - // NOT_IMPLEMENTED_YET(); // TODO(mtrberzi) - } - } else { // !upper_bound_exists - // no upper bound information - if (lower_bound_exists && !lower_bound_value.is_zero() && new_lower_bound_info) { - // nonzero lower bound, no upper bound - - // check current assumptions - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - // one or more existing assumptions. - // see if the (current best) lower bound can be refined - // (note that if we have an automaton with no assumption, - // this automatically counts as best) - bool need_assumption = true; - regex_automaton_under_assumptions last_assumption; - rational last_lb = rational::zero(); // the default - for (svector::iterator it = regex_automaton_assumptions[re].begin(); - it != regex_automaton_assumptions[re].end(); ++it) { - regex_automaton_under_assumptions autA = *it; - if ((current_assignment == l_true && autA.get_polarity() == false) - || (current_assignment == l_false && autA.get_polarity() == true)) { - // automaton uses incorrect polarity - continue; - } - rational this_lb; - if (autA.get_lower_bound(this_lb)) { - if (this_lb > last_lb) { - last_lb = this_lb; - last_assumption = autA; - } - } else { - need_assumption = false; - last_assumption = autA; - break; - } - } - if (!last_lb.is_zero() || !need_assumption) { - CTRACE("str", !need_assumption, tout << "using automaton with full length information" << std::endl;); - CTRACE("str", need_assumption, tout << "using automaton with assumed lower bound of " << last_lb << std::endl;); - rational refined_lower_bound; - bool solution_at_lower_bound = refine_automaton_lower_bound(last_assumption.get_automaton(), - lower_bound_value, refined_lower_bound); - TRACE("str", tout << "refined lower bound is " << refined_lower_bound << - (solution_at_lower_bound?", solution at lower bound":", no solution at lower bound") << std::endl;); - - expr_ref_vector lhs(m); - if (current_assignment == l_false) { - lhs.push_back(m.mk_not(str_in_re)); - } else { - lhs.push_back(str_in_re); - } - if (need_assumption) { - lhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(last_lb, true))); - } - lhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(lower_bound_value, true))); - - expr_ref_vector rhs(m); - - if (solution_at_lower_bound) { - if (refined_lower_bound.is_minus_one()) { - // If there are solutions at the lower bound but not above it, make the bound exact. - rhs.push_back(ctx.mk_eq_atom(str_len, m_autil.mk_numeral(lower_bound_value, true))); - } else { - // If there are solutions at and above the lower bound, add an additional bound. - // DISABLED as this is causing non-termination in the integer solver. --mtrberzi - /* - rhs.push_back(m.mk_or( - ctx.mk_eq_atom(str_len, m_autil.mk_numeral(lower_bound_value, true)), - m_autil.mk_ge(str_len, m_autil.mk_numeral(refined_lower_bound, true)) - )); - */ - } - } else { - if (refined_lower_bound.is_minus_one()) { - // If there are no solutions at or above the lower bound, assert a conflict clause. - rhs.push_back(m.mk_not(m_autil.mk_ge(str_len, m_autil.mk_numeral(lower_bound_value, true)))); - } else { - // If there are solutions above the lower bound but not at it, refine the bound. - rhs.push_back(m_autil.mk_ge(str_len, m_autil.mk_numeral(refined_lower_bound, true))); - } - } - - if (!rhs.empty()) { - expr_ref lhs_terms(mk_and(lhs), m); - expr_ref rhs_terms(mk_and(rhs), m); - assert_implication(lhs_terms, rhs_terms); - regex_axiom_add = true; - } - } - } else { - // no existing automata/assumptions. - // if it's easy to construct a full automaton for R, do so - unsigned expected_complexity = estimate_regex_complexity(re); - bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || failureThresholdExceeded) { - eautomaton * aut = m_mk_aut(re); - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - regex_axiom_add = true; - find_automaton_initial_bounds(str_in_re, aut); - } else { - // TODO check negation? - // TODO construct a partial automaton for R to the given lower bound? - if (false) { - - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } - continue; - } - } else { // !lower_bound_exists - // no bounds information - // check for existing automata; - // try to construct an automaton if we don't have one yet - // and doing so without bounds is not difficult - bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty()); - bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); - if (!existingAutomata) { - unsigned expected_complexity = estimate_regex_complexity(re); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold - || failureThresholdExceeded) { - eautomaton * aut = m_mk_aut(re); - aut->compress(); - regex_automata.push_back(aut); - regex_automaton_under_assumptions new_aut(re, aut, true); - if (!regex_automaton_assumptions.contains(re)) { - regex_automaton_assumptions.insert(re, svector()); - } - regex_automaton_assumptions[re].push_back(new_aut); - TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); - regex_axiom_add = true; - find_automaton_initial_bounds(str_in_re, aut); - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } else { - regex_inc_counter(regex_fail_count, str_in_re); - } - } - } - } // foreach (entry in regex_terms) - - for (obj_map >::iterator it = regex_terms_by_string.begin(); - it != regex_terms_by_string.end(); ++it) { - // TODO do we need to check equivalence classes of strings here? - - expr * str = it->m_key; - ptr_vector str_in_re_terms = it->m_value; - - svector intersect_constraints; - // we may find empty intersection before checking every constraint; - // this vector keeps track of which ones actually take part in intersection - svector used_intersect_constraints; - - // choose an automaton/assumption for each assigned (str.in.re) - // that's consistent with the current length information - for (ptr_vector::iterator term_it = str_in_re_terms.begin(); - term_it != str_in_re_terms.end(); ++term_it) { - expr * _unused = nullptr; - expr * re = nullptr; - SASSERT(u.str.is_in_re(*term_it)); - u.str.is_in_re(*term_it, _unused, re); - - rational exact_len; - bool has_exact_len = get_len_value(str, exact_len); - - rational lb, ub; - bool has_lower_bound = lower_bound(mk_strlen(str), lb); - bool has_upper_bound = upper_bound(mk_strlen(str), ub); - - if (regex_automaton_assumptions.contains(re) && - !regex_automaton_assumptions[re].empty()){ - for (svector::iterator aut_it = regex_automaton_assumptions[re].begin(); - aut_it != regex_automaton_assumptions[re].end(); ++aut_it) { - regex_automaton_under_assumptions aut = *aut_it; - rational aut_ub; - bool assume_ub = aut.get_upper_bound(aut_ub); - rational aut_lb; - bool assume_lb = aut.get_lower_bound(aut_lb); - bool consistent = true; - - if (assume_ub) { - // check consistency of assumed upper bound - if (has_exact_len) { - if (exact_len > aut_ub) { - consistent = false; - } - } else { - if (has_upper_bound && ub > aut_ub) { - consistent = false; - } - } - } - - if (assume_lb) { - // check consistency of assumed lower bound - if (has_exact_len) { - if (exact_len < aut_lb) { - consistent = false; - } - } else { - if (has_lower_bound && lb < aut_lb) { - consistent = false; - } - } - } - - if (consistent) { - intersect_constraints.push_back(aut); - break; - } - } - } - } // foreach(term in str_in_re_terms) - - eautomaton * aut_inter = NULL; - CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;); - for (svector::iterator aut_it = intersect_constraints.begin(); - aut_it != intersect_constraints.end(); ++aut_it) { - regex_automaton_under_assumptions aut = *aut_it; - if (aut_inter == NULL) { - // start somewhere - aut_inter = aut.get_automaton(); - used_intersect_constraints.push_back(aut); - continue; - } - - TRACE("str", - { - unsigned v = regex_get_counter(regex_length_attempt_count, aut.get_regex_term()); - tout << "length attempt count of " << mk_pp(aut.get_regex_term(), m) << " is " << v - << ", threshold is " << m_params.m_RegexAutomata_LengthAttemptThreshold << std::endl; - }); - - if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) { - unsigned intersectionDifficulty = estimate_automata_intersection_difficulty(aut_inter, aut.get_automaton()); - TRACE("str", tout << "intersection difficulty is " << intersectionDifficulty << std::endl;); - if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold - || regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) { - - expr * str_in_re_term(u.re.mk_in_re(str, aut.get_regex_term())); - lbool current_assignment = ctx.get_assignment(str_in_re_term); - // if the assignment is consistent with our assumption, use the automaton directly; - // otherwise, complement it (and save that automaton for next time) - // TODO we should cache these intermediate results - // TODO do we need to push the intermediates into a vector for deletion anyway? - if ( (current_assignment == l_true && aut.get_polarity()) - || (current_assignment == l_false && !aut.get_polarity())) { - aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton()); - m_automata.push_back(aut_inter); - } else { - // need to complement first - expr_ref rc(u.re.mk_complement(aut.get_regex_term()), m); - eautomaton * aut_c = m_mk_aut(rc); - regex_automata.push_back(aut_c); - // TODO is there any way to build a complement automaton from an existing one? - // this discards length information - aut_inter = m_mk_aut.mk_product(aut_inter, aut_c); - m_automata.push_back(aut_inter); - } - used_intersect_constraints.push_back(aut); - if (aut_inter->is_empty()) { - break; - } - } else { - // failed intersection - regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term()); - } - } - } // foreach(entry in intersect_constraints) - if (aut_inter != NULL) { - aut_inter->compress(); - } - TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;); - - expr_ref_vector conflict_terms(m); - expr_ref conflict_lhs(m); - for (svector::iterator aut_it = used_intersect_constraints.begin(); - aut_it != used_intersect_constraints.end(); ++aut_it) { - regex_automaton_under_assumptions aut = *aut_it; - expr * str_in_re_term(u.re.mk_in_re(str, aut.get_regex_term())); - lbool current_assignment = ctx.get_assignment(str_in_re_term); - if (current_assignment == l_true) { - conflict_terms.push_back(str_in_re_term); - } else if (current_assignment == l_false) { - conflict_terms.push_back(m.mk_not(str_in_re_term)); - } - // add length assumptions, if any - rational ub; - if (aut.get_upper_bound(ub)) { - expr_ref ub_term(m_autil.mk_le(mk_strlen(str), m_autil.mk_numeral(ub, true)), m); - conflict_terms.push_back(ub_term); - } - rational lb; - if (aut.get_lower_bound(lb)) { - expr_ref lb_term(m_autil.mk_ge(mk_strlen(str), m_autil.mk_numeral(lb, true)), m); - conflict_terms.push_back(lb_term); - } - } - conflict_lhs = mk_and(conflict_terms); - - if (used_intersect_constraints.size() > 1 && aut_inter != NULL) { - // check whether the intersection is only the empty string - unsigned initial_state = aut_inter->init(); - if (aut_inter->final_states().size() == 1 && aut_inter->is_final_state(initial_state)) { - // initial state is final and it is the only final state - // if there are no moves from the initial state, - // the only solution is the empty string - if (aut_inter->get_moves_from(initial_state).empty()) { - TRACE("str", tout << "product automaton only accepts empty string" << std::endl;); - expr_ref rhs1(ctx.mk_eq_atom(str, mk_string("")), m); - expr_ref rhs2(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m); - expr_ref rhs(m.mk_and(rhs1, rhs2), m); - assert_implication(conflict_lhs, rhs); - regex_axiom_add = true; - } - } - } - - if (aut_inter != NULL && aut_inter->is_empty()) { - TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;); - expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m); - assert_axiom(conflict_clause); - add_persisted_axiom(conflict_clause); - regex_axiom_add = true; - } - } // foreach (entry in regex_terms_by_string) - if (regex_axiom_add) { - //return FC_CONTINUE; - } + solve_regex_automata(); } // RegexAutomata bool needToAssignFreeVars = false; diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index 419084091..6ee6866b3 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -30,6 +30,7 @@ #include "smt/params/theory_str_params.h" #include "smt/proto_model/value_factory.h" #include "smt/smt_model_generator.h" +#include "smt/smt_arith_value.h" #include #include #include @@ -546,6 +547,7 @@ protected: void process_concat_eq_unroll(expr * concat, expr * unroll); // regex automata and length-aware regex + void solve_regex_automata(); unsigned estimate_regex_complexity(expr * re); unsigned estimate_regex_complexity_under_complement(expr * re); unsigned estimate_automata_intersection_difficulty(eautomaton * aut1, eautomaton * aut2);