From 0ac7082c8092c25efb3b9f893b2982a9e0ab556e Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Thu, 21 Dec 2017 17:13:39 -0500 Subject: [PATCH] add upper bound refinement (WIP) --- src/smt/theory_str.cpp | 239 ++++++++++++++++++++++++++++++++++++++++- src/smt/theory_str.h | 69 ++++++++++++ 2 files changed, 305 insertions(+), 3 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 88180352c..2f41a6617 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -74,6 +74,7 @@ namespace smt { theory_str::~theory_str() { m_trail_stack.reset(); + regex_automata.clear(); } expr * theory_str::mk_string(zstring const& str) { @@ -1741,14 +1742,19 @@ namespace smt { regex_in_var_reg_str_map[ex->get_arg(0)].insert(regexStr); } + expr_ref str(ex->get_arg(0), m); + app * regex = to_app(ex->get_arg(1)); + if (m_params.m_RegexAutomata) { + regex_terms.insert(ex); + if (!regex_terms_by_string.contains(str)) { + regex_terms_by_string.insert(str, ptr_vector()); + } + regex_terms_by_string[str].push_back(ex); // stop setting up axioms here, we do this differently return; } - expr_ref str(ex->get_arg(0), m); - app * regex = to_app(ex->get_arg(1)); - // quick reference for the following code: // - ex: top-level regex membership term // - str: string term appearing in ex @@ -6665,6 +6671,77 @@ namespace smt { } } + /* + * Refine the upper bound on the length of a solution to a given automaton. + * The method returns TRUE if a solution of length `current_upper_bound` exists, + * and FALSE otherwise. In addition, the reference parameter `refined_upper_bound` + * is assigned the length of the longest solution shorter than `current_upper_bound`, + * if a shorter solution exists, or -1 otherwise. + */ + bool theory_str::refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound) { + ENSURE(aut != NULL); + + if (aut->final_states().empty()) { + // no solutions at all! + refined_upper_bound = rational::minus_one(); + return false; + } + + // from here we assume there is a final state reachable from the initial state + unsigned_vector search_queue; + // populate search queue with all states reachable from the epsilon-closure of the start state + aut->get_epsilon_closure(aut->init(), search_queue); + + rational last_solution_depth = rational::minus_one(); + bool found_solution_at_upper_bound = false; + + unsigned search_depth = 0; + hashtable > next_states; + unsigned_vector next_search_queue; + + while(!search_queue.empty()) { + // see if any of the current states are final + for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { + unsigned src = *it; + if (aut->is_final_state(src)) { + if (search_depth == current_upper_bound.get_unsigned()) { + found_solution_at_upper_bound = true; + } else { + last_solution_depth = rational(search_depth); + } + break; + } + } + + if (search_depth == current_upper_bound.get_unsigned()) { + break; + } + + next_states.reset(); + next_search_queue.clear(); + // move one step along all states + for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) { + unsigned src = *it; + eautomaton::moves next_moves; + aut->get_moves_from(src, next_moves, true); + for (eautomaton::moves::iterator moves_it = next_moves.begin(); + moves_it != next_moves.end(); ++moves_it) { + unsigned dst = moves_it->dst(); + if (!next_states.contains(dst)) { + next_states.insert(dst); + next_search_queue.push_back(dst); + } + } + } + search_queue.clear(); + search_queue.append(next_search_queue); + search_depth += 1; + } //!search_queue.empty() + + refined_upper_bound = last_solution_depth; + return found_solution_at_upper_bound; + } + /* * strArgmt::solve_concat_eq_str() * Solve concatenations of the form: @@ -9076,6 +9153,155 @@ namespace smt { } } + // regex automata + if (m_params.m_RegexAutomata) { + bool regex_axiom_add = false; + for (obj_hashtable::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) { + expr * str_in_re = *it; + expr * str; + expr * re; + u.str.is_in_re(str_in_re, str, re); + lbool current_assignment = ctx.get_assignment(str_in_re); + TRACE("str", tout << "regex term: " << mk_pp(str, m) << " in " << mk_pp(re, m) << " : " << current_assignment << std::endl;); + if (current_assignment == l_undef) { + continue; + } + rational exact_length_value; + if (get_len_value(str, exact_length_value)) { + TRACE("str", tout << "exact length of " << mk_pp(str, m) << " is " << exact_length_value << std::endl;); + NOT_IMPLEMENTED_YET(); + } + expr_ref str_len(mk_strlen(str), m); + rational lower_bound_value; + rational upper_bound_value; + bool lower_bound_exists = lower_bound(str_len, lower_bound_value); + bool upper_bound_exists = upper_bound(str_len, upper_bound_value); + CTRACE("str", lower_bound_exists, tout << "lower bound of " << mk_pp(str, m) << " is " << lower_bound_value << std::endl;); + CTRACE("str", upper_bound_exists, tout << "upper bound of " << mk_pp(str, m) << " is " << upper_bound_value << std::endl;); + + if (upper_bound_exists) { + // check current assumptions + if (regex_automaton_assumptions.contains(re) && + !regex_automaton_assumptions[re].empty()){ + // one or more existing assumptions. + // see if the (current best) upper bound can be refined + // (note that if we have an automaton with no assumption, + // this automatically counts as best) + bool need_assumption = true; + regex_automaton_under_assumptions last_assumption; + rational last_ub = rational::minus_one(); + for (svector::iterator it = regex_automaton_assumptions[re].begin(); + it != regex_automaton_assumptions[re].end(); ++it) { + regex_automaton_under_assumptions autA = *it; + if ((current_assignment == l_true && autA.get_polarity() == false) + || (current_assignment == l_false && autA.get_polarity() == true)) { + // automaton uses incorrect polarity + continue; + } + rational this_ub; + if (autA.get_upper_bound(this_ub)) { + if (last_ub == rational::minus_one() || this_ub < last_ub) { + last_ub = this_ub; + last_assumption = autA; + } + } else { + need_assumption = false; + last_assumption = autA; + break; + } + } + if (!last_ub.is_minus_one() || !need_assumption) { + CTRACE("str", !need_assumption, tout << "using automaton with full length information" << std::endl;); + CTRACE("str", need_assumption, tout << "using automaton with assumed upper bound of " << last_ub << std::endl;); + + rational refined_upper_bound; + bool solution_at_upper_bound = refine_automaton_upper_bound(last_assumption.get_automaton(), + upper_bound_value, refined_upper_bound); + TRACE("str", tout << "refined upper bound is " << refined_upper_bound << + (solution_at_upper_bound?", solution at upper bound":", no solution at upper bound") << std::endl;); + + expr_ref_vector lhs(m); + if (current_assignment == l_false) { + lhs.push_back(m.mk_not(str_in_re)); + } else { + lhs.push_back(str_in_re); + } + if (need_assumption) { + lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(last_ub, true))); + } + lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true))); + + expr_ref_vector rhs(m); + + if (solution_at_upper_bound) { + if (refined_upper_bound.is_minus_one()) { + // If there are solutions at the upper bound but not below it, make the bound exact. + NOT_IMPLEMENTED_YET(); + } else { + // If there are solutions at and below the upper bound, add an additional bound. + NOT_IMPLEMENTED_YET(); + } + } else { + if (refined_upper_bound.is_minus_one()) { + // If there are no solutions at or below the upper bound, assert a conflict clause. + rhs.push_back(m.mk_not(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true)))); + } else { + // If there are solutions below the upper bound but not at it, refine the bound. + NOT_IMPLEMENTED_YET(); + } + } + + expr_ref lhs_terms(mk_and(lhs), m); + expr_ref rhs_terms(mk_and(rhs), m); + assert_implication(lhs_terms, rhs_terms); + regex_axiom_add = true; + } + } else { + // no existing automata/assumptions. + // if it's easy to construct a full automaton for R, do so + unsigned expected_complexity = estimate_regex_complexity(re); + unsigned threshold = 1000; // TODO better metric + if (expected_complexity <= threshold) { + eautomaton * aut = m_mk_aut(re); + aut->compress(); + regex_automata.push_back(aut); + regex_automaton_under_assumptions new_aut(re, aut, true); + if (!regex_automaton_assumptions.contains(re)) { + regex_automaton_assumptions.insert(re, svector()); + } + regex_automaton_assumptions[re].push_back(new_aut); + TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); + regex_axiom_add = true; + // TODO immediately attempt to learn lower/upper bound info here + } else { + // TODO check negation? + // construct a partial automaton for R to the given upper bound + NOT_IMPLEMENTED_YET(); + } + } + // if we have *any* automaton for R, and the upper bound is not too large, + // finitize the automaton (if we have not already done so) and assert all solutions + if (upper_bound_value < 50) { // TODO better metric for threshold + // NOT_IMPLEMENTED_YET(); // TODO(mtrberzi) + } + } else { // !upper_bound_exists + // no upper bound information + if (lower_bound_exists) { + // lower bound, no upper bound + NOT_IMPLEMENTED_YET(); + } else { // !lower_bound_exists + // no bounds information + NOT_IMPLEMENTED_YET(); + } + } + + // NOT_IMPLEMENTED_YET(); + } + if (regex_axiom_add) { + return FC_CONTINUE; + } + } // RegexAutomata + bool needToAssignFreeVars = false; std::set free_variables; std::set unused_internal_variables; @@ -10567,6 +10793,13 @@ namespace smt { SASSERT(lenTestAssert != NULL); return lenTestAssert; } else { + // if we are performing automata-based reasoning and the term associated with + // this length tester is in any way constrained by regex terms, + // do not perform value testing -- this term is not a free variable. + if (m_params.m_RegexAutomata) { + NOT_IMPLEMENTED_YET(); // TODO + } + TRACE("str", tout << "length is fixed; generating models for free var" << std::endl;); // length is fixed expr * valueAssert = gen_free_var_options(freeVar, effectiveLenInd, effectiveLenIndiStr, NULL, zstring("")); diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index 602f9feab..57fa48142 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -166,6 +166,69 @@ public: bool matches(zstring input); }; +class regex_automaton_under_assumptions { +protected: + expr * str_in_re; + eautomaton * aut; + bool polarity; + + bool assume_lower_bound; + rational lower_bound; + + bool assume_upper_bound; + rational upper_bound; +public: + regex_automaton_under_assumptions() : + str_in_re(NULL), aut(NULL), polarity(false), + assume_lower_bound(false), assume_upper_bound(false) {} + + regex_automaton_under_assumptions(expr * str_in_re, eautomaton * aut, bool polarity) : + str_in_re(str_in_re), aut(aut), polarity(polarity), + assume_lower_bound(false), assume_upper_bound(false) {} + + void set_lower_bound(rational & lb) { + lower_bound = lb; + assume_lower_bound = true; + } + void unset_lower_bound() { + assume_lower_bound = false; + } + + void set_upper_bound(rational & ub) { + upper_bound = ub; + assume_upper_bound = true; + } + void unset_upper_bound() { + assume_upper_bound = false; + } + + bool get_lower_bound(rational & lb) const { + if (assume_lower_bound) { + lb = lower_bound; + return true; + } else { + return false; + } + } + + bool get_upper_bound(rational & ub) const { + if (assume_upper_bound) { + ub = upper_bound; + return true; + } else { + return false; + } + } + + eautomaton * get_automaton() const { return aut; } + bool get_polarity() const { return polarity; } + + virtual ~regex_automaton_under_assumptions() { + // don't free str_in_re or aut; + // they are managed separately + } +}; + class theory_str : public theory { struct T_cut { @@ -338,6 +401,11 @@ protected: std::map, expr*> regex_in_bool_map; std::map > regex_in_var_reg_str_map; + // regex automata + ptr_vector regex_automata; + obj_hashtable regex_terms; + obj_map > regex_terms_by_string; // S --> [ (str.in.re S *) ] + obj_map > regex_automaton_assumptions; // RegEx --> [ aut+assumptions ] std::map regex_nfa_cache; // Regex term --> NFA svector char_set; @@ -468,6 +536,7 @@ protected: unsigned estimate_regex_complexity(expr * re); unsigned estimate_regex_complexity_under_complement(expr * re); expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables); + bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound); void set_up_axioms(expr * ex); void handle_equality(expr * lhs, expr * rhs);