3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-14 21:08:46 +00:00

add upper bound refinement (WIP)

This commit is contained in:
Murphy Berzish 2017-12-21 17:13:39 -05:00
parent a5c828f6f2
commit 0ac7082c80
2 changed files with 305 additions and 3 deletions

View file

@ -74,6 +74,7 @@ namespace smt {
theory_str::~theory_str() {
m_trail_stack.reset();
regex_automata.clear();
}
expr * theory_str::mk_string(zstring const& str) {
@ -1741,14 +1742,19 @@ namespace smt {
regex_in_var_reg_str_map[ex->get_arg(0)].insert(regexStr);
}
expr_ref str(ex->get_arg(0), m);
app * regex = to_app(ex->get_arg(1));
if (m_params.m_RegexAutomata) {
regex_terms.insert(ex);
if (!regex_terms_by_string.contains(str)) {
regex_terms_by_string.insert(str, ptr_vector<expr>());
}
regex_terms_by_string[str].push_back(ex);
// stop setting up axioms here, we do this differently
return;
}
expr_ref str(ex->get_arg(0), m);
app * regex = to_app(ex->get_arg(1));
// quick reference for the following code:
// - ex: top-level regex membership term
// - str: string term appearing in ex
@ -6665,6 +6671,77 @@ namespace smt {
}
}
/*
* Refine the upper bound on the length of a solution to a given automaton.
* The method returns TRUE if a solution of length `current_upper_bound` exists,
* and FALSE otherwise. In addition, the reference parameter `refined_upper_bound`
* is assigned the length of the longest solution shorter than `current_upper_bound`,
* if a shorter solution exists, or -1 otherwise.
*/
bool theory_str::refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound) {
ENSURE(aut != NULL);
if (aut->final_states().empty()) {
// no solutions at all!
refined_upper_bound = rational::minus_one();
return false;
}
// from here we assume there is a final state reachable from the initial state
unsigned_vector search_queue;
// populate search queue with all states reachable from the epsilon-closure of the start state
aut->get_epsilon_closure(aut->init(), search_queue);
rational last_solution_depth = rational::minus_one();
bool found_solution_at_upper_bound = false;
unsigned search_depth = 0;
hashtable<unsigned, unsigned_hash, default_eq<unsigned> > next_states;
unsigned_vector next_search_queue;
while(!search_queue.empty()) {
// see if any of the current states are final
for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) {
unsigned src = *it;
if (aut->is_final_state(src)) {
if (search_depth == current_upper_bound.get_unsigned()) {
found_solution_at_upper_bound = true;
} else {
last_solution_depth = rational(search_depth);
}
break;
}
}
if (search_depth == current_upper_bound.get_unsigned()) {
break;
}
next_states.reset();
next_search_queue.clear();
// move one step along all states
for (unsigned_vector::iterator it = search_queue.begin(); it != search_queue.end(); ++it) {
unsigned src = *it;
eautomaton::moves next_moves;
aut->get_moves_from(src, next_moves, true);
for (eautomaton::moves::iterator moves_it = next_moves.begin();
moves_it != next_moves.end(); ++moves_it) {
unsigned dst = moves_it->dst();
if (!next_states.contains(dst)) {
next_states.insert(dst);
next_search_queue.push_back(dst);
}
}
}
search_queue.clear();
search_queue.append(next_search_queue);
search_depth += 1;
} //!search_queue.empty()
refined_upper_bound = last_solution_depth;
return found_solution_at_upper_bound;
}
/*
* strArgmt::solve_concat_eq_str()
* Solve concatenations of the form:
@ -9076,6 +9153,155 @@ namespace smt {
}
}
// regex automata
if (m_params.m_RegexAutomata) {
bool regex_axiom_add = false;
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
expr * str_in_re = *it;
expr * str;
expr * re;
u.str.is_in_re(str_in_re, str, re);
lbool current_assignment = ctx.get_assignment(str_in_re);
TRACE("str", tout << "regex term: " << mk_pp(str, m) << " in " << mk_pp(re, m) << " : " << current_assignment << std::endl;);
if (current_assignment == l_undef) {
continue;
}
rational exact_length_value;
if (get_len_value(str, exact_length_value)) {
TRACE("str", tout << "exact length of " << mk_pp(str, m) << " is " << exact_length_value << std::endl;);
NOT_IMPLEMENTED_YET();
}
expr_ref str_len(mk_strlen(str), m);
rational lower_bound_value;
rational upper_bound_value;
bool lower_bound_exists = lower_bound(str_len, lower_bound_value);
bool upper_bound_exists = upper_bound(str_len, upper_bound_value);
CTRACE("str", lower_bound_exists, tout << "lower bound of " << mk_pp(str, m) << " is " << lower_bound_value << std::endl;);
CTRACE("str", upper_bound_exists, tout << "upper bound of " << mk_pp(str, m) << " is " << upper_bound_value << std::endl;);
if (upper_bound_exists) {
// check current assumptions
if (regex_automaton_assumptions.contains(re) &&
!regex_automaton_assumptions[re].empty()){
// one or more existing assumptions.
// see if the (current best) upper bound can be refined
// (note that if we have an automaton with no assumption,
// this automatically counts as best)
bool need_assumption = true;
regex_automaton_under_assumptions last_assumption;
rational last_ub = rational::minus_one();
for (svector<regex_automaton_under_assumptions>::iterator it = regex_automaton_assumptions[re].begin();
it != regex_automaton_assumptions[re].end(); ++it) {
regex_automaton_under_assumptions autA = *it;
if ((current_assignment == l_true && autA.get_polarity() == false)
|| (current_assignment == l_false && autA.get_polarity() == true)) {
// automaton uses incorrect polarity
continue;
}
rational this_ub;
if (autA.get_upper_bound(this_ub)) {
if (last_ub == rational::minus_one() || this_ub < last_ub) {
last_ub = this_ub;
last_assumption = autA;
}
} else {
need_assumption = false;
last_assumption = autA;
break;
}
}
if (!last_ub.is_minus_one() || !need_assumption) {
CTRACE("str", !need_assumption, tout << "using automaton with full length information" << std::endl;);
CTRACE("str", need_assumption, tout << "using automaton with assumed upper bound of " << last_ub << std::endl;);
rational refined_upper_bound;
bool solution_at_upper_bound = refine_automaton_upper_bound(last_assumption.get_automaton(),
upper_bound_value, refined_upper_bound);
TRACE("str", tout << "refined upper bound is " << refined_upper_bound <<
(solution_at_upper_bound?", solution at upper bound":", no solution at upper bound") << std::endl;);
expr_ref_vector lhs(m);
if (current_assignment == l_false) {
lhs.push_back(m.mk_not(str_in_re));
} else {
lhs.push_back(str_in_re);
}
if (need_assumption) {
lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(last_ub, true)));
}
lhs.push_back(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true)));
expr_ref_vector rhs(m);
if (solution_at_upper_bound) {
if (refined_upper_bound.is_minus_one()) {
// If there are solutions at the upper bound but not below it, make the bound exact.
NOT_IMPLEMENTED_YET();
} else {
// If there are solutions at and below the upper bound, add an additional bound.
NOT_IMPLEMENTED_YET();
}
} else {
if (refined_upper_bound.is_minus_one()) {
// If there are no solutions at or below the upper bound, assert a conflict clause.
rhs.push_back(m.mk_not(m_autil.mk_le(str_len, m_autil.mk_numeral(upper_bound_value, true))));
} else {
// If there are solutions below the upper bound but not at it, refine the bound.
NOT_IMPLEMENTED_YET();
}
}
expr_ref lhs_terms(mk_and(lhs), m);
expr_ref rhs_terms(mk_and(rhs), m);
assert_implication(lhs_terms, rhs_terms);
regex_axiom_add = true;
}
} else {
// no existing automata/assumptions.
// if it's easy to construct a full automaton for R, do so
unsigned expected_complexity = estimate_regex_complexity(re);
unsigned threshold = 1000; // TODO better metric
if (expected_complexity <= threshold) {
eautomaton * aut = m_mk_aut(re);
aut->compress();
regex_automata.push_back(aut);
regex_automaton_under_assumptions new_aut(re, aut, true);
if (!regex_automaton_assumptions.contains(re)) {
regex_automaton_assumptions.insert(re, svector<regex_automaton_under_assumptions>());
}
regex_automaton_assumptions[re].push_back(new_aut);
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
regex_axiom_add = true;
// TODO immediately attempt to learn lower/upper bound info here
} else {
// TODO check negation?
// construct a partial automaton for R to the given upper bound
NOT_IMPLEMENTED_YET();
}
}
// if we have *any* automaton for R, and the upper bound is not too large,
// finitize the automaton (if we have not already done so) and assert all solutions
if (upper_bound_value < 50) { // TODO better metric for threshold
// NOT_IMPLEMENTED_YET(); // TODO(mtrberzi)
}
} else { // !upper_bound_exists
// no upper bound information
if (lower_bound_exists) {
// lower bound, no upper bound
NOT_IMPLEMENTED_YET();
} else { // !lower_bound_exists
// no bounds information
NOT_IMPLEMENTED_YET();
}
}
// NOT_IMPLEMENTED_YET();
}
if (regex_axiom_add) {
return FC_CONTINUE;
}
} // RegexAutomata
bool needToAssignFreeVars = false;
std::set<expr*> free_variables;
std::set<expr*> unused_internal_variables;
@ -10567,6 +10793,13 @@ namespace smt {
SASSERT(lenTestAssert != NULL);
return lenTestAssert;
} else {
// if we are performing automata-based reasoning and the term associated with
// this length tester is in any way constrained by regex terms,
// do not perform value testing -- this term is not a free variable.
if (m_params.m_RegexAutomata) {
NOT_IMPLEMENTED_YET(); // TODO
}
TRACE("str", tout << "length is fixed; generating models for free var" << std::endl;);
// length is fixed
expr * valueAssert = gen_free_var_options(freeVar, effectiveLenInd, effectiveLenIndiStr, NULL, zstring(""));

View file

@ -166,6 +166,69 @@ public:
bool matches(zstring input);
};
class regex_automaton_under_assumptions {
protected:
expr * str_in_re;
eautomaton * aut;
bool polarity;
bool assume_lower_bound;
rational lower_bound;
bool assume_upper_bound;
rational upper_bound;
public:
regex_automaton_under_assumptions() :
str_in_re(NULL), aut(NULL), polarity(false),
assume_lower_bound(false), assume_upper_bound(false) {}
regex_automaton_under_assumptions(expr * str_in_re, eautomaton * aut, bool polarity) :
str_in_re(str_in_re), aut(aut), polarity(polarity),
assume_lower_bound(false), assume_upper_bound(false) {}
void set_lower_bound(rational & lb) {
lower_bound = lb;
assume_lower_bound = true;
}
void unset_lower_bound() {
assume_lower_bound = false;
}
void set_upper_bound(rational & ub) {
upper_bound = ub;
assume_upper_bound = true;
}
void unset_upper_bound() {
assume_upper_bound = false;
}
bool get_lower_bound(rational & lb) const {
if (assume_lower_bound) {
lb = lower_bound;
return true;
} else {
return false;
}
}
bool get_upper_bound(rational & ub) const {
if (assume_upper_bound) {
ub = upper_bound;
return true;
} else {
return false;
}
}
eautomaton * get_automaton() const { return aut; }
bool get_polarity() const { return polarity; }
virtual ~regex_automaton_under_assumptions() {
// don't free str_in_re or aut;
// they are managed separately
}
};
class theory_str : public theory {
struct T_cut
{
@ -338,6 +401,11 @@ protected:
std::map<std::pair<expr*, zstring>, expr*> regex_in_bool_map;
std::map<expr*, std::set<zstring> > regex_in_var_reg_str_map;
// regex automata
ptr_vector<eautomaton> regex_automata;
obj_hashtable<expr> regex_terms;
obj_map<expr, ptr_vector<expr> > regex_terms_by_string; // S --> [ (str.in.re S *) ]
obj_map<expr, svector<regex_automaton_under_assumptions> > regex_automaton_assumptions; // RegEx --> [ aut+assumptions ]
std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
svector<char> char_set;
@ -468,6 +536,7 @@ protected:
unsigned estimate_regex_complexity(expr * re);
unsigned estimate_regex_complexity_under_complement(expr * re);
expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
void set_up_axioms(expr * ex);
void handle_equality(expr * lhs, expr * rhs);