3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-08-12 06:00:53 +00:00

short path for length-0 regex terms

This commit is contained in:
Murphy Berzish 2018-01-17 18:26:31 -05:00
parent c0ed683882
commit c2b268c645
2 changed files with 206 additions and 119 deletions

View file

@ -6646,6 +6646,9 @@ namespace smt {
* In some cases, the returned formula requires one or more free integer variables to be created. * In some cases, the returned formula requires one or more free integer variables to be created.
* These variables are returned in the reference parameter `freeVariables`. * These variables are returned in the reference parameter `freeVariables`.
* Extra assertions should be made for these free variables constraining them to be non-negative. * Extra assertions should be made for these free variables constraining them to be non-negative.
*
* TODO: star unrolling?
* TODO: generate stars "correctly" as a linear combination of all possible subterm lengths
*/ */
expr_ref theory_str::infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables) { expr_ref theory_str::infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables) {
ENSURE(u.is_re(re)); ENSURE(u.is_re(re));
@ -6725,6 +6728,59 @@ namespace smt {
} }
} }
/*
* Assert initial lower and upper bounds for the positive constraint (str in re) corresponding
* to the automaton `aut`.
* This asserts a constraint of the form:
* str_in_re --> (len(str) ?= 0 OR len(str) >= lb) AND len(str) <= ub
* where the upper bound clause is omitted if the upper bound doesn't exist
* and the equality with 0 is based on whether solutions of length 0 are allowed.
*/
void theory_str::find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut) {
ENSURE(aut != NULL);
context & ctx = get_context();
ast_manager & m = get_manager();
expr_ref_vector rhs(m);
expr * str;
expr * re;
u.str.is_in_re(str_in_re, str, re);
expr_ref strlen(mk_strlen(str), m);
// lower bound first
rational nonzero_lower_bound;
bool zero_sol_exists = refine_automaton_lower_bound(aut, rational::zero(), nonzero_lower_bound);
if (zero_sol_exists) {
regex_last_lower_bound.insert(str, rational::zero());
// solution at 0
if (!nonzero_lower_bound.is_minus_one()) {
expr_ref rhs1(ctx.mk_eq_atom(strlen, m_autil.mk_numeral(rational::zero(), true)), m);
expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m);
rhs.push_back(m.mk_or(rhs1, rhs2));
} else {
// shouldn't happen
UNREACHABLE();
}
} else {
// no solution at 0
if (!nonzero_lower_bound.is_minus_one()) {
regex_last_lower_bound.insert(str, nonzero_lower_bound);
expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m);
rhs.push_back(rhs2);
} else {
// shouldn't happen
UNREACHABLE();
}
}
// TODO upper bound check
if (!rhs.empty()) {
expr_ref lhs(str_in_re, m);
expr_ref _rhs(mk_and(rhs), m);
assert_implication(lhs, _rhs);
}
}
/* /*
* Refine the lower bound on the length of a solution to a given automaton. * Refine the lower bound on the length of a solution to a given automaton.
* The method returns TRUE if a solution of length `current_lower_bound` exists, * The method returns TRUE if a solution of length `current_lower_bound` exists,
@ -9596,7 +9652,6 @@ namespace smt {
// regex automata // regex automata
if (m_params.m_RegexAutomata) { if (m_params.m_RegexAutomata) {
// TODO since heuristics might fail, the "no progress" flag might need to be handled specially here // TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
// TODO learning of linear length constraints in the style of length automata, if possible?
bool regex_axiom_add = false; bool regex_axiom_add = false;
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) { for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
expr * str_in_re = *it; expr * str_in_re = *it;
@ -9667,6 +9722,12 @@ namespace smt {
continue; continue;
} }
if (exact_length_value.is_zero()) {
// shortcut
expr_ref lhs(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m);
expr_ref rhs(ctx.mk_eq_atom(str, mk_string("")), m);
assert_implication(lhs, rhs);
} else {
// find a consistent automaton for this term // find a consistent automaton for this term
bool found = false; bool found = false;
regex_automaton_under_assumptions assumption; regex_automaton_under_assumptions assumption;
@ -9743,13 +9804,14 @@ namespace smt {
regex_automaton_assumptions[re].push_back(new_aut); regex_automaton_assumptions[re].push_back(new_aut);
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
regex_axiom_add = true; regex_axiom_add = true;
// TODO immediately attempt to learn lower/upper bound info here find_automaton_initial_bounds(str_in_re, aut);
} else { } else {
regex_inc_counter(regex_fail_count, str_in_re); regex_inc_counter(regex_fail_count, str_in_re);
} }
continue; continue;
} }
} } // !length is zero
} // get_len_value()
expr_ref str_len(mk_strlen(str), m); expr_ref str_len(mk_strlen(str), m);
rational lower_bound_value; rational lower_bound_value;
rational upper_bound_value; rational upper_bound_value;
@ -9858,11 +9920,13 @@ namespace smt {
} }
} }
if (!rhs.empty()) {
expr_ref lhs_terms(mk_and(lhs), m); expr_ref lhs_terms(mk_and(lhs), m);
expr_ref rhs_terms(mk_and(rhs), m); expr_ref rhs_terms(mk_and(rhs), m);
assert_implication(lhs_terms, rhs_terms); assert_implication(lhs_terms, rhs_terms);
regex_axiom_add = true; regex_axiom_add = true;
} }
}
} else { } else {
// no existing automata/assumptions. // no existing automata/assumptions.
// if it's easy to construct a full automaton for R, do so // if it's easy to construct a full automaton for R, do so
@ -9878,7 +9942,7 @@ namespace smt {
regex_automaton_assumptions[re].push_back(new_aut); regex_automaton_assumptions[re].push_back(new_aut);
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
regex_axiom_add = true; regex_axiom_add = true;
// TODO immediately attempt to learn lower/upper bound info here find_automaton_initial_bounds(str_in_re, aut);
} else { } else {
// TODO check negation? // TODO check negation?
// TODO construct a partial automaton for R to the given upper bound? // TODO construct a partial automaton for R to the given upper bound?
@ -9976,11 +10040,13 @@ namespace smt {
} }
} }
if (!rhs.empty()) {
expr_ref lhs_terms(mk_and(lhs), m); expr_ref lhs_terms(mk_and(lhs), m);
expr_ref rhs_terms(mk_and(rhs), m); expr_ref rhs_terms(mk_and(rhs), m);
assert_implication(lhs_terms, rhs_terms); assert_implication(lhs_terms, rhs_terms);
regex_axiom_add = true; regex_axiom_add = true;
} }
}
} else { } else {
// no existing automata/assumptions. // no existing automata/assumptions.
// if it's easy to construct a full automaton for R, do so // if it's easy to construct a full automaton for R, do so
@ -9996,7 +10062,7 @@ namespace smt {
regex_automaton_assumptions[re].push_back(new_aut); regex_automaton_assumptions[re].push_back(new_aut);
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
regex_axiom_add = true; regex_axiom_add = true;
// TODO immediately attempt to learn lower/upper bound info here find_automaton_initial_bounds(str_in_re, aut);
} else { } else {
// TODO check negation? // TODO check negation?
// TODO construct a partial automaton for R to the given lower bound? // TODO construct a partial automaton for R to the given lower bound?
@ -10016,7 +10082,7 @@ namespace smt {
// and doing so without bounds is not difficult // and doing so without bounds is not difficult
bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty()); bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty());
bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold); bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold);
if (!existingAutomata || failureThresholdExceeded) { if (!existingAutomata) {
unsigned expected_complexity = estimate_regex_complexity(re); unsigned expected_complexity = estimate_regex_complexity(re);
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold
|| failureThresholdExceeded) { || failureThresholdExceeded) {
@ -10030,7 +10096,7 @@ namespace smt {
regex_automaton_assumptions[re].push_back(new_aut); regex_automaton_assumptions[re].push_back(new_aut);
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;); TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
regex_axiom_add = true; regex_axiom_add = true;
// TODO immediately attempt to learn lower/upper bound info here find_automaton_initial_bounds(str_in_re, aut);
} else { } else {
regex_inc_counter(regex_fail_count, str_in_re); regex_inc_counter(regex_fail_count, str_in_re);
} }
@ -10165,11 +10231,11 @@ namespace smt {
} }
} }
} // foreach(entry in intersect_constraints) } // foreach(entry in intersect_constraints)
aut_inter->compress();
TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;); TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;);
if (aut_inter != NULL && aut_inter->is_empty()) {
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
expr_ref_vector conflict_terms(m);
expr_ref_vector conflict_terms(m);
expr_ref conflict_lhs(m);
for (svector<regex_automaton_under_assumptions>::iterator aut_it = used_intersect_constraints.begin(); for (svector<regex_automaton_under_assumptions>::iterator aut_it = used_intersect_constraints.begin();
aut_it != used_intersect_constraints.end(); ++aut_it) { aut_it != used_intersect_constraints.end(); ++aut_it) {
regex_automaton_under_assumptions aut = *aut_it; regex_automaton_under_assumptions aut = *aut_it;
@ -10192,15 +10258,35 @@ namespace smt {
conflict_terms.push_back(lb_term); conflict_terms.push_back(lb_term);
} }
} }
conflict_lhs = mk_and(conflict_terms);
if (used_intersect_constraints.size() > 1 && aut_inter != NULL) {
// check whether the intersection is only the empty string
unsigned initial_state = aut_inter->init();
if (aut_inter->final_states().size() == 1 && aut_inter->is_final_state(initial_state)) {
// initial state is final and it is the only final state
// if there are no moves from the initial state,
// the only solution is the empty string
if (aut_inter->get_moves_from(initial_state).empty()) {
TRACE("str", tout << "product automaton only accepts empty string" << std::endl;);
expr_ref rhs1(ctx.mk_eq_atom(str, mk_string("")), m);
expr_ref rhs2(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m);
expr_ref rhs(m.mk_and(rhs1, rhs2), m);
assert_implication(conflict_lhs, rhs);
regex_axiom_add = true;
}
}
}
if (aut_inter != NULL && aut_inter->is_empty()) {
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m); expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m);
assert_axiom(conflict_clause); assert_axiom(conflict_clause);
regex_axiom_add = true; regex_axiom_add = true;
} }
} // foreach (entry in regex_terms_by_string) } // foreach (entry in regex_terms_by_string)
if (regex_axiom_add) { if (regex_axiom_add) {
return FC_CONTINUE; //return FC_CONTINUE;
} }
} // RegexAutomata } // RegexAutomata

View file

@ -556,6 +556,7 @@ protected:
bool check_regex_length_linearity(expr * re); bool check_regex_length_linearity(expr * re);
bool check_regex_length_linearity_helper(expr * re, bool already_star); bool check_regex_length_linearity_helper(expr * re, bool already_star);
expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables); expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
void find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut);
bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound); bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound);
bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound); bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints); expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);