mirror of
https://github.com/Z3Prover/z3
synced 2025-08-12 06:00:53 +00:00
short path for length-0 regex terms
This commit is contained in:
parent
c0ed683882
commit
c2b268c645
2 changed files with 206 additions and 119 deletions
|
@ -6646,6 +6646,9 @@ namespace smt {
|
||||||
* In some cases, the returned formula requires one or more free integer variables to be created.
|
* In some cases, the returned formula requires one or more free integer variables to be created.
|
||||||
* These variables are returned in the reference parameter `freeVariables`.
|
* These variables are returned in the reference parameter `freeVariables`.
|
||||||
* Extra assertions should be made for these free variables constraining them to be non-negative.
|
* Extra assertions should be made for these free variables constraining them to be non-negative.
|
||||||
|
*
|
||||||
|
* TODO: star unrolling?
|
||||||
|
* TODO: generate stars "correctly" as a linear combination of all possible subterm lengths
|
||||||
*/
|
*/
|
||||||
expr_ref theory_str::infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables) {
|
expr_ref theory_str::infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables) {
|
||||||
ENSURE(u.is_re(re));
|
ENSURE(u.is_re(re));
|
||||||
|
@ -6725,6 +6728,59 @@ namespace smt {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assert initial lower and upper bounds for the positive constraint (str in re) corresponding
|
||||||
|
* to the automaton `aut`.
|
||||||
|
* This asserts a constraint of the form:
|
||||||
|
* str_in_re --> (len(str) ?= 0 OR len(str) >= lb) AND len(str) <= ub
|
||||||
|
* where the upper bound clause is omitted if the upper bound doesn't exist
|
||||||
|
* and the equality with 0 is based on whether solutions of length 0 are allowed.
|
||||||
|
*/
|
||||||
|
void theory_str::find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut) {
|
||||||
|
ENSURE(aut != NULL);
|
||||||
|
context & ctx = get_context();
|
||||||
|
ast_manager & m = get_manager();
|
||||||
|
|
||||||
|
expr_ref_vector rhs(m);
|
||||||
|
expr * str;
|
||||||
|
expr * re;
|
||||||
|
u.str.is_in_re(str_in_re, str, re);
|
||||||
|
expr_ref strlen(mk_strlen(str), m);
|
||||||
|
|
||||||
|
// lower bound first
|
||||||
|
rational nonzero_lower_bound;
|
||||||
|
bool zero_sol_exists = refine_automaton_lower_bound(aut, rational::zero(), nonzero_lower_bound);
|
||||||
|
if (zero_sol_exists) {
|
||||||
|
regex_last_lower_bound.insert(str, rational::zero());
|
||||||
|
// solution at 0
|
||||||
|
if (!nonzero_lower_bound.is_minus_one()) {
|
||||||
|
expr_ref rhs1(ctx.mk_eq_atom(strlen, m_autil.mk_numeral(rational::zero(), true)), m);
|
||||||
|
expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m);
|
||||||
|
rhs.push_back(m.mk_or(rhs1, rhs2));
|
||||||
|
} else {
|
||||||
|
// shouldn't happen
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no solution at 0
|
||||||
|
if (!nonzero_lower_bound.is_minus_one()) {
|
||||||
|
regex_last_lower_bound.insert(str, nonzero_lower_bound);
|
||||||
|
expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m);
|
||||||
|
rhs.push_back(rhs2);
|
||||||
|
} else {
|
||||||
|
// shouldn't happen
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO upper bound check
|
||||||
|
|
||||||
|
if (!rhs.empty()) {
|
||||||
|
expr_ref lhs(str_in_re, m);
|
||||||
|
expr_ref _rhs(mk_and(rhs), m);
|
||||||
|
assert_implication(lhs, _rhs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Refine the lower bound on the length of a solution to a given automaton.
|
* Refine the lower bound on the length of a solution to a given automaton.
|
||||||
* The method returns TRUE if a solution of length `current_lower_bound` exists,
|
* The method returns TRUE if a solution of length `current_lower_bound` exists,
|
||||||
|
@ -9596,7 +9652,6 @@ namespace smt {
|
||||||
// regex automata
|
// regex automata
|
||||||
if (m_params.m_RegexAutomata) {
|
if (m_params.m_RegexAutomata) {
|
||||||
// TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
|
// TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
|
||||||
// TODO learning of linear length constraints in the style of length automata, if possible?
|
|
||||||
bool regex_axiom_add = false;
|
bool regex_axiom_add = false;
|
||||||
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
|
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
|
||||||
expr * str_in_re = *it;
|
expr * str_in_re = *it;
|
||||||
|
@ -9667,6 +9722,12 @@ namespace smt {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (exact_length_value.is_zero()) {
|
||||||
|
// shortcut
|
||||||
|
expr_ref lhs(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m);
|
||||||
|
expr_ref rhs(ctx.mk_eq_atom(str, mk_string("")), m);
|
||||||
|
assert_implication(lhs, rhs);
|
||||||
|
} else {
|
||||||
// find a consistent automaton for this term
|
// find a consistent automaton for this term
|
||||||
bool found = false;
|
bool found = false;
|
||||||
regex_automaton_under_assumptions assumption;
|
regex_automaton_under_assumptions assumption;
|
||||||
|
@ -9743,13 +9804,14 @@ namespace smt {
|
||||||
regex_automaton_assumptions[re].push_back(new_aut);
|
regex_automaton_assumptions[re].push_back(new_aut);
|
||||||
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
// TODO immediately attempt to learn lower/upper bound info here
|
find_automaton_initial_bounds(str_in_re, aut);
|
||||||
} else {
|
} else {
|
||||||
regex_inc_counter(regex_fail_count, str_in_re);
|
regex_inc_counter(regex_fail_count, str_in_re);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
} // !length is zero
|
||||||
|
} // get_len_value()
|
||||||
expr_ref str_len(mk_strlen(str), m);
|
expr_ref str_len(mk_strlen(str), m);
|
||||||
rational lower_bound_value;
|
rational lower_bound_value;
|
||||||
rational upper_bound_value;
|
rational upper_bound_value;
|
||||||
|
@ -9858,11 +9920,13 @@ namespace smt {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!rhs.empty()) {
|
||||||
expr_ref lhs_terms(mk_and(lhs), m);
|
expr_ref lhs_terms(mk_and(lhs), m);
|
||||||
expr_ref rhs_terms(mk_and(rhs), m);
|
expr_ref rhs_terms(mk_and(rhs), m);
|
||||||
assert_implication(lhs_terms, rhs_terms);
|
assert_implication(lhs_terms, rhs_terms);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// no existing automata/assumptions.
|
// no existing automata/assumptions.
|
||||||
// if it's easy to construct a full automaton for R, do so
|
// if it's easy to construct a full automaton for R, do so
|
||||||
|
@ -9878,7 +9942,7 @@ namespace smt {
|
||||||
regex_automaton_assumptions[re].push_back(new_aut);
|
regex_automaton_assumptions[re].push_back(new_aut);
|
||||||
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
// TODO immediately attempt to learn lower/upper bound info here
|
find_automaton_initial_bounds(str_in_re, aut);
|
||||||
} else {
|
} else {
|
||||||
// TODO check negation?
|
// TODO check negation?
|
||||||
// TODO construct a partial automaton for R to the given upper bound?
|
// TODO construct a partial automaton for R to the given upper bound?
|
||||||
|
@ -9976,11 +10040,13 @@ namespace smt {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!rhs.empty()) {
|
||||||
expr_ref lhs_terms(mk_and(lhs), m);
|
expr_ref lhs_terms(mk_and(lhs), m);
|
||||||
expr_ref rhs_terms(mk_and(rhs), m);
|
expr_ref rhs_terms(mk_and(rhs), m);
|
||||||
assert_implication(lhs_terms, rhs_terms);
|
assert_implication(lhs_terms, rhs_terms);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// no existing automata/assumptions.
|
// no existing automata/assumptions.
|
||||||
// if it's easy to construct a full automaton for R, do so
|
// if it's easy to construct a full automaton for R, do so
|
||||||
|
@ -9996,7 +10062,7 @@ namespace smt {
|
||||||
regex_automaton_assumptions[re].push_back(new_aut);
|
regex_automaton_assumptions[re].push_back(new_aut);
|
||||||
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
// TODO immediately attempt to learn lower/upper bound info here
|
find_automaton_initial_bounds(str_in_re, aut);
|
||||||
} else {
|
} else {
|
||||||
// TODO check negation?
|
// TODO check negation?
|
||||||
// TODO construct a partial automaton for R to the given lower bound?
|
// TODO construct a partial automaton for R to the given lower bound?
|
||||||
|
@ -10016,7 +10082,7 @@ namespace smt {
|
||||||
// and doing so without bounds is not difficult
|
// and doing so without bounds is not difficult
|
||||||
bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty());
|
bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty());
|
||||||
bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold);
|
bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold);
|
||||||
if (!existingAutomata || failureThresholdExceeded) {
|
if (!existingAutomata) {
|
||||||
unsigned expected_complexity = estimate_regex_complexity(re);
|
unsigned expected_complexity = estimate_regex_complexity(re);
|
||||||
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold
|
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold
|
||||||
|| failureThresholdExceeded) {
|
|| failureThresholdExceeded) {
|
||||||
|
@ -10030,7 +10096,7 @@ namespace smt {
|
||||||
regex_automaton_assumptions[re].push_back(new_aut);
|
regex_automaton_assumptions[re].push_back(new_aut);
|
||||||
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
// TODO immediately attempt to learn lower/upper bound info here
|
find_automaton_initial_bounds(str_in_re, aut);
|
||||||
} else {
|
} else {
|
||||||
regex_inc_counter(regex_fail_count, str_in_re);
|
regex_inc_counter(regex_fail_count, str_in_re);
|
||||||
}
|
}
|
||||||
|
@ -10165,11 +10231,11 @@ namespace smt {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // foreach(entry in intersect_constraints)
|
} // foreach(entry in intersect_constraints)
|
||||||
|
aut_inter->compress();
|
||||||
TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;);
|
TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;);
|
||||||
if (aut_inter != NULL && aut_inter->is_empty()) {
|
|
||||||
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
|
|
||||||
expr_ref_vector conflict_terms(m);
|
|
||||||
|
|
||||||
|
expr_ref_vector conflict_terms(m);
|
||||||
|
expr_ref conflict_lhs(m);
|
||||||
for (svector<regex_automaton_under_assumptions>::iterator aut_it = used_intersect_constraints.begin();
|
for (svector<regex_automaton_under_assumptions>::iterator aut_it = used_intersect_constraints.begin();
|
||||||
aut_it != used_intersect_constraints.end(); ++aut_it) {
|
aut_it != used_intersect_constraints.end(); ++aut_it) {
|
||||||
regex_automaton_under_assumptions aut = *aut_it;
|
regex_automaton_under_assumptions aut = *aut_it;
|
||||||
|
@ -10192,15 +10258,35 @@ namespace smt {
|
||||||
conflict_terms.push_back(lb_term);
|
conflict_terms.push_back(lb_term);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
conflict_lhs = mk_and(conflict_terms);
|
||||||
|
|
||||||
|
if (used_intersect_constraints.size() > 1 && aut_inter != NULL) {
|
||||||
|
// check whether the intersection is only the empty string
|
||||||
|
unsigned initial_state = aut_inter->init();
|
||||||
|
if (aut_inter->final_states().size() == 1 && aut_inter->is_final_state(initial_state)) {
|
||||||
|
// initial state is final and it is the only final state
|
||||||
|
// if there are no moves from the initial state,
|
||||||
|
// the only solution is the empty string
|
||||||
|
if (aut_inter->get_moves_from(initial_state).empty()) {
|
||||||
|
TRACE("str", tout << "product automaton only accepts empty string" << std::endl;);
|
||||||
|
expr_ref rhs1(ctx.mk_eq_atom(str, mk_string("")), m);
|
||||||
|
expr_ref rhs2(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m);
|
||||||
|
expr_ref rhs(m.mk_and(rhs1, rhs2), m);
|
||||||
|
assert_implication(conflict_lhs, rhs);
|
||||||
|
regex_axiom_add = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aut_inter != NULL && aut_inter->is_empty()) {
|
||||||
|
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
|
||||||
expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m);
|
expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m);
|
||||||
assert_axiom(conflict_clause);
|
assert_axiom(conflict_clause);
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
}
|
}
|
||||||
} // foreach (entry in regex_terms_by_string)
|
} // foreach (entry in regex_terms_by_string)
|
||||||
|
|
||||||
if (regex_axiom_add) {
|
if (regex_axiom_add) {
|
||||||
return FC_CONTINUE;
|
//return FC_CONTINUE;
|
||||||
}
|
}
|
||||||
} // RegexAutomata
|
} // RegexAutomata
|
||||||
|
|
||||||
|
|
|
@ -556,6 +556,7 @@ protected:
|
||||||
bool check_regex_length_linearity(expr * re);
|
bool check_regex_length_linearity(expr * re);
|
||||||
bool check_regex_length_linearity_helper(expr * re, bool already_star);
|
bool check_regex_length_linearity_helper(expr * re, bool already_star);
|
||||||
expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
|
expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
|
||||||
|
void find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut);
|
||||||
bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound);
|
bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound);
|
||||||
bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
|
bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
|
||||||
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
|
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue