mirror of
https://github.com/Z3Prover/z3
synced 2025-04-16 13:58:45 +00:00
z3str3: make counterexamples less naive, and check regex membership more efficiently (#4358)
* z3str3: make counterexamples less naive, and check regex membership more efficiently * z3str3: construct even better counterexamples for regex membership
This commit is contained in:
parent
56bf4c144b
commit
f3b2a082ae
|
@ -8633,11 +8633,9 @@ namespace smt {
|
|||
|
||||
// We must be be 100% certain that if there are any regex constraints,
|
||||
// the string assignment for each variable is consistent with the automaton.
|
||||
// The (probably) easiest way to do this is to ensure
|
||||
// that we have path constraints set up for every assigned regex term.
|
||||
bool regexOK = true;
|
||||
if (!regex_terms.empty()) {
|
||||
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
|
||||
expr * str_in_re = *it;
|
||||
for (auto& str_in_re : regex_terms) {
|
||||
expr * str;
|
||||
expr * re;
|
||||
u.str.is_in_re(str_in_re, str, re);
|
||||
|
@ -8645,25 +8643,53 @@ namespace smt {
|
|||
if (current_assignment == l_undef) {
|
||||
continue;
|
||||
}
|
||||
if (!regex_terms_with_path_constraints.contains(str_in_re)) {
|
||||
TRACE("str", tout << "assigned regex term " << mk_pp(str_in_re, m) << " has no path constraints -- continuing search" << std::endl;);
|
||||
return FC_CONTINUE;
|
||||
zstring strValue;
|
||||
if (get_string_constant_eqc(str, strValue)) {
|
||||
// try substituting the current assignment and solving the regex
|
||||
expr_ref valueInRe(u.re.mk_in_re(mk_string(strValue), re), m);
|
||||
ctx.get_rewriter()(valueInRe);
|
||||
if (m.is_true(valueInRe)) {
|
||||
if (current_assignment == l_false) {
|
||||
TRACE("str", tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must not be in the language " << mk_pp(re, m) << std::endl;);
|
||||
expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), str_in_re), m);
|
||||
assert_axiom(conflictClause);
|
||||
add_persisted_axiom(conflictClause);
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
} else if (m.is_false(valueInRe)) {
|
||||
if (current_assignment == l_true) {
|
||||
TRACE("str", tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must be in the language " << mk_pp(re, m) << std::endl;);
|
||||
expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), m.mk_not(str_in_re)), m);
|
||||
assert_axiom(conflictClause);
|
||||
add_persisted_axiom(conflictClause);
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
} else {
|
||||
// try to keep going, but don't assume the current assignment is right or wrong
|
||||
regexOK = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
regexOK = false;
|
||||
break;
|
||||
}
|
||||
} // foreach (str.in.re in regex_terms)
|
||||
}
|
||||
|
||||
if (unused_internal_variables.empty()) {
|
||||
TRACE("str", tout << "All variables are assigned. Done!" << std::endl;);
|
||||
m_stats.m_solved_by = 2;
|
||||
return FC_DONE;
|
||||
} else {
|
||||
TRACE("str", tout << "Assigning decoy values to free internal variables." << std::endl;);
|
||||
for (std::set<expr*>::iterator it = unused_internal_variables.begin(); it != unused_internal_variables.end(); ++it) {
|
||||
expr * var = *it;
|
||||
expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m);
|
||||
assert_axiom(assignment);
|
||||
// we're not done if some variable in a regex membership predicate was unassigned
|
||||
if (regexOK) {
|
||||
if (unused_internal_variables.empty()) {
|
||||
TRACE("str", tout << "All variables are assigned. Done!" << std::endl;);
|
||||
m_stats.m_solved_by = 2;
|
||||
return FC_DONE;
|
||||
} else {
|
||||
TRACE("str", tout << "Assigning decoy values to free internal variables." << std::endl;);
|
||||
for (std::set<expr*>::iterator it = unused_internal_variables.begin(); it != unused_internal_variables.end(); ++it) {
|
||||
expr * var = *it;
|
||||
expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m);
|
||||
assert_axiom(assignment);
|
||||
}
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -571,7 +571,33 @@ namespace smt {
|
|||
// If the membership constraint is true, we assert a conflict clause.
|
||||
// If the membership constraint is false, we ignore the constraint.
|
||||
if (polarity) {
|
||||
cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(str_chars.size()))));
|
||||
// Decompose `str` into its components if it is a concatenation of terms.
|
||||
// This fixes cases where the length of S in (S in RE) might be correct
|
||||
// if the lengths of components of S are assigned in a different way.
|
||||
expr_ref_vector str_terms(m);
|
||||
expr_ref_vector str_terms_eq_len(m);
|
||||
str_terms.push_back(str);
|
||||
while (!str_terms.empty()) {
|
||||
expr* str_term = str_terms.back();
|
||||
str_terms.pop_back();
|
||||
expr* arg0;
|
||||
expr* arg1;
|
||||
if (u.str.is_concat(str_term, arg0, arg1)) {
|
||||
str_terms.push_back(arg0);
|
||||
str_terms.push_back(arg1);
|
||||
} else {
|
||||
rational termLen;
|
||||
if (fixed_length_get_len_value(str_term, termLen)) {
|
||||
str_terms_eq_len.push_back(ctx.mk_eq_atom(mk_strlen(str_term), mk_int(termLen)));
|
||||
} else {
|
||||
// this is strange, since we knew the length of `str` in order to get here
|
||||
cex = expr_ref(m_autil.mk_ge(mk_strlen(str_term), mk_int(0)), m);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cex = m.mk_or(m.mk_not(f), m.mk_not(mk_and(str_terms_eq_len)));
|
||||
ctx.get_rewriter()(cex);
|
||||
return false;
|
||||
} else {
|
||||
|
@ -887,6 +913,9 @@ namespace smt {
|
|||
uninterpreted_to_char_subterm_map.reset();
|
||||
fixed_length_lesson.reset();
|
||||
|
||||
// All reduced Boolean formulas in the current assignment
|
||||
expr_ref_vector fixed_length_reduced_boolean_formulas(m);
|
||||
|
||||
// Boolean formulas on which to apply abstraction refinement.
|
||||
expr_ref_vector abstracted_boolean_formulas(m);
|
||||
|
||||
|
@ -951,6 +980,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else {
|
||||
TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not an equality over strings" << std::endl;);
|
||||
}
|
||||
|
@ -963,6 +993,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex_clause);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else if (u.str.is_contains(f)) {
|
||||
// TODO in some cases (e.g. len(haystack) is only slightly greater than len(needle))
|
||||
// we might be okay to assert the full disjunction because there are very few disjuncts
|
||||
|
@ -978,6 +1009,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
}
|
||||
} else if (u.str.is_prefix(f)) {
|
||||
TRACE("str_fl", tout << "reduce positive prefix: " << mk_pp(f, m) << std::endl;);
|
||||
|
@ -988,6 +1020,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else if (u.str.is_suffix(f)) {
|
||||
TRACE("str_fl", tout << "reduce positive suffix: " << mk_pp(f, m) << std::endl;);
|
||||
expr_ref cex(m);
|
||||
|
@ -997,6 +1030,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
}else if (m.is_not(f, subterm)) {
|
||||
// if subterm is a string formula such as an equality, reduce it as a disequality
|
||||
if (m.is_eq(subterm, lhs, rhs)) {
|
||||
|
@ -1012,6 +1046,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
}
|
||||
} else if (u.str.is_in_re(subterm)) {
|
||||
TRACE("str_fl", tout << "reduce negative regex membership: " << mk_pp(f, m) << std::endl;);
|
||||
|
@ -1022,6 +1057,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex_clause);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else if (u.str.is_contains(subterm)) {
|
||||
TRACE("str_fl", tout << "reduce negative contains: " << mk_pp(subterm, m) << std::endl;);
|
||||
expr_ref cex(m);
|
||||
|
@ -1031,6 +1067,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else if (u.str.is_prefix(subterm)) {
|
||||
TRACE("str_fl", tout << "reduce negative prefix: " << mk_pp(subterm, m) << std::endl;);
|
||||
expr_ref cex(m);
|
||||
|
@ -1040,6 +1077,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else if (u.str.is_suffix(subterm)) {
|
||||
TRACE("str_fl", tout << "reduce negative suffix: " << mk_pp(subterm, m) << std::endl;);
|
||||
expr_ref cex(m);
|
||||
|
@ -1049,6 +1087,7 @@ namespace smt {
|
|||
add_persisted_axiom(cex);
|
||||
return l_undef;
|
||||
}
|
||||
fixed_length_reduced_boolean_formulas.push_back(f);
|
||||
} else {
|
||||
TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;);
|
||||
}
|
||||
|
@ -1081,6 +1120,10 @@ namespace smt {
|
|||
}
|
||||
tout << std::endl;
|
||||
}
|
||||
tout << "reduced boolean formulas:" << std::endl;
|
||||
for (auto e : fixed_length_reduced_boolean_formulas) {
|
||||
tout << mk_pp(e, m) << std::endl;
|
||||
}
|
||||
);
|
||||
|
||||
TRACE("str_fl", tout << "calling subsolver" << std::endl;);
|
||||
|
@ -1184,6 +1227,9 @@ namespace smt {
|
|||
rational val = e.get_value();
|
||||
cex.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val)));
|
||||
}
|
||||
for (auto e : fixed_length_reduced_boolean_formulas) {
|
||||
cex.push_back(e);
|
||||
}
|
||||
return l_false;
|
||||
} else {
|
||||
TRACE("str_fl", tout << "subsolver found UNSAT; reconstructing unsat core" << std::endl;);
|
||||
|
|
|
@ -206,69 +206,8 @@ namespace smt {
|
|||
regex_inc_counter(regex_length_attempt_count, re);
|
||||
continue;
|
||||
} else {
|
||||
expr_ref pathConstraint(m);
|
||||
expr_ref characterConstraints(m);
|
||||
pathConstraint = generate_regex_path_constraints(str, assumption.get_automaton(), exact_length_value, characterConstraints);
|
||||
TRACE("str", tout << "generated regex path constraint " << mk_pp(pathConstraint, m) << std::endl;);
|
||||
TRACE("str", tout << "character constraints are " << mk_pp(characterConstraints, m) << std::endl;);
|
||||
|
||||
expr_ref_vector lhs_terms(m);
|
||||
if (current_assignment == l_true) {
|
||||
lhs_terms.push_back(str_in_re);
|
||||
} else {
|
||||
lhs_terms.push_back(m.mk_not(str_in_re));
|
||||
}
|
||||
lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true)));
|
||||
expr_ref lhs(mk_and(lhs_terms), m);
|
||||
|
||||
// If the path constraint comes out as "false", this means there are no paths of that length
|
||||
// in the automaton. If the polarity is the same, we can assert a conflict clause.
|
||||
// If the polarity is opposite, we ignore the path constraint.
|
||||
|
||||
if (m.is_false(pathConstraint)) {
|
||||
if ( (current_assignment == l_true && assumption.get_polarity())
|
||||
|| (current_assignment == l_false && !assumption.get_polarity())) {
|
||||
// automaton and constraint have same polarity -- assert conflict clause
|
||||
TRACE("str", tout << "path constraint is false with matching polarity; asserting conflict clause" << std::endl;);
|
||||
expr_ref conflict(m.mk_not(mk_and(lhs_terms)), m);
|
||||
assert_axiom(conflict);
|
||||
// don't set up "regex_terms_with_path_constraints" as a conflict clause is not a path constraint
|
||||
} else {
|
||||
// automaton and constraint have opposite polarity -- ignore path constraint
|
||||
TRACE("str", tout << "path constraint is false with opposite polarity; ignoring path constraint" << std::endl;);
|
||||
assert_implication(lhs, characterConstraints);
|
||||
regex_terms_with_path_constraints.insert(str_in_re);
|
||||
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
|
||||
}
|
||||
regex_axiom_add = true;
|
||||
} else {
|
||||
// If the automaton was built with the same polarity as the constraint,
|
||||
// assert directly. Otherwise, negate the path constraint
|
||||
if ( (current_assignment == l_true && assumption.get_polarity())
|
||||
|| (current_assignment == l_false && !assumption.get_polarity())) {
|
||||
TRACE("str", tout << "automaton and regex term have same polarity" << std::endl;);
|
||||
expr_ref rhs(m.mk_and(pathConstraint, characterConstraints), m);
|
||||
assert_implication(lhs, rhs);
|
||||
} else {
|
||||
TRACE("str", tout << "automaton and regex term have opposite polarity" << std::endl;);
|
||||
expr_ref rhs(m.mk_and(m.mk_not(pathConstraint), characterConstraints), m);
|
||||
assert_implication(lhs, rhs);
|
||||
}
|
||||
regex_terms_with_path_constraints.insert(str_in_re);
|
||||
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
|
||||
regex_axiom_add = true;
|
||||
}
|
||||
|
||||
// increment LengthAttemptCount
|
||||
regex_inc_counter(regex_length_attempt_count, re);
|
||||
|
||||
TRACE("str",
|
||||
{
|
||||
unsigned v = regex_get_counter(regex_length_attempt_count, re);
|
||||
tout << "length attempt count for " << mk_pp(re, m) << " is " << v << std::endl;
|
||||
});
|
||||
|
||||
continue;
|
||||
// fixed-length model construction handles path constraints on our behalf, and with a better reduction
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// no automata available, or else all bounds assumptions are invalid
|
||||
|
|
Loading…
Reference in a new issue