3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-10-24 00:14:35 +00:00

z3str3: make counterexamples less naive, and check regex membership more efficiently (#4358)

* z3str3: make counterexamples less naive, and check regex membership more efficiently

* z3str3: construct even better counterexamples for regex membership
This commit is contained in:
Murphy Berzish 2020-05-28 11:57:08 -05:00 committed by GitHub
parent 56bf4c144b
commit f3b2a082ae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 94 additions and 83 deletions

View file

@ -8633,11 +8633,9 @@ namespace smt {
// We must be be 100% certain that if there are any regex constraints,
// the string assignment for each variable is consistent with the automaton.
// The (probably) easiest way to do this is to ensure
// that we have path constraints set up for every assigned regex term.
bool regexOK = true;
if (!regex_terms.empty()) {
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
expr * str_in_re = *it;
for (auto& str_in_re : regex_terms) {
expr * str;
expr * re;
u.str.is_in_re(str_in_re, str, re);
@ -8645,25 +8643,53 @@ namespace smt {
if (current_assignment == l_undef) {
continue;
}
if (!regex_terms_with_path_constraints.contains(str_in_re)) {
TRACE("str", tout << "assigned regex term " << mk_pp(str_in_re, m) << " has no path constraints -- continuing search" << std::endl;);
return FC_CONTINUE;
zstring strValue;
if (get_string_constant_eqc(str, strValue)) {
// try substituting the current assignment and solving the regex
expr_ref valueInRe(u.re.mk_in_re(mk_string(strValue), re), m);
ctx.get_rewriter()(valueInRe);
if (m.is_true(valueInRe)) {
if (current_assignment == l_false) {
TRACE("str", tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must not be in the language " << mk_pp(re, m) << std::endl;);
expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), str_in_re), m);
assert_axiom(conflictClause);
add_persisted_axiom(conflictClause);
return FC_CONTINUE;
}
} else if (m.is_false(valueInRe)) {
if (current_assignment == l_true) {
TRACE("str", tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must be in the language " << mk_pp(re, m) << std::endl;);
expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), m.mk_not(str_in_re)), m);
assert_axiom(conflictClause);
add_persisted_axiom(conflictClause);
return FC_CONTINUE;
}
} else {
// try to keep going, but don't assume the current assignment is right or wrong
regexOK = false;
break;
}
} else {
regexOK = false;
break;
}
} // foreach (str.in.re in regex_terms)
}
if (unused_internal_variables.empty()) {
TRACE("str", tout << "All variables are assigned. Done!" << std::endl;);
m_stats.m_solved_by = 2;
return FC_DONE;
} else {
TRACE("str", tout << "Assigning decoy values to free internal variables." << std::endl;);
for (std::set<expr*>::iterator it = unused_internal_variables.begin(); it != unused_internal_variables.end(); ++it) {
expr * var = *it;
expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m);
assert_axiom(assignment);
// we're not done if some variable in a regex membership predicate was unassigned
if (regexOK) {
if (unused_internal_variables.empty()) {
TRACE("str", tout << "All variables are assigned. Done!" << std::endl;);
m_stats.m_solved_by = 2;
return FC_DONE;
} else {
TRACE("str", tout << "Assigning decoy values to free internal variables." << std::endl;);
for (std::set<expr*>::iterator it = unused_internal_variables.begin(); it != unused_internal_variables.end(); ++it) {
expr * var = *it;
expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m);
assert_axiom(assignment);
}
return FC_CONTINUE;
}
return FC_CONTINUE;
}
}

View file

@ -571,7 +571,33 @@ namespace smt {
// If the membership constraint is true, we assert a conflict clause.
// If the membership constraint is false, we ignore the constraint.
if (polarity) {
cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(str_chars.size()))));
// Decompose `str` into its components if it is a concatenation of terms.
// This fixes cases where the length of S in (S in RE) might be correct
// if the lengths of components of S are assigned in a different way.
expr_ref_vector str_terms(m);
expr_ref_vector str_terms_eq_len(m);
str_terms.push_back(str);
while (!str_terms.empty()) {
expr* str_term = str_terms.back();
str_terms.pop_back();
expr* arg0;
expr* arg1;
if (u.str.is_concat(str_term, arg0, arg1)) {
str_terms.push_back(arg0);
str_terms.push_back(arg1);
} else {
rational termLen;
if (fixed_length_get_len_value(str_term, termLen)) {
str_terms_eq_len.push_back(ctx.mk_eq_atom(mk_strlen(str_term), mk_int(termLen)));
} else {
// this is strange, since we knew the length of `str` in order to get here
cex = expr_ref(m_autil.mk_ge(mk_strlen(str_term), mk_int(0)), m);
return false;
}
}
}
cex = m.mk_or(m.mk_not(f), m.mk_not(mk_and(str_terms_eq_len)));
ctx.get_rewriter()(cex);
return false;
} else {
@ -887,6 +913,9 @@ namespace smt {
uninterpreted_to_char_subterm_map.reset();
fixed_length_lesson.reset();
// All reduced Boolean formulas in the current assignment
expr_ref_vector fixed_length_reduced_boolean_formulas(m);
// Boolean formulas on which to apply abstraction refinement.
expr_ref_vector abstracted_boolean_formulas(m);
@ -951,6 +980,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else {
TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not an equality over strings" << std::endl;);
}
@ -963,6 +993,7 @@ namespace smt {
add_persisted_axiom(cex_clause);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else if (u.str.is_contains(f)) {
// TODO in some cases (e.g. len(haystack) is only slightly greater than len(needle))
// we might be okay to assert the full disjunction because there are very few disjuncts
@ -978,6 +1009,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
}
} else if (u.str.is_prefix(f)) {
TRACE("str_fl", tout << "reduce positive prefix: " << mk_pp(f, m) << std::endl;);
@ -988,6 +1020,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else if (u.str.is_suffix(f)) {
TRACE("str_fl", tout << "reduce positive suffix: " << mk_pp(f, m) << std::endl;);
expr_ref cex(m);
@ -997,6 +1030,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
}else if (m.is_not(f, subterm)) {
// if subterm is a string formula such as an equality, reduce it as a disequality
if (m.is_eq(subterm, lhs, rhs)) {
@ -1012,6 +1046,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
}
} else if (u.str.is_in_re(subterm)) {
TRACE("str_fl", tout << "reduce negative regex membership: " << mk_pp(f, m) << std::endl;);
@ -1022,6 +1057,7 @@ namespace smt {
add_persisted_axiom(cex_clause);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else if (u.str.is_contains(subterm)) {
TRACE("str_fl", tout << "reduce negative contains: " << mk_pp(subterm, m) << std::endl;);
expr_ref cex(m);
@ -1031,6 +1067,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else if (u.str.is_prefix(subterm)) {
TRACE("str_fl", tout << "reduce negative prefix: " << mk_pp(subterm, m) << std::endl;);
expr_ref cex(m);
@ -1040,6 +1077,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else if (u.str.is_suffix(subterm)) {
TRACE("str_fl", tout << "reduce negative suffix: " << mk_pp(subterm, m) << std::endl;);
expr_ref cex(m);
@ -1049,6 +1087,7 @@ namespace smt {
add_persisted_axiom(cex);
return l_undef;
}
fixed_length_reduced_boolean_formulas.push_back(f);
} else {
TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;);
}
@ -1081,6 +1120,10 @@ namespace smt {
}
tout << std::endl;
}
tout << "reduced boolean formulas:" << std::endl;
for (auto e : fixed_length_reduced_boolean_formulas) {
tout << mk_pp(e, m) << std::endl;
}
);
TRACE("str_fl", tout << "calling subsolver" << std::endl;);
@ -1184,6 +1227,9 @@ namespace smt {
rational val = e.get_value();
cex.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val)));
}
for (auto e : fixed_length_reduced_boolean_formulas) {
cex.push_back(e);
}
return l_false;
} else {
TRACE("str_fl", tout << "subsolver found UNSAT; reconstructing unsat core" << std::endl;);

View file

@ -206,69 +206,8 @@ namespace smt {
regex_inc_counter(regex_length_attempt_count, re);
continue;
} else {
expr_ref pathConstraint(m);
expr_ref characterConstraints(m);
pathConstraint = generate_regex_path_constraints(str, assumption.get_automaton(), exact_length_value, characterConstraints);
TRACE("str", tout << "generated regex path constraint " << mk_pp(pathConstraint, m) << std::endl;);
TRACE("str", tout << "character constraints are " << mk_pp(characterConstraints, m) << std::endl;);
expr_ref_vector lhs_terms(m);
if (current_assignment == l_true) {
lhs_terms.push_back(str_in_re);
} else {
lhs_terms.push_back(m.mk_not(str_in_re));
}
lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true)));
expr_ref lhs(mk_and(lhs_terms), m);
// If the path constraint comes out as "false", this means there are no paths of that length
// in the automaton. If the polarity is the same, we can assert a conflict clause.
// If the polarity is opposite, we ignore the path constraint.
if (m.is_false(pathConstraint)) {
if ( (current_assignment == l_true && assumption.get_polarity())
|| (current_assignment == l_false && !assumption.get_polarity())) {
// automaton and constraint have same polarity -- assert conflict clause
TRACE("str", tout << "path constraint is false with matching polarity; asserting conflict clause" << std::endl;);
expr_ref conflict(m.mk_not(mk_and(lhs_terms)), m);
assert_axiom(conflict);
// don't set up "regex_terms_with_path_constraints" as a conflict clause is not a path constraint
} else {
// automaton and constraint have opposite polarity -- ignore path constraint
TRACE("str", tout << "path constraint is false with opposite polarity; ignoring path constraint" << std::endl;);
assert_implication(lhs, characterConstraints);
regex_terms_with_path_constraints.insert(str_in_re);
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
}
regex_axiom_add = true;
} else {
// If the automaton was built with the same polarity as the constraint,
// assert directly. Otherwise, negate the path constraint
if ( (current_assignment == l_true && assumption.get_polarity())
|| (current_assignment == l_false && !assumption.get_polarity())) {
TRACE("str", tout << "automaton and regex term have same polarity" << std::endl;);
expr_ref rhs(m.mk_and(pathConstraint, characterConstraints), m);
assert_implication(lhs, rhs);
} else {
TRACE("str", tout << "automaton and regex term have opposite polarity" << std::endl;);
expr_ref rhs(m.mk_and(m.mk_not(pathConstraint), characterConstraints), m);
assert_implication(lhs, rhs);
}
regex_terms_with_path_constraints.insert(str_in_re);
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
regex_axiom_add = true;
}
// increment LengthAttemptCount
regex_inc_counter(regex_length_attempt_count, re);
TRACE("str",
{
unsigned v = regex_get_counter(regex_length_attempt_count, re);
tout << "length attempt count for " << mk_pp(re, m) << " is " << v << std::endl;
});
continue;
// fixed-length model construction handles path constraints on our behalf, and with a better reduction
return;
}
} else {
// no automata available, or else all bounds assumptions are invalid