From f3b2a082ae344f3aa719d537c002f02d14c15847 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Thu, 28 May 2020 11:57:08 -0500 Subject: [PATCH] z3str3: make counterexamples less naive, and check regex membership more efficiently (#4358) * z3str3: make counterexamples less naive, and check regex membership more efficiently * z3str3: construct even better counterexamples for regex membership --- src/smt/theory_str.cpp | 64 ++++++++++++++++++++++++----------- src/smt/theory_str_mc.cpp | 48 +++++++++++++++++++++++++- src/smt/theory_str_regex.cpp | 65 ++---------------------------------- 3 files changed, 94 insertions(+), 83 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 1cbbf083f..9349258c0 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -8633,11 +8633,9 @@ namespace smt { // We must be be 100% certain that if there are any regex constraints, // the string assignment for each variable is consistent with the automaton. - // The (probably) easiest way to do this is to ensure - // that we have path constraints set up for every assigned regex term. + bool regexOK = true; if (!regex_terms.empty()) { - for (obj_hashtable::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) { - expr * str_in_re = *it; + for (auto& str_in_re : regex_terms) { expr * str; expr * re; u.str.is_in_re(str_in_re, str, re); @@ -8645,25 +8643,53 @@ namespace smt { if (current_assignment == l_undef) { continue; } - if (!regex_terms_with_path_constraints.contains(str_in_re)) { - TRACE("str", tout << "assigned regex term " << mk_pp(str_in_re, m) << " has no path constraints -- continuing search" << std::endl;); - return FC_CONTINUE; + zstring strValue; + if (get_string_constant_eqc(str, strValue)) { + // try substituting the current assignment and solving the regex + expr_ref valueInRe(u.re.mk_in_re(mk_string(strValue), re), m); + ctx.get_rewriter()(valueInRe); + if (m.is_true(valueInRe)) { + if (current_assignment == l_false) { + TRACE("str", tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must not be in the language " << mk_pp(re, m) << std::endl;); + expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), str_in_re), m); + assert_axiom(conflictClause); + add_persisted_axiom(conflictClause); + return FC_CONTINUE; + } + } else if (m.is_false(valueInRe)) { + if (current_assignment == l_true) { + TRACE("str", tout << "regex conflict: " << mk_pp(str, m) << " = \"" << strValue << "\" but must be in the language " << mk_pp(re, m) << std::endl;); + expr_ref conflictClause(m.mk_or(m.mk_not(ctx.mk_eq_atom(str, mk_string(strValue))), m.mk_not(str_in_re)), m); + assert_axiom(conflictClause); + add_persisted_axiom(conflictClause); + return FC_CONTINUE; + } + } else { + // try to keep going, but don't assume the current assignment is right or wrong + regexOK = false; + break; + } + } else { + regexOK = false; + break; } } // foreach (str.in.re in regex_terms) } - - if (unused_internal_variables.empty()) { - TRACE("str", tout << "All variables are assigned. Done!" << std::endl;); - m_stats.m_solved_by = 2; - return FC_DONE; - } else { - TRACE("str", tout << "Assigning decoy values to free internal variables." << std::endl;); - for (std::set::iterator it = unused_internal_variables.begin(); it != unused_internal_variables.end(); ++it) { - expr * var = *it; - expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m); - assert_axiom(assignment); + // we're not done if some variable in a regex membership predicate was unassigned + if (regexOK) { + if (unused_internal_variables.empty()) { + TRACE("str", tout << "All variables are assigned. Done!" << std::endl;); + m_stats.m_solved_by = 2; + return FC_DONE; + } else { + TRACE("str", tout << "Assigning decoy values to free internal variables." << std::endl;); + for (std::set::iterator it = unused_internal_variables.begin(); it != unused_internal_variables.end(); ++it) { + expr * var = *it; + expr_ref assignment(m.mk_eq(var, mk_string("**unused**")), m); + assert_axiom(assignment); + } + return FC_CONTINUE; } - return FC_CONTINUE; } } diff --git a/src/smt/theory_str_mc.cpp b/src/smt/theory_str_mc.cpp index aae8234b0..8ebd34245 100644 --- a/src/smt/theory_str_mc.cpp +++ b/src/smt/theory_str_mc.cpp @@ -571,7 +571,33 @@ namespace smt { // If the membership constraint is true, we assert a conflict clause. // If the membership constraint is false, we ignore the constraint. if (polarity) { - cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(str_chars.size())))); + // Decompose `str` into its components if it is a concatenation of terms. + // This fixes cases where the length of S in (S in RE) might be correct + // if the lengths of components of S are assigned in a different way. + expr_ref_vector str_terms(m); + expr_ref_vector str_terms_eq_len(m); + str_terms.push_back(str); + while (!str_terms.empty()) { + expr* str_term = str_terms.back(); + str_terms.pop_back(); + expr* arg0; + expr* arg1; + if (u.str.is_concat(str_term, arg0, arg1)) { + str_terms.push_back(arg0); + str_terms.push_back(arg1); + } else { + rational termLen; + if (fixed_length_get_len_value(str_term, termLen)) { + str_terms_eq_len.push_back(ctx.mk_eq_atom(mk_strlen(str_term), mk_int(termLen))); + } else { + // this is strange, since we knew the length of `str` in order to get here + cex = expr_ref(m_autil.mk_ge(mk_strlen(str_term), mk_int(0)), m); + return false; + } + } + } + + cex = m.mk_or(m.mk_not(f), m.mk_not(mk_and(str_terms_eq_len))); ctx.get_rewriter()(cex); return false; } else { @@ -887,6 +913,9 @@ namespace smt { uninterpreted_to_char_subterm_map.reset(); fixed_length_lesson.reset(); + // All reduced Boolean formulas in the current assignment + expr_ref_vector fixed_length_reduced_boolean_formulas(m); + // Boolean formulas on which to apply abstraction refinement. expr_ref_vector abstracted_boolean_formulas(m); @@ -951,6 +980,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else { TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not an equality over strings" << std::endl;); } @@ -963,6 +993,7 @@ namespace smt { add_persisted_axiom(cex_clause); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else if (u.str.is_contains(f)) { // TODO in some cases (e.g. len(haystack) is only slightly greater than len(needle)) // we might be okay to assert the full disjunction because there are very few disjuncts @@ -978,6 +1009,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } } else if (u.str.is_prefix(f)) { TRACE("str_fl", tout << "reduce positive prefix: " << mk_pp(f, m) << std::endl;); @@ -988,6 +1020,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else if (u.str.is_suffix(f)) { TRACE("str_fl", tout << "reduce positive suffix: " << mk_pp(f, m) << std::endl;); expr_ref cex(m); @@ -997,6 +1030,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); }else if (m.is_not(f, subterm)) { // if subterm is a string formula such as an equality, reduce it as a disequality if (m.is_eq(subterm, lhs, rhs)) { @@ -1012,6 +1046,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } } else if (u.str.is_in_re(subterm)) { TRACE("str_fl", tout << "reduce negative regex membership: " << mk_pp(f, m) << std::endl;); @@ -1022,6 +1057,7 @@ namespace smt { add_persisted_axiom(cex_clause); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else if (u.str.is_contains(subterm)) { TRACE("str_fl", tout << "reduce negative contains: " << mk_pp(subterm, m) << std::endl;); expr_ref cex(m); @@ -1031,6 +1067,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else if (u.str.is_prefix(subterm)) { TRACE("str_fl", tout << "reduce negative prefix: " << mk_pp(subterm, m) << std::endl;); expr_ref cex(m); @@ -1040,6 +1077,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else if (u.str.is_suffix(subterm)) { TRACE("str_fl", tout << "reduce negative suffix: " << mk_pp(subterm, m) << std::endl;); expr_ref cex(m); @@ -1049,6 +1087,7 @@ namespace smt { add_persisted_axiom(cex); return l_undef; } + fixed_length_reduced_boolean_formulas.push_back(f); } else { TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;); } @@ -1081,6 +1120,10 @@ namespace smt { } tout << std::endl; } + tout << "reduced boolean formulas:" << std::endl; + for (auto e : fixed_length_reduced_boolean_formulas) { + tout << mk_pp(e, m) << std::endl; + } ); TRACE("str_fl", tout << "calling subsolver" << std::endl;); @@ -1184,6 +1227,9 @@ namespace smt { rational val = e.get_value(); cex.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val))); } + for (auto e : fixed_length_reduced_boolean_formulas) { + cex.push_back(e); + } return l_false; } else { TRACE("str_fl", tout << "subsolver found UNSAT; reconstructing unsat core" << std::endl;); diff --git a/src/smt/theory_str_regex.cpp b/src/smt/theory_str_regex.cpp index 8ce201752..b26ce197c 100644 --- a/src/smt/theory_str_regex.cpp +++ b/src/smt/theory_str_regex.cpp @@ -206,69 +206,8 @@ namespace smt { regex_inc_counter(regex_length_attempt_count, re); continue; } else { - expr_ref pathConstraint(m); - expr_ref characterConstraints(m); - pathConstraint = generate_regex_path_constraints(str, assumption.get_automaton(), exact_length_value, characterConstraints); - TRACE("str", tout << "generated regex path constraint " << mk_pp(pathConstraint, m) << std::endl;); - TRACE("str", tout << "character constraints are " << mk_pp(characterConstraints, m) << std::endl;); - - expr_ref_vector lhs_terms(m); - if (current_assignment == l_true) { - lhs_terms.push_back(str_in_re); - } else { - lhs_terms.push_back(m.mk_not(str_in_re)); - } - lhs_terms.push_back(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(exact_length_value, true))); - expr_ref lhs(mk_and(lhs_terms), m); - - // If the path constraint comes out as "false", this means there are no paths of that length - // in the automaton. If the polarity is the same, we can assert a conflict clause. - // If the polarity is opposite, we ignore the path constraint. - - if (m.is_false(pathConstraint)) { - if ( (current_assignment == l_true && assumption.get_polarity()) - || (current_assignment == l_false && !assumption.get_polarity())) { - // automaton and constraint have same polarity -- assert conflict clause - TRACE("str", tout << "path constraint is false with matching polarity; asserting conflict clause" << std::endl;); - expr_ref conflict(m.mk_not(mk_and(lhs_terms)), m); - assert_axiom(conflict); - // don't set up "regex_terms_with_path_constraints" as a conflict clause is not a path constraint - } else { - // automaton and constraint have opposite polarity -- ignore path constraint - TRACE("str", tout << "path constraint is false with opposite polarity; ignoring path constraint" << std::endl;); - assert_implication(lhs, characterConstraints); - regex_terms_with_path_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); - } - regex_axiom_add = true; - } else { - // If the automaton was built with the same polarity as the constraint, - // assert directly. Otherwise, negate the path constraint - if ( (current_assignment == l_true && assumption.get_polarity()) - || (current_assignment == l_false && !assumption.get_polarity())) { - TRACE("str", tout << "automaton and regex term have same polarity" << std::endl;); - expr_ref rhs(m.mk_and(pathConstraint, characterConstraints), m); - assert_implication(lhs, rhs); - } else { - TRACE("str", tout << "automaton and regex term have opposite polarity" << std::endl;); - expr_ref rhs(m.mk_and(m.mk_not(pathConstraint), characterConstraints), m); - assert_implication(lhs, rhs); - } - regex_terms_with_path_constraints.insert(str_in_re); - m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); - regex_axiom_add = true; - } - - // increment LengthAttemptCount - regex_inc_counter(regex_length_attempt_count, re); - - TRACE("str", - { - unsigned v = regex_get_counter(regex_length_attempt_count, re); - tout << "length attempt count for " << mk_pp(re, m) << " is " << v << std::endl; - }); - - continue; + // fixed-length model construction handles path constraints on our behalf, and with a better reduction + return; } } else { // no automata available, or else all bounds assumptions are invalid