3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-15 13:28:47 +00:00

regex failsafe and intersect WIP

This commit is contained in:
Murphy Berzish 2018-01-12 13:53:02 -05:00
parent 6b799706b5
commit ca3784449f
5 changed files with 136 additions and 5 deletions

View file

@ -81,6 +81,10 @@ def_module_params(module_name='smt',
('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'),
('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'),
('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'),
('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'),
('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),

View file

@ -33,4 +33,8 @@ void theory_str_params::updt_params(params_ref const & _p) {
m_OverlapTheoryAwarePriority = p.str_overlap_priority();
m_RegexAutomata = p.str_regex_automata();
m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold();
m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold();
m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold();
m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold();
}

View file

@ -93,6 +93,30 @@ struct theory_str_params {
*/
unsigned m_RegexAutomata_DifficultyThreshold;
/*
* RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3
* will not eagerly intersect automata to check unsatisfiability.
*/
unsigned m_RegexAutomata_IntersectionDifficultyThreshold;
/*
* RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton
* after which a full automaton (i.e. with no length information) will be built regardless of difficulty.
*/
unsigned m_RegexAutomata_FailedAutomatonThreshold;
/*
* RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton
* intersection after which intersection will always be performed regardless of difficulty.
*/
unsigned m_RegexAutomata_FailedIntersectionThreshold;
/*
* RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints
* before which we begin checking unsatisfiability of a regex term.
*/
unsigned m_RegexAutomata_LengthAttemptThreshold;
theory_str_params(params_ref const & p = params_ref()):
m_StrongArrangements(true),
m_AggressiveLengthTesting(false),
@ -106,7 +130,11 @@ struct theory_str_params {
m_BinarySearchInitialUpperBound(64),
m_OverlapTheoryAwarePriority(-0.1),
m_RegexAutomata(true),
m_RegexAutomata_DifficultyThreshold(1000)
m_RegexAutomata_DifficultyThreshold(1000),
m_RegexAutomata_IntersectionDifficultyThreshold(1000),
m_RegexAutomata_FailedAutomatonThreshold(10),
m_RegexAutomata_FailedIntersectionThreshold(10),
m_RegexAutomata_LengthAttemptThreshold(10)
{
updt_params(p);
}

View file

@ -6480,6 +6480,26 @@ namespace smt {
}
}
void theory_str::regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
unsigned old_v;
if (counter_map.find(key, old_v)) {
unsigned new_v = old_v += 1;
counter_map.insert(key, new_v);
} else {
counter_map.insert(key, 1);
}
}
unsigned theory_str::regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
unsigned v;
if (counter_map.find(key, v)) {
return v;
} else {
counter_map.insert(key, 0);
return 0;
}
}
// saturating unsigned addition
unsigned inline _qadd(unsigned a, unsigned b) {
if (a == UINT_MAX || b == UINT_MAX) {
@ -9515,6 +9535,7 @@ namespace smt {
// regex automata
if (m_params.m_RegexAutomata) {
// TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
bool regex_axiom_add = false;
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
expr * str_in_re = *it;
@ -9579,11 +9600,16 @@ namespace smt {
regex_terms_with_path_constraints.insert(str_in_re);
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
regex_axiom_add = true;
// increment LengthAttemptCount
regex_inc_counter(regex_length_attempt_count, str_in_re);
continue;
} else {
// no automata available, or else all bounds assumptions are invalid
unsigned expected_complexity = estimate_regex_complexity(re);
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold) {
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) {
CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold,
tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;);
eautomaton * aut = m_mk_aut(re);
aut->compress();
regex_automata.push_back(aut);
@ -9596,8 +9622,7 @@ namespace smt {
regex_axiom_add = true;
// TODO immediately attempt to learn lower/upper bound info here
} else {
// TODO increment failure count
NOT_IMPLEMENTED_YET();
regex_inc_counter(regex_fail_count, str_in_re);
}
continue;
}
@ -9852,7 +9877,68 @@ namespace smt {
}
// NOT_IMPLEMENTED_YET();
}
} // foreach (entry in regex_terms)
for (obj_map<expr, ptr_vector<expr> >::iterator it = regex_terms_by_string.begin();
it != regex_terms_by_string.end(); ++it) {
// TODO do we need to check equivalence classes of strings here?
expr * str = it->m_key;
ptr_vector<expr> str_in_re_terms = it->m_value;
svector<regex_automaton_under_assumptions> intersect_constraints;
// we may find empty intersection before checking every constraint;
// this vector keeps track of which ones actually take part in intersection
svector<regex_automaton_under_assumptions> used_intersect_constraints;
// choose an automaton/assumption for each assigned (str.in.re)
// that's consistent with the current length information
NOT_IMPLEMENTED_YET();
eautomaton * aut_inter = NULL;
CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;);
for (svector<regex_automaton_under_assumptions>::iterator aut_it = intersect_constraints.begin();
aut_it != intersect_constraints.end(); ++aut_it) {
regex_automaton_under_assumptions aut = *aut_it;
if (aut_inter == NULL) {
// start somewhere
aut_inter = aut.get_automaton();
used_intersect_constraints.push_back(aut);
continue;
}
if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) {
unsigned intersectionDifficulty = 0; // TODO EstimateIntersectionDifficulty
if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold
|| regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) {
lbool current_assignment = ctx.get_assignment(aut.get_regex_term());
// if the assignment is consistent with our assumption, use the automaton directly;
// otherwise, complement it (and save that automaton for next time)
// TODO we should cache these intermediate results
// TODO do we need to push the intermediates into a vector for deletion anyway?
if ( (current_assignment == l_true && aut.get_polarity())
|| (current_assignment == l_false && !aut.get_polarity())) {
aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton());
} else {
// need to complement first
NOT_IMPLEMENTED_YET();
}
used_intersect_constraints.push_back(aut);
if (aut_inter->is_empty()) {
break;
}
} else {
// failed intersection
regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term());
}
}
} // foreach(entry in intersect_constraints)
if (aut_inter->is_empty()) {
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
NOT_IMPLEMENTED_YET();
}
} // foreach (entry in regex_terms_by_string)
if (regex_axiom_add) {
return FC_CONTINUE;
}

View file

@ -221,6 +221,7 @@ public:
}
eautomaton * get_automaton() const { return aut; }
expr * get_regex_term() const { return str_in_re; }
bool get_polarity() const { return polarity; }
virtual ~regex_automaton_under_assumptions() {
@ -409,6 +410,12 @@ protected:
std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope
// each counter maps a (str.in.re) expression to an integer.
// use helper functions regex_inc_counter() and regex_get_counter() to access
obj_map<expr, unsigned> regex_length_attempt_count;
obj_map<expr, unsigned> regex_fail_count;
obj_map<expr, unsigned> regex_intersection_fail_count;
svector<char> char_set;
std::map<char, int> charSetLookupTable;
int charSetSize;
@ -542,6 +549,8 @@ protected:
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
void regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key);
unsigned regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key);
void set_up_axioms(expr * ex);
void handle_equality(expr * lhs, expr * rhs);