mirror of
https://github.com/Z3Prover/z3
synced 2025-04-15 13:28:47 +00:00
regex failsafe and intersect WIP
This commit is contained in:
parent
6b799706b5
commit
ca3784449f
|
@ -81,6 +81,10 @@ def_module_params(module_name='smt',
|
|||
('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
|
||||
('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
|
||||
('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
|
||||
('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'),
|
||||
('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'),
|
||||
('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'),
|
||||
('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'),
|
||||
('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
|
||||
('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
|
||||
('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
|
||||
|
|
|
@ -33,4 +33,8 @@ void theory_str_params::updt_params(params_ref const & _p) {
|
|||
m_OverlapTheoryAwarePriority = p.str_overlap_priority();
|
||||
m_RegexAutomata = p.str_regex_automata();
|
||||
m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
|
||||
m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold();
|
||||
m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold();
|
||||
m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold();
|
||||
m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold();
|
||||
}
|
||||
|
|
|
@ -93,6 +93,30 @@ struct theory_str_params {
|
|||
*/
|
||||
unsigned m_RegexAutomata_DifficultyThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3
|
||||
* will not eagerly intersect automata to check unsatisfiability.
|
||||
*/
|
||||
unsigned m_RegexAutomata_IntersectionDifficultyThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton
|
||||
* after which a full automaton (i.e. with no length information) will be built regardless of difficulty.
|
||||
*/
|
||||
unsigned m_RegexAutomata_FailedAutomatonThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton
|
||||
* intersection after which intersection will always be performed regardless of difficulty.
|
||||
*/
|
||||
unsigned m_RegexAutomata_FailedIntersectionThreshold;
|
||||
|
||||
/*
|
||||
* RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints
|
||||
* before which we begin checking unsatisfiability of a regex term.
|
||||
*/
|
||||
unsigned m_RegexAutomata_LengthAttemptThreshold;
|
||||
|
||||
theory_str_params(params_ref const & p = params_ref()):
|
||||
m_StrongArrangements(true),
|
||||
m_AggressiveLengthTesting(false),
|
||||
|
@ -106,7 +130,11 @@ struct theory_str_params {
|
|||
m_BinarySearchInitialUpperBound(64),
|
||||
m_OverlapTheoryAwarePriority(-0.1),
|
||||
m_RegexAutomata(true),
|
||||
m_RegexAutomata_DifficultyThreshold(1000)
|
||||
m_RegexAutomata_DifficultyThreshold(1000),
|
||||
m_RegexAutomata_IntersectionDifficultyThreshold(1000),
|
||||
m_RegexAutomata_FailedAutomatonThreshold(10),
|
||||
m_RegexAutomata_FailedIntersectionThreshold(10),
|
||||
m_RegexAutomata_LengthAttemptThreshold(10)
|
||||
{
|
||||
updt_params(p);
|
||||
}
|
||||
|
|
|
@ -6480,6 +6480,26 @@ namespace smt {
|
|||
}
|
||||
}
|
||||
|
||||
void theory_str::regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
|
||||
unsigned old_v;
|
||||
if (counter_map.find(key, old_v)) {
|
||||
unsigned new_v = old_v += 1;
|
||||
counter_map.insert(key, new_v);
|
||||
} else {
|
||||
counter_map.insert(key, 1);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned theory_str::regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
|
||||
unsigned v;
|
||||
if (counter_map.find(key, v)) {
|
||||
return v;
|
||||
} else {
|
||||
counter_map.insert(key, 0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// saturating unsigned addition
|
||||
unsigned inline _qadd(unsigned a, unsigned b) {
|
||||
if (a == UINT_MAX || b == UINT_MAX) {
|
||||
|
@ -9515,6 +9535,7 @@ namespace smt {
|
|||
|
||||
// regex automata
|
||||
if (m_params.m_RegexAutomata) {
|
||||
// TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
|
||||
bool regex_axiom_add = false;
|
||||
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
|
||||
expr * str_in_re = *it;
|
||||
|
@ -9579,11 +9600,16 @@ namespace smt {
|
|||
regex_terms_with_path_constraints.insert(str_in_re);
|
||||
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
|
||||
regex_axiom_add = true;
|
||||
|
||||
// increment LengthAttemptCount
|
||||
regex_inc_counter(regex_length_attempt_count, str_in_re);
|
||||
continue;
|
||||
} else {
|
||||
// no automata available, or else all bounds assumptions are invalid
|
||||
unsigned expected_complexity = estimate_regex_complexity(re);
|
||||
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold) {
|
||||
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) {
|
||||
CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold,
|
||||
tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;);
|
||||
eautomaton * aut = m_mk_aut(re);
|
||||
aut->compress();
|
||||
regex_automata.push_back(aut);
|
||||
|
@ -9596,8 +9622,7 @@ namespace smt {
|
|||
regex_axiom_add = true;
|
||||
// TODO immediately attempt to learn lower/upper bound info here
|
||||
} else {
|
||||
// TODO increment failure count
|
||||
NOT_IMPLEMENTED_YET();
|
||||
regex_inc_counter(regex_fail_count, str_in_re);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -9852,7 +9877,68 @@ namespace smt {
|
|||
}
|
||||
|
||||
// NOT_IMPLEMENTED_YET();
|
||||
}
|
||||
} // foreach (entry in regex_terms)
|
||||
|
||||
for (obj_map<expr, ptr_vector<expr> >::iterator it = regex_terms_by_string.begin();
|
||||
it != regex_terms_by_string.end(); ++it) {
|
||||
// TODO do we need to check equivalence classes of strings here?
|
||||
|
||||
expr * str = it->m_key;
|
||||
ptr_vector<expr> str_in_re_terms = it->m_value;
|
||||
|
||||
svector<regex_automaton_under_assumptions> intersect_constraints;
|
||||
// we may find empty intersection before checking every constraint;
|
||||
// this vector keeps track of which ones actually take part in intersection
|
||||
svector<regex_automaton_under_assumptions> used_intersect_constraints;
|
||||
|
||||
// choose an automaton/assumption for each assigned (str.in.re)
|
||||
// that's consistent with the current length information
|
||||
NOT_IMPLEMENTED_YET();
|
||||
|
||||
eautomaton * aut_inter = NULL;
|
||||
CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;);
|
||||
for (svector<regex_automaton_under_assumptions>::iterator aut_it = intersect_constraints.begin();
|
||||
aut_it != intersect_constraints.end(); ++aut_it) {
|
||||
regex_automaton_under_assumptions aut = *aut_it;
|
||||
if (aut_inter == NULL) {
|
||||
// start somewhere
|
||||
aut_inter = aut.get_automaton();
|
||||
used_intersect_constraints.push_back(aut);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) {
|
||||
unsigned intersectionDifficulty = 0; // TODO EstimateIntersectionDifficulty
|
||||
if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold
|
||||
|| regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) {
|
||||
lbool current_assignment = ctx.get_assignment(aut.get_regex_term());
|
||||
// if the assignment is consistent with our assumption, use the automaton directly;
|
||||
// otherwise, complement it (and save that automaton for next time)
|
||||
// TODO we should cache these intermediate results
|
||||
// TODO do we need to push the intermediates into a vector for deletion anyway?
|
||||
if ( (current_assignment == l_true && aut.get_polarity())
|
||||
|| (current_assignment == l_false && !aut.get_polarity())) {
|
||||
aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton());
|
||||
} else {
|
||||
// need to complement first
|
||||
NOT_IMPLEMENTED_YET();
|
||||
}
|
||||
used_intersect_constraints.push_back(aut);
|
||||
if (aut_inter->is_empty()) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// failed intersection
|
||||
regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term());
|
||||
}
|
||||
}
|
||||
} // foreach(entry in intersect_constraints)
|
||||
if (aut_inter->is_empty()) {
|
||||
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
|
||||
NOT_IMPLEMENTED_YET();
|
||||
}
|
||||
} // foreach (entry in regex_terms_by_string)
|
||||
|
||||
if (regex_axiom_add) {
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
|
|
|
@ -221,6 +221,7 @@ public:
|
|||
}
|
||||
|
||||
eautomaton * get_automaton() const { return aut; }
|
||||
expr * get_regex_term() const { return str_in_re; }
|
||||
bool get_polarity() const { return polarity; }
|
||||
|
||||
virtual ~regex_automaton_under_assumptions() {
|
||||
|
@ -409,6 +410,12 @@ protected:
|
|||
std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
|
||||
obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope
|
||||
|
||||
// each counter maps a (str.in.re) expression to an integer.
|
||||
// use helper functions regex_inc_counter() and regex_get_counter() to access
|
||||
obj_map<expr, unsigned> regex_length_attempt_count;
|
||||
obj_map<expr, unsigned> regex_fail_count;
|
||||
obj_map<expr, unsigned> regex_intersection_fail_count;
|
||||
|
||||
svector<char> char_set;
|
||||
std::map<char, int> charSetLookupTable;
|
||||
int charSetSize;
|
||||
|
@ -542,6 +549,8 @@ protected:
|
|||
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
|
||||
void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
|
||||
expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
|
||||
void regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key);
|
||||
unsigned regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key);
|
||||
|
||||
void set_up_axioms(expr * ex);
|
||||
void handle_equality(expr * lhs, expr * rhs);
|
||||
|
|
Loading…
Reference in a new issue