mirror of
https://github.com/Z3Prover/z3
synced 2025-06-07 06:33:23 +00:00
regex failsafe and intersect WIP
This commit is contained in:
parent
6b799706b5
commit
ca3784449f
5 changed files with 136 additions and 5 deletions
|
@ -81,6 +81,10 @@ def_module_params(module_name='smt',
|
||||||
('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
|
('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
|
||||||
('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
|
('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
|
||||||
('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
|
('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
|
||||||
|
('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'),
|
||||||
|
('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'),
|
||||||
|
('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'),
|
||||||
|
('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'),
|
||||||
('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
|
('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
|
||||||
('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
|
('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
|
||||||
('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
|
('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
|
||||||
|
|
|
@ -33,4 +33,8 @@ void theory_str_params::updt_params(params_ref const & _p) {
|
||||||
m_OverlapTheoryAwarePriority = p.str_overlap_priority();
|
m_OverlapTheoryAwarePriority = p.str_overlap_priority();
|
||||||
m_RegexAutomata = p.str_regex_automata();
|
m_RegexAutomata = p.str_regex_automata();
|
||||||
m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
|
m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
|
||||||
|
m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold();
|
||||||
|
m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold();
|
||||||
|
m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold();
|
||||||
|
m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold();
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,30 @@ struct theory_str_params {
|
||||||
*/
|
*/
|
||||||
unsigned m_RegexAutomata_DifficultyThreshold;
|
unsigned m_RegexAutomata_DifficultyThreshold;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3
|
||||||
|
* will not eagerly intersect automata to check unsatisfiability.
|
||||||
|
*/
|
||||||
|
unsigned m_RegexAutomata_IntersectionDifficultyThreshold;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton
|
||||||
|
* after which a full automaton (i.e. with no length information) will be built regardless of difficulty.
|
||||||
|
*/
|
||||||
|
unsigned m_RegexAutomata_FailedAutomatonThreshold;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton
|
||||||
|
* intersection after which intersection will always be performed regardless of difficulty.
|
||||||
|
*/
|
||||||
|
unsigned m_RegexAutomata_FailedIntersectionThreshold;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints
|
||||||
|
* before which we begin checking unsatisfiability of a regex term.
|
||||||
|
*/
|
||||||
|
unsigned m_RegexAutomata_LengthAttemptThreshold;
|
||||||
|
|
||||||
theory_str_params(params_ref const & p = params_ref()):
|
theory_str_params(params_ref const & p = params_ref()):
|
||||||
m_StrongArrangements(true),
|
m_StrongArrangements(true),
|
||||||
m_AggressiveLengthTesting(false),
|
m_AggressiveLengthTesting(false),
|
||||||
|
@ -106,7 +130,11 @@ struct theory_str_params {
|
||||||
m_BinarySearchInitialUpperBound(64),
|
m_BinarySearchInitialUpperBound(64),
|
||||||
m_OverlapTheoryAwarePriority(-0.1),
|
m_OverlapTheoryAwarePriority(-0.1),
|
||||||
m_RegexAutomata(true),
|
m_RegexAutomata(true),
|
||||||
m_RegexAutomata_DifficultyThreshold(1000)
|
m_RegexAutomata_DifficultyThreshold(1000),
|
||||||
|
m_RegexAutomata_IntersectionDifficultyThreshold(1000),
|
||||||
|
m_RegexAutomata_FailedAutomatonThreshold(10),
|
||||||
|
m_RegexAutomata_FailedIntersectionThreshold(10),
|
||||||
|
m_RegexAutomata_LengthAttemptThreshold(10)
|
||||||
{
|
{
|
||||||
updt_params(p);
|
updt_params(p);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6480,6 +6480,26 @@ namespace smt {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void theory_str::regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
|
||||||
|
unsigned old_v;
|
||||||
|
if (counter_map.find(key, old_v)) {
|
||||||
|
unsigned new_v = old_v += 1;
|
||||||
|
counter_map.insert(key, new_v);
|
||||||
|
} else {
|
||||||
|
counter_map.insert(key, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned theory_str::regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
|
||||||
|
unsigned v;
|
||||||
|
if (counter_map.find(key, v)) {
|
||||||
|
return v;
|
||||||
|
} else {
|
||||||
|
counter_map.insert(key, 0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// saturating unsigned addition
|
// saturating unsigned addition
|
||||||
unsigned inline _qadd(unsigned a, unsigned b) {
|
unsigned inline _qadd(unsigned a, unsigned b) {
|
||||||
if (a == UINT_MAX || b == UINT_MAX) {
|
if (a == UINT_MAX || b == UINT_MAX) {
|
||||||
|
@ -9515,6 +9535,7 @@ namespace smt {
|
||||||
|
|
||||||
// regex automata
|
// regex automata
|
||||||
if (m_params.m_RegexAutomata) {
|
if (m_params.m_RegexAutomata) {
|
||||||
|
// TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
|
||||||
bool regex_axiom_add = false;
|
bool regex_axiom_add = false;
|
||||||
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
|
for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
|
||||||
expr * str_in_re = *it;
|
expr * str_in_re = *it;
|
||||||
|
@ -9579,11 +9600,16 @@ namespace smt {
|
||||||
regex_terms_with_path_constraints.insert(str_in_re);
|
regex_terms_with_path_constraints.insert(str_in_re);
|
||||||
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
|
m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
|
|
||||||
|
// increment LengthAttemptCount
|
||||||
|
regex_inc_counter(regex_length_attempt_count, str_in_re);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// no automata available, or else all bounds assumptions are invalid
|
// no automata available, or else all bounds assumptions are invalid
|
||||||
unsigned expected_complexity = estimate_regex_complexity(re);
|
unsigned expected_complexity = estimate_regex_complexity(re);
|
||||||
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold) {
|
if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) {
|
||||||
|
CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold,
|
||||||
|
tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;);
|
||||||
eautomaton * aut = m_mk_aut(re);
|
eautomaton * aut = m_mk_aut(re);
|
||||||
aut->compress();
|
aut->compress();
|
||||||
regex_automata.push_back(aut);
|
regex_automata.push_back(aut);
|
||||||
|
@ -9596,8 +9622,7 @@ namespace smt {
|
||||||
regex_axiom_add = true;
|
regex_axiom_add = true;
|
||||||
// TODO immediately attempt to learn lower/upper bound info here
|
// TODO immediately attempt to learn lower/upper bound info here
|
||||||
} else {
|
} else {
|
||||||
// TODO increment failure count
|
regex_inc_counter(regex_fail_count, str_in_re);
|
||||||
NOT_IMPLEMENTED_YET();
|
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -9852,7 +9877,68 @@ namespace smt {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOT_IMPLEMENTED_YET();
|
// NOT_IMPLEMENTED_YET();
|
||||||
|
} // foreach (entry in regex_terms)
|
||||||
|
|
||||||
|
for (obj_map<expr, ptr_vector<expr> >::iterator it = regex_terms_by_string.begin();
|
||||||
|
it != regex_terms_by_string.end(); ++it) {
|
||||||
|
// TODO do we need to check equivalence classes of strings here?
|
||||||
|
|
||||||
|
expr * str = it->m_key;
|
||||||
|
ptr_vector<expr> str_in_re_terms = it->m_value;
|
||||||
|
|
||||||
|
svector<regex_automaton_under_assumptions> intersect_constraints;
|
||||||
|
// we may find empty intersection before checking every constraint;
|
||||||
|
// this vector keeps track of which ones actually take part in intersection
|
||||||
|
svector<regex_automaton_under_assumptions> used_intersect_constraints;
|
||||||
|
|
||||||
|
// choose an automaton/assumption for each assigned (str.in.re)
|
||||||
|
// that's consistent with the current length information
|
||||||
|
NOT_IMPLEMENTED_YET();
|
||||||
|
|
||||||
|
eautomaton * aut_inter = NULL;
|
||||||
|
CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;);
|
||||||
|
for (svector<regex_automaton_under_assumptions>::iterator aut_it = intersect_constraints.begin();
|
||||||
|
aut_it != intersect_constraints.end(); ++aut_it) {
|
||||||
|
regex_automaton_under_assumptions aut = *aut_it;
|
||||||
|
if (aut_inter == NULL) {
|
||||||
|
// start somewhere
|
||||||
|
aut_inter = aut.get_automaton();
|
||||||
|
used_intersect_constraints.push_back(aut);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) {
|
||||||
|
unsigned intersectionDifficulty = 0; // TODO EstimateIntersectionDifficulty
|
||||||
|
if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold
|
||||||
|
|| regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) {
|
||||||
|
lbool current_assignment = ctx.get_assignment(aut.get_regex_term());
|
||||||
|
// if the assignment is consistent with our assumption, use the automaton directly;
|
||||||
|
// otherwise, complement it (and save that automaton for next time)
|
||||||
|
// TODO we should cache these intermediate results
|
||||||
|
// TODO do we need to push the intermediates into a vector for deletion anyway?
|
||||||
|
if ( (current_assignment == l_true && aut.get_polarity())
|
||||||
|
|| (current_assignment == l_false && !aut.get_polarity())) {
|
||||||
|
aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton());
|
||||||
|
} else {
|
||||||
|
// need to complement first
|
||||||
|
NOT_IMPLEMENTED_YET();
|
||||||
|
}
|
||||||
|
used_intersect_constraints.push_back(aut);
|
||||||
|
if (aut_inter->is_empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// failed intersection
|
||||||
|
regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // foreach(entry in intersect_constraints)
|
||||||
|
if (aut_inter->is_empty()) {
|
||||||
|
TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
|
||||||
|
NOT_IMPLEMENTED_YET();
|
||||||
|
}
|
||||||
|
} // foreach (entry in regex_terms_by_string)
|
||||||
|
|
||||||
if (regex_axiom_add) {
|
if (regex_axiom_add) {
|
||||||
return FC_CONTINUE;
|
return FC_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -221,6 +221,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
eautomaton * get_automaton() const { return aut; }
|
eautomaton * get_automaton() const { return aut; }
|
||||||
|
expr * get_regex_term() const { return str_in_re; }
|
||||||
bool get_polarity() const { return polarity; }
|
bool get_polarity() const { return polarity; }
|
||||||
|
|
||||||
virtual ~regex_automaton_under_assumptions() {
|
virtual ~regex_automaton_under_assumptions() {
|
||||||
|
@ -409,6 +410,12 @@ protected:
|
||||||
std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
|
std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
|
||||||
obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope
|
obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope
|
||||||
|
|
||||||
|
// each counter maps a (str.in.re) expression to an integer.
|
||||||
|
// use helper functions regex_inc_counter() and regex_get_counter() to access
|
||||||
|
obj_map<expr, unsigned> regex_length_attempt_count;
|
||||||
|
obj_map<expr, unsigned> regex_fail_count;
|
||||||
|
obj_map<expr, unsigned> regex_intersection_fail_count;
|
||||||
|
|
||||||
svector<char> char_set;
|
svector<char> char_set;
|
||||||
std::map<char, int> charSetLookupTable;
|
std::map<char, int> charSetLookupTable;
|
||||||
int charSetSize;
|
int charSetSize;
|
||||||
|
@ -542,6 +549,8 @@ protected:
|
||||||
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
|
expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
|
||||||
void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
|
void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
|
||||||
expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
|
expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
|
||||||
|
void regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key);
|
||||||
|
unsigned regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key);
|
||||||
|
|
||||||
void set_up_axioms(expr * ex);
|
void set_up_axioms(expr * ex);
|
||||||
void handle_equality(expr * lhs, expr * rhs);
|
void handle_equality(expr * lhs, expr * rhs);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue