From ca3784449f02ce860736a741befb62ff33d9549b Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Fri, 12 Jan 2018 13:53:02 -0500 Subject: [PATCH] regex failsafe and intersect WIP --- src/smt/params/smt_params_helper.pyg | 4 ++ src/smt/params/theory_str_params.cpp | 4 ++ src/smt/params/theory_str_params.h | 30 ++++++++- src/smt/theory_str.cpp | 94 ++++++++++++++++++++++++++-- src/smt/theory_str.h | 9 +++ 5 files changed, 136 insertions(+), 5 deletions(-) diff --git a/src/smt/params/smt_params_helper.pyg b/src/smt/params/smt_params_helper.pyg index 7ab0d52f2..1f691d587 100644 --- a/src/smt/params/smt_params_helper.pyg +++ b/src/smt/params/smt_params_helper.pyg @@ -81,6 +81,10 @@ def_module_params(module_name='smt', ('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'), ('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'), ('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'), + ('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'), + ('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'), + ('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'), + ('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'), ('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'), ('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'), ('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'), diff --git a/src/smt/params/theory_str_params.cpp b/src/smt/params/theory_str_params.cpp index fa6862161..92478bcd9 100644 --- a/src/smt/params/theory_str_params.cpp +++ b/src/smt/params/theory_str_params.cpp @@ -33,4 +33,8 @@ void theory_str_params::updt_params(params_ref const & _p) { m_OverlapTheoryAwarePriority = p.str_overlap_priority(); m_RegexAutomata = p.str_regex_automata(); m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold(); + m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold(); + m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold(); + m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold(); + m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold(); } diff --git a/src/smt/params/theory_str_params.h b/src/smt/params/theory_str_params.h index 48c0205ee..8c7816839 100644 --- a/src/smt/params/theory_str_params.h +++ b/src/smt/params/theory_str_params.h @@ -93,6 +93,30 @@ struct theory_str_params { */ unsigned m_RegexAutomata_DifficultyThreshold; + /* + * RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3 + * will not eagerly intersect automata to check unsatisfiability. + */ + unsigned m_RegexAutomata_IntersectionDifficultyThreshold; + + /* + * RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton + * after which a full automaton (i.e. with no length information) will be built regardless of difficulty. + */ + unsigned m_RegexAutomata_FailedAutomatonThreshold; + + /* + * RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton + * intersection after which intersection will always be performed regardless of difficulty. + */ + unsigned m_RegexAutomata_FailedIntersectionThreshold; + + /* + * RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints + * before which we begin checking unsatisfiability of a regex term. + */ + unsigned m_RegexAutomata_LengthAttemptThreshold; + theory_str_params(params_ref const & p = params_ref()): m_StrongArrangements(true), m_AggressiveLengthTesting(false), @@ -106,7 +130,11 @@ struct theory_str_params { m_BinarySearchInitialUpperBound(64), m_OverlapTheoryAwarePriority(-0.1), m_RegexAutomata(true), - m_RegexAutomata_DifficultyThreshold(1000) + m_RegexAutomata_DifficultyThreshold(1000), + m_RegexAutomata_IntersectionDifficultyThreshold(1000), + m_RegexAutomata_FailedAutomatonThreshold(10), + m_RegexAutomata_FailedIntersectionThreshold(10), + m_RegexAutomata_LengthAttemptThreshold(10) { updt_params(p); } diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 5910d9d95..6e4211f7b 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -6480,6 +6480,26 @@ namespace smt { } } + void theory_str::regex_inc_counter(obj_map & counter_map, expr * key) { + unsigned old_v; + if (counter_map.find(key, old_v)) { + unsigned new_v = old_v += 1; + counter_map.insert(key, new_v); + } else { + counter_map.insert(key, 1); + } + } + + unsigned theory_str::regex_get_counter(obj_map & counter_map, expr * key) { + unsigned v; + if (counter_map.find(key, v)) { + return v; + } else { + counter_map.insert(key, 0); + return 0; + } + } + // saturating unsigned addition unsigned inline _qadd(unsigned a, unsigned b) { if (a == UINT_MAX || b == UINT_MAX) { @@ -9515,6 +9535,7 @@ namespace smt { // regex automata if (m_params.m_RegexAutomata) { + // TODO since heuristics might fail, the "no progress" flag might need to be handled specially here bool regex_axiom_add = false; for (obj_hashtable::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) { expr * str_in_re = *it; @@ -9579,11 +9600,16 @@ namespace smt { regex_terms_with_path_constraints.insert(str_in_re); m_trail_stack.push(insert_obj_trail(regex_terms_with_path_constraints, str_in_re)); regex_axiom_add = true; + + // increment LengthAttemptCount + regex_inc_counter(regex_length_attempt_count, str_in_re); continue; } else { // no automata available, or else all bounds assumptions are invalid unsigned expected_complexity = estimate_regex_complexity(re); - if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold) { + if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) { + CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold, + tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;); eautomaton * aut = m_mk_aut(re); aut->compress(); regex_automata.push_back(aut); @@ -9596,8 +9622,7 @@ namespace smt { regex_axiom_add = true; // TODO immediately attempt to learn lower/upper bound info here } else { - // TODO increment failure count - NOT_IMPLEMENTED_YET(); + regex_inc_counter(regex_fail_count, str_in_re); } continue; } @@ -9852,7 +9877,68 @@ namespace smt { } // NOT_IMPLEMENTED_YET(); - } + } // foreach (entry in regex_terms) + + for (obj_map >::iterator it = regex_terms_by_string.begin(); + it != regex_terms_by_string.end(); ++it) { + // TODO do we need to check equivalence classes of strings here? + + expr * str = it->m_key; + ptr_vector str_in_re_terms = it->m_value; + + svector intersect_constraints; + // we may find empty intersection before checking every constraint; + // this vector keeps track of which ones actually take part in intersection + svector used_intersect_constraints; + + // choose an automaton/assumption for each assigned (str.in.re) + // that's consistent with the current length information + NOT_IMPLEMENTED_YET(); + + eautomaton * aut_inter = NULL; + CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;); + for (svector::iterator aut_it = intersect_constraints.begin(); + aut_it != intersect_constraints.end(); ++aut_it) { + regex_automaton_under_assumptions aut = *aut_it; + if (aut_inter == NULL) { + // start somewhere + aut_inter = aut.get_automaton(); + used_intersect_constraints.push_back(aut); + continue; + } + + if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) { + unsigned intersectionDifficulty = 0; // TODO EstimateIntersectionDifficulty + if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold + || regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) { + lbool current_assignment = ctx.get_assignment(aut.get_regex_term()); + // if the assignment is consistent with our assumption, use the automaton directly; + // otherwise, complement it (and save that automaton for next time) + // TODO we should cache these intermediate results + // TODO do we need to push the intermediates into a vector for deletion anyway? + if ( (current_assignment == l_true && aut.get_polarity()) + || (current_assignment == l_false && !aut.get_polarity())) { + aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton()); + } else { + // need to complement first + NOT_IMPLEMENTED_YET(); + } + used_intersect_constraints.push_back(aut); + if (aut_inter->is_empty()) { + break; + } + } else { + // failed intersection + regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term()); + } + } + } // foreach(entry in intersect_constraints) + if (aut_inter->is_empty()) { + TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;); + NOT_IMPLEMENTED_YET(); + } + } // foreach (entry in regex_terms_by_string) + if (regex_axiom_add) { return FC_CONTINUE; } diff --git a/src/smt/theory_str.h b/src/smt/theory_str.h index c64eec838..45df675fb 100644 --- a/src/smt/theory_str.h +++ b/src/smt/theory_str.h @@ -221,6 +221,7 @@ public: } eautomaton * get_automaton() const { return aut; } + expr * get_regex_term() const { return str_in_re; } bool get_polarity() const { return polarity; } virtual ~regex_automaton_under_assumptions() { @@ -409,6 +410,12 @@ protected: std::map regex_nfa_cache; // Regex term --> NFA obj_hashtable regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope + // each counter maps a (str.in.re) expression to an integer. + // use helper functions regex_inc_counter() and regex_get_counter() to access + obj_map regex_length_attempt_count; + obj_map regex_fail_count; + obj_map regex_intersection_fail_count; + svector char_set; std::map charSetLookupTable; int charSetSize; @@ -542,6 +549,8 @@ protected: expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints); void aut_path_add_next(u_map& next, expr_ref_vector& trail, unsigned idx, expr* cond); expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var); + void regex_inc_counter(obj_map & counter_map, expr * key); + unsigned regex_get_counter(obj_map & counter_map, expr * key); void set_up_axioms(expr * ex); void handle_equality(expr * lhs, expr * rhs);