regex failsafe and intersect WIP

2025-08-20 10:10:21 +00:00 · 2018-01-12 13:53:02 -05:00 · 2018-01-12 13:53:02 -05:00 · ca3784449f
commit ca3784449f
parent 6b799706b5
5 changed files with 136 additions and 5 deletions
--- a/src/smt/params/smt_params_helper.pyg
+++ b/src/smt/params/smt_params_helper.pyg
@ -81,6 +81,10 @@ def_module_params(module_name='smt',
                          ('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
                          ('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
                          ('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
+                          ('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'),
+                          ('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'),
+                          ('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'),
+                          ('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'),
                          ('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
                          ('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
                          ('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
--- a/src/smt/params/theory_str_params.cpp
+++ b/src/smt/params/theory_str_params.cpp
@ -33,4 +33,8 @@ void theory_str_params::updt_params(params_ref const & _p) {
    m_OverlapTheoryAwarePriority = p.str_overlap_priority();
    m_RegexAutomata = p.str_regex_automata();
    m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
+    m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold();
+    m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold();
+    m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold();
+    m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold();
 }
--- a/src/smt/params/theory_str_params.h
+++ b/src/smt/params/theory_str_params.h
@ -93,6 +93,30 @@ struct theory_str_params {
     */
    unsigned m_RegexAutomata_DifficultyThreshold;

+    /*
+     * RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3
+     * will not eagerly intersect automata to check unsatisfiability.
+     */
+    unsigned m_RegexAutomata_IntersectionDifficultyThreshold;
+
+    /*
+     * RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton
+     * after which a full automaton (i.e. with no length information) will be built regardless of difficulty.
+     */
+    unsigned m_RegexAutomata_FailedAutomatonThreshold;
+
+    /*
+     * RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton
+     * intersection after which intersection will always be performed regardless of difficulty.
+     */
+    unsigned m_RegexAutomata_FailedIntersectionThreshold;
+
+    /*
+     * RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints
+     * before which we begin checking unsatisfiability of a regex term.
+     */
+    unsigned m_RegexAutomata_LengthAttemptThreshold;
+
    theory_str_params(params_ref const & p = params_ref()):
        m_StrongArrangements(true),
        m_AggressiveLengthTesting(false),
@ -106,7 +130,11 @@ struct theory_str_params {
        m_BinarySearchInitialUpperBound(64),
        m_OverlapTheoryAwarePriority(-0.1),
        m_RegexAutomata(true),
-        m_RegexAutomata_DifficultyThreshold(1000)
+        m_RegexAutomata_DifficultyThreshold(1000),
+        m_RegexAutomata_IntersectionDifficultyThreshold(1000),
+        m_RegexAutomata_FailedAutomatonThreshold(10),
+        m_RegexAutomata_FailedIntersectionThreshold(10),
+        m_RegexAutomata_LengthAttemptThreshold(10)
    {
        updt_params(p);
    }
--- a/src/smt/theory_str.cpp
+++ b/src/smt/theory_str.cpp
@ -6480,6 +6480,26 @@ namespace smt {
        }
    }

+    void theory_str::regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
+        unsigned old_v;
+        if (counter_map.find(key, old_v)) {
+            unsigned new_v = old_v += 1;
+            counter_map.insert(key, new_v);
+        } else {
+            counter_map.insert(key, 1);
+        }
+    }
+
+    unsigned theory_str::regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key) {
+        unsigned v;
+        if (counter_map.find(key, v)) {
+            return v;
+        } else {
+            counter_map.insert(key, 0);
+            return 0;
+        }
+    }
+
    // saturating unsigned addition
    unsigned inline _qadd(unsigned a, unsigned b) {
        if (a == UINT_MAX || b == UINT_MAX) {
@ -9515,6 +9535,7 @@ namespace smt {

        // regex automata
        if (m_params.m_RegexAutomata) {
+            // TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
            bool regex_axiom_add = false;
            for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
                expr * str_in_re = *it;
@ -9579,11 +9600,16 @@ namespace smt {
                        regex_terms_with_path_constraints.insert(str_in_re);
                        m_trail_stack.push(insert_obj_trail<theory_str, expr>(regex_terms_with_path_constraints, str_in_re));
                        regex_axiom_add = true;
+
+                        // increment LengthAttemptCount
+                        regex_inc_counter(regex_length_attempt_count, str_in_re);
                        continue;
                    } else {
                        // no automata available, or else all bounds assumptions are invalid
                        unsigned expected_complexity = estimate_regex_complexity(re);
-                        if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold) {
+                        if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold || regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold) {
+                            CTRACE("str", regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold,
+                                    tout << "failed automaton threshold reached for " << mk_pp(str_in_re, m) << " -- automatically constructing full automaton" << std::endl;);
                            eautomaton * aut = m_mk_aut(re);
                            aut->compress();
                            regex_automata.push_back(aut);
@ -9596,8 +9622,7 @@ namespace smt {
                            regex_axiom_add = true;
                            // TODO immediately attempt to learn lower/upper bound info here
                        } else {
-                            // TODO increment failure count
-                            NOT_IMPLEMENTED_YET();
+                            regex_inc_counter(regex_fail_count, str_in_re);
                        }
                        continue;
                    }
@ -9852,7 +9877,68 @@ namespace smt {
                }

                // NOT_IMPLEMENTED_YET();
-            }
+            } // foreach (entry in regex_terms)
+
+            for (obj_map<expr, ptr_vector<expr> >::iterator it = regex_terms_by_string.begin();
+                    it != regex_terms_by_string.end(); ++it) {
+                // TODO do we need to check equivalence classes of strings here?
+
+                expr * str = it->m_key;
+                ptr_vector<expr> str_in_re_terms = it->m_value;
+
+                svector<regex_automaton_under_assumptions> intersect_constraints;
+                // we may find empty intersection before checking every constraint;
+                // this vector keeps track of which ones actually take part in intersection
+                svector<regex_automaton_under_assumptions> used_intersect_constraints;
+
+                // choose an automaton/assumption for each assigned (str.in.re)
+                // that's consistent with the current length information
+                NOT_IMPLEMENTED_YET();
+
+                eautomaton * aut_inter = NULL;
+                CTRACE("str", !intersect_constraints.empty(), tout << "check intersection of automata constraints for " << mk_pp(str, m) << std::endl;);
+                for (svector<regex_automaton_under_assumptions>::iterator aut_it = intersect_constraints.begin();
+                        aut_it != intersect_constraints.end(); ++aut_it) {
+                    regex_automaton_under_assumptions aut = *aut_it;
+                    if (aut_inter == NULL) {
+                        // start somewhere
+                        aut_inter = aut.get_automaton();
+                        used_intersect_constraints.push_back(aut);
+                        continue;
+                    }
+
+                    if (regex_get_counter(regex_length_attempt_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_LengthAttemptThreshold) {
+                        unsigned intersectionDifficulty = 0; // TODO EstimateIntersectionDifficulty
+                        if (intersectionDifficulty <= m_params.m_RegexAutomata_IntersectionDifficultyThreshold
+                                || regex_get_counter(regex_intersection_fail_count, aut.get_regex_term()) >= m_params.m_RegexAutomata_FailedIntersectionThreshold) {
+                            lbool current_assignment = ctx.get_assignment(aut.get_regex_term());
+                            // if the assignment is consistent with our assumption, use the automaton directly;
+                            // otherwise, complement it (and save that automaton for next time)
+                            // TODO we should cache these intermediate results
+                            // TODO do we need to push the intermediates into a vector for deletion anyway?
+                            if ( (current_assignment == l_true && aut.get_polarity())
+                                    || (current_assignment == l_false && !aut.get_polarity())) {
+                                aut_inter = m_mk_aut.mk_product(aut_inter, aut.get_automaton());
+                            } else {
+                                // need to complement first
+                                NOT_IMPLEMENTED_YET();
+                            }
+                            used_intersect_constraints.push_back(aut);
+                            if (aut_inter->is_empty()) {
+                                break;
+                            }
+                        } else {
+                            // failed intersection
+                            regex_inc_counter(regex_intersection_fail_count, aut.get_regex_term());
+                        }
+                    }
+                } // foreach(entry in intersect_constraints)
+                if (aut_inter->is_empty()) {
+                    TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
+                    NOT_IMPLEMENTED_YET();
+                }
+            } // foreach (entry in regex_terms_by_string)
+
            if (regex_axiom_add) {
                return FC_CONTINUE;
            }
--- a/src/smt/theory_str.h
+++ b/src/smt/theory_str.h
@ -221,6 +221,7 @@ public:
    }

    eautomaton * get_automaton() const { return aut; }
+    expr * get_regex_term() const { return str_in_re; }
    bool get_polarity() const { return polarity; }

    virtual ~regex_automaton_under_assumptions() {
@ -409,6 +410,12 @@ protected:
    std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
    obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope

+    // each counter maps a (str.in.re) expression to an integer.
+    // use helper functions regex_inc_counter() and regex_get_counter() to access
+    obj_map<expr, unsigned> regex_length_attempt_count;
+    obj_map<expr, unsigned> regex_fail_count;
+    obj_map<expr, unsigned> regex_intersection_fail_count;
+
    svector<char> char_set;
    std::map<char, int>  charSetLookupTable;
    int           charSetSize;
@ -542,6 +549,8 @@ protected:
    expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
    void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
    expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
+    void regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key);
+    unsigned regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key);

    void set_up_axioms(expr * ex);
    void handle_equality(expr * lhs, expr * rhs);