short path for length-0 regex terms

2025-11-28 16:29:50 +00:00 · 2018-01-17 18:26:31 -05:00 · 2018-01-17 18:26:31 -05:00 · c2b268c645
commit c2b268c645
parent c0ed683882
2 changed files with 206 additions and 119 deletions
--- a/src/smt/theory_str.cpp
+++ b/src/smt/theory_str.cpp
@ -6646,6 +6646,9 @@ namespace smt {
     * In some cases, the returned formula requires one or more free integer variables to be created.
     * These variables are returned in the reference parameter `freeVariables`.
     * Extra assertions should be made for these free variables constraining them to be non-negative.
     *
     * TODO: star unrolling?
     * TODO: generate stars "correctly" as a linear combination of all possible subterm lengths
     */
    expr_ref theory_str::infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables) {
        ENSURE(u.is_re(re));
@ -6725,6 +6728,59 @@ namespace smt {
        }
    }
    /*
     * Assert initial lower and upper bounds for the positive constraint (str in re) corresponding
     * to the automaton `aut`.
     * This asserts a constraint of the form:
     *   str_in_re --> (len(str) ?= 0 OR len(str) >= lb) AND len(str) <= ub
     * where the upper bound clause is omitted if the upper bound doesn't exist
     * and the equality with 0 is based on whether solutions of length 0 are allowed.
     */
    void theory_str::find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut) {
        ENSURE(aut != NULL);
        context & ctx = get_context();
        ast_manager & m = get_manager();
        expr_ref_vector rhs(m);
        expr * str;
        expr * re;
        u.str.is_in_re(str_in_re, str, re);
        expr_ref strlen(mk_strlen(str), m);
        // lower bound first
        rational nonzero_lower_bound;
        bool zero_sol_exists = refine_automaton_lower_bound(aut, rational::zero(), nonzero_lower_bound);
        if (zero_sol_exists) {
            regex_last_lower_bound.insert(str, rational::zero());
            // solution at 0
            if (!nonzero_lower_bound.is_minus_one()) {
                expr_ref rhs1(ctx.mk_eq_atom(strlen, m_autil.mk_numeral(rational::zero(), true)), m);
                expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m);
                rhs.push_back(m.mk_or(rhs1, rhs2));
            } else {
                // shouldn't happen
                UNREACHABLE();
            }
        } else {
            // no solution at 0
            if (!nonzero_lower_bound.is_minus_one()) {
                regex_last_lower_bound.insert(str, nonzero_lower_bound);
                expr_ref rhs2(m_autil.mk_ge(strlen, m_autil.mk_numeral(nonzero_lower_bound, true)), m);
                rhs.push_back(rhs2);
            } else {
                // shouldn't happen
                UNREACHABLE();
            }
        }
        // TODO upper bound check
        if (!rhs.empty()) {
            expr_ref lhs(str_in_re, m);
            expr_ref _rhs(mk_and(rhs), m);
            assert_implication(lhs, _rhs);
        }
    }
    /*
     * Refine the lower bound on the length of a solution to a given automaton.
     * The method returns TRUE if a solution of length `current_lower_bound` exists,
@ -9596,7 +9652,6 @@ namespace smt {
        // regex automata
        if (m_params.m_RegexAutomata) {
            // TODO since heuristics might fail, the "no progress" flag might need to be handled specially here
            // TODO learning of linear length constraints in the style of length automata, if possible?
            bool regex_axiom_add = false;
            for (obj_hashtable<expr>::iterator it = regex_terms.begin(); it != regex_terms.end(); ++it) {
                expr * str_in_re = *it;
@ -9667,6 +9722,12 @@ namespace smt {
                        continue;
                    }
                    if (exact_length_value.is_zero()) {
                        // shortcut
                        expr_ref lhs(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m);
                        expr_ref rhs(ctx.mk_eq_atom(str, mk_string("")), m);
                        assert_implication(lhs, rhs);
                    } else {
                        // find a consistent automaton for this term
                        bool found = false;
                        regex_automaton_under_assumptions assumption;
@ -9743,13 +9804,14 @@ namespace smt {
                                regex_automaton_assumptions[re].push_back(new_aut);
                                TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
                                regex_axiom_add = true;
-                            // TODO immediately attempt to learn lower/upper bound info here
+                                find_automaton_initial_bounds(str_in_re, aut);
                            } else {
                                regex_inc_counter(regex_fail_count, str_in_re);
                            }
                            continue;
                        }
-                }
+                    } // !length is zero
                } // get_len_value()
                expr_ref str_len(mk_strlen(str), m);
                rational lower_bound_value;
                rational upper_bound_value;
@ -9858,11 +9920,13 @@ namespace smt {
                                }
                            }
                            if (!rhs.empty()) {
                                expr_ref lhs_terms(mk_and(lhs), m);
                                expr_ref rhs_terms(mk_and(rhs), m);
                                assert_implication(lhs_terms, rhs_terms);
                                regex_axiom_add = true;
                            }
                        }
                    } else {
                        // no existing automata/assumptions.
                        // if it's easy to construct a full automaton for R, do so
@ -9878,7 +9942,7 @@ namespace smt {
                            regex_automaton_assumptions[re].push_back(new_aut);
                            TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
                            regex_axiom_add = true;
-                            // TODO immediately attempt to learn lower/upper bound info here
+                            find_automaton_initial_bounds(str_in_re, aut);
                        } else {
                            // TODO check negation?
                            // TODO construct a partial automaton for R to the given upper bound?
@ -9976,11 +10040,13 @@ namespace smt {
                                    }
                                }
                                if (!rhs.empty()) {
                                    expr_ref lhs_terms(mk_and(lhs), m);
                                    expr_ref rhs_terms(mk_and(rhs), m);
                                    assert_implication(lhs_terms, rhs_terms);
                                    regex_axiom_add = true;
                                }
                            }
                        } else {
                            // no existing automata/assumptions.
                            // if it's easy to construct a full automaton for R, do so
@ -9996,7 +10062,7 @@ namespace smt {
                                regex_automaton_assumptions[re].push_back(new_aut);
                                TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
                                regex_axiom_add = true;
-                                // TODO immediately attempt to learn lower/upper bound info here
+                                find_automaton_initial_bounds(str_in_re, aut);
                            } else {
                                // TODO check negation?
                                // TODO construct a partial automaton for R to the given lower bound?
@ -10016,7 +10082,7 @@ namespace smt {
                        // and doing so without bounds is not difficult
                        bool existingAutomata = (regex_automaton_assumptions.contains(re) && !regex_automaton_assumptions[re].empty());
                        bool failureThresholdExceeded = (regex_get_counter(regex_fail_count, str_in_re) >= m_params.m_RegexAutomata_FailedAutomatonThreshold);
-                        if (!existingAutomata || failureThresholdExceeded) {
+                        if (!existingAutomata) {
                            unsigned expected_complexity = estimate_regex_complexity(re);
                            if (expected_complexity <= m_params.m_RegexAutomata_DifficultyThreshold
                                    || failureThresholdExceeded) {
@ -10030,7 +10096,7 @@ namespace smt {
                                regex_automaton_assumptions[re].push_back(new_aut);
                                TRACE("str", tout << "add new automaton for " << mk_pp(re, m) << ": no assumptions" << std::endl;);
                                regex_axiom_add = true;
-                                // TODO immediately attempt to learn lower/upper bound info here
+                                find_automaton_initial_bounds(str_in_re, aut);
                            } else {
                                regex_inc_counter(regex_fail_count, str_in_re);
                            }
@ -10165,11 +10231,11 @@ namespace smt {
                        }
                    }
                } // foreach(entry in intersect_constraints)
                aut_inter->compress();
                TRACE("str", tout << "intersected " << used_intersect_constraints.size() << " constraints" << std::endl;);
                if (aut_inter != NULL && aut_inter->is_empty()) {
                    TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
                    expr_ref_vector conflict_terms(m);
                expr_ref_vector conflict_terms(m);
                expr_ref conflict_lhs(m);
                for (svector<regex_automaton_under_assumptions>::iterator aut_it = used_intersect_constraints.begin();
                        aut_it != used_intersect_constraints.end(); ++aut_it) {
                    regex_automaton_under_assumptions aut = *aut_it;
@ -10192,15 +10258,35 @@ namespace smt {
                        conflict_terms.push_back(lb_term);
                    }
                }
                conflict_lhs = mk_and(conflict_terms);
                if (used_intersect_constraints.size() > 1 && aut_inter != NULL) {
                    // check whether the intersection is only the empty string
                    unsigned initial_state = aut_inter->init();
                    if (aut_inter->final_states().size() == 1 && aut_inter->is_final_state(initial_state)) {
                        // initial state is final and it is the only final state
                        // if there are no moves from the initial state,
                        // the only solution is the empty string
                        if (aut_inter->get_moves_from(initial_state).empty()) {
                            TRACE("str", tout << "product automaton only accepts empty string" << std::endl;);
                            expr_ref rhs1(ctx.mk_eq_atom(str, mk_string("")), m);
                            expr_ref rhs2(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::zero(), true)), m);
                            expr_ref rhs(m.mk_and(rhs1, rhs2), m);
                            assert_implication(conflict_lhs, rhs);
                            regex_axiom_add = true;
                        }
                    }
                }
                if (aut_inter != NULL && aut_inter->is_empty()) {
                    TRACE("str", tout << "product automaton is empty; asserting conflict clause" << std::endl;);
                    expr_ref conflict_clause(m.mk_not(mk_and(conflict_terms)), m);
                    assert_axiom(conflict_clause);
                    regex_axiom_add = true;
                }
            } // foreach (entry in regex_terms_by_string)
            if (regex_axiom_add) {
-                return FC_CONTINUE;
+                //return FC_CONTINUE;
            }
        } // RegexAutomata
--- a/src/smt/theory_str.h
+++ b/src/smt/theory_str.h
@ -556,6 +556,7 @@ protected:
    bool check_regex_length_linearity(expr * re);
    bool check_regex_length_linearity_helper(expr * re, bool already_star);
    expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
    void find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut);
    bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound);
    bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
    expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);