mirror of
				https://github.com/Z3Prover/z3
				synced 2025-11-04 13:29:11 +00:00 
			
		
		
		
	simplify extended contains patterns
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
		
							parent
							
								
									6a90072a98
								
							
						
					
					
						commit
						ea1f50b77e
					
				
					 8 changed files with 163 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -2297,6 +2297,147 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) {
 | 
			
		|||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * pattern match against all ++ "abc" ++ all ++ "def" ++ all regexes.
 | 
			
		||||
*/
 | 
			
		||||
bool seq_rewriter::is_re_contains_pattern(expr* r, vector<expr_ref_vector>& patterns) {
 | 
			
		||||
    expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
 | 
			
		||||
    if (re().is_concat(r, r1, r2) && re().is_full_seq(r1)) {
 | 
			
		||||
        r = r2;
 | 
			
		||||
        patterns.push_back(expr_ref_vector(m()));
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    while (re().is_concat(r, r1, r2)) {
 | 
			
		||||
        if (re().is_to_re(r1, s))
 | 
			
		||||
            patterns.back().push_back(s);
 | 
			
		||||
        else if (re().is_full_seq(r1))
 | 
			
		||||
            patterns.push_back(expr_ref_vector(m()));
 | 
			
		||||
        else
 | 
			
		||||
            return false;
 | 
			
		||||
        r = r2;
 | 
			
		||||
    }
 | 
			
		||||
    return re().is_full_seq(r);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * return true if the sequences p1, p2 cannot overlap in any way.
 | 
			
		||||
 * assume |p1| <= |p2|
 | 
			
		||||
 *   no suffix of p1 is a prefix of p2
 | 
			
		||||
 *   no prefix of p1 is a suffix of p2
 | 
			
		||||
 *   p1 is not properly contained in p2
 | 
			
		||||
 */
 | 
			
		||||
bool seq_rewriter::non_overlap(zstring const& s1, zstring const& s2) const {
 | 
			
		||||
    unsigned sz1 = s1.length(), sz2 = s2.length();
 | 
			
		||||
    if (sz1 > sz2) 
 | 
			
		||||
        return non_overlap(s2, s1);
 | 
			
		||||
    auto can_overlap = [&](unsigned start1, unsigned end1, unsigned start2) {
 | 
			
		||||
        for (unsigned i = start1; i < end1; ++i) {
 | 
			
		||||
            if (s1[i] != s2[start2 + i])
 | 
			
		||||
                return false;
 | 
			
		||||
        }
 | 
			
		||||
        return true;
 | 
			
		||||
    };
 | 
			
		||||
    for (unsigned i = 1; i < sz1; ++i) 
 | 
			
		||||
        if (can_overlap(i, sz1, 0))
 | 
			
		||||
            return false;
 | 
			
		||||
    for (unsigned j = 0; j + sz1 < sz2; ++j) 
 | 
			
		||||
        if (can_overlap(0, sz1, j))
 | 
			
		||||
            return false;
 | 
			
		||||
    for (unsigned j = sz2-sz1+1; j < sz2; ++j) 
 | 
			
		||||
        if (can_overlap(0, sz2 - j, j))
 | 
			
		||||
            return false;
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool seq_rewriter::non_overlap(expr_ref_vector const& p1, expr_ref_vector const& p2) const {
 | 
			
		||||
    unsigned sz1 = p1.size(), sz2 = p2.size();
 | 
			
		||||
    if (sz1 > sz2) 
 | 
			
		||||
        return non_overlap(p2, p1);
 | 
			
		||||
    if (sz1 == 0 || sz2 == 0)
 | 
			
		||||
        return false;
 | 
			
		||||
    zstring s1, s2;
 | 
			
		||||
    if (sz1 == 1 && sz2 == 1 && str().is_string(p1[0], s1) && str().is_string(p2[0], s2))
 | 
			
		||||
        return non_overlap(s1, s2);
 | 
			
		||||
    for (expr* e : p1) 
 | 
			
		||||
        if (!str().is_unit(e))
 | 
			
		||||
            return false;
 | 
			
		||||
    for (expr* e : p2) 
 | 
			
		||||
        if (!str().is_unit(e))
 | 
			
		||||
            return false;
 | 
			
		||||
    auto can_overlap = [&](unsigned start1, unsigned end1, unsigned start2) {
 | 
			
		||||
        for (unsigned i = start1; i < end1; ++i) {
 | 
			
		||||
            if (m().are_distinct(p1[i], p2[start2 + i]))
 | 
			
		||||
                return false;
 | 
			
		||||
            if (!m().are_equal(p1[i], p2[start2 + i]))
 | 
			
		||||
                return true;
 | 
			
		||||
        }
 | 
			
		||||
        return true;
 | 
			
		||||
    };
 | 
			
		||||
    for (unsigned i = 1; i < sz1; ++i) 
 | 
			
		||||
        if (can_overlap(i, sz1, 0))
 | 
			
		||||
            return false;
 | 
			
		||||
    for (unsigned j = 0; j + sz1 < sz2; ++j) 
 | 
			
		||||
        if (can_overlap(0, sz1, j))
 | 
			
		||||
            return false;
 | 
			
		||||
    for (unsigned j = sz2-sz1+1; j < sz2; ++j) 
 | 
			
		||||
        if (can_overlap(0, sz2 - j, j))
 | 
			
		||||
            return false;
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool seq_rewriter::rewrite_contains(expr* a, expr* b, expr_ref& result) {
 | 
			
		||||
    
 | 
			
		||||
    // 
 | 
			
		||||
    // simplify extended contains patterns into simpler membership constraints
 | 
			
		||||
    // 
 | 
			
		||||
    // x ++ "abc" ++ s in all ++ "de" ++ all ++ "ee" ++ all ++ "ff" ++ all
 | 
			
		||||
    // => 
 | 
			
		||||
    //     "abc" ++ s in (all ++ "de" ++ all ++ "ee" ++ all ++ "ff" ++ all)
 | 
			
		||||
    // or  x in all ++ "de" ++ all & "abc" ++ s in (all ++ "ee" ++ all ++ "ff" ++ all)
 | 
			
		||||
    // or  x in (all ++ "de" ++ all ++ "ee" ++ all) & "abc" ++ s in (all ++ "ff" ++ all)
 | 
			
		||||
    // or  x in (all ++ "de" ++ all ++ "ee" ++ all ++ "ff" ++ all) & .. simplifies to true ..
 | 
			
		||||
    // 
 | 
			
		||||
 | 
			
		||||
    vector<expr_ref_vector> patterns;
 | 
			
		||||
    expr* x = nullptr, *y = nullptr, *z = nullptr, *u = nullptr;
 | 
			
		||||
    if (!str().is_concat(a, x, y))
 | 
			
		||||
        return false;
 | 
			
		||||
    if (!is_re_contains_pattern(b, patterns)) 
 | 
			
		||||
        return false;
 | 
			
		||||
    m_lhs.reset();        
 | 
			
		||||
    u = y;
 | 
			
		||||
    while (str().is_concat(u, z, u) && (str().is_unit(z) || str().is_string(z))) {
 | 
			
		||||
        m_lhs.push_back(z);
 | 
			
		||||
    }
 | 
			
		||||
    bool no_overlaps = true;
 | 
			
		||||
    for (auto const& p : patterns)
 | 
			
		||||
        if (!non_overlap(p, m_lhs))
 | 
			
		||||
            return false;
 | 
			
		||||
 | 
			
		||||
    expr_ref_vector fmls(m());
 | 
			
		||||
    sort* rs = m().get_sort(b);
 | 
			
		||||
    expr_ref full(re().mk_full_seq(rs), m()), prefix(m()), suffix(m());
 | 
			
		||||
    fmls.push_back(re().mk_in_re(y, b));
 | 
			
		||||
    prefix = full;
 | 
			
		||||
    for (unsigned i = 0; i < patterns.size(); ++i) {
 | 
			
		||||
        for (expr* e : patterns[i])
 | 
			
		||||
            prefix = re().mk_concat(prefix, re().mk_to_re(e));
 | 
			
		||||
        prefix = re().mk_concat(prefix, full);
 | 
			
		||||
        suffix = full;
 | 
			
		||||
        for (unsigned j = i + 1; j < patterns.size(); ++j) {
 | 
			
		||||
            for (expr* e : patterns[j])
 | 
			
		||||
                suffix = re().mk_concat(suffix, re().mk_to_re(e));
 | 
			
		||||
            suffix = re().mk_concat(suffix, full);
 | 
			
		||||
        }
 | 
			
		||||
        fmls.push_back(m().mk_and(re().mk_in_re(x, prefix),
 | 
			
		||||
                                  re().mk_in_re(y, suffix)));
 | 
			
		||||
    }
 | 
			
		||||
    result = mk_or(fmls);
 | 
			
		||||
    return true;    
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
 | 
			
		||||
 | 
			
		||||
    if (re().is_empty(b)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -2329,6 +2470,9 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
 | 
			
		|||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (rewrite_contains(a, b, result)) 
 | 
			
		||||
        return BR_REWRITE_FULL;
 | 
			
		||||
 | 
			
		||||
    return BR_FAILED; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -179,6 +179,12 @@ class seq_rewriter {
 | 
			
		|||
    br_status reduce_re_eq(expr* a, expr* b, expr_ref& result);
 | 
			
		||||
    br_status reduce_re_is_empty(expr* r, expr_ref& result);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    bool is_re_contains_pattern(expr* r, vector<expr_ref_vector>& patterns);
 | 
			
		||||
    bool non_overlap(expr_ref_vector const& p1, expr_ref_vector const& p2) const;
 | 
			
		||||
    bool non_overlap(zstring const& p1, zstring const& p2) const;
 | 
			
		||||
    bool rewrite_contains(expr* a, expr* b, expr_ref& result);
 | 
			
		||||
 | 
			
		||||
    br_status mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result);
 | 
			
		||||
 | 
			
		||||
    bool cannot_contain_prefix(expr* a, expr* b);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1051,6 +1051,7 @@ new_lemma& new_lemma::operator|=(ineq const& ineq) {
 | 
			
		|||
 | 
			
		||||
new_lemma::~new_lemma() {
 | 
			
		||||
    static int i = 0;
 | 
			
		||||
    (void)i;
 | 
			
		||||
    // code for checking lemma can be added here
 | 
			
		||||
    TRACE("nla_solver", tout << name << " " << (++i) << "\n" << *this; );
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -466,7 +466,7 @@ namespace datalog {
 | 
			
		|||
        expr_ref_vector conjs = get_tail_conjs(r);
 | 
			
		||||
        for (expr * e : conjs) {
 | 
			
		||||
            expr_ref r(m);
 | 
			
		||||
            unsigned v;
 | 
			
		||||
            unsigned v = 0;
 | 
			
		||||
            if (is_eq(e, v, r) && is_output(v) && m_var_is_sliceable[v]) {
 | 
			
		||||
                TRACE("dl", tout << "is_eq: " << mk_pp(e, m) << " " << (m_solved_vars[v].get()?"solved":"new") << "\n";);
 | 
			
		||||
                add_var(v);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -41,10 +41,13 @@ namespace sat {
 | 
			
		|||
        uint64_t table_mask() const { return (1ull << (1ull << m_size)) - 1ull; }
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
        cut(): m_filter(0), m_size(0), m_table(0), m_dont_care(0) {}
 | 
			
		||||
        cut(): m_filter(0), m_size(0), m_table(0), m_dont_care(0) {
 | 
			
		||||
            m_elems[0] = m_elems[1] = m_elems[2] = m_elems[3] = m_elems[4] = 0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        cut(unsigned id): m_filter(1u << (id & 0x1F)), m_size(1), m_table(2), m_dont_care(0) { 
 | 
			
		||||
            m_elems[0] = id; 
 | 
			
		||||
            m_elems[1] = m_elems[2] = m_elems[3] = m_elems[4] = 0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        cut(cut const& other) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1803,7 +1803,7 @@ namespace smt {
 | 
			
		|||
        // axiom 2: expr = 0 <==> S in "0+"
 | 
			
		||||
        // axiom 3: expr >= 1 ==> S in "0*[1-9][0-9]*"
 | 
			
		||||
 | 
			
		||||
        expr * S = ex->get_arg(0);
 | 
			
		||||
        // expr * S = ex->get_arg(0);
 | 
			
		||||
        {
 | 
			
		||||
            expr_ref axiom1(m_autil.mk_ge(ex, m_autil.mk_numeral(rational::minus_one(), true)), m);
 | 
			
		||||
            SASSERT(axiom1);
 | 
			
		||||
| 
						 | 
				
			
			@ -8676,9 +8676,9 @@ namespace smt {
 | 
			
		|||
            bool regexOK = true;
 | 
			
		||||
            if (!regex_terms.empty()) {
 | 
			
		||||
                for (auto& str_in_re : regex_terms) {
 | 
			
		||||
                    expr * str;
 | 
			
		||||
                    expr * re;
 | 
			
		||||
                    u.str.is_in_re(str_in_re, str, re);
 | 
			
		||||
                    expr * str = nullptr;
 | 
			
		||||
                    expr * re = nullptr;
 | 
			
		||||
                    VERIFY(u.str.is_in_re(str_in_re, str, re));
 | 
			
		||||
                    lbool current_assignment = ctx.get_assignment(str_in_re);
 | 
			
		||||
                    if (current_assignment == l_undef) {
 | 
			
		||||
                        continue;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -729,6 +729,8 @@ namespace smt {
 | 
			
		|||
                regex_axiom_add = true;
 | 
			
		||||
            }
 | 
			
		||||
        } // foreach (entry in regex_terms_by_string)
 | 
			
		||||
        // NSB: compiler warns that regex_axiom_add is set but not used.
 | 
			
		||||
        (void)regex_axiom_add;
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -834,6 +834,7 @@ namespace smt {
 | 
			
		|||
            default:
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            (void)ok;
 | 
			
		||||
            CTRACE("utvpi", !ok, 
 | 
			
		||||
                   tout << "validation failed:\n";
 | 
			
		||||
                   tout << "Assignment: " << assign << "\n";
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue