mirror of
				https://github.com/Z3Prover/z3
				synced 2025-11-04 05:19:11 +00:00 
			
		
		
		
	add (disabled) stubs for decomposing re-membership on regex
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
		
							parent
							
								
									7f7663a3b4
								
							
						
					
					
						commit
						084cd335eb
					
				
					 4 changed files with 143 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -2109,6 +2109,34 @@ bool seq_rewriter::get_head_tail_reversed(expr* s, expr_ref& head, expr_ref& tai
 | 
			
		|||
    return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool seq_rewriter::get_re_head_tail(expr* r, expr_ref& head, expr_ref& tail) {
 | 
			
		||||
    expr* r1 = nullptr, *r2 = nullptr;
 | 
			
		||||
    if (re().is_concat(r, r1, r2)) {
 | 
			
		||||
        head = r1;
 | 
			
		||||
        tail = r2;
 | 
			
		||||
        return re().min_length(r1) != UINT_MAX && re().max_length(r1) == re().min_length(r1);
 | 
			
		||||
    }
 | 
			
		||||
    return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref& tail) {
 | 
			
		||||
    expr* r1 = nullptr, *r2 = nullptr;
 | 
			
		||||
    if (re().is_concat(r, r1, r2)) {
 | 
			
		||||
        unsigned len = re().min_length(r2);
 | 
			
		||||
        if (len != UINT_MAX && re().max_length(r2) == len) {
 | 
			
		||||
            head = r1;
 | 
			
		||||
            tail = r2;
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
        if (get_re_head_tail_reversed(r2, head, tail)) {
 | 
			
		||||
            head = re().mk_concat(r1, head);
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
expr_ref seq_rewriter::re_and(expr* cond, expr* r) {
 | 
			
		||||
    if (m().is_true(cond))
 | 
			
		||||
        return expr_ref(r, m());    
 | 
			
		||||
| 
						 | 
				
			
			@ -2586,6 +2614,21 @@ bool seq_rewriter::rewrite_contains_pattern(expr* a, expr* b, expr_ref& result)
 | 
			
		|||
    "" in b -> is_nullable(b)
 | 
			
		||||
    (ele + tail) in b -> tail in (derivative e b)
 | 
			
		||||
    (head + ele) in b -> head in (right-derivative e b)
 | 
			
		||||
 | 
			
		||||
Other rewrites:
 | 
			
		||||
    s in b1 ++ b2, min_len(b1) = max_len(b2) != UINT_MAX -> 
 | 
			
		||||
           (seq.len s) >= min_len(b1) & 
 | 
			
		||||
           (seq.extract s 0 min_len(b1)) in b1 & 
 | 
			
		||||
           (seq.extract s min_len(b1) (- (seq.len s) min_len(b1))) in b2
 | 
			
		||||
 | 
			
		||||
    similar for tail of regex
 | 
			
		||||
 | 
			
		||||
Disabled rewrite:
 | 
			
		||||
   s + "ab" + t in all ++ "c" ++ all ++ .... ++ "z" ++ all
 | 
			
		||||
   => 
 | 
			
		||||
   disjunctions that cover cases where s overlaps given that "ab" does
 | 
			
		||||
   not overlap with any of the sequences.
 | 
			
		||||
   It is disabled because the solver doesn't handle disjunctions of regexes well.
 | 
			
		||||
*/
 | 
			
		||||
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2615,14 +2658,35 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
 | 
			
		|||
        result = re().mk_in_re(tl, re().mk_derivative(hd, b));
 | 
			
		||||
        return BR_REWRITE2;
 | 
			
		||||
    }
 | 
			
		||||
    else if (get_head_tail_reversed(a, hd, tl)) {
 | 
			
		||||
        result = re().mk_in_re(
 | 
			
		||||
            hd,
 | 
			
		||||
            re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b)))
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
    if (get_head_tail_reversed(a, hd, tl)) {
 | 
			
		||||
        result = re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b)));
 | 
			
		||||
        result = re().mk_in_re(hd, result);
 | 
			
		||||
        return BR_REWRITE_FULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
    if (get_re_head_tail(b, hd, tl)) {
 | 
			
		||||
        SASSERT(re().min_length(hd) == re().max_length(hd));
 | 
			
		||||
        expr_ref len_hd(m_autil.mk_int(re().min_length(hd)), m()); 
 | 
			
		||||
        expr_ref len_a(str().mk_length(a), m());
 | 
			
		||||
        expr_ref len_tl(m_autil.mk_sub(len_a, len_hd), m());
 | 
			
		||||
        result = m().mk_and(m_autil.mk_ge(len_a, len_hd),
 | 
			
		||||
                            re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
 | 
			
		||||
                            re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
 | 
			
		||||
        return BR_REWRITE_FULL;
 | 
			
		||||
    }
 | 
			
		||||
    if (get_re_head_tail_reversed(b, hd, tl)) {
 | 
			
		||||
        SASSERT(re().min_length(tl) == re().max_length(tl));
 | 
			
		||||
        expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m()); 
 | 
			
		||||
        expr_ref len_a(str().mk_length(a), m());
 | 
			
		||||
        expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
 | 
			
		||||
        result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
 | 
			
		||||
                            re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
 | 
			
		||||
                            re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
 | 
			
		||||
        return BR_REWRITE_FULL;
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
    if (false && rewrite_contains_pattern(a, b, result))
 | 
			
		||||
        return BR_REWRITE_FULL;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -136,6 +136,8 @@ class seq_rewriter {
 | 
			
		|||
    // Support for regular expression derivatives
 | 
			
		||||
    bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail);
 | 
			
		||||
    bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
 | 
			
		||||
    bool get_re_head_tail(expr* e, expr_ref& head, expr_ref& tail);
 | 
			
		||||
    bool get_re_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
 | 
			
		||||
    expr_ref re_and(expr* cond, expr* r);
 | 
			
		||||
    expr_ref re_predicate(expr* cond, sort* seq_sort);
 | 
			
		||||
    expr_ref mk_seq_concat(expr* a, expr* b);    
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1155,6 +1155,13 @@ bv_util& seq_util::bv() const {
 | 
			
		|||
    return *m_bv.get();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned seq_util::max_plus(unsigned x, unsigned y) const {
 | 
			
		||||
    if (x + y < x || x + y < y)
 | 
			
		||||
        return UINT_MAX;
 | 
			
		||||
    return x + y;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bool seq_util::is_const_char(expr* e, unsigned& c) const {
 | 
			
		||||
#if Z3_USE_UNICODE
 | 
			
		||||
    return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
 | 
			
		||||
| 
						 | 
				
			
			@ -1243,15 +1250,49 @@ app* seq_util::str::mk_is_empty(expr* s) const {
 | 
			
		|||
    return m.mk_eq(s, mk_empty(get_sort(s)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
unsigned seq_util::str::min_length(expr* s) const {
 | 
			
		||||
    SASSERT(u.is_seq(s));
 | 
			
		||||
    expr_ref_vector es(m);
 | 
			
		||||
    unsigned result = 0;
 | 
			
		||||
    get_concat_units(s, es);
 | 
			
		||||
    for (expr* arg : es) {
 | 
			
		||||
        if (is_unit(arg))
 | 
			
		||||
            result++;
 | 
			
		||||
    expr* s1 = nullptr, *s2 = nullptr;
 | 
			
		||||
    auto get_length = [&](expr* s1) {
 | 
			
		||||
        zstring st;
 | 
			
		||||
        if (is_unit(s1))
 | 
			
		||||
            return 1u;
 | 
			
		||||
        else if (is_string(s1, st))
 | 
			
		||||
            return st.length();
 | 
			
		||||
        else
 | 
			
		||||
            return 0u;
 | 
			
		||||
    };
 | 
			
		||||
    while (is_concat(s, s1, s2)) {
 | 
			
		||||
        result += get_length(s1);
 | 
			
		||||
        s = s2;
 | 
			
		||||
    }
 | 
			
		||||
    result += get_length(s);
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned seq_util::str::max_length(expr* s) const {
 | 
			
		||||
    SASSERT(u.is_seq(s));
 | 
			
		||||
    unsigned result = 0;
 | 
			
		||||
    expr* s1 = nullptr, *s2 = nullptr;
 | 
			
		||||
    zstring st;
 | 
			
		||||
    auto get_length = [&](expr* s1) {
 | 
			
		||||
        if (is_empty(s1))
 | 
			
		||||
            return 0u;
 | 
			
		||||
        else if (is_unit(s1))
 | 
			
		||||
            return 1u;
 | 
			
		||||
        else if (is_string(s1, st))
 | 
			
		||||
            return st.length();
 | 
			
		||||
        else
 | 
			
		||||
            return UINT_MAX;
 | 
			
		||||
    };
 | 
			
		||||
    while (is_concat(s, s1, s2)) {
 | 
			
		||||
        result = u.max_plus(get_length(s), result);
 | 
			
		||||
        s = s2;
 | 
			
		||||
    }
 | 
			
		||||
    result = u.max_plus(get_length(s), result);
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1260,15 +1301,8 @@ unsigned seq_util::re::min_length(expr* r) const {
 | 
			
		|||
    expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
 | 
			
		||||
    if (is_empty(r))
 | 
			
		||||
        return UINT_MAX;
 | 
			
		||||
    if (is_concat(r, r1, r2)) {
 | 
			
		||||
        unsigned l1 = min_length(r1);
 | 
			
		||||
        if (l1 == UINT_MAX)
 | 
			
		||||
            return l1;
 | 
			
		||||
        unsigned l2 = min_length(r2);
 | 
			
		||||
        if (l2 == UINT_MAX)
 | 
			
		||||
            return l2;
 | 
			
		||||
        return l1 + l2;
 | 
			
		||||
    }
 | 
			
		||||
    if (is_concat(r, r1, r2)) 
 | 
			
		||||
        return u.max_plus(min_length(r1), min_length(r2));
 | 
			
		||||
    if (m.is_ite(r, s, r1, r2)) 
 | 
			
		||||
        return std::min(min_length(r1), min_length(r2));
 | 
			
		||||
    if (is_diff(r, r1, r2))
 | 
			
		||||
| 
						 | 
				
			
			@ -1282,6 +1316,26 @@ unsigned seq_util::re::min_length(expr* r) const {
 | 
			
		|||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned seq_util::re::max_length(expr* r) const {
 | 
			
		||||
    SASSERT(u.is_re(r));
 | 
			
		||||
    expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
 | 
			
		||||
    if (is_empty(r))
 | 
			
		||||
        return 0;
 | 
			
		||||
    if (is_concat(r, r1, r2)) 
 | 
			
		||||
        return u.max_plus(max_length(r1), max_length(r2));
 | 
			
		||||
    if (m.is_ite(r, s, r1, r2)) 
 | 
			
		||||
        return std::max(max_length(r1), max_length(r2));
 | 
			
		||||
    if (is_diff(r, r1, r2))
 | 
			
		||||
        return max_length(r1);
 | 
			
		||||
    if (is_union(r, r1, r2)) 
 | 
			
		||||
        return std::max(max_length(r1), max_length(r2));
 | 
			
		||||
    if (is_intersection(r, r1, r2)) 
 | 
			
		||||
        return std::min(max_length(r1), max_length(r2));
 | 
			
		||||
    if (is_to_re(r, s)) 
 | 
			
		||||
        return u.str.max_length(s);
 | 
			
		||||
    return UINT_MAX;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sort* seq_util::re::to_seq(sort* re) {
 | 
			
		||||
    (void)u;
 | 
			
		||||
    SASSERT(u.is_re(re));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -236,6 +236,8 @@ class seq_util {
 | 
			
		|||
    family_id m_fid;
 | 
			
		||||
    mutable scoped_ptr<bv_util> m_bv;
 | 
			
		||||
    bv_util& bv() const;
 | 
			
		||||
 | 
			
		||||
    unsigned max_plus(unsigned x, unsigned y) const;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    ast_manager& get_manager() const { return m; }
 | 
			
		||||
| 
						 | 
				
			
			@ -400,6 +402,7 @@ public:
 | 
			
		|||
        expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; }
 | 
			
		||||
 | 
			
		||||
        unsigned min_length(expr* s) const;
 | 
			
		||||
        unsigned max_length(expr* s) const;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    class re {
 | 
			
		||||
| 
						 | 
				
			
			@ -469,6 +472,7 @@ public:
 | 
			
		|||
        bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi);
 | 
			
		||||
        bool is_loop(expr const* n, expr*& body, expr*& lo);
 | 
			
		||||
        unsigned min_length(expr* r) const;
 | 
			
		||||
        unsigned max_length(expr* r) const;
 | 
			
		||||
    };
 | 
			
		||||
    str str;
 | 
			
		||||
    re  re;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue