mirror of
				https://github.com/Z3Prover/z3
				synced 2025-11-04 05:19:11 +00:00 
			
		
		
		
	string to regex approximation used to strengthen membership constraints (#4610)
* string to regex approximation used to strengthen membership constraints * fixed pull request comments
This commit is contained in:
		
							parent
							
								
									fb035c0634
								
							
						
					
					
						commit
						8137143ada
					
				
					 4 changed files with 98 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -1449,3 +1449,17 @@ bool seq_util::re::is_loop(expr const* n, expr*& body, expr*& lo) const {
 | 
			
		|||
    }
 | 
			
		||||
    return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
   Returns true iff e is the epsilon regex.
 | 
			
		||||
 */
 | 
			
		||||
bool seq_util::re::is_epsilon(expr* r) const {
 | 
			
		||||
    expr* s;
 | 
			
		||||
    return is_to_re(r, s) && u.str.is_empty(s);
 | 
			
		||||
}
 | 
			
		||||
/**
 | 
			
		||||
   Makes the epsilon regex for a given sequence sort.
 | 
			
		||||
 */
 | 
			
		||||
app* seq_util::re::mk_epsilon(sort* seq_sort) {
 | 
			
		||||
    return mk_to_re(u.str.mk_empty(seq_sort));
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -474,6 +474,8 @@ public:
 | 
			
		|||
        bool is_loop(expr const* n, expr*& body, expr*& lo) const;
 | 
			
		||||
        unsigned min_length(expr* r) const;
 | 
			
		||||
        unsigned max_length(expr* r) const;
 | 
			
		||||
        bool is_epsilon(expr* r) const;
 | 
			
		||||
        app* mk_epsilon(sort* seq_sort);
 | 
			
		||||
    };
 | 
			
		||||
    str str;
 | 
			
		||||
    re  re;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -109,11 +109,37 @@ namespace smt {
 | 
			
		|||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (coallesce_in_re(lit))
 | 
			
		||||
            return;
 | 
			
		||||
        // Convert a non-ground sequence into an additional regex and
 | 
			
		||||
        // strengthen the original regex constraint into an intersection
 | 
			
		||||
        // for example:
 | 
			
		||||
        //     (x ++ "a" ++ y) in b*
 | 
			
		||||
        // is coverted to
 | 
			
		||||
        //     (x ++ "a" ++ y) in intersect((.* ++ "a" ++ .*), b*)
 | 
			
		||||
        if (!m.is_value(s)) {
 | 
			
		||||
            expr_ref s_approx = get_overapprox_regex(s);
 | 
			
		||||
            if (!re().is_full_seq(s_approx)) {
 | 
			
		||||
                r = re().mk_inter(r, s_approx);
 | 
			
		||||
                TRACE("seq_regex", tout
 | 
			
		||||
                    << "get_overapprox_regex(" << mk_pp(s, m)
 | 
			
		||||
                    << ") = " << mk_pp(s_approx, m) << std::endl;);
 | 
			
		||||
                STRACE("seq_regex_brief", tout
 | 
			
		||||
                    << "overapprox=" << state_str(r) << " ";);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (is_string_equality(lit))
 | 
			
		||||
        if (coallesce_in_re(lit)) {
 | 
			
		||||
            TRACE("seq_regex", tout
 | 
			
		||||
                << "simplified conjunctions to an intersection" << std::endl;);
 | 
			
		||||
            STRACE("seq_regex_brief", tout << "coallesce_in_re ";);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (is_string_equality(lit)) {
 | 
			
		||||
            TRACE("seq_regex", tout
 | 
			
		||||
                << "simplified regex using string equality" << std::endl;);
 | 
			
		||||
            STRACE("seq_regex_brief", tout << "string_eq ";);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        expr_ref zero(a().mk_int(0), m);
 | 
			
		||||
        expr_ref acc = sk().mk_accept(s, zero, r);
 | 
			
		||||
| 
						 | 
				
			
			@ -124,6 +150,56 @@ namespace smt {
 | 
			
		|||
        th.propagate_lit(nullptr, 1, &lit, acc_lit);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
    * Gets an overapproximating regex s_approx for the input string expression s.
 | 
			
		||||
    * such that for any valuation v(s) of s, v(s) in L(s_approx).
 | 
			
		||||
    * If the overapproximation is trivial then dotstar is returned.
 | 
			
		||||
    */
 | 
			
		||||
    expr_ref seq_regex::get_overapprox_regex(expr* s) {
 | 
			
		||||
        expr_ref s_to_re(re().mk_to_re(s), m);
 | 
			
		||||
        expr_ref epsilon(re().mk_epsilon(m.get_sort(s)), m);
 | 
			
		||||
        expr_ref dotstar(re().mk_full_seq(m.get_sort(s_to_re)), m);
 | 
			
		||||
        if (m.is_value(s)) {
 | 
			
		||||
            return s_to_re;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            expr_ref_vector es(m);
 | 
			
		||||
            expr_ref s_approx(m);
 | 
			
		||||
            if (str().is_concat(s)) {
 | 
			
		||||
                str().get_concat(s, es);
 | 
			
		||||
                //here it is known that es is nonempty or else s would be a value
 | 
			
		||||
                expr_ref e_approx(m), last(m);
 | 
			
		||||
                for (expr* e : es) {
 | 
			
		||||
                    e_approx = get_overapprox_regex(e);
 | 
			
		||||
                    if (!s_approx)
 | 
			
		||||
                        s_approx = e_approx;
 | 
			
		||||
                    else if (last != dotstar || e_approx != dotstar)
 | 
			
		||||
                        s_approx = re().mk_concat(e_approx, s_approx);
 | 
			
		||||
                        last = e_approx;
 | 
			
		||||
                }
 | 
			
		||||
                return s_approx;
 | 
			
		||||
            }
 | 
			
		||||
            else if (m.is_ite(s)) {
 | 
			
		||||
                s_approx = get_overapprox_regex(to_app(s)->get_arg(1));
 | 
			
		||||
                //if either branch approximates to .* then the result is also .*
 | 
			
		||||
                if (!re().is_full_seq(s_approx)) {
 | 
			
		||||
                    expr_ref r2 = get_overapprox_regex(to_app(s)->get_arg(2));
 | 
			
		||||
                    if (re().is_full_seq(r2)) {
 | 
			
		||||
                        s_approx = r2;
 | 
			
		||||
                    }
 | 
			
		||||
                    else if (s_approx != r2) {
 | 
			
		||||
                        s_approx = re().mk_union(s_approx, r2);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                return s_approx;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                // TBD: other app expressions that can be approximated
 | 
			
		||||
                return dotstar;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Propagate the atom (accept s i r)
 | 
			
		||||
     *
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -75,6 +75,9 @@ namespace smt {
 | 
			
		|||
 | 
			
		||||
        bool is_string_equality(literal lit);
 | 
			
		||||
 | 
			
		||||
        // Get a regex which overapproximates a given string
 | 
			
		||||
        expr_ref get_overapprox_regex(expr* s);
 | 
			
		||||
 | 
			
		||||
        void rewrite(expr_ref& e);
 | 
			
		||||
 | 
			
		||||
        bool coallesce_in_re(literal lit);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue