diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 3040ee899..d359f010c 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1449,3 +1449,17 @@ bool seq_util::re::is_loop(expr const* n, expr*& body, expr*& lo) const { } return false; } + +/** + Returns true iff e is the epsilon regex. + */ +bool seq_util::re::is_epsilon(expr* r) const { + expr* s; + return is_to_re(r, s) && u.str.is_empty(s); +} +/** + Makes the epsilon regex for a given sequence sort. + */ +app* seq_util::re::mk_epsilon(sort* seq_sort) { + return mk_to_re(u.str.mk_empty(seq_sort)); +} diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 6edfc2007..78286fac0 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -474,6 +474,8 @@ public: bool is_loop(expr const* n, expr*& body, expr*& lo) const; unsigned min_length(expr* r) const; unsigned max_length(expr* r) const; + bool is_epsilon(expr* r) const; + app* mk_epsilon(sort* seq_sort); }; str str; re re; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index af5617745..f24b744bd 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -109,11 +109,37 @@ namespace smt { return; } - if (coallesce_in_re(lit)) - return; + // Convert a non-ground sequence into an additional regex and + // strengthen the original regex constraint into an intersection + // for example: + // (x ++ "a" ++ y) in b* + // is coverted to + // (x ++ "a" ++ y) in intersect((.* ++ "a" ++ .*), b*) + if (!m.is_value(s)) { + expr_ref s_approx = get_overapprox_regex(s); + if (!re().is_full_seq(s_approx)) { + r = re().mk_inter(r, s_approx); + TRACE("seq_regex", tout + << "get_overapprox_regex(" << mk_pp(s, m) + << ") = " << mk_pp(s_approx, m) << std::endl;); + STRACE("seq_regex_brief", tout + << "overapprox=" << state_str(r) << " ";); + } + } - if (is_string_equality(lit)) + if (coallesce_in_re(lit)) { + TRACE("seq_regex", tout + << "simplified conjunctions to an intersection" << std::endl;); + STRACE("seq_regex_brief", tout << "coallesce_in_re ";); return; + } + + if (is_string_equality(lit)) { + TRACE("seq_regex", tout + << "simplified regex using string equality" << std::endl;); + STRACE("seq_regex_brief", tout << "string_eq ";); + return; + } expr_ref zero(a().mk_int(0), m); expr_ref acc = sk().mk_accept(s, zero, r); @@ -124,6 +150,56 @@ namespace smt { th.propagate_lit(nullptr, 1, &lit, acc_lit); } + /** + * Gets an overapproximating regex s_approx for the input string expression s. + * such that for any valuation v(s) of s, v(s) in L(s_approx). + * If the overapproximation is trivial then dotstar is returned. + */ + expr_ref seq_regex::get_overapprox_regex(expr* s) { + expr_ref s_to_re(re().mk_to_re(s), m); + expr_ref epsilon(re().mk_epsilon(m.get_sort(s)), m); + expr_ref dotstar(re().mk_full_seq(m.get_sort(s_to_re)), m); + if (m.is_value(s)) { + return s_to_re; + } + else { + expr_ref_vector es(m); + expr_ref s_approx(m); + if (str().is_concat(s)) { + str().get_concat(s, es); + //here it is known that es is nonempty or else s would be a value + expr_ref e_approx(m), last(m); + for (expr* e : es) { + e_approx = get_overapprox_regex(e); + if (!s_approx) + s_approx = e_approx; + else if (last != dotstar || e_approx != dotstar) + s_approx = re().mk_concat(e_approx, s_approx); + last = e_approx; + } + return s_approx; + } + else if (m.is_ite(s)) { + s_approx = get_overapprox_regex(to_app(s)->get_arg(1)); + //if either branch approximates to .* then the result is also .* + if (!re().is_full_seq(s_approx)) { + expr_ref r2 = get_overapprox_regex(to_app(s)->get_arg(2)); + if (re().is_full_seq(r2)) { + s_approx = r2; + } + else if (s_approx != r2) { + s_approx = re().mk_union(s_approx, r2); + } + } + return s_approx; + } + else { + // TBD: other app expressions that can be approximated + return dotstar; + } + } + } + /** * Propagate the atom (accept s i r) * diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 7d4cfb651..1f49d7003 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -75,6 +75,9 @@ namespace smt { bool is_string_equality(literal lit); + // Get a regex which overapproximates a given string + expr_ref get_overapprox_regex(expr* s); + void rewrite(expr_ref& e); bool coallesce_in_re(literal lit);