3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-08 18:31:49 +00:00

first-class re.range support in theory_str

This commit is contained in:
Murphy Berzish 2017-05-05 15:29:58 -04:00
parent 75ba4d5a4d
commit 7ddd43e16d

View file

@ -1673,6 +1673,13 @@ zstring theory_str::get_std_regex_str(expr * regex) {
expr * reg1Ast = a_regex->get_arg(0);
zstring reg1Str = get_std_regex_str(reg1Ast);
return zstring("(") + reg1Str + zstring(")*");
} else if (u.re.is_range(a_regex)) {
expr * range1 = a_regex->get_arg(0);
expr * range2 = a_regex->get_arg(1);
zstring range1val, range2val;
u.str.is_string(range1, range1val);
u.str.is_string(range2, range2val);
return zstring("[") + range1val + zstring("-") + range2val + zstring("]");
} else {
TRACE("str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;);
UNREACHABLE(); return zstring("");
@ -1752,6 +1759,36 @@ void theory_str::instantiate_axiom_RegexIn(enode * e) {
expr_ref finalAxiom(mk_and(items), m);
SASSERT(finalAxiom);
assert_axiom(finalAxiom);
} else if (u.re.is_range(regex)) {
// (re.range "A" "Z") unfolds to (re.union "A" "B" ... "Z");
// we rewrite to expr IFF (str = "A" or str = "B" or ... or str = "Z")
expr_ref lo(regex->get_arg(0), m);
expr_ref hi(regex->get_arg(1), m);
zstring str_lo, str_hi;
SASSERT(u.str.is_string(lo));
SASSERT(u.str.is_string(hi));
u.str.is_string(lo, str_lo);
u.str.is_string(hi, str_hi);
SASSERT(str_lo.length() == 1);
SASSERT(str_hi.length() == 1);
unsigned int c1 = str_lo[0];
unsigned int c2 = str_hi[0];
if (c1 > c2) {
// exchange
unsigned int tmp = c1;
c1 = c2;
c2 = tmp;
}
expr_ref_vector range_cases(m);
for (unsigned int ch = c1; ch <= c2; ++ch) {
zstring s_ch(ch);
expr_ref rhs(ctx.mk_eq_atom(str, u.str.mk_string(s_ch)), m);
range_cases.push_back(rhs);
}
expr_ref rhs(mk_or(range_cases), m);
expr_ref finalAxiom(m.mk_iff(ex, rhs), m);
SASSERT(finalAxiom);
assert_axiom(finalAxiom);
} else {
TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;);
NOT_IMPLEMENTED_YET();
@ -6165,7 +6202,6 @@ void nfa::convert_re(expr * e, unsigned & start, unsigned & end, seq_util & u) {
zstring str;
if (u.str.is_string(arg_str, str)) {
TRACE("str", tout << "build NFA for '" << str << "'" << "\n";);
/*
* For an n-character string, we make (n-1) intermediate states,
* labelled i_(0) through i_(n-2).
@ -6227,6 +6263,33 @@ void nfa::convert_re(expr * e, unsigned & start, unsigned & end, seq_util & u) {
make_epsilon_move(end_subex, start_subex);
make_epsilon_move(end_subex, end);
TRACE("str", tout << "star NFA: start = " << start << ", end = " << end << std::endl;);
} else if (u.re.is_range(e)) {
// range('a', 'z')
// start --'a'--> end
// start --'b'--> end
// ...
// start --'z'--> end
app * a = to_app(e);
expr * c1 = a->get_arg(0);
expr * c2 = a->get_arg(1);
zstring s_c1, s_c2;
u.str.is_string(c1, s_c1);
u.str.is_string(c2, s_c2);
unsigned int id1 = s_c1[0];
unsigned int id2 = s_c2[0];
if (id1 > id2) {
unsigned int tmp = id1;
id1 = id2;
id2 = tmp;
}
for (unsigned int i = id1; i <= id2; ++i) {
char ch = (char)i;
make_transition(start, ch, end);
}
TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;);
} else {
TRACE("str", tout << "invalid regular expression" << std::endl;);
m_valid = false;
@ -9429,6 +9492,39 @@ void theory_str::reduce_virtual_regex_in(expr * var, expr * regex, expr_ref_vect
items.push_back(ctx.mk_eq_atom(var, unrollFunc));
items.push_back(ctx.mk_eq_atom(mk_strlen(var), mk_strlen(unrollFunc)));
return;
}
// re.range
else if (u.re.is_range(regexFuncDecl)) {
// var in range("a", "z")
// ==>
// (var = "a" or var = "b" or ... or var = "z")
expr_ref lo(regexFuncDecl->get_arg(0), mgr);
expr_ref hi(regexFuncDecl->get_arg(1), mgr);
zstring str_lo, str_hi;
SASSERT(u.str.is_string(lo));
SASSERT(u.str.is_string(hi));
u.str.is_string(lo, str_lo);
u.str.is_string(hi, str_hi);
SASSERT(str_lo.length() == 1);
SASSERT(str_hi.length() == 1);
unsigned int c1 = str_lo[0];
unsigned int c2 = str_hi[0];
if (c1 > c2) {
// exchange
unsigned int tmp = c1;
c1 = c2;
c2 = tmp;
}
expr_ref_vector range_cases(mgr);
for (unsigned int ch = c1; ch <= c2; ++ch) {
zstring s_ch(ch);
expr_ref rhs(ctx.mk_eq_atom(var, u.str.mk_string(s_ch)), mgr);
range_cases.push_back(rhs);
}
expr_ref rhs(mk_or(range_cases), mgr);
SASSERT(rhs);
assert_axiom(rhs);
return;
} else {
get_manager().raise_exception("unrecognized regex operator");
UNREACHABLE();