From cadde94017b235ef4c18b3164261be75be1dc626 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Mon, 7 Aug 2017 23:02:55 -0400 Subject: [PATCH] fix indentation and add support for re.allchar --- src/smt/theory_str.cpp | 70 ++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index b869abb0a..727e7e131 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -1685,6 +1685,8 @@ namespace smt { u.str.is_string(range1, range1val); u.str.is_string(range2, range2val); return zstring("[") + range1val + zstring("-") + range2val + zstring("]"); + } else if (u.re.is_full(a_regex)) { + return zstring("."); } else { TRACE("str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;); UNREACHABLE(); return zstring(""); @@ -1794,6 +1796,13 @@ namespace smt { expr_ref finalAxiom(m.mk_iff(ex, rhs), m); SASSERT(finalAxiom); assert_axiom(finalAxiom); + } else if (u.re.is_full(regex)) { + // equivalent to regex "." operator, match any one character. + // we rewrite to expr IFF len(str) == 1 + expr_ref rhs(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::one(), true)), m); + expr_ref finalAxiom(m.mk_iff(ex, rhs), m); + SASSERT(finalAxiom); + assert_axiom(finalAxiom); } else { TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); NOT_IMPLEMENTED_YET(); @@ -6190,32 +6199,32 @@ namespace smt { zstring str; if (u.str.is_string(arg_str, str)) { if (str.length() == 0) { - // transitioning on the empty string is handled specially - TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); - make_epsilon_move(start, end); - } else { - TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); - /* - * For an n-character string, we make (n-1) intermediate states, - * labelled i_(0) through i_(n-2). - * Then we construct the following transitions: - * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final - */ - unsigned last = start; - for (int i = 0; i <= ((int)str.length()) - 2; ++i) { - unsigned i_state = next_id(); - make_transition(last, str[i], i_state); - TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); - last = i_state; - } - make_transition(last, str[(str.length() - 1)], end); - TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); - } - } else { // ! u.str.is_string(arg_str, str) - TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); - m_valid = false; - return; - } + // transitioning on the empty string is handled specially + TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); + make_epsilon_move(start, end); + } else { + TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); + /* + * For an n-character string, we make (n-1) intermediate states, + * labelled i_(0) through i_(n-2). + * Then we construct the following transitions: + * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final + */ + unsigned last = start; + for (int i = 0; i <= ((int)str.length()) - 2; ++i) { + unsigned i_state = next_id(); + make_transition(last, str[i], i_state); + TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); + last = i_state; + } + make_transition(last, str[(str.length() - 1)], end); + TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); + } + } else { // ! u.str.is_string(arg_str, str) + TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); + m_valid = false; + return; + } } else if (u.re.is_concat(e)){ app * a = to_app(e); expr * re1 = a->get_arg(0); @@ -6284,6 +6293,15 @@ namespace smt { } TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;); + } else if (u.re.is_full(e)) { + // equivalent to "transition on each character in the alphabet" + + // TODO this hard-codes something about theory_str's char set here + + for (unsigned i = 1; i <= 255; ++i) { + char ch = (char)i; + make_transition(start, ch, end); + } } else { TRACE("str", tout << "invalid regular expression" << std::endl;); m_valid = false;