From f4c0e0b28d1e9aa4fc6452506112222dd270eb70 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Sun, 6 Aug 2017 17:17:04 -0400 Subject: [PATCH 1/7] fix regex bug in theory_str for empty string match. need to fix indents --- src/smt/theory_str.cpp | 48 ++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 89e0123a8..b869abb0a 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -6189,27 +6189,33 @@ namespace smt { expr * arg_str = a->get_arg(0); zstring str; if (u.str.is_string(arg_str, str)) { - TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); - /* - * For an n-character string, we make (n-1) intermediate states, - * labelled i_(0) through i_(n-2). - * Then we construct the following transitions: - * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final - */ - unsigned last = start; - for (int i = 0; i <= ((int)str.length()) - 2; ++i) { - unsigned i_state = next_id(); - make_transition(last, str[i], i_state); - TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); - last = i_state; - } - make_transition(last, str[(str.length() - 1)], end); - TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); - } else { - TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); - m_valid = false; - return; - } + if (str.length() == 0) { + // transitioning on the empty string is handled specially + TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); + make_epsilon_move(start, end); + } else { + TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); + /* + * For an n-character string, we make (n-1) intermediate states, + * labelled i_(0) through i_(n-2). + * Then we construct the following transitions: + * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final + */ + unsigned last = start; + for (int i = 0; i <= ((int)str.length()) - 2; ++i) { + unsigned i_state = next_id(); + make_transition(last, str[i], i_state); + TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); + last = i_state; + } + make_transition(last, str[(str.length() - 1)], end); + TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); + } + } else { // ! u.str.is_string(arg_str, str) + TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); + m_valid = false; + return; + } } else if (u.re.is_concat(e)){ app * a = to_app(e); expr * re1 = a->get_arg(0); From cadde94017b235ef4c18b3164261be75be1dc626 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Mon, 7 Aug 2017 23:02:55 -0400 Subject: [PATCH 2/7] fix indentation and add support for re.allchar --- src/smt/theory_str.cpp | 70 ++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index b869abb0a..727e7e131 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -1685,6 +1685,8 @@ namespace smt { u.str.is_string(range1, range1val); u.str.is_string(range2, range2val); return zstring("[") + range1val + zstring("-") + range2val + zstring("]"); + } else if (u.re.is_full(a_regex)) { + return zstring("."); } else { TRACE("str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;); UNREACHABLE(); return zstring(""); @@ -1794,6 +1796,13 @@ namespace smt { expr_ref finalAxiom(m.mk_iff(ex, rhs), m); SASSERT(finalAxiom); assert_axiom(finalAxiom); + } else if (u.re.is_full(regex)) { + // equivalent to regex "." operator, match any one character. + // we rewrite to expr IFF len(str) == 1 + expr_ref rhs(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::one(), true)), m); + expr_ref finalAxiom(m.mk_iff(ex, rhs), m); + SASSERT(finalAxiom); + assert_axiom(finalAxiom); } else { TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); NOT_IMPLEMENTED_YET(); @@ -6190,32 +6199,32 @@ namespace smt { zstring str; if (u.str.is_string(arg_str, str)) { if (str.length() == 0) { - // transitioning on the empty string is handled specially - TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); - make_epsilon_move(start, end); - } else { - TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); - /* - * For an n-character string, we make (n-1) intermediate states, - * labelled i_(0) through i_(n-2). - * Then we construct the following transitions: - * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final - */ - unsigned last = start; - for (int i = 0; i <= ((int)str.length()) - 2; ++i) { - unsigned i_state = next_id(); - make_transition(last, str[i], i_state); - TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); - last = i_state; - } - make_transition(last, str[(str.length() - 1)], end); - TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); - } - } else { // ! u.str.is_string(arg_str, str) - TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); - m_valid = false; - return; - } + // transitioning on the empty string is handled specially + TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); + make_epsilon_move(start, end); + } else { + TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); + /* + * For an n-character string, we make (n-1) intermediate states, + * labelled i_(0) through i_(n-2). + * Then we construct the following transitions: + * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final + */ + unsigned last = start; + for (int i = 0; i <= ((int)str.length()) - 2; ++i) { + unsigned i_state = next_id(); + make_transition(last, str[i], i_state); + TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); + last = i_state; + } + make_transition(last, str[(str.length() - 1)], end); + TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); + } + } else { // ! u.str.is_string(arg_str, str) + TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); + m_valid = false; + return; + } } else if (u.re.is_concat(e)){ app * a = to_app(e); expr * re1 = a->get_arg(0); @@ -6284,6 +6293,15 @@ namespace smt { } TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;); + } else if (u.re.is_full(e)) { + // equivalent to "transition on each character in the alphabet" + + // TODO this hard-codes something about theory_str's char set here + + for (unsigned i = 1; i <= 255; ++i) { + char ch = (char)i; + make_transition(start, ch, end); + } } else { TRACE("str", tout << "invalid regular expression" << std::endl;); m_valid = false; From fce35fdb611c9018a32855a2e46b9ea05161c26c Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Wed, 9 Aug 2017 15:37:52 -0400 Subject: [PATCH 3/7] Revert "fix indentation and add support for re.allchar" This reverts commit cadde94017b235ef4c18b3164261be75be1dc626. --- src/smt/theory_str.cpp | 70 ++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 727e7e131..b869abb0a 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -1685,8 +1685,6 @@ namespace smt { u.str.is_string(range1, range1val); u.str.is_string(range2, range2val); return zstring("[") + range1val + zstring("-") + range2val + zstring("]"); - } else if (u.re.is_full(a_regex)) { - return zstring("."); } else { TRACE("str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;); UNREACHABLE(); return zstring(""); @@ -1796,13 +1794,6 @@ namespace smt { expr_ref finalAxiom(m.mk_iff(ex, rhs), m); SASSERT(finalAxiom); assert_axiom(finalAxiom); - } else if (u.re.is_full(regex)) { - // equivalent to regex "." operator, match any one character. - // we rewrite to expr IFF len(str) == 1 - expr_ref rhs(ctx.mk_eq_atom(mk_strlen(str), m_autil.mk_numeral(rational::one(), true)), m); - expr_ref finalAxiom(m.mk_iff(ex, rhs), m); - SASSERT(finalAxiom); - assert_axiom(finalAxiom); } else { TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); NOT_IMPLEMENTED_YET(); @@ -6199,32 +6190,32 @@ namespace smt { zstring str; if (u.str.is_string(arg_str, str)) { if (str.length() == 0) { - // transitioning on the empty string is handled specially - TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); - make_epsilon_move(start, end); - } else { - TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); - /* - * For an n-character string, we make (n-1) intermediate states, - * labelled i_(0) through i_(n-2). - * Then we construct the following transitions: - * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final - */ - unsigned last = start; - for (int i = 0; i <= ((int)str.length()) - 2; ++i) { - unsigned i_state = next_id(); - make_transition(last, str[i], i_state); - TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); - last = i_state; - } - make_transition(last, str[(str.length() - 1)], end); - TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); - } - } else { // ! u.str.is_string(arg_str, str) - TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); - m_valid = false; - return; - } + // transitioning on the empty string is handled specially + TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); + make_epsilon_move(start, end); + } else { + TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); + /* + * For an n-character string, we make (n-1) intermediate states, + * labelled i_(0) through i_(n-2). + * Then we construct the following transitions: + * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final + */ + unsigned last = start; + for (int i = 0; i <= ((int)str.length()) - 2; ++i) { + unsigned i_state = next_id(); + make_transition(last, str[i], i_state); + TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); + last = i_state; + } + make_transition(last, str[(str.length() - 1)], end); + TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); + } + } else { // ! u.str.is_string(arg_str, str) + TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); + m_valid = false; + return; + } } else if (u.re.is_concat(e)){ app * a = to_app(e); expr * re1 = a->get_arg(0); @@ -6293,15 +6284,6 @@ namespace smt { } TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;); - } else if (u.re.is_full(e)) { - // equivalent to "transition on each character in the alphabet" - - // TODO this hard-codes something about theory_str's char set here - - for (unsigned i = 1; i <= 255; ++i) { - char ch = (char)i; - make_transition(start, ch, end); - } } else { TRACE("str", tout << "invalid regular expression" << std::endl;); m_valid = false; From 84abdae5f755e0e703bf8e4d378712b843140627 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Wed, 9 Aug 2017 15:38:56 -0400 Subject: [PATCH 4/7] fix indentation --- src/smt/theory_str.cpp | 52 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index b869abb0a..2016a6501 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -6190,32 +6190,32 @@ namespace smt { zstring str; if (u.str.is_string(arg_str, str)) { if (str.length() == 0) { - // transitioning on the empty string is handled specially - TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); - make_epsilon_move(start, end); - } else { - TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); - /* - * For an n-character string, we make (n-1) intermediate states, - * labelled i_(0) through i_(n-2). - * Then we construct the following transitions: - * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final - */ - unsigned last = start; - for (int i = 0; i <= ((int)str.length()) - 2; ++i) { - unsigned i_state = next_id(); - make_transition(last, str[i], i_state); - TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); - last = i_state; - } - make_transition(last, str[(str.length() - 1)], end); - TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); - } - } else { // ! u.str.is_string(arg_str, str) - TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); - m_valid = false; - return; - } + // transitioning on the empty string is handled specially + TRACE("str", tout << "empty string epsilon-move " << start << " --> " << end << std::endl;); + make_epsilon_move(start, end); + } else { + TRACE("str", tout << "build NFA for '" << str << "'" << "\n";); + /* + * For an n-character string, we make (n-1) intermediate states, + * labelled i_(0) through i_(n-2). + * Then we construct the following transitions: + * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final + */ + unsigned last = start; + for (int i = 0; i <= ((int)str.length()) - 2; ++i) { + unsigned i_state = next_id(); + make_transition(last, str[i], i_state); + TRACE("str", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";); + last = i_state; + } + make_transition(last, str[(str.length() - 1)], end); + TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); + } + } else { // ! u.str.is_string(arg_str, str) + TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); + m_valid = false; + return; + } } else if (u.re.is_concat(e)){ app * a = to_app(e); expr * re1 = a->get_arg(0); From b2388464e46e3e44f5448a2b285ce0b35878422d Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Wed, 9 Aug 2017 22:03:26 -0400 Subject: [PATCH 5/7] add re.all to theory_str --- src/smt/theory_str.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 2016a6501..5642339f4 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -1685,6 +1685,8 @@ namespace smt { u.str.is_string(range1, range1val); u.str.is_string(range2, range2val); return zstring("[") + range1val + zstring("-") + range2val + zstring("]"); + } else if (u.re.is_full(a_regex)) { + return zstring("(.*)"); } else { TRACE("str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;); UNREACHABLE(); return zstring(""); @@ -1715,6 +1717,12 @@ namespace smt { expr_ref str(ex->get_arg(0), m); app * regex = to_app(ex->get_arg(1)); + // quick reference for the following code: + // - ex: top-level regex membership term + // - str: string term appearing in ex + // - regex: regex term appearing in ex + // ex ::= (str.in.re str regex) + if (u.re.is_to_re(regex)) { expr_ref rxStr(regex->get_arg(0), m); // want to assert 'expr IFF (str == rxStr)' @@ -1794,6 +1802,9 @@ namespace smt { expr_ref finalAxiom(m.mk_iff(ex, rhs), m); SASSERT(finalAxiom); assert_axiom(finalAxiom); + } else if (u.re.is_full(regex)) { + // trivially true for any string! + assert_axiom(ex); } else { TRACE("str", tout << "ERROR: unknown regex expression " << mk_pp(regex, m) << "!" << std::endl;); NOT_IMPLEMENTED_YET(); @@ -6284,6 +6295,10 @@ namespace smt { } TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;); + } else if (u.re.is_full(e)) { + NOT_IMPLEMENTED_YET(); + m_valid = false; + return; } else { TRACE("str", tout << "invalid regular expression" << std::endl;); m_valid = false; From 1e445a62d4d37009cdd537d22bff2d0826efcf1d Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Fri, 18 Aug 2017 17:31:40 -0400 Subject: [PATCH 6/7] improve error message in theory_str when an invalid term in str.to.re is encountered addresses #871 --- src/smt/theory_str.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 5642339f4..7fbc15e8a 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -6223,7 +6223,8 @@ namespace smt { TRACE("str", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";); } } else { // ! u.str.is_string(arg_str, str) - TRACE("str", tout << "invalid string constant in Str2Reg" << std::endl;); + TRACE("str", tout << "WARNING: invalid string constant in str.to.re! Cancelling." << std::endl;); + u.get_manager().raise_exception("invalid term in str.to.re, argument must be a string constant"); m_valid = false; return; } From adae32f7efeb5c4bcf4777792f04e53c03e1e564 Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Sat, 19 Aug 2017 23:25:34 -0400 Subject: [PATCH 7/7] add re.all to NFA in theory_str --- src/smt/theory_str.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/smt/theory_str.cpp b/src/smt/theory_str.cpp index 7fbc15e8a..127409efe 100644 --- a/src/smt/theory_str.cpp +++ b/src/smt/theory_str.cpp @@ -6297,9 +6297,18 @@ namespace smt { TRACE("str", tout << "range NFA: start = " << start << ", end = " << end << std::endl;); } else if (u.re.is_full(e)) { - NOT_IMPLEMENTED_YET(); - m_valid = false; - return; + // effectively the same as .* where . can be any single character + // start --e--> tmp + // tmp --e--> end + // tmp --C--> tmp for every character C + unsigned tmp = next_id(); + make_epsilon_move(start, tmp); + make_epsilon_move(tmp, end); + for (unsigned int i = 0; i < 256; ++i) { + char ch = (char)i; + make_transition(tmp, ch, tmp); + } + TRACE("str", tout << "re.all NFA: start = " << start << ", end = " << end << std::endl;); } else { TRACE("str", tout << "invalid regular expression" << std::endl;); m_valid = false;