diff --git a/src/ast/rewriter/str_rewriter.cpp b/src/ast/rewriter/str_rewriter.cpp index a40d52aa1..1449afcc3 100644 --- a/src/ast/rewriter/str_rewriter.cpp +++ b/src/ast/rewriter/str_rewriter.cpp @@ -200,8 +200,7 @@ br_status str_rewriter::mk_str_Replace(expr * base, expr * source, expr * target br_status str_rewriter::mk_re_Str2Reg(expr * str, expr_ref & result) { // the argument to Str2Reg *must* be a string constant - // TODO is an assertion error too strict here? this basically crashes the solver - VERIFY(m_strutil.is_string(str)); + ENSURE(m_strutil.is_string(str)); return BR_FAILED; } @@ -211,7 +210,7 @@ br_status str_rewriter::mk_re_RegexIn(expr * str, expr * re, expr_ref & result) if (m_strutil.is_re_Str2Reg(re)) { TRACE("t_str_rw", tout << "RegexIn fast path: " << mk_pp(str, m()) << " in " << mk_pp(re, m()) << std::endl;); expr * regexStr = to_app(re)->get_arg(0); - VERIFY(m_strutil.is_string(regexStr)); + ENSURE(m_strutil.is_string(regexStr)); result = m().mk_eq(str, regexStr); return BR_REWRITE_FULL; } @@ -240,6 +239,35 @@ br_status str_rewriter::mk_re_RegexPlus(expr * re, expr_ref & result) { } } +br_status str_rewriter::mk_re_RegexCharRange(expr * start, expr * end, expr_ref & result) { + TRACE("t_str_rw", tout << "rewrite (RegexCharRange " << mk_pp(start, m()) << " " << mk_pp(end, m()) << ")" << std::endl;); + // both 'start' and 'end' must be string constants + ENSURE(m_strutil.is_string(start) && m_strutil.is_string(end)); + std::string arg0Value = m_strutil.get_string_constant_value(start); + std::string arg1Value = m_strutil.get_string_constant_value(end); + ENSURE(arg0Value.length() == 1 && arg1Value.length() == 1); + char low = arg0Value[0]; + char high = arg1Value[0]; + if (low > high) { + char t = low; + low = high; + high = t; + } + + char c = low; + std::string cStr; + cStr.push_back(c); + expr * res = m_strutil.mk_re_Str2Reg(cStr); + c++; + for (; c <= high; c++) { + cStr.clear(); + cStr.push_back(c); + res = m_strutil.mk_re_RegexUnion(res, m_strutil.mk_re_Str2Reg(cStr)); + } + result = res; + return BR_DONE; +} + br_status str_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { SASSERT(f->get_family_id() == get_fid()); @@ -280,6 +308,9 @@ br_status str_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con case OP_RE_REGEXPLUS: SASSERT(num_args == 1); return mk_re_RegexPlus(args[0], result); + case OP_RE_REGEXCHARRANGE: + SASSERT(num_args == 2); + return mk_re_RegexCharRange(args[0], args[1], result); default: return BR_FAILED; } diff --git a/src/ast/rewriter/str_rewriter.h b/src/ast/rewriter/str_rewriter.h index bd79ed7a1..dccf4a6bd 100644 --- a/src/ast/rewriter/str_rewriter.h +++ b/src/ast/rewriter/str_rewriter.h @@ -52,6 +52,7 @@ public: br_status mk_re_Str2Reg(expr * str, expr_ref & result); br_status mk_re_RegexIn(expr * str, expr * re, expr_ref & result); br_status mk_re_RegexPlus(expr * re, expr_ref & result); + br_status mk_re_RegexCharRange(expr * start, expr * end, expr_ref & result); bool reduce_eq(expr * l, expr * r, expr_ref_vector & lhs, expr_ref_vector & rhs, bool & change); bool reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& change); diff --git a/src/ast/str_decl_plugin.cpp b/src/ast/str_decl_plugin.cpp index 45ff37b0f..08358d46b 100644 --- a/src/ast/str_decl_plugin.cpp +++ b/src/ast/str_decl_plugin.cpp @@ -43,6 +43,7 @@ str_decl_plugin::str_decl_plugin(): m_re_regexunion_decl(0), m_re_unroll_decl(0), m_re_regexplus_decl(0), + m_re_regexcharrange_decl(0), m_arith_plugin(0), m_arith_fid(0), m_int_sort(0){ @@ -72,6 +73,7 @@ void str_decl_plugin::finalize(void) { DEC_REF(m_re_regexstar_decl); DEC_REF(m_re_regexunion_decl); DEC_REF(m_re_regexplus_decl); + DEC_REF(m_re_regexcharrange_decl); DEC_REF(m_re_unroll_decl); DEC_REF(m_int_sort); } @@ -164,6 +166,9 @@ void str_decl_plugin::set_manager(ast_manager * m, family_id id) { m_re_unroll_decl = m->mk_func_decl(symbol("Unroll"), re, i, s, func_decl_info(id, OP_RE_UNROLL)); m_manager->inc_ref(m_re_unroll_decl); + m_re_regexcharrange_decl = m->mk_func_decl(symbol("RegexCharRange"), s, s, re, func_decl_info(id, OP_RE_REGEXCHARRANGE)); + m_manager->inc_ref(m_re_regexcharrange_decl); + } decl_plugin * str_decl_plugin::mk_fresh() { @@ -198,6 +203,7 @@ func_decl * str_decl_plugin::mk_func_decl(decl_kind k) { case OP_RE_REGEXPLUS: return m_re_regexplus_decl; case OP_RE_REGEXUNION: return m_re_regexunion_decl; case OP_RE_UNROLL: return m_re_unroll_decl; + case OP_RE_REGEXCHARRANGE: return m_re_regexcharrange_decl; default: return 0; } } @@ -270,6 +276,7 @@ void str_decl_plugin::get_op_names(svector & op_names, symbol cons op_names.push_back(builtin_name("RegexUnion", OP_RE_REGEXUNION)); op_names.push_back(builtin_name("RegexPlus", OP_RE_REGEXPLUS)); op_names.push_back(builtin_name("Unroll", OP_RE_UNROLL)); + op_names.push_back(builtin_name("RegexCharRange", OP_RE_REGEXCHARRANGE)); } void str_decl_plugin::get_sort_names(svector & sort_names, symbol const & logic) { diff --git a/src/ast/str_decl_plugin.h b/src/ast/str_decl_plugin.h index 902e2208f..4b7a8858e 100644 --- a/src/ast/str_decl_plugin.h +++ b/src/ast/str_decl_plugin.h @@ -50,6 +50,7 @@ enum str_op_kind { OP_RE_UNROLL, // higher-level regex operators OP_RE_REGEXPLUS, + OP_RE_REGEXCHARRANGE, // end LAST_STR_OP }; @@ -80,6 +81,7 @@ protected: func_decl * m_re_regexunion_decl; func_decl * m_re_unroll_decl; func_decl * m_re_regexplus_decl; + func_decl * m_re_regexcharrange_decl; arith_decl_plugin * m_arith_plugin; family_id m_arith_fid; @@ -148,6 +150,20 @@ public: return m_plugin->mk_fresh_string(); } + app * mk_re_Str2Reg(expr * s) { + expr * es[1] = {s}; + return m_manager.mk_app(get_fid(), OP_RE_STR2REGEX, 1, es); + } + + app * mk_re_Str2Reg(std::string s) { + return mk_re_Str2Reg(mk_string(s)); + } + + app * mk_re_RegexUnion(expr * e1, expr * e2) { + expr * es[2] = {e1, e2}; + return m_manager.mk_app(get_fid(), OP_RE_REGEXUNION, 2, es); + } + app * mk_re_RegexConcat(expr * e1, expr * e2) { expr * es[2] = {e1, e2}; return m_manager.mk_app(get_fid(), OP_RE_REGEXCONCAT, 2, es);