From 8021d63539b697476f510dbd79abeaeca06b9a7e Mon Sep 17 00:00:00 2001 From: Murphy Berzish Date: Wed, 15 Mar 2017 15:25:48 -0400 Subject: [PATCH] remove legacy str_decl_plugin and str_rewriter classes; these have been unified with sequence-compatible equivalents --- src/ast/ast_smt2_pp.h | 1 - src/ast/ast_smt_pp.cpp | 1 - src/ast/rewriter/str_rewriter.cpp | 703 ------------------------------ src/ast/rewriter/str_rewriter.h | 120 ----- src/ast/str_decl_plugin.cpp | 501 --------------------- src/ast/str_decl_plugin.h | 218 --------- 6 files changed, 1544 deletions(-) delete mode 100644 src/ast/rewriter/str_rewriter.cpp delete mode 100644 src/ast/rewriter/str_rewriter.h delete mode 100644 src/ast/str_decl_plugin.cpp delete mode 100644 src/ast/str_decl_plugin.h diff --git a/src/ast/ast_smt2_pp.h b/src/ast/ast_smt2_pp.h index 2f79ebaec..244594461 100644 --- a/src/ast/ast_smt2_pp.h +++ b/src/ast/ast_smt2_pp.h @@ -30,7 +30,6 @@ Revision History: #include"fpa_decl_plugin.h" #include"dl_decl_plugin.h" #include"seq_decl_plugin.h" -#include"str_decl_plugin.h" #include"smt2_util.h" class smt2_pp_environment { diff --git a/src/ast/ast_smt_pp.cpp b/src/ast/ast_smt_pp.cpp index de6ae6cc3..c3f1523b1 100644 --- a/src/ast/ast_smt_pp.cpp +++ b/src/ast/ast_smt_pp.cpp @@ -24,7 +24,6 @@ Revision History: #include"ast_smt_pp.h" #include"arith_decl_plugin.h" #include"bv_decl_plugin.h" -#include"str_decl_plugin.h" #include"array_decl_plugin.h" #include"datatype_decl_plugin.h" #include"fpa_decl_plugin.h" diff --git a/src/ast/rewriter/str_rewriter.cpp b/src/ast/rewriter/str_rewriter.cpp deleted file mode 100644 index 3933e7fdb..000000000 --- a/src/ast/rewriter/str_rewriter.cpp +++ /dev/null @@ -1,703 +0,0 @@ -/*++ -Copyright (c) 2016 Microsoft Corporation - -Module Name: - - str_rewriter.cpp - -Abstract: - - AST rewriting rules for string terms. - -Author: - - Murphy Berzish - -Notes: - ---*/ - -#if 0 - -#include"str_rewriter.h" -#include"arith_decl_plugin.h" -#include"ast_pp.h" -#include"ast_util.h" -#include"well_sorted.h" -#include -#include -#include -#include - -// Convert a regular expression to an e-NFA using Thompson's construction -void nfa::convert_re(expr * e, unsigned & start, unsigned & end, str_util & m_strutil) { - start = next_id(); - end = next_id(); - if (m_strutil.is_re_Str2Reg(e)) { - app * a = to_app(e); - expr * arg_str = a->get_arg(0); - if (m_strutil.is_string(arg_str)) { - std::string str = m_strutil.get_string_constant_value(arg_str); - TRACE("t_str_rw", tout << "build NFA for '" << str << "'" << std::endl;); - - /* - * For an n-character string, we make (n-1) intermediate states, - * labelled i_(0) through i_(n-2). - * Then we construct the following transitions: - * start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final - */ - unsigned last = start; - for (int i = 0; i <= ((int)str.length()) - 2; ++i) { - unsigned i_state = next_id(); - make_transition(last, str.at(i), i_state); - TRACE("t_str_rw", tout << "string transition " << last << "--" << str.at(i) << "--> " << i_state << std::endl;); - last = i_state; - } - make_transition(last, str.at(str.length() - 1), end); - TRACE("t_str_rw", tout << "string transition " << last << "--" << str.at(str.length() - 1) << "--> " << end << std::endl;); - TRACE("t_str_rw", tout << "string NFA: start = " << start << ", end = " << end << std::endl;); - } else { - TRACE("t_str_rw", tout << "invalid string constant in Str2Reg" << std::endl;); - m_valid = false; - return; - } - } else if (m_strutil.is_re_RegexConcat(e)){ - app * a = to_app(e); - expr * re1 = a->get_arg(0); - expr * re2 = a->get_arg(1); - unsigned start1, end1; - convert_re(re1, start1, end1, m_strutil); - unsigned start2, end2; - convert_re(re2, start2, end2, m_strutil); - // start --e--> start1 --...--> end1 --e--> start2 --...--> end2 --e--> end - make_epsilon_move(start, start1); - make_epsilon_move(end1, start2); - make_epsilon_move(end2, end); - TRACE("t_str_rw", tout << "concat NFA: start = " << start << ", end = " << end << std::endl;); - } else if (m_strutil.is_re_RegexUnion(e)) { - app * a = to_app(e); - expr * re1 = a->get_arg(0); - expr * re2 = a->get_arg(1); - unsigned start1, end1; - convert_re(re1, start1, end1, m_strutil); - unsigned start2, end2; - convert_re(re2, start2, end2, m_strutil); - - // start --e--> start1 ; start --e--> start2 - // end1 --e--> end ; end2 --e--> end - make_epsilon_move(start, start1); - make_epsilon_move(start, start2); - make_epsilon_move(end1, end); - make_epsilon_move(end2, end); - TRACE("t_str_rw", tout << "union NFA: start = " << start << ", end = " << end << std::endl;); - } else if (m_strutil.is_re_RegexStar(e)) { - app * a = to_app(e); - expr * subex = a->get_arg(0); - unsigned start_subex, end_subex; - convert_re(subex, start_subex, end_subex, m_strutil); - // start --e--> start_subex, start --e--> end - // end_subex --e--> start_subex, end_subex --e--> end - make_epsilon_move(start, start_subex); - make_epsilon_move(start, end); - make_epsilon_move(end_subex, start_subex); - make_epsilon_move(end_subex, end); - TRACE("t_str_rw", tout << "star NFA: start = " << start << ", end = " << end << std::endl;); - } else { - TRACE("t_str_rw", tout << "invalid regular expression" << std::endl;); - m_valid = false; - return; - } -} - -void nfa::epsilon_closure(unsigned start, std::set & closure) { - std::deque worklist; - closure.insert(start); - worklist.push_back(start); - - while(!worklist.empty()) { - unsigned state = worklist.front(); - worklist.pop_front(); - if (epsilon_map.find(state) != epsilon_map.end()) { - for (std::set::iterator it = epsilon_map[state].begin(); - it != epsilon_map[state].end(); ++it) { - unsigned new_state = *it; - if (closure.find(new_state) == closure.end()) { - closure.insert(new_state); - worklist.push_back(new_state); - } - } - } - } -} - -bool nfa::matches(std::string input) { - /* - * Keep a set of all states the NFA can currently be in. - * Initially this is the e-closure of m_start_state - * For each character A in the input string, - * the set of next states contains - * all states in transition_map[S][A] for each S in current_states, - * and all states in epsilon_map[S] for each S in current_states. - * After consuming the entire input string, - * the match is successful iff current_states contains m_end_state. - */ - std::set current_states; - epsilon_closure(m_start_state, current_states); - for (unsigned i = 0; i < input.length(); ++i) { - char A = input.at(i); - std::set next_states; - for (std::set::iterator it = current_states.begin(); - it != current_states.end(); ++it) { - unsigned S = *it; - // check transition_map - if (transition_map[S].find(A) != transition_map[S].end()) { - next_states.insert(transition_map[S][A]); - } - } - - // take e-closure over next_states to compute the actual next_states - std::set epsilon_next_states; - for (std::set::iterator it = next_states.begin(); it != next_states.end(); ++it) { - unsigned S = *it; - std::set closure; - epsilon_closure(S, closure); - epsilon_next_states.insert(closure.begin(), closure.end()); - } - current_states = epsilon_next_states; - } - if (current_states.find(m_end_state) != current_states.end()) { - return true; - } else { - return false; - } -} - - -br_status str_rewriter::mk_str_Concat(expr * arg0, expr * arg1, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Concat " << mk_pp(arg0, m()) << " " << mk_pp(arg1, m()) << ")" << std::endl;); - if(m_strutil.is_string(arg0) && m_strutil.is_string(arg1)) { - TRACE("t_str_rw", tout << "evaluating concat of two constant strings" << std::endl;); - std::string arg0Str = m_strutil.get_string_constant_value(arg0); - std::string arg1Str = m_strutil.get_string_constant_value(arg1); - std::string resultStr = arg0Str + arg1Str; - result = m_strutil.mk_string(resultStr); - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_Length(expr * arg0, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Length " << mk_pp(arg0, m()) << ")" << std::endl;); - if (m_strutil.is_string(arg0)) { - TRACE("t_str_rw", tout << "evaluating length of constant string" << std::endl;); - std::string arg0Str = m_strutil.get_string_constant_value(arg0); - rational arg0Len((unsigned)arg0Str.length()); - result = m_autil.mk_numeral(arg0Len, true); - TRACE("t_str_rw", tout << "result is " << mk_pp(result, m()) << std::endl;); - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_CharAt(expr * arg0, expr * arg1, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (CharAt " << mk_pp(arg0, m()) << " " << mk_pp(arg1, m()) << ")" << std::endl;); - // if arg0 is a string constant and arg1 is an integer constant, - // we can rewrite this by evaluating the expression - rational arg1Int; - if (m_strutil.is_string(arg0) && m_autil.is_numeral(arg1, arg1Int)) { - TRACE("t_str_rw", tout << "evaluating constant CharAt expression" << std::endl;); - std::string arg0Str = m_strutil.get_string_constant_value(arg0); - std::string resultStr; - if (arg1Int >= rational(0) && arg1Int <= rational((unsigned)arg0Str.length())) { - resultStr = arg0Str.at(arg1Int.get_unsigned()); - TRACE("t_str_rw", tout << "result is '" << resultStr << "'" << std::endl;); - } else { - resultStr = ""; - TRACE("t_str_rw", tout << "bogus length argument, result is empty string" << std::endl;); - } - result = m_strutil.mk_string(resultStr); - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_StartsWith(expr * haystack, expr * needle, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (StartsWith " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;); - if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) { - TRACE("t_str_rw", tout << "evaluating constant StartsWith predicate" << std::endl;); - std::string haystackStr = m_strutil.get_string_constant_value(haystack); - std::string needleStr = m_strutil.get_string_constant_value(needle); - if (haystackStr.length() < needleStr.length()) { - result = m().mk_false(); - return BR_DONE; - } else { - if (haystackStr.substr(0, needleStr.length()) == needleStr) { - result = m().mk_true(); - } else { - result = m().mk_false(); - } - return BR_DONE; - } - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_EndsWith(expr * haystack, expr * needle, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (EndsWith " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;); - if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) { - TRACE("t_str_rw", tout << "evaluating constant EndsWith predicate" << std::endl;); - std::string haystackStr = m_strutil.get_string_constant_value(haystack); - std::string needleStr = m_strutil.get_string_constant_value(needle); - if (haystackStr.length() < needleStr.length()) { - result = m().mk_false(); - return BR_DONE; - } else { - if (haystackStr.substr(haystackStr.length() - needleStr.length(), needleStr.length()) == needleStr) { - result = m().mk_true(); - } else { - result = m().mk_false(); - } - return BR_DONE; - } - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_Contains(expr * haystack, expr * needle, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Contains " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;); - if (haystack == needle) { - TRACE("t_str_rw", tout << "eliminate (Contains) over identical terms" << std::endl;); - result = m().mk_true(); - return BR_DONE; - } else if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) { - TRACE("t_str_rw", tout << "evaluating constant Contains predicate" << std::endl;); - std::string haystackStr = m_strutil.get_string_constant_value(haystack); - std::string needleStr = m_strutil.get_string_constant_value(needle); - if (haystackStr.find(needleStr) != std::string::npos) { - result = m().mk_true(); - } else { - result = m().mk_false(); - } - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_Indexof(expr * haystack, expr * needle, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Indexof " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;); - if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) { - TRACE("t_str_rw", tout << "evaluating constant Indexof expression" << std::endl;); - std::string haystackStr = m_strutil.get_string_constant_value(haystack); - std::string needleStr = m_strutil.get_string_constant_value(needle); - if (haystackStr.find(needleStr) != std::string::npos) { - int index = haystackStr.find(needleStr); - result = m_autil.mk_numeral(rational(index), true); - } else { - result = m_autil.mk_numeral(rational(-1), true); - } - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_Indexof2(expr * arg0, expr * arg1, expr * arg2, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Indexof2 " << mk_pp(arg0, m()) << " " << mk_pp(arg1, m()) << " " << mk_pp(arg2, m()) << ")" << std::endl;); - //if (getNodeType(t, args[0]) == my_Z3_ConstStr && getNodeType(t, args[1]) == my_Z3_ConstStr && getNodeType(t, args[2]) == my_Z3_Num) { - rational arg2Int; - if (m_strutil.is_string(arg0) && m_strutil.is_string(arg1) && m_autil.is_numeral(arg2, arg2Int)) { - TRACE("t_str_rw", tout << "evaluating constant Indexof2 expression" << std::endl;); - std::string arg0str = m_strutil.get_string_constant_value(arg0); - std::string arg1str = m_strutil.get_string_constant_value(arg1); - if (arg2Int >= rational((unsigned)arg0str.length())) { - result = m_autil.mk_numeral(rational(-1), true); - } else if (arg2Int < rational(0)) { - int index = arg0str.find(arg1str); - result = m_autil.mk_numeral(rational(index), true); - } else { - std::string suffixStr = arg0str.substr(arg2Int.get_unsigned(), arg0str.length() - arg2Int.get_unsigned()); - if (suffixStr.find(arg1str) != std::string::npos) { - int index = suffixStr.find(arg1str) + arg2Int.get_unsigned(); - result = m_autil.mk_numeral(rational(index), true); - } else { - result = m_autil.mk_numeral(rational(-1), true); - } - } - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_LastIndexof(expr * haystack, expr * needle, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (LastIndexof " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;); - if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) { - TRACE("t_str_rw", tout << "evaluating constant LastIndexof expression" << std::endl;); - std::string arg0Str = m_strutil.get_string_constant_value(haystack); - std::string arg1Str = m_strutil.get_string_constant_value(needle); - if (arg0Str.rfind(arg1Str) != std::string::npos) { - int index = arg0Str.rfind(arg1Str); - result = m_autil.mk_numeral(rational(index), true); - } else { - result = m_autil.mk_numeral(rational(-1), true); - } - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_Replace(expr * base, expr * source, expr * target, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Replace " << mk_pp(base, m()) << " " << mk_pp(source, m()) << " " << mk_pp(target, m()) << ")" << std::endl;); - if (m_strutil.is_string(base) && m_strutil.is_string(source) && m_strutil.is_string(target)) { - std::string arg0Str = m_strutil.get_string_constant_value(base); - std::string arg1Str = m_strutil.get_string_constant_value(source); - std::string arg2Str = m_strutil.get_string_constant_value(target); - if (arg0Str.find(arg1Str) != std::string::npos) { - int index1 = arg0Str.find(arg1Str); - int index2 = index1 + arg1Str.length(); - std::string substr0 = arg0Str.substr(0, index1); - std::string substr2 = arg0Str.substr(index2); - std::string replaced = substr0 + arg2Str + substr2; - result = m_strutil.mk_string(replaced); - } else { - result = base; - } - return BR_DONE; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_str_prefixof(expr * pre, expr * full, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (str.prefixof " << mk_pp(pre, m()) << " " << mk_pp(full, m()) << ")" << std::endl;); - result = m_strutil.mk_str_StartsWith(full, pre); - return BR_REWRITE_FULL; -} - -br_status str_rewriter::mk_str_suffixof(expr * post, expr * full, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (str.suffixof" << mk_pp(post, m()) << " " << mk_pp(full, m()) << ")" << std::endl;); - result = m_strutil.mk_str_EndsWith(full, post); - return BR_REWRITE_FULL; -} - -br_status str_rewriter::mk_str_to_int(expr * arg0, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (str.to-int " << mk_pp(arg0, m()) << ")" << std::endl;); - - if (m_strutil.is_string(arg0)) { - std::string str = m_strutil.get_string_constant_value(arg0); - if (str.length() == 0) { - result = m_autil.mk_numeral(rational::zero(), true); - return BR_DONE; - } - - // interpret str as a natural number and rewrite to the corresponding integer. - // if this is not valid, rewrite to -1 - rational convertedRepresentation(0); - rational ten(10); - for (unsigned i = 0; i < str.length(); ++i) { - char digit = str.at(i); - if (isdigit((int)digit)) { - std::string sDigit(1, digit); - int val = atoi(sDigit.c_str()); - convertedRepresentation = (ten * convertedRepresentation) + rational(val); - } else { - // not a digit, invalid - TRACE("t_str_rw", tout << "str.to-int argument contains non-digit character '" << digit << "'" << std::endl;); - convertedRepresentation = rational::minus_one(); - break; - } - } - result = m_autil.mk_numeral(convertedRepresentation, true); - return BR_DONE; - } - return BR_FAILED; - -} - -br_status str_rewriter::mk_str_from_int(expr * arg0, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (str.from-int " << mk_pp(arg0, m()) << ")" << std::endl;); - rational arg0Int; - if (m_autil.is_numeral(arg0, arg0Int)) { - // (str.from-int N) with N non-negative is the corresponding string in decimal notation. - // otherwise it is the empty string - if (arg0Int.is_nonneg()) { - std::string str = arg0Int.to_string(); - result = m_strutil.mk_string(str); - TRACE("t_str_rw", tout << "convert non-negative integer constant to " << str << std::endl;); - } else { - result = m_strutil.mk_string(""); - TRACE("t_str_rw", tout << "convert invalid integer constant to empty string" << std::endl;); - } - return BR_DONE; - } - return BR_FAILED; -} - -br_status str_rewriter::mk_str_Substr(expr * base, expr * start, expr * len, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (Substr " << mk_pp(base, m()) << " " << mk_pp(start, m()) << " " << mk_pp(len, m()) << ")" << std::endl;); - - bool constant_base = m_strutil.is_string(base); - std::string baseStr; - if (constant_base) { - baseStr = m_strutil.get_string_constant_value(base); - } - rational startVal; - bool constant_start = m_autil.is_numeral(start, startVal); - rational lenVal; - bool constant_len = m_autil.is_numeral(len, lenVal); - - // case 1: start < 0 or len < 0 - if ( (constant_start && startVal.is_neg()) || (constant_len && lenVal.is_neg()) ) { - TRACE("t_str_rw", tout << "start/len of substr is negative" << std::endl;); - result = m_strutil.mk_string(""); - return BR_DONE; - } - // case 1.1: start >= length(base) - if (constant_start && constant_base) { - rational baseStrlen((unsigned int)baseStr.length()); - if (startVal >= baseStrlen) { - TRACE("t_str_rw", tout << "start >= strlen for substr" << std::endl;); - result = m_strutil.mk_string(""); - return BR_DONE; - } - } - - if (constant_base && constant_start && constant_len) { - rational baseStrlen((unsigned int)baseStr.length()); - std::string retval; - if (startVal + lenVal >= baseStrlen) { - // case 2: pos+len goes past the end of the string - retval = baseStr.substr(startVal.get_unsigned(), std::string::npos); - } else { - // case 3: pos+len still within string - retval = baseStr.substr(startVal.get_unsigned(), lenVal.get_unsigned()); - } - result = m_strutil.mk_string(retval); - return BR_DONE; - } - - return BR_FAILED; -} - -br_status str_rewriter::mk_re_Str2Reg(expr * str, expr_ref & result) { - // the argument to Str2Reg *must* be a string constant - ENSURE(m_strutil.is_string(str)); - return BR_FAILED; -} - -br_status str_rewriter::mk_re_RegexIn(expr * str, expr * re, expr_ref & result) { - // fast path: - // (RegexIn E (Str2Reg S)) --> (= E S) - if (m_strutil.is_re_Str2Reg(re)) { - expr * regexStr = to_app(re)->get_arg(0); - ENSURE(m_strutil.is_string(regexStr)); - result = m().mk_eq(str, regexStr); - TRACE("t_str_rw", tout << "RegexIn fast path: " << mk_pp(str, m()) << " in " << mk_pp(re, m()) << " ==> " << mk_pp(result, m()) << std::endl;); - return BR_REWRITE_FULL; - } - - // necessary for model validation - if (m_strutil.is_string(str)) { - TRACE("t_str_rw", tout << "RegexIn with constant string argument" << std::endl;); - nfa regex_nfa(m_strutil, re); - ENSURE(regex_nfa.is_valid()); - std::string input = m_strutil.get_string_constant_value(str); - if (regex_nfa.matches(input)) { - result = m().mk_true(); - } else { - result = m().mk_false(); - } - return BR_DONE; - } - - return BR_FAILED; -} - -br_status str_rewriter::mk_re_RegexStar(expr * re, expr_ref & result) { - if (m_strutil.is_re_RegexStar(re)) { - result = re; - return BR_REWRITE_FULL; - } else { - return BR_FAILED; - } -} - -br_status str_rewriter::mk_re_RegexConcat(expr * r0, expr * r1, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (RegexConcat " << mk_pp(r0, m()) << " " << mk_pp(r1, m()) << ")" << std::endl;); - // (RegexConcat (Str2Reg "A") (Str2Reg "B")) --> (Str2Reg "AB") - if (m_strutil.is_re_Str2Reg(r0) && m_strutil.is_re_Str2Reg(r1)) { - expr * r0str = to_app(r0)->get_arg(0); - expr * r1str = to_app(r1)->get_arg(0); - ENSURE(m_strutil.is_string(r0str)); - ENSURE(m_strutil.is_string(r1str)); - std::string r0val = m_strutil.get_string_constant_value(r0str); - std::string r1val = m_strutil.get_string_constant_value(r1str); - std::string simplifyVal = r0val + r1val; - TRACE("t_str_rw", tout << "RegexConcat fast path: both sides are Str2Reg, simplify to (Str2Reg \"" << simplifyVal << "\")" << std::endl;); - result = m_strutil.mk_re_Str2Reg(simplifyVal); - return BR_DONE; - } - - return BR_FAILED; -} - -br_status str_rewriter::mk_re_RegexPlus(expr * re, expr_ref & result) { - /* - * Two optimizations are possible if we inspect 're'. - * If 're' is (RegexPlus X), then reduce to 're'. - * If 're' is (RegexStar X), then reduce to 're'. - * Otherwise, reduce to (RegexConcat re (RegexStar re)). - */ - - if (m_strutil.is_re_RegexPlus(re)) { - result = re; - return BR_REWRITE_FULL; - } else if (m_strutil.is_re_RegexStar(re)) { - // Z3str2 re-created the AST under 're' here, but I don't think we need to do that - result = re; - return BR_REWRITE_FULL; - } else { - result = m_strutil.mk_re_RegexConcat(re, m_strutil.mk_re_RegexStar(re)); - return BR_REWRITE_FULL; - } -} - -br_status str_rewriter::mk_re_RegexCharRange(expr * start, expr * end, expr_ref & result) { - TRACE("t_str_rw", tout << "rewrite (RegexCharRange " << mk_pp(start, m()) << " " << mk_pp(end, m()) << ")" << std::endl;); - // both 'start' and 'end' must be string constants - ENSURE(m_strutil.is_string(start) && m_strutil.is_string(end)); - std::string arg0Value = m_strutil.get_string_constant_value(start); - std::string arg1Value = m_strutil.get_string_constant_value(end); - ENSURE(arg0Value.length() == 1 && arg1Value.length() == 1); - char low = arg0Value[0]; - char high = arg1Value[0]; - if (low > high) { - char t = low; - low = high; - high = t; - } - - char c = low; - std::string cStr; - cStr.push_back(c); - expr * res = m_strutil.mk_re_Str2Reg(cStr); - c++; - for (; c <= high; c++) { - cStr.clear(); - cStr.push_back(c); - res = m_strutil.mk_re_RegexUnion(res, m_strutil.mk_re_Str2Reg(cStr)); - } - result = res; - return BR_DONE; -} - -br_status str_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { - SASSERT(f->get_family_id() == get_fid()); - - TRACE("t_str_rw", tout << "rewrite app: " << f->get_name() << std::endl;); - - switch(f->get_decl_kind()) { - case OP_STRCAT: - SASSERT(num_args == 2); - return mk_str_Concat(args[0], args[1], result); - case OP_STRLEN: - SASSERT(num_args == 1); - return mk_str_Length(args[0], result); - case OP_STR_CHARAT: - SASSERT(num_args == 2); - return mk_str_CharAt(args[0], args[1], result); - case OP_STR_STARTSWITH: - SASSERT(num_args == 2); - return mk_str_StartsWith(args[0], args[1], result); - case OP_STR_ENDSWITH: - SASSERT(num_args == 2); - return mk_str_EndsWith(args[0], args[1], result); - case OP_STR_CONTAINS: - SASSERT(num_args == 2); - return mk_str_Contains(args[0], args[1], result); - case OP_STR_INDEXOF: - SASSERT(num_args == 2); - return mk_str_Indexof(args[0], args[1], result); - case OP_STR_INDEXOF2: - SASSERT(num_args == 3); - return mk_str_Indexof2(args[0], args[1], args[2], result); - case OP_STR_LASTINDEXOF: - SASSERT(num_args == 2); - return mk_str_LastIndexof(args[0], args[1], result); - case OP_STR_REPLACE: - SASSERT(num_args == 3); - return mk_str_Replace(args[0], args[1], args[2], result); - case OP_STR_PREFIXOF: - SASSERT(num_args == 2); - return mk_str_prefixof(args[0], args[1], result); - case OP_STR_SUFFIXOF: - SASSERT(num_args == 2); - return mk_str_suffixof(args[0], args[1], result); - case OP_STR_STR2INT: - SASSERT(num_args == 1); - return mk_str_to_int(args[0], result); - case OP_STR_INT2STR: - SASSERT(num_args == 1); - return mk_str_from_int(args[0], result); - case OP_STR_SUBSTR: - SASSERT(num_args == 3); - return mk_str_Substr(args[0], args[1], args[2], result); - case OP_RE_STR2REGEX: - SASSERT(num_args == 1); - return mk_re_Str2Reg(args[0], result); - case OP_RE_REGEXIN: - SASSERT(num_args == 2); - return mk_re_RegexIn(args[0], args[1], result); - case OP_RE_REGEXPLUS: - SASSERT(num_args == 1); - return mk_re_RegexPlus(args[0], result); - case OP_RE_REGEXSTAR: - SASSERT(num_args == 1); - return mk_re_RegexStar(args[0], result); - case OP_RE_REGEXCONCAT: - SASSERT(num_args == 2); - return mk_re_RegexConcat(args[0], args[1], result); - case OP_RE_REGEXCHARRANGE: - SASSERT(num_args == 2); - return mk_re_RegexCharRange(args[0], args[1], result); - default: - return BR_FAILED; - } -} - -br_status str_rewriter::mk_eq_core(expr * l, expr * r, expr_ref & result) { - // from seq_rewriter - expr_ref_vector lhs(m()), rhs(m()), res(m()); - bool changed = false; - if (!reduce_eq(l, r, lhs, rhs, changed)) { - result = m().mk_false(); - return BR_DONE; - } - if (!changed) { - return BR_FAILED; - } - for (unsigned i = 0; i < lhs.size(); ++i) { - res.push_back(m().mk_eq(lhs[i].get(), rhs[i].get())); - } - result = mk_and(res); - return BR_REWRITE3; -} - -bool str_rewriter::reduce_eq(expr * l, expr * r, expr_ref_vector & lhs, expr_ref_vector & rhs, bool & change) { - change = false; - return true; -} - -bool str_rewriter::reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& change) { - change = false; - return true; -} - -#endif /* disable */ diff --git a/src/ast/rewriter/str_rewriter.h b/src/ast/rewriter/str_rewriter.h deleted file mode 100644 index 8d6041a51..000000000 --- a/src/ast/rewriter/str_rewriter.h +++ /dev/null @@ -1,120 +0,0 @@ -/*++ -Copyright (c) 2016 Microsoft Corporation - -Module Name: - - str_rewriter.h - -Abstract: - - AST rewriting rules for string terms. - -Author: - - Murphy Berzish - -Notes: - ---*/ - -#if 0 - -#include"str_decl_plugin.h" -#include"arith_decl_plugin.h" -#include"rewriter_types.h" -#include"params.h" -#include -#include - -class str_rewriter { - str_util m_strutil; - arith_util m_autil; - -public: - str_rewriter(ast_manager & m, params_ref const & p = params_ref()) : - m_strutil(m), m_autil(m) { - } - - ast_manager & m() const { return m_strutil.get_manager(); } - family_id get_fid() const { return m_strutil.get_family_id(); } - - void updt_params(params_ref const & p) {} - static void get_param_descrs(param_descrs & r) {} - - br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result); - br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result); - - br_status mk_str_Concat(expr * arg0, expr * arg1, expr_ref & result); - br_status mk_str_Length(expr * arg0, expr_ref & result); - br_status mk_str_CharAt(expr * arg0, expr * arg1, expr_ref & result); - br_status mk_str_StartsWith(expr * haystack, expr * needle, expr_ref & result); - br_status mk_str_EndsWith(expr * haystack, expr * needle, expr_ref & result); - br_status mk_str_Contains(expr * haystack, expr * needle, expr_ref & result); - br_status mk_str_Indexof(expr * haystack, expr * needle, expr_ref & result); - br_status mk_str_Indexof2(expr * arg0, expr * arg1, expr * arg2, expr_ref & result); - br_status mk_str_LastIndexof(expr * haystack, expr * needle, expr_ref & result); - br_status mk_str_Replace(expr * base, expr * source, expr * target, expr_ref & result); - br_status mk_str_Substr(expr * base, expr * start, expr * len, expr_ref & result); - br_status mk_str_prefixof(expr * pre, expr * full, expr_ref & result); - br_status mk_str_suffixof(expr * post, expr * full, expr_ref & result); - br_status mk_str_to_int(expr * arg0, expr_ref & result); - br_status mk_str_from_int(expr * arg0, expr_ref & result); - - br_status mk_re_Str2Reg(expr * str, expr_ref & result); - br_status mk_re_RegexIn(expr * str, expr * re, expr_ref & result); - br_status mk_re_RegexPlus(expr * re, expr_ref & result); - br_status mk_re_RegexStar(expr * re, expr_ref & result); - br_status mk_re_RegexConcat(expr * r0, expr * r1, expr_ref & result); - br_status mk_re_RegexCharRange(expr * start, expr * end, expr_ref & result); - - bool reduce_eq(expr * l, expr * r, expr_ref_vector & lhs, expr_ref_vector & rhs, bool & change); - bool reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& change); - -}; - -class nfa { -protected: - bool m_valid; - unsigned m_next_id; - - unsigned next_id() { - unsigned retval = m_next_id; - ++m_next_id; - return retval; - } - - unsigned m_start_state; - unsigned m_end_state; - - std::map > transition_map; - std::map > epsilon_map; - - void make_transition(unsigned start, char symbol, unsigned end) { - transition_map[start][symbol] = end; - } - - void make_epsilon_move(unsigned start, unsigned end) { - epsilon_map[start].insert(end); - } - - // Convert a regular expression to an e-NFA using Thompson's construction - void convert_re(expr * e, unsigned & start, unsigned & end, str_util & m_strutil); - -public: - nfa(str_util & m_strutil, expr * e) -: m_valid(true), m_next_id(0), m_start_state(0), m_end_state(0) { - convert_re(e, m_start_state, m_end_state, m_strutil); - } - - nfa() : m_valid(false), m_next_id(0), m_start_state(0), m_end_state(0) {} - - bool is_valid() const { - return m_valid; - } - - void epsilon_closure(unsigned start, std::set & closure); - - bool matches(std::string input); -}; - -#endif /* disable */ diff --git a/src/ast/str_decl_plugin.cpp b/src/ast/str_decl_plugin.cpp deleted file mode 100644 index 067420f04..000000000 --- a/src/ast/str_decl_plugin.cpp +++ /dev/null @@ -1,501 +0,0 @@ -/*++ -Module Name: - - str_decl_plugin.h - -Abstract: - - - -Author: - - Murphy Berzish (mtrberzi) 2015-09-02. - -Revision History: - ---*/ - -#if 0 - -#include -#include"str_decl_plugin.h" -#include"string_buffer.h" -#include"warning.h" -#include"ast_pp.h" -#include"ast_smt2_pp.h" - -str_decl_plugin::str_decl_plugin(): - m_strv_sym("String"), - m_str_decl(0), - m_regex_decl(0), - m_concat_decl(0), - m_length_decl(0), - m_charat_decl(0), - m_startswith_decl(0), - m_endswith_decl(0), - m_contains_decl(0), - m_indexof_decl(0), - m_indexof2_decl(0), - m_lastindexof_decl(0), - m_substr_decl(0), - m_replace_decl(0), - m_str2int_decl(0), - m_int2str_decl(0), - m_prefixof_decl(0), - m_suffixof_decl(0), - m_re_str2regex_decl(0), - m_re_regexin_decl(0), - m_re_regexconcat_decl(0), - m_re_regexstar_decl(0), - m_re_regexunion_decl(0), - m_re_unroll_decl(0), - m_re_regexplus_decl(0), - m_re_regexcharrange_decl(0), - m_arith_plugin(0), - m_arith_fid(0), - m_int_sort(0){ -} - -str_decl_plugin::~str_decl_plugin(){ -} - -void str_decl_plugin::finalize(void) { - #define DEC_REF(decl) if (decl) { m_manager->dec_ref(decl); } ((void) 0) - DEC_REF(m_str_decl); - DEC_REF(m_regex_decl); - DEC_REF(m_concat_decl); - DEC_REF(m_length_decl); - DEC_REF(m_charat_decl); - DEC_REF(m_startswith_decl); - DEC_REF(m_endswith_decl); - DEC_REF(m_contains_decl); - DEC_REF(m_indexof_decl); - DEC_REF(m_indexof2_decl); - DEC_REF(m_lastindexof_decl); - DEC_REF(m_substr_decl); - DEC_REF(m_replace_decl); - DEC_REF(m_prefixof_decl); - DEC_REF(m_suffixof_decl); - DEC_REF(m_str2int_decl); - DEC_REF(m_int2str_decl); - DEC_REF(m_re_str2regex_decl); - DEC_REF(m_re_regexin_decl); - DEC_REF(m_re_regexconcat_decl); - DEC_REF(m_re_regexstar_decl); - DEC_REF(m_re_regexunion_decl); - DEC_REF(m_re_regexplus_decl); - DEC_REF(m_re_regexcharrange_decl); - DEC_REF(m_re_unroll_decl); - DEC_REF(m_int_sort); -} - -void str_decl_plugin::set_manager(ast_manager * m, family_id id) { - decl_plugin::set_manager(m, id); - m_str_decl = m->mk_sort(symbol("String"), sort_info(id, STRING_SORT)); - m->inc_ref(m_str_decl); - sort * s = m_str_decl; - - m_regex_decl = m->mk_sort(symbol("Regex"), sort_info(id, REGEX_SORT)); - m->inc_ref(m_regex_decl); - sort * re = m_regex_decl; - - SASSERT(m_manager->has_plugin(symbol("arith"))); - m_arith_fid = m_manager->mk_family_id("arith"); - m_arith_plugin = static_cast(m_manager->get_plugin(m_arith_fid)); - SASSERT(m_arith_plugin); - - m_int_sort = m_manager->mk_sort(m_arith_fid, INT_SORT); - SASSERT(m_int_sort != 0); // arith_decl_plugin must be installed before str_decl_plugin. - m_manager->inc_ref(m_int_sort); - sort * i = m_int_sort; - - sort* boolT = m_manager->mk_bool_sort(); - -#define MK_OP(FIELD, NAME, KIND, SORT) \ - FIELD = m->mk_func_decl(symbol(NAME), SORT, SORT, SORT, func_decl_info(id, KIND)); \ - m->inc_ref(FIELD) - - MK_OP(m_concat_decl, "str.++", OP_STRCAT, s); - - m_length_decl = m->mk_func_decl(symbol("str.len"), s, i, func_decl_info(id, OP_STRLEN)); - m_manager->inc_ref(m_length_decl); - - m_charat_decl = m->mk_func_decl(symbol("str.at"), s, i, s, func_decl_info(id, OP_STR_CHARAT)); - m_manager->inc_ref(m_charat_decl); - - m_startswith_decl = m->mk_func_decl(symbol("StartsWith"), s, s, boolT, func_decl_info(id, OP_STR_STARTSWITH)); - m_manager->inc_ref(m_startswith_decl); - - m_endswith_decl = m->mk_func_decl(symbol("EndsWith"), s, s, boolT, func_decl_info(id, OP_STR_ENDSWITH)); - m_manager->inc_ref(m_endswith_decl); - - m_contains_decl = m->mk_func_decl(symbol("str.contains"), s, s, boolT, func_decl_info(id, OP_STR_CONTAINS)); - m_manager->inc_ref(m_contains_decl); - - m_indexof_decl = m->mk_func_decl(symbol("str.indexof"), s, s, i, func_decl_info(id, OP_STR_INDEXOF)); - m_manager->inc_ref(m_indexof_decl); - - { - sort * d[3] = { s, s, i }; - m_indexof2_decl = m->mk_func_decl(symbol("Indexof2"), 3, d, i, func_decl_info(id, OP_STR_INDEXOF2)); - m_manager->inc_ref(m_indexof2_decl); - } - - m_lastindexof_decl = m->mk_func_decl(symbol("str.lastindexof"), s, s, i, func_decl_info(id, OP_STR_LASTINDEXOF)); - m_manager->inc_ref(m_lastindexof_decl); - - { - sort * d[3] = {s, i, i }; - m_substr_decl = m->mk_func_decl(symbol("str.substr"), 3, d, s, func_decl_info(id, OP_STR_SUBSTR)); - m_manager->inc_ref(m_substr_decl); - } - - { - sort * d[3] = {s, s, s}; - m_replace_decl = m->mk_func_decl(symbol("str.replace"), 3, d, s, func_decl_info(id, OP_STR_REPLACE)); - m_manager->inc_ref(m_replace_decl); - } - - m_prefixof_decl = m->mk_func_decl(symbol("str.prefixof"), s, s, boolT, func_decl_info(id, OP_STR_PREFIXOF)); - m_manager->inc_ref(m_prefixof_decl); - - m_suffixof_decl = m->mk_func_decl(symbol("str.suffixof"), s, s, boolT, func_decl_info(id, OP_STR_SUFFIXOF)); - m_manager->inc_ref(m_suffixof_decl); - - m_str2int_decl = m->mk_func_decl(symbol("str.to-int"), s, i, func_decl_info(id, OP_STR_STR2INT)); - m_manager->inc_ref(m_str2int_decl); - - m_int2str_decl = m->mk_func_decl(symbol("str.from-int"), i, s, func_decl_info(id, OP_STR_INT2STR)); - m_manager->inc_ref(m_int2str_decl); - - m_re_str2regex_decl = m->mk_func_decl(symbol("str.to.re"), s, re, func_decl_info(id, OP_RE_STR2REGEX)); - m_manager->inc_ref(m_re_str2regex_decl); - - m_re_regexin_decl = m->mk_func_decl(symbol("str.in.re"), s, re, boolT, func_decl_info(id, OP_RE_REGEXIN)); - m_manager->inc_ref(m_re_regexin_decl); - - m_re_regexconcat_decl = m->mk_func_decl(symbol("re.++"), re, re, re, func_decl_info(id, OP_RE_REGEXCONCAT)); - m_manager->inc_ref(m_re_regexconcat_decl); - - m_re_regexstar_decl = m->mk_func_decl(symbol("re.*"), re, re, func_decl_info(id, OP_RE_REGEXSTAR)); - m_manager->inc_ref(m_re_regexstar_decl); - - m_re_regexplus_decl = m->mk_func_decl(symbol("re.+"), re, re, func_decl_info(id, OP_RE_REGEXPLUS)); - m_manager->inc_ref(m_re_regexplus_decl); - - m_re_regexunion_decl = m->mk_func_decl(symbol("re.union"), re, re, re, func_decl_info(id, OP_RE_REGEXUNION)); - m_manager->inc_ref(m_re_regexunion_decl); - - m_re_unroll_decl = m->mk_func_decl(symbol("Unroll"), re, i, s, func_decl_info(id, OP_RE_UNROLL)); - m_manager->inc_ref(m_re_unroll_decl); - - m_re_regexcharrange_decl = m->mk_func_decl(symbol("re.range"), s, s, re, func_decl_info(id, OP_RE_REGEXCHARRANGE)); - m_manager->inc_ref(m_re_regexcharrange_decl); - -} - -decl_plugin * str_decl_plugin::mk_fresh() { - return alloc(str_decl_plugin); -} - -sort * str_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) { - switch (k) { - case STRING_SORT: return m_str_decl; - case REGEX_SORT: return m_regex_decl; - default: return 0; - } -} - -func_decl * str_decl_plugin::mk_func_decl(decl_kind k) { - switch(k) { - case OP_STRCAT: return m_concat_decl; - case OP_STRLEN: return m_length_decl; - case OP_STR_CHARAT: return m_charat_decl; - case OP_STR_STARTSWITH: return m_startswith_decl; - case OP_STR_ENDSWITH: return m_endswith_decl; - case OP_STR_CONTAINS: return m_contains_decl; - case OP_STR_INDEXOF: return m_indexof_decl; - case OP_STR_INDEXOF2: return m_indexof2_decl; - case OP_STR_LASTINDEXOF: return m_lastindexof_decl; - case OP_STR_SUBSTR: return m_substr_decl; - case OP_STR_REPLACE: return m_replace_decl; - case OP_STR_PREFIXOF: return m_prefixof_decl; - case OP_STR_SUFFIXOF: return m_suffixof_decl; - case OP_STR_STR2INT: return m_str2int_decl; - case OP_STR_INT2STR: return m_int2str_decl; - case OP_RE_STR2REGEX: return m_re_str2regex_decl; - case OP_RE_REGEXIN: return m_re_regexin_decl; - case OP_RE_REGEXCONCAT: return m_re_regexconcat_decl; - case OP_RE_REGEXSTAR: return m_re_regexstar_decl; - case OP_RE_REGEXPLUS: return m_re_regexplus_decl; - case OP_RE_REGEXUNION: return m_re_regexunion_decl; - case OP_RE_UNROLL: return m_re_unroll_decl; - case OP_RE_REGEXCHARRANGE: return m_re_regexcharrange_decl; - default: return 0; - } -} - -func_decl * str_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters, - unsigned arity, sort * const * domain, sort * range) { - if (k == OP_STR) { - m_manager->raise_exception("OP_STR not yet implemented in mk_func_decl!"); - return 0; - } - if (arity == 0) { - m_manager->raise_exception("no arguments supplied to string operator"); - return 0; - } - return mk_func_decl(k); -} - -app * str_decl_plugin::mk_string(std::string & val) { - std::map::iterator it = string_cache.find(val); - //if (it == string_cache.end()) { - if (true) { - char * new_buffer = alloc_svect(char, (val.length() + 1)); - strcpy(new_buffer, val.c_str()); - parameter p[1] = {parameter(new_buffer)}; - func_decl * d; - d = m_manager->mk_const_decl(m_strv_sym, m_str_decl, func_decl_info(m_family_id, OP_STR, 1, p)); - app * str = m_manager->mk_const(d); - string_cache[val] = str; - return str; - } else { - return it->second; - } -} - -app * str_decl_plugin::mk_string(const char * val) { - std::string key(val); - return mk_string(key); -} - -app * str_decl_plugin::mk_fresh_string() { - // cheating. - // take the longest string in the cache, append the letter "A", and call it fresh. - std::string longestString = ""; - std::map::iterator it = string_cache.begin(); - for (; it != string_cache.end(); ++it) { - if (it->first.length() > longestString.length()) { - longestString = it->first; - } - } - longestString += "A"; - return mk_string(longestString); -} - -void str_decl_plugin::get_op_names(svector & op_names, symbol const & logic) { - op_names.push_back(builtin_name("str.++", OP_STRCAT)); - op_names.push_back(builtin_name("str.len", OP_STRLEN)); - op_names.push_back(builtin_name("str.at", OP_STR_CHARAT)); - op_names.push_back(builtin_name("StartsWith", OP_STR_STARTSWITH)); - op_names.push_back(builtin_name("EndsWith", OP_STR_ENDSWITH)); - op_names.push_back(builtin_name("str.contains", OP_STR_CONTAINS)); - op_names.push_back(builtin_name("str.indexof", OP_STR_INDEXOF)); - op_names.push_back(builtin_name("Indexof2", OP_STR_INDEXOF2)); - op_names.push_back(builtin_name("str.lastindexof", OP_STR_LASTINDEXOF)); - op_names.push_back(builtin_name("str.substr", OP_STR_SUBSTR)); - op_names.push_back(builtin_name("str.replace", OP_STR_REPLACE)); - op_names.push_back(builtin_name("str.prefixof", OP_STR_PREFIXOF)); - op_names.push_back(builtin_name("str.suffixof", OP_STR_SUFFIXOF)); - op_names.push_back(builtin_name("str.to-int", OP_STR_STR2INT)); - op_names.push_back(builtin_name("str.from-int", OP_STR_INT2STR)); - op_names.push_back(builtin_name("str.to.re", OP_RE_STR2REGEX)); - op_names.push_back(builtin_name("str.in.re", OP_RE_REGEXIN)); - op_names.push_back(builtin_name("re.++", OP_RE_REGEXCONCAT)); - op_names.push_back(builtin_name("re.*", OP_RE_REGEXSTAR)); - op_names.push_back(builtin_name("re.union", OP_RE_REGEXUNION)); - op_names.push_back(builtin_name("re.+", OP_RE_REGEXPLUS)); - op_names.push_back(builtin_name("Unroll", OP_RE_UNROLL)); - op_names.push_back(builtin_name("re.range", OP_RE_REGEXCHARRANGE)); -} - -void str_decl_plugin::get_sort_names(svector & sort_names, symbol const & logic) { - sort_names.push_back(builtin_name("String", STRING_SORT)); - sort_names.push_back(builtin_name("Regex", REGEX_SORT)); -} - -bool str_decl_plugin::is_value(app * e) const { - if (e->get_family_id() != m_family_id) { - return false; - } - switch (e->get_decl_kind()) { - case OP_STR: - return true; - default: - return false; - } -} - -bool str_recognizers::is_string(expr const * n, const char ** val) const { - if (!is_app_of(n, m_afid, OP_STR)) - return false; - func_decl * decl = to_app(n)->get_decl(); - *val = decl->get_parameter(0).get_string(); - return true; -} - -bool str_recognizers::is_string(expr const * n) const { - const char * tmp = 0; - return is_string(n, & tmp); -} - -std::string str_recognizers::get_string_constant_value(expr const *n) const { - const char * cstr = 0; - bool isString = is_string(n, & cstr); - SASSERT(isString); - std::string strval(cstr); - return strval; -} - -str_util::str_util(ast_manager &m) : - str_recognizers(m.mk_family_id(symbol("str"))), - m_manager(m) { - SASSERT(m.has_plugin(symbol("str"))); - m_plugin = static_cast(m.get_plugin(m.mk_family_id(symbol("str")))); - m_fid = m_plugin->get_family_id(); -} - -/* - * Scan through the string 'val' and interpret each instance of "backslash followed by a character" - * as a possible escape sequence. Emit all other characters as-is. - * This exists because the SMT-LIB 2.5 standard does not recognize escape sequences other than "" -> " . - * The escape sequences recognized are as follows: - * \a \b \e \f \n \r \t \v \\ : as specified by the C++ standard - * \ooo : produces the ASCII character corresponding to the octal value "ooo", where each "o" is a - * single octal digit and between 1 and 3 valid digits are given - * \xhh : produces the ASCII character corresponding to the hexadecimal value "hh", where each "h" is a - * single case-insensitive hex digit (0-9A-F) and exactly 2 digits are given - * \C, for any character C that does not start a legal escape sequence : the backslash is ignored and "C" is produced. - */ -app * str_util::mk_string_with_escape_characters(std::string & val) { - std::string parsedStr; - parsedStr.reserve(val.length()); - for (unsigned i = 0; i < val.length(); ++i) { - char nextChar = val.at(i); - - if (nextChar == '\\') { - // check escape sequence - i++; - if (i >= val.length()) { - get_manager().raise_exception("invalid escape sequence"); - } - char escapeChar1 = val.at(i); - if (escapeChar1 == 'a') { - parsedStr.push_back('\a'); - } else if (escapeChar1 == 'b') { - parsedStr.push_back('\b'); - } else if (escapeChar1 == 'e') { - parsedStr.push_back('\e'); - } else if (escapeChar1 == 'f') { - parsedStr.push_back('\f'); - } else if (escapeChar1 == 'n') { - parsedStr.push_back('\n'); - } else if (escapeChar1 == 'r') { - parsedStr.push_back('\r'); - } else if (escapeChar1 == 't') { - parsedStr.push_back('\t'); - } else if (escapeChar1 == 'v') { - parsedStr.push_back('\v'); - } else if (escapeChar1 == '\\') { - parsedStr.push_back('\\'); - } else if (escapeChar1 == 'x') { - // hex escape: we expect 'x' to be followed by exactly two hex digits - // which means that i+2 must be a valid index - if (i+2 >= val.length()) { - get_manager().raise_exception("invalid hex escape: \\x must be followed by exactly two hex digits"); - } - char hexDigitHi = val.at(i+1); - char hexDigitLo = val.at(i+2); - i += 2; - if (!isxdigit((int)hexDigitHi) || !isxdigit((int)hexDigitLo)) { - get_manager().raise_exception("invalid hex escape: \\x must be followed by exactly two hex digits"); - } - char tmp[3] = {hexDigitHi, hexDigitLo, '\0'}; - long converted = strtol(tmp, NULL, 16); - unsigned char convChar = (unsigned char)converted; - parsedStr.push_back(convChar); - } else if (escapeChar1 == '0' || escapeChar1 == '1' || escapeChar1 == '2' || escapeChar1 == '3' || - escapeChar1 == '4' || escapeChar1 == '5' || escapeChar1 == '6' || escapeChar1 == '7') { - // octal escape: we expect exactly three octal digits - // which means that val[i], val[i+1], val[i+2] must all be octal digits - // and that i+2 must be a valid index - if (i+2 >= val.length()) { - get_manager().raise_exception("invalid octal escape: exactly three octal digits required"); - } - char c2 = escapeChar1; - char c1 = val.at(i+1); - char c0 = val.at(i+2); - i += 2; - - if (!isdigit(c2) || !isdigit(c1) || !isdigit(c0)) { - get_manager().raise_exception("invalid octal escape: exactly three octal digits required"); - } - - if (c2 == '8' || c2 == '9' || c1 == '8' || c1 == '9' || c0 == '8' || c0 == '9') { - get_manager().raise_exception("invalid octal escape: exactly three octal digits required"); - } - - char tmp[4] = {c2, c1, c0, '\0'}; - long converted = strtol(tmp, NULL, 8); - unsigned char convChar = (unsigned char)converted; - parsedStr.push_back(convChar); - } else { - // unrecognized escape sequence -- just emit that character - parsedStr.push_back(escapeChar1); - } - } else { - parsedStr.push_back(nextChar); - } - - // i is incremented at the end of this loop. - // If it is modified, ensure that it points to the index before - // the next character. - } - return mk_string(parsedStr); -} - -static std::string str2RegexStr(std::string str) { - std::string res = ""; - int len = str.size(); - for (int i = 0; i < len; i++) { - char nc = str[i]; - // 12 special chars - if (nc == '\\' || nc == '^' || nc == '$' || nc == '.' || nc == '|' || nc == '?' - || nc == '*' || nc == '+' || nc == '(' || nc == ')' || nc == '[' || nc == '{') { - res.append(1, '\\'); - } - res.append(1, str[i]); - } - return res; -} - -std::string str_util::get_std_regex_str(expr * regex) { - app * a_regex = to_app(regex); - if (is_re_Str2Reg(a_regex)) { - expr * regAst = a_regex->get_arg(0); - std::string regStr = str2RegexStr(get_string_constant_value(regAst)); - return regStr; - } else if (is_re_RegexConcat(a_regex)) { - expr * reg1Ast = a_regex->get_arg(0); - expr * reg2Ast = a_regex->get_arg(1); - std::string reg1Str = get_std_regex_str(reg1Ast); - std::string reg2Str = get_std_regex_str(reg2Ast); - return "(" + reg1Str + ")(" + reg2Str + ")"; - } else if (is_re_RegexUnion(a_regex)) { - expr * reg1Ast = a_regex->get_arg(0); - expr * reg2Ast = a_regex->get_arg(1); - std::string reg1Str = get_std_regex_str(reg1Ast); - std::string reg2Str = get_std_regex_str(reg2Ast); - return "(" + reg1Str + ")|(" + reg2Str + ")"; - } else if (is_re_RegexStar(a_regex)) { - expr * reg1Ast = a_regex->get_arg(0); - std::string reg1Str = get_std_regex_str(reg1Ast); - return "(" + reg1Str + ")*"; - } else { - TRACE("t_str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;); - UNREACHABLE(); return ""; - } -} - -#endif /* disable */ diff --git a/src/ast/str_decl_plugin.h b/src/ast/str_decl_plugin.h deleted file mode 100644 index 28ecd1e43..000000000 --- a/src/ast/str_decl_plugin.h +++ /dev/null @@ -1,218 +0,0 @@ -/*++ -Module Name: - - str_decl_plugin.h - -Abstract: - - - -Author: - - Murphy Berzish (mtrberzi) 2015-09-02. - -Revision History: - ---*/ - -#if 0 - -#ifndef _STR_DECL_PLUGIN_H_ -#define _STR_DECL_PLUGIN_H_ - -#include"ast.h" -#include"arith_decl_plugin.h" -#include - -enum str_sort_kind { - STRING_SORT, - REGEX_SORT, -}; - -enum str_op_kind { - OP_STR, /* string constants */ - // basic string operators - OP_STRCAT, - OP_STRLEN, - // higher-level string functions -- these are reduced to basic operations - OP_STR_CHARAT, - OP_STR_STARTSWITH, - OP_STR_ENDSWITH, - OP_STR_CONTAINS, - OP_STR_INDEXOF, - OP_STR_INDEXOF2, - OP_STR_LASTINDEXOF, - OP_STR_SUBSTR, - OP_STR_REPLACE, - // SMT-LIB 2.5 standard operators -- these are rewritten to internal ones - OP_STR_PREFIXOF, - OP_STR_SUFFIXOF, - // string-integer conversion - OP_STR_STR2INT, - OP_STR_INT2STR, OP_STR_PLACEHOLDER1, OP_STR_PLACEHOLDER2, - // regular expression operators - OP_RE_STR2REGEX, - OP_RE_REGEXIN, - OP_RE_REGEXCONCAT, - OP_RE_REGEXSTAR, - OP_RE_REGEXUNION, - OP_RE_UNROLL, - // higher-level regex operators - OP_RE_REGEXPLUS, - OP_RE_REGEXCHARRANGE, - // end - LAST_STR_OP -}; - -class str_decl_plugin : public decl_plugin { -protected: - symbol m_strv_sym; - sort * m_str_decl; - sort * m_regex_decl; - - func_decl * m_concat_decl; - func_decl * m_length_decl; - - func_decl * m_charat_decl; - func_decl * m_startswith_decl; - func_decl * m_endswith_decl; - func_decl * m_contains_decl; - func_decl * m_indexof_decl; - func_decl * m_indexof2_decl; - func_decl * m_lastindexof_decl; - func_decl * m_substr_decl; - func_decl * m_replace_decl; - func_decl * m_str2int_decl; - func_decl * m_int2str_decl; - func_decl * m_prefixof_decl; - func_decl * m_suffixof_decl; - - func_decl * m_re_str2regex_decl; - func_decl * m_re_regexin_decl; - func_decl * m_re_regexconcat_decl; - func_decl * m_re_regexstar_decl; - func_decl * m_re_regexunion_decl; - func_decl * m_re_unroll_decl; - func_decl * m_re_regexplus_decl; - func_decl * m_re_regexcharrange_decl; - - arith_decl_plugin * m_arith_plugin; - family_id m_arith_fid; - sort * m_int_sort; - - std::map string_cache; - - virtual void set_manager(ast_manager * m, family_id id); - - func_decl * mk_func_decl(decl_kind k); -public: - str_decl_plugin(); - virtual ~str_decl_plugin(); - virtual void finalize(); - - virtual decl_plugin * mk_fresh(); - virtual sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters); - virtual func_decl * mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters, - unsigned arity, sort * const * domain, sort * range); - - app * mk_string(const char * val); - app * mk_string(std::string & val); - app * mk_fresh_string(); - - virtual void get_op_names(svector & op_names, symbol const & logic); - virtual void get_sort_names(svector & sort_names, symbol const & logic); - - virtual bool is_value(app * e) const; - virtual bool is_unique_value(app * e) const { return is_value(e); } -}; - -class str_recognizers { - family_id m_afid; -public: - str_recognizers(family_id fid):m_afid(fid) {} - family_id get_fid() const { return m_afid; } - family_id get_family_id() const { return get_fid(); } - - bool is_str_sort(sort* s) const { return is_sort_of(s, m_afid, STRING_SORT); } - - bool is_string(expr const * n, const char ** val) const; - bool is_string(expr const * n) const; - - bool is_re_Str2Reg(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_STR2REGEX); } - bool is_re_RegexConcat(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXCONCAT); } - bool is_re_RegexUnion(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXUNION); } - bool is_re_RegexStar(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXSTAR); } - bool is_re_RegexPlus(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXPLUS); } - - std::string get_string_constant_value(expr const *n) const; -}; - -class str_util : public str_recognizers { - ast_manager & m_manager; - str_decl_plugin * m_plugin; - family_id m_fid; -public: - str_util(ast_manager & m); - ast_manager & get_manager() const { return m_manager; } - str_decl_plugin & plugin() { return *m_plugin; } - - sort* mk_string_sort() const { return get_manager().mk_sort(m_fid, STRING_SORT, 0, 0); } - - app * mk_string(const char * val) { - return m_plugin->mk_string(val); - } - app * mk_string(std::string & val) { - return m_plugin->mk_string(val); - } - - app * mk_fresh_string() { - return m_plugin->mk_fresh_string(); - } - - app * mk_string_with_escape_characters(const char * val) { - std::string str(val); - return mk_string_with_escape_characters(str); - } - app * mk_string_with_escape_characters(std::string & val); - - app * mk_str_StartsWith(expr * haystack, expr * needle) { - expr * es[2] = {haystack, needle}; - return m_manager.mk_app(get_fid(), OP_STR_STARTSWITH, 2, es); - } - - app * mk_str_EndsWith(expr * haystack, expr * needle) { - expr * es[2] = {haystack, needle}; - return m_manager.mk_app(get_fid(), OP_STR_ENDSWITH, 2, es); - } - - app * mk_re_Str2Reg(expr * s) { - expr * es[1] = {s}; - return m_manager.mk_app(get_fid(), OP_RE_STR2REGEX, 1, es); - } - - app * mk_re_Str2Reg(std::string s) { - return mk_re_Str2Reg(mk_string(s)); - } - - app * mk_re_RegexUnion(expr * e1, expr * e2) { - expr * es[2] = {e1, e2}; - return m_manager.mk_app(get_fid(), OP_RE_REGEXUNION, 2, es); - } - - app * mk_re_RegexConcat(expr * e1, expr * e2) { - expr * es[2] = {e1, e2}; - return m_manager.mk_app(get_fid(), OP_RE_REGEXCONCAT, 2, es); - } - - app * mk_re_RegexStar(expr * r) { - expr * es[1] = {r}; - return m_manager.mk_app(get_fid(), OP_RE_REGEXSTAR, 1, es); - } - - std::string get_std_regex_str(expr * regex); - -}; - -#endif /* _STR_DECL_PLUGIN_H_ */ - -#endif /* disable */