3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-08 10:25:18 +00:00

remove legacy str_decl_plugin and str_rewriter classes; these have been unified with sequence-compatible equivalents

This commit is contained in:
Murphy Berzish 2017-03-15 15:25:48 -04:00
parent 5917a34226
commit 8021d63539
6 changed files with 0 additions and 1544 deletions

View file

@ -30,7 +30,6 @@ Revision History:
#include"fpa_decl_plugin.h"
#include"dl_decl_plugin.h"
#include"seq_decl_plugin.h"
#include"str_decl_plugin.h"
#include"smt2_util.h"
class smt2_pp_environment {

View file

@ -24,7 +24,6 @@ Revision History:
#include"ast_smt_pp.h"
#include"arith_decl_plugin.h"
#include"bv_decl_plugin.h"
#include"str_decl_plugin.h"
#include"array_decl_plugin.h"
#include"datatype_decl_plugin.h"
#include"fpa_decl_plugin.h"

View file

@ -1,703 +0,0 @@
/*++
Copyright (c) 2016 Microsoft Corporation
Module Name:
str_rewriter.cpp
Abstract:
AST rewriting rules for string terms.
Author:
Murphy Berzish
Notes:
--*/
#if 0
#include"str_rewriter.h"
#include"arith_decl_plugin.h"
#include"ast_pp.h"
#include"ast_util.h"
#include"well_sorted.h"
#include<map>
#include<set>
#include<deque>
#include<cctype>
// Convert a regular expression to an e-NFA using Thompson's construction
void nfa::convert_re(expr * e, unsigned & start, unsigned & end, str_util & m_strutil) {
start = next_id();
end = next_id();
if (m_strutil.is_re_Str2Reg(e)) {
app * a = to_app(e);
expr * arg_str = a->get_arg(0);
if (m_strutil.is_string(arg_str)) {
std::string str = m_strutil.get_string_constant_value(arg_str);
TRACE("t_str_rw", tout << "build NFA for '" << str << "'" << std::endl;);
/*
* For an n-character string, we make (n-1) intermediate states,
* labelled i_(0) through i_(n-2).
* Then we construct the following transitions:
* start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final
*/
unsigned last = start;
for (int i = 0; i <= ((int)str.length()) - 2; ++i) {
unsigned i_state = next_id();
make_transition(last, str.at(i), i_state);
TRACE("t_str_rw", tout << "string transition " << last << "--" << str.at(i) << "--> " << i_state << std::endl;);
last = i_state;
}
make_transition(last, str.at(str.length() - 1), end);
TRACE("t_str_rw", tout << "string transition " << last << "--" << str.at(str.length() - 1) << "--> " << end << std::endl;);
TRACE("t_str_rw", tout << "string NFA: start = " << start << ", end = " << end << std::endl;);
} else {
TRACE("t_str_rw", tout << "invalid string constant in Str2Reg" << std::endl;);
m_valid = false;
return;
}
} else if (m_strutil.is_re_RegexConcat(e)){
app * a = to_app(e);
expr * re1 = a->get_arg(0);
expr * re2 = a->get_arg(1);
unsigned start1, end1;
convert_re(re1, start1, end1, m_strutil);
unsigned start2, end2;
convert_re(re2, start2, end2, m_strutil);
// start --e--> start1 --...--> end1 --e--> start2 --...--> end2 --e--> end
make_epsilon_move(start, start1);
make_epsilon_move(end1, start2);
make_epsilon_move(end2, end);
TRACE("t_str_rw", tout << "concat NFA: start = " << start << ", end = " << end << std::endl;);
} else if (m_strutil.is_re_RegexUnion(e)) {
app * a = to_app(e);
expr * re1 = a->get_arg(0);
expr * re2 = a->get_arg(1);
unsigned start1, end1;
convert_re(re1, start1, end1, m_strutil);
unsigned start2, end2;
convert_re(re2, start2, end2, m_strutil);
// start --e--> start1 ; start --e--> start2
// end1 --e--> end ; end2 --e--> end
make_epsilon_move(start, start1);
make_epsilon_move(start, start2);
make_epsilon_move(end1, end);
make_epsilon_move(end2, end);
TRACE("t_str_rw", tout << "union NFA: start = " << start << ", end = " << end << std::endl;);
} else if (m_strutil.is_re_RegexStar(e)) {
app * a = to_app(e);
expr * subex = a->get_arg(0);
unsigned start_subex, end_subex;
convert_re(subex, start_subex, end_subex, m_strutil);
// start --e--> start_subex, start --e--> end
// end_subex --e--> start_subex, end_subex --e--> end
make_epsilon_move(start, start_subex);
make_epsilon_move(start, end);
make_epsilon_move(end_subex, start_subex);
make_epsilon_move(end_subex, end);
TRACE("t_str_rw", tout << "star NFA: start = " << start << ", end = " << end << std::endl;);
} else {
TRACE("t_str_rw", tout << "invalid regular expression" << std::endl;);
m_valid = false;
return;
}
}
void nfa::epsilon_closure(unsigned start, std::set<unsigned> & closure) {
std::deque<unsigned> worklist;
closure.insert(start);
worklist.push_back(start);
while(!worklist.empty()) {
unsigned state = worklist.front();
worklist.pop_front();
if (epsilon_map.find(state) != epsilon_map.end()) {
for (std::set<unsigned>::iterator it = epsilon_map[state].begin();
it != epsilon_map[state].end(); ++it) {
unsigned new_state = *it;
if (closure.find(new_state) == closure.end()) {
closure.insert(new_state);
worklist.push_back(new_state);
}
}
}
}
}
bool nfa::matches(std::string input) {
/*
* Keep a set of all states the NFA can currently be in.
* Initially this is the e-closure of m_start_state
* For each character A in the input string,
* the set of next states contains
* all states in transition_map[S][A] for each S in current_states,
* and all states in epsilon_map[S] for each S in current_states.
* After consuming the entire input string,
* the match is successful iff current_states contains m_end_state.
*/
std::set<unsigned> current_states;
epsilon_closure(m_start_state, current_states);
for (unsigned i = 0; i < input.length(); ++i) {
char A = input.at(i);
std::set<unsigned> next_states;
for (std::set<unsigned>::iterator it = current_states.begin();
it != current_states.end(); ++it) {
unsigned S = *it;
// check transition_map
if (transition_map[S].find(A) != transition_map[S].end()) {
next_states.insert(transition_map[S][A]);
}
}
// take e-closure over next_states to compute the actual next_states
std::set<unsigned> epsilon_next_states;
for (std::set<unsigned>::iterator it = next_states.begin(); it != next_states.end(); ++it) {
unsigned S = *it;
std::set<unsigned> closure;
epsilon_closure(S, closure);
epsilon_next_states.insert(closure.begin(), closure.end());
}
current_states = epsilon_next_states;
}
if (current_states.find(m_end_state) != current_states.end()) {
return true;
} else {
return false;
}
}
br_status str_rewriter::mk_str_Concat(expr * arg0, expr * arg1, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Concat " << mk_pp(arg0, m()) << " " << mk_pp(arg1, m()) << ")" << std::endl;);
if(m_strutil.is_string(arg0) && m_strutil.is_string(arg1)) {
TRACE("t_str_rw", tout << "evaluating concat of two constant strings" << std::endl;);
std::string arg0Str = m_strutil.get_string_constant_value(arg0);
std::string arg1Str = m_strutil.get_string_constant_value(arg1);
std::string resultStr = arg0Str + arg1Str;
result = m_strutil.mk_string(resultStr);
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_Length(expr * arg0, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Length " << mk_pp(arg0, m()) << ")" << std::endl;);
if (m_strutil.is_string(arg0)) {
TRACE("t_str_rw", tout << "evaluating length of constant string" << std::endl;);
std::string arg0Str = m_strutil.get_string_constant_value(arg0);
rational arg0Len((unsigned)arg0Str.length());
result = m_autil.mk_numeral(arg0Len, true);
TRACE("t_str_rw", tout << "result is " << mk_pp(result, m()) << std::endl;);
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_CharAt(expr * arg0, expr * arg1, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (CharAt " << mk_pp(arg0, m()) << " " << mk_pp(arg1, m()) << ")" << std::endl;);
// if arg0 is a string constant and arg1 is an integer constant,
// we can rewrite this by evaluating the expression
rational arg1Int;
if (m_strutil.is_string(arg0) && m_autil.is_numeral(arg1, arg1Int)) {
TRACE("t_str_rw", tout << "evaluating constant CharAt expression" << std::endl;);
std::string arg0Str = m_strutil.get_string_constant_value(arg0);
std::string resultStr;
if (arg1Int >= rational(0) && arg1Int <= rational((unsigned)arg0Str.length())) {
resultStr = arg0Str.at(arg1Int.get_unsigned());
TRACE("t_str_rw", tout << "result is '" << resultStr << "'" << std::endl;);
} else {
resultStr = "";
TRACE("t_str_rw", tout << "bogus length argument, result is empty string" << std::endl;);
}
result = m_strutil.mk_string(resultStr);
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_StartsWith(expr * haystack, expr * needle, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (StartsWith " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;);
if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) {
TRACE("t_str_rw", tout << "evaluating constant StartsWith predicate" << std::endl;);
std::string haystackStr = m_strutil.get_string_constant_value(haystack);
std::string needleStr = m_strutil.get_string_constant_value(needle);
if (haystackStr.length() < needleStr.length()) {
result = m().mk_false();
return BR_DONE;
} else {
if (haystackStr.substr(0, needleStr.length()) == needleStr) {
result = m().mk_true();
} else {
result = m().mk_false();
}
return BR_DONE;
}
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_EndsWith(expr * haystack, expr * needle, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (EndsWith " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;);
if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) {
TRACE("t_str_rw", tout << "evaluating constant EndsWith predicate" << std::endl;);
std::string haystackStr = m_strutil.get_string_constant_value(haystack);
std::string needleStr = m_strutil.get_string_constant_value(needle);
if (haystackStr.length() < needleStr.length()) {
result = m().mk_false();
return BR_DONE;
} else {
if (haystackStr.substr(haystackStr.length() - needleStr.length(), needleStr.length()) == needleStr) {
result = m().mk_true();
} else {
result = m().mk_false();
}
return BR_DONE;
}
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_Contains(expr * haystack, expr * needle, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Contains " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;);
if (haystack == needle) {
TRACE("t_str_rw", tout << "eliminate (Contains) over identical terms" << std::endl;);
result = m().mk_true();
return BR_DONE;
} else if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) {
TRACE("t_str_rw", tout << "evaluating constant Contains predicate" << std::endl;);
std::string haystackStr = m_strutil.get_string_constant_value(haystack);
std::string needleStr = m_strutil.get_string_constant_value(needle);
if (haystackStr.find(needleStr) != std::string::npos) {
result = m().mk_true();
} else {
result = m().mk_false();
}
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_Indexof(expr * haystack, expr * needle, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Indexof " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;);
if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) {
TRACE("t_str_rw", tout << "evaluating constant Indexof expression" << std::endl;);
std::string haystackStr = m_strutil.get_string_constant_value(haystack);
std::string needleStr = m_strutil.get_string_constant_value(needle);
if (haystackStr.find(needleStr) != std::string::npos) {
int index = haystackStr.find(needleStr);
result = m_autil.mk_numeral(rational(index), true);
} else {
result = m_autil.mk_numeral(rational(-1), true);
}
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_Indexof2(expr * arg0, expr * arg1, expr * arg2, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Indexof2 " << mk_pp(arg0, m()) << " " << mk_pp(arg1, m()) << " " << mk_pp(arg2, m()) << ")" << std::endl;);
//if (getNodeType(t, args[0]) == my_Z3_ConstStr && getNodeType(t, args[1]) == my_Z3_ConstStr && getNodeType(t, args[2]) == my_Z3_Num) {
rational arg2Int;
if (m_strutil.is_string(arg0) && m_strutil.is_string(arg1) && m_autil.is_numeral(arg2, arg2Int)) {
TRACE("t_str_rw", tout << "evaluating constant Indexof2 expression" << std::endl;);
std::string arg0str = m_strutil.get_string_constant_value(arg0);
std::string arg1str = m_strutil.get_string_constant_value(arg1);
if (arg2Int >= rational((unsigned)arg0str.length())) {
result = m_autil.mk_numeral(rational(-1), true);
} else if (arg2Int < rational(0)) {
int index = arg0str.find(arg1str);
result = m_autil.mk_numeral(rational(index), true);
} else {
std::string suffixStr = arg0str.substr(arg2Int.get_unsigned(), arg0str.length() - arg2Int.get_unsigned());
if (suffixStr.find(arg1str) != std::string::npos) {
int index = suffixStr.find(arg1str) + arg2Int.get_unsigned();
result = m_autil.mk_numeral(rational(index), true);
} else {
result = m_autil.mk_numeral(rational(-1), true);
}
}
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_LastIndexof(expr * haystack, expr * needle, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (LastIndexof " << mk_pp(haystack, m()) << " " << mk_pp(needle, m()) << ")" << std::endl;);
if (m_strutil.is_string(haystack) && m_strutil.is_string(needle)) {
TRACE("t_str_rw", tout << "evaluating constant LastIndexof expression" << std::endl;);
std::string arg0Str = m_strutil.get_string_constant_value(haystack);
std::string arg1Str = m_strutil.get_string_constant_value(needle);
if (arg0Str.rfind(arg1Str) != std::string::npos) {
int index = arg0Str.rfind(arg1Str);
result = m_autil.mk_numeral(rational(index), true);
} else {
result = m_autil.mk_numeral(rational(-1), true);
}
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_Replace(expr * base, expr * source, expr * target, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Replace " << mk_pp(base, m()) << " " << mk_pp(source, m()) << " " << mk_pp(target, m()) << ")" << std::endl;);
if (m_strutil.is_string(base) && m_strutil.is_string(source) && m_strutil.is_string(target)) {
std::string arg0Str = m_strutil.get_string_constant_value(base);
std::string arg1Str = m_strutil.get_string_constant_value(source);
std::string arg2Str = m_strutil.get_string_constant_value(target);
if (arg0Str.find(arg1Str) != std::string::npos) {
int index1 = arg0Str.find(arg1Str);
int index2 = index1 + arg1Str.length();
std::string substr0 = arg0Str.substr(0, index1);
std::string substr2 = arg0Str.substr(index2);
std::string replaced = substr0 + arg2Str + substr2;
result = m_strutil.mk_string(replaced);
} else {
result = base;
}
return BR_DONE;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_str_prefixof(expr * pre, expr * full, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (str.prefixof " << mk_pp(pre, m()) << " " << mk_pp(full, m()) << ")" << std::endl;);
result = m_strutil.mk_str_StartsWith(full, pre);
return BR_REWRITE_FULL;
}
br_status str_rewriter::mk_str_suffixof(expr * post, expr * full, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (str.suffixof" << mk_pp(post, m()) << " " << mk_pp(full, m()) << ")" << std::endl;);
result = m_strutil.mk_str_EndsWith(full, post);
return BR_REWRITE_FULL;
}
br_status str_rewriter::mk_str_to_int(expr * arg0, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (str.to-int " << mk_pp(arg0, m()) << ")" << std::endl;);
if (m_strutil.is_string(arg0)) {
std::string str = m_strutil.get_string_constant_value(arg0);
if (str.length() == 0) {
result = m_autil.mk_numeral(rational::zero(), true);
return BR_DONE;
}
// interpret str as a natural number and rewrite to the corresponding integer.
// if this is not valid, rewrite to -1
rational convertedRepresentation(0);
rational ten(10);
for (unsigned i = 0; i < str.length(); ++i) {
char digit = str.at(i);
if (isdigit((int)digit)) {
std::string sDigit(1, digit);
int val = atoi(sDigit.c_str());
convertedRepresentation = (ten * convertedRepresentation) + rational(val);
} else {
// not a digit, invalid
TRACE("t_str_rw", tout << "str.to-int argument contains non-digit character '" << digit << "'" << std::endl;);
convertedRepresentation = rational::minus_one();
break;
}
}
result = m_autil.mk_numeral(convertedRepresentation, true);
return BR_DONE;
}
return BR_FAILED;
}
br_status str_rewriter::mk_str_from_int(expr * arg0, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (str.from-int " << mk_pp(arg0, m()) << ")" << std::endl;);
rational arg0Int;
if (m_autil.is_numeral(arg0, arg0Int)) {
// (str.from-int N) with N non-negative is the corresponding string in decimal notation.
// otherwise it is the empty string
if (arg0Int.is_nonneg()) {
std::string str = arg0Int.to_string();
result = m_strutil.mk_string(str);
TRACE("t_str_rw", tout << "convert non-negative integer constant to " << str << std::endl;);
} else {
result = m_strutil.mk_string("");
TRACE("t_str_rw", tout << "convert invalid integer constant to empty string" << std::endl;);
}
return BR_DONE;
}
return BR_FAILED;
}
br_status str_rewriter::mk_str_Substr(expr * base, expr * start, expr * len, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (Substr " << mk_pp(base, m()) << " " << mk_pp(start, m()) << " " << mk_pp(len, m()) << ")" << std::endl;);
bool constant_base = m_strutil.is_string(base);
std::string baseStr;
if (constant_base) {
baseStr = m_strutil.get_string_constant_value(base);
}
rational startVal;
bool constant_start = m_autil.is_numeral(start, startVal);
rational lenVal;
bool constant_len = m_autil.is_numeral(len, lenVal);
// case 1: start < 0 or len < 0
if ( (constant_start && startVal.is_neg()) || (constant_len && lenVal.is_neg()) ) {
TRACE("t_str_rw", tout << "start/len of substr is negative" << std::endl;);
result = m_strutil.mk_string("");
return BR_DONE;
}
// case 1.1: start >= length(base)
if (constant_start && constant_base) {
rational baseStrlen((unsigned int)baseStr.length());
if (startVal >= baseStrlen) {
TRACE("t_str_rw", tout << "start >= strlen for substr" << std::endl;);
result = m_strutil.mk_string("");
return BR_DONE;
}
}
if (constant_base && constant_start && constant_len) {
rational baseStrlen((unsigned int)baseStr.length());
std::string retval;
if (startVal + lenVal >= baseStrlen) {
// case 2: pos+len goes past the end of the string
retval = baseStr.substr(startVal.get_unsigned(), std::string::npos);
} else {
// case 3: pos+len still within string
retval = baseStr.substr(startVal.get_unsigned(), lenVal.get_unsigned());
}
result = m_strutil.mk_string(retval);
return BR_DONE;
}
return BR_FAILED;
}
br_status str_rewriter::mk_re_Str2Reg(expr * str, expr_ref & result) {
// the argument to Str2Reg *must* be a string constant
ENSURE(m_strutil.is_string(str));
return BR_FAILED;
}
br_status str_rewriter::mk_re_RegexIn(expr * str, expr * re, expr_ref & result) {
// fast path:
// (RegexIn E (Str2Reg S)) --> (= E S)
if (m_strutil.is_re_Str2Reg(re)) {
expr * regexStr = to_app(re)->get_arg(0);
ENSURE(m_strutil.is_string(regexStr));
result = m().mk_eq(str, regexStr);
TRACE("t_str_rw", tout << "RegexIn fast path: " << mk_pp(str, m()) << " in " << mk_pp(re, m()) << " ==> " << mk_pp(result, m()) << std::endl;);
return BR_REWRITE_FULL;
}
// necessary for model validation
if (m_strutil.is_string(str)) {
TRACE("t_str_rw", tout << "RegexIn with constant string argument" << std::endl;);
nfa regex_nfa(m_strutil, re);
ENSURE(regex_nfa.is_valid());
std::string input = m_strutil.get_string_constant_value(str);
if (regex_nfa.matches(input)) {
result = m().mk_true();
} else {
result = m().mk_false();
}
return BR_DONE;
}
return BR_FAILED;
}
br_status str_rewriter::mk_re_RegexStar(expr * re, expr_ref & result) {
if (m_strutil.is_re_RegexStar(re)) {
result = re;
return BR_REWRITE_FULL;
} else {
return BR_FAILED;
}
}
br_status str_rewriter::mk_re_RegexConcat(expr * r0, expr * r1, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (RegexConcat " << mk_pp(r0, m()) << " " << mk_pp(r1, m()) << ")" << std::endl;);
// (RegexConcat (Str2Reg "A") (Str2Reg "B")) --> (Str2Reg "AB")
if (m_strutil.is_re_Str2Reg(r0) && m_strutil.is_re_Str2Reg(r1)) {
expr * r0str = to_app(r0)->get_arg(0);
expr * r1str = to_app(r1)->get_arg(0);
ENSURE(m_strutil.is_string(r0str));
ENSURE(m_strutil.is_string(r1str));
std::string r0val = m_strutil.get_string_constant_value(r0str);
std::string r1val = m_strutil.get_string_constant_value(r1str);
std::string simplifyVal = r0val + r1val;
TRACE("t_str_rw", tout << "RegexConcat fast path: both sides are Str2Reg, simplify to (Str2Reg \"" << simplifyVal << "\")" << std::endl;);
result = m_strutil.mk_re_Str2Reg(simplifyVal);
return BR_DONE;
}
return BR_FAILED;
}
br_status str_rewriter::mk_re_RegexPlus(expr * re, expr_ref & result) {
/*
* Two optimizations are possible if we inspect 're'.
* If 're' is (RegexPlus X), then reduce to 're'.
* If 're' is (RegexStar X), then reduce to 're'.
* Otherwise, reduce to (RegexConcat re (RegexStar re)).
*/
if (m_strutil.is_re_RegexPlus(re)) {
result = re;
return BR_REWRITE_FULL;
} else if (m_strutil.is_re_RegexStar(re)) {
// Z3str2 re-created the AST under 're' here, but I don't think we need to do that
result = re;
return BR_REWRITE_FULL;
} else {
result = m_strutil.mk_re_RegexConcat(re, m_strutil.mk_re_RegexStar(re));
return BR_REWRITE_FULL;
}
}
br_status str_rewriter::mk_re_RegexCharRange(expr * start, expr * end, expr_ref & result) {
TRACE("t_str_rw", tout << "rewrite (RegexCharRange " << mk_pp(start, m()) << " " << mk_pp(end, m()) << ")" << std::endl;);
// both 'start' and 'end' must be string constants
ENSURE(m_strutil.is_string(start) && m_strutil.is_string(end));
std::string arg0Value = m_strutil.get_string_constant_value(start);
std::string arg1Value = m_strutil.get_string_constant_value(end);
ENSURE(arg0Value.length() == 1 && arg1Value.length() == 1);
char low = arg0Value[0];
char high = arg1Value[0];
if (low > high) {
char t = low;
low = high;
high = t;
}
char c = low;
std::string cStr;
cStr.push_back(c);
expr * res = m_strutil.mk_re_Str2Reg(cStr);
c++;
for (; c <= high; c++) {
cStr.clear();
cStr.push_back(c);
res = m_strutil.mk_re_RegexUnion(res, m_strutil.mk_re_Str2Reg(cStr));
}
result = res;
return BR_DONE;
}
br_status str_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) {
SASSERT(f->get_family_id() == get_fid());
TRACE("t_str_rw", tout << "rewrite app: " << f->get_name() << std::endl;);
switch(f->get_decl_kind()) {
case OP_STRCAT:
SASSERT(num_args == 2);
return mk_str_Concat(args[0], args[1], result);
case OP_STRLEN:
SASSERT(num_args == 1);
return mk_str_Length(args[0], result);
case OP_STR_CHARAT:
SASSERT(num_args == 2);
return mk_str_CharAt(args[0], args[1], result);
case OP_STR_STARTSWITH:
SASSERT(num_args == 2);
return mk_str_StartsWith(args[0], args[1], result);
case OP_STR_ENDSWITH:
SASSERT(num_args == 2);
return mk_str_EndsWith(args[0], args[1], result);
case OP_STR_CONTAINS:
SASSERT(num_args == 2);
return mk_str_Contains(args[0], args[1], result);
case OP_STR_INDEXOF:
SASSERT(num_args == 2);
return mk_str_Indexof(args[0], args[1], result);
case OP_STR_INDEXOF2:
SASSERT(num_args == 3);
return mk_str_Indexof2(args[0], args[1], args[2], result);
case OP_STR_LASTINDEXOF:
SASSERT(num_args == 2);
return mk_str_LastIndexof(args[0], args[1], result);
case OP_STR_REPLACE:
SASSERT(num_args == 3);
return mk_str_Replace(args[0], args[1], args[2], result);
case OP_STR_PREFIXOF:
SASSERT(num_args == 2);
return mk_str_prefixof(args[0], args[1], result);
case OP_STR_SUFFIXOF:
SASSERT(num_args == 2);
return mk_str_suffixof(args[0], args[1], result);
case OP_STR_STR2INT:
SASSERT(num_args == 1);
return mk_str_to_int(args[0], result);
case OP_STR_INT2STR:
SASSERT(num_args == 1);
return mk_str_from_int(args[0], result);
case OP_STR_SUBSTR:
SASSERT(num_args == 3);
return mk_str_Substr(args[0], args[1], args[2], result);
case OP_RE_STR2REGEX:
SASSERT(num_args == 1);
return mk_re_Str2Reg(args[0], result);
case OP_RE_REGEXIN:
SASSERT(num_args == 2);
return mk_re_RegexIn(args[0], args[1], result);
case OP_RE_REGEXPLUS:
SASSERT(num_args == 1);
return mk_re_RegexPlus(args[0], result);
case OP_RE_REGEXSTAR:
SASSERT(num_args == 1);
return mk_re_RegexStar(args[0], result);
case OP_RE_REGEXCONCAT:
SASSERT(num_args == 2);
return mk_re_RegexConcat(args[0], args[1], result);
case OP_RE_REGEXCHARRANGE:
SASSERT(num_args == 2);
return mk_re_RegexCharRange(args[0], args[1], result);
default:
return BR_FAILED;
}
}
br_status str_rewriter::mk_eq_core(expr * l, expr * r, expr_ref & result) {
// from seq_rewriter
expr_ref_vector lhs(m()), rhs(m()), res(m());
bool changed = false;
if (!reduce_eq(l, r, lhs, rhs, changed)) {
result = m().mk_false();
return BR_DONE;
}
if (!changed) {
return BR_FAILED;
}
for (unsigned i = 0; i < lhs.size(); ++i) {
res.push_back(m().mk_eq(lhs[i].get(), rhs[i].get()));
}
result = mk_and(res);
return BR_REWRITE3;
}
bool str_rewriter::reduce_eq(expr * l, expr * r, expr_ref_vector & lhs, expr_ref_vector & rhs, bool & change) {
change = false;
return true;
}
bool str_rewriter::reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& change) {
change = false;
return true;
}
#endif /* disable */

View file

@ -1,120 +0,0 @@
/*++
Copyright (c) 2016 Microsoft Corporation
Module Name:
str_rewriter.h
Abstract:
AST rewriting rules for string terms.
Author:
Murphy Berzish
Notes:
--*/
#if 0
#include"str_decl_plugin.h"
#include"arith_decl_plugin.h"
#include"rewriter_types.h"
#include"params.h"
#include<set>
#include<map>
class str_rewriter {
str_util m_strutil;
arith_util m_autil;
public:
str_rewriter(ast_manager & m, params_ref const & p = params_ref()) :
m_strutil(m), m_autil(m) {
}
ast_manager & m() const { return m_strutil.get_manager(); }
family_id get_fid() const { return m_strutil.get_family_id(); }
void updt_params(params_ref const & p) {}
static void get_param_descrs(param_descrs & r) {}
br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result);
br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result);
br_status mk_str_Concat(expr * arg0, expr * arg1, expr_ref & result);
br_status mk_str_Length(expr * arg0, expr_ref & result);
br_status mk_str_CharAt(expr * arg0, expr * arg1, expr_ref & result);
br_status mk_str_StartsWith(expr * haystack, expr * needle, expr_ref & result);
br_status mk_str_EndsWith(expr * haystack, expr * needle, expr_ref & result);
br_status mk_str_Contains(expr * haystack, expr * needle, expr_ref & result);
br_status mk_str_Indexof(expr * haystack, expr * needle, expr_ref & result);
br_status mk_str_Indexof2(expr * arg0, expr * arg1, expr * arg2, expr_ref & result);
br_status mk_str_LastIndexof(expr * haystack, expr * needle, expr_ref & result);
br_status mk_str_Replace(expr * base, expr * source, expr * target, expr_ref & result);
br_status mk_str_Substr(expr * base, expr * start, expr * len, expr_ref & result);
br_status mk_str_prefixof(expr * pre, expr * full, expr_ref & result);
br_status mk_str_suffixof(expr * post, expr * full, expr_ref & result);
br_status mk_str_to_int(expr * arg0, expr_ref & result);
br_status mk_str_from_int(expr * arg0, expr_ref & result);
br_status mk_re_Str2Reg(expr * str, expr_ref & result);
br_status mk_re_RegexIn(expr * str, expr * re, expr_ref & result);
br_status mk_re_RegexPlus(expr * re, expr_ref & result);
br_status mk_re_RegexStar(expr * re, expr_ref & result);
br_status mk_re_RegexConcat(expr * r0, expr * r1, expr_ref & result);
br_status mk_re_RegexCharRange(expr * start, expr * end, expr_ref & result);
bool reduce_eq(expr * l, expr * r, expr_ref_vector & lhs, expr_ref_vector & rhs, bool & change);
bool reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& change);
};
class nfa {
protected:
bool m_valid;
unsigned m_next_id;
unsigned next_id() {
unsigned retval = m_next_id;
++m_next_id;
return retval;
}
unsigned m_start_state;
unsigned m_end_state;
std::map<unsigned, std::map<char, unsigned> > transition_map;
std::map<unsigned, std::set<unsigned> > epsilon_map;
void make_transition(unsigned start, char symbol, unsigned end) {
transition_map[start][symbol] = end;
}
void make_epsilon_move(unsigned start, unsigned end) {
epsilon_map[start].insert(end);
}
// Convert a regular expression to an e-NFA using Thompson's construction
void convert_re(expr * e, unsigned & start, unsigned & end, str_util & m_strutil);
public:
nfa(str_util & m_strutil, expr * e)
: m_valid(true), m_next_id(0), m_start_state(0), m_end_state(0) {
convert_re(e, m_start_state, m_end_state, m_strutil);
}
nfa() : m_valid(false), m_next_id(0), m_start_state(0), m_end_state(0) {}
bool is_valid() const {
return m_valid;
}
void epsilon_closure(unsigned start, std::set<unsigned> & closure);
bool matches(std::string input);
};
#endif /* disable */

View file

@ -1,501 +0,0 @@
/*++
Module Name:
str_decl_plugin.h
Abstract:
<abstract>
Author:
Murphy Berzish (mtrberzi) 2015-09-02.
Revision History:
--*/
#if 0
#include<sstream>
#include"str_decl_plugin.h"
#include"string_buffer.h"
#include"warning.h"
#include"ast_pp.h"
#include"ast_smt2_pp.h"
str_decl_plugin::str_decl_plugin():
m_strv_sym("String"),
m_str_decl(0),
m_regex_decl(0),
m_concat_decl(0),
m_length_decl(0),
m_charat_decl(0),
m_startswith_decl(0),
m_endswith_decl(0),
m_contains_decl(0),
m_indexof_decl(0),
m_indexof2_decl(0),
m_lastindexof_decl(0),
m_substr_decl(0),
m_replace_decl(0),
m_str2int_decl(0),
m_int2str_decl(0),
m_prefixof_decl(0),
m_suffixof_decl(0),
m_re_str2regex_decl(0),
m_re_regexin_decl(0),
m_re_regexconcat_decl(0),
m_re_regexstar_decl(0),
m_re_regexunion_decl(0),
m_re_unroll_decl(0),
m_re_regexplus_decl(0),
m_re_regexcharrange_decl(0),
m_arith_plugin(0),
m_arith_fid(0),
m_int_sort(0){
}
str_decl_plugin::~str_decl_plugin(){
}
void str_decl_plugin::finalize(void) {
#define DEC_REF(decl) if (decl) { m_manager->dec_ref(decl); } ((void) 0)
DEC_REF(m_str_decl);
DEC_REF(m_regex_decl);
DEC_REF(m_concat_decl);
DEC_REF(m_length_decl);
DEC_REF(m_charat_decl);
DEC_REF(m_startswith_decl);
DEC_REF(m_endswith_decl);
DEC_REF(m_contains_decl);
DEC_REF(m_indexof_decl);
DEC_REF(m_indexof2_decl);
DEC_REF(m_lastindexof_decl);
DEC_REF(m_substr_decl);
DEC_REF(m_replace_decl);
DEC_REF(m_prefixof_decl);
DEC_REF(m_suffixof_decl);
DEC_REF(m_str2int_decl);
DEC_REF(m_int2str_decl);
DEC_REF(m_re_str2regex_decl);
DEC_REF(m_re_regexin_decl);
DEC_REF(m_re_regexconcat_decl);
DEC_REF(m_re_regexstar_decl);
DEC_REF(m_re_regexunion_decl);
DEC_REF(m_re_regexplus_decl);
DEC_REF(m_re_regexcharrange_decl);
DEC_REF(m_re_unroll_decl);
DEC_REF(m_int_sort);
}
void str_decl_plugin::set_manager(ast_manager * m, family_id id) {
decl_plugin::set_manager(m, id);
m_str_decl = m->mk_sort(symbol("String"), sort_info(id, STRING_SORT));
m->inc_ref(m_str_decl);
sort * s = m_str_decl;
m_regex_decl = m->mk_sort(symbol("Regex"), sort_info(id, REGEX_SORT));
m->inc_ref(m_regex_decl);
sort * re = m_regex_decl;
SASSERT(m_manager->has_plugin(symbol("arith")));
m_arith_fid = m_manager->mk_family_id("arith");
m_arith_plugin = static_cast<arith_decl_plugin*>(m_manager->get_plugin(m_arith_fid));
SASSERT(m_arith_plugin);
m_int_sort = m_manager->mk_sort(m_arith_fid, INT_SORT);
SASSERT(m_int_sort != 0); // arith_decl_plugin must be installed before str_decl_plugin.
m_manager->inc_ref(m_int_sort);
sort * i = m_int_sort;
sort* boolT = m_manager->mk_bool_sort();
#define MK_OP(FIELD, NAME, KIND, SORT) \
FIELD = m->mk_func_decl(symbol(NAME), SORT, SORT, SORT, func_decl_info(id, KIND)); \
m->inc_ref(FIELD)
MK_OP(m_concat_decl, "str.++", OP_STRCAT, s);
m_length_decl = m->mk_func_decl(symbol("str.len"), s, i, func_decl_info(id, OP_STRLEN));
m_manager->inc_ref(m_length_decl);
m_charat_decl = m->mk_func_decl(symbol("str.at"), s, i, s, func_decl_info(id, OP_STR_CHARAT));
m_manager->inc_ref(m_charat_decl);
m_startswith_decl = m->mk_func_decl(symbol("StartsWith"), s, s, boolT, func_decl_info(id, OP_STR_STARTSWITH));
m_manager->inc_ref(m_startswith_decl);
m_endswith_decl = m->mk_func_decl(symbol("EndsWith"), s, s, boolT, func_decl_info(id, OP_STR_ENDSWITH));
m_manager->inc_ref(m_endswith_decl);
m_contains_decl = m->mk_func_decl(symbol("str.contains"), s, s, boolT, func_decl_info(id, OP_STR_CONTAINS));
m_manager->inc_ref(m_contains_decl);
m_indexof_decl = m->mk_func_decl(symbol("str.indexof"), s, s, i, func_decl_info(id, OP_STR_INDEXOF));
m_manager->inc_ref(m_indexof_decl);
{
sort * d[3] = { s, s, i };
m_indexof2_decl = m->mk_func_decl(symbol("Indexof2"), 3, d, i, func_decl_info(id, OP_STR_INDEXOF2));
m_manager->inc_ref(m_indexof2_decl);
}
m_lastindexof_decl = m->mk_func_decl(symbol("str.lastindexof"), s, s, i, func_decl_info(id, OP_STR_LASTINDEXOF));
m_manager->inc_ref(m_lastindexof_decl);
{
sort * d[3] = {s, i, i };
m_substr_decl = m->mk_func_decl(symbol("str.substr"), 3, d, s, func_decl_info(id, OP_STR_SUBSTR));
m_manager->inc_ref(m_substr_decl);
}
{
sort * d[3] = {s, s, s};
m_replace_decl = m->mk_func_decl(symbol("str.replace"), 3, d, s, func_decl_info(id, OP_STR_REPLACE));
m_manager->inc_ref(m_replace_decl);
}
m_prefixof_decl = m->mk_func_decl(symbol("str.prefixof"), s, s, boolT, func_decl_info(id, OP_STR_PREFIXOF));
m_manager->inc_ref(m_prefixof_decl);
m_suffixof_decl = m->mk_func_decl(symbol("str.suffixof"), s, s, boolT, func_decl_info(id, OP_STR_SUFFIXOF));
m_manager->inc_ref(m_suffixof_decl);
m_str2int_decl = m->mk_func_decl(symbol("str.to-int"), s, i, func_decl_info(id, OP_STR_STR2INT));
m_manager->inc_ref(m_str2int_decl);
m_int2str_decl = m->mk_func_decl(symbol("str.from-int"), i, s, func_decl_info(id, OP_STR_INT2STR));
m_manager->inc_ref(m_int2str_decl);
m_re_str2regex_decl = m->mk_func_decl(symbol("str.to.re"), s, re, func_decl_info(id, OP_RE_STR2REGEX));
m_manager->inc_ref(m_re_str2regex_decl);
m_re_regexin_decl = m->mk_func_decl(symbol("str.in.re"), s, re, boolT, func_decl_info(id, OP_RE_REGEXIN));
m_manager->inc_ref(m_re_regexin_decl);
m_re_regexconcat_decl = m->mk_func_decl(symbol("re.++"), re, re, re, func_decl_info(id, OP_RE_REGEXCONCAT));
m_manager->inc_ref(m_re_regexconcat_decl);
m_re_regexstar_decl = m->mk_func_decl(symbol("re.*"), re, re, func_decl_info(id, OP_RE_REGEXSTAR));
m_manager->inc_ref(m_re_regexstar_decl);
m_re_regexplus_decl = m->mk_func_decl(symbol("re.+"), re, re, func_decl_info(id, OP_RE_REGEXPLUS));
m_manager->inc_ref(m_re_regexplus_decl);
m_re_regexunion_decl = m->mk_func_decl(symbol("re.union"), re, re, re, func_decl_info(id, OP_RE_REGEXUNION));
m_manager->inc_ref(m_re_regexunion_decl);
m_re_unroll_decl = m->mk_func_decl(symbol("Unroll"), re, i, s, func_decl_info(id, OP_RE_UNROLL));
m_manager->inc_ref(m_re_unroll_decl);
m_re_regexcharrange_decl = m->mk_func_decl(symbol("re.range"), s, s, re, func_decl_info(id, OP_RE_REGEXCHARRANGE));
m_manager->inc_ref(m_re_regexcharrange_decl);
}
decl_plugin * str_decl_plugin::mk_fresh() {
return alloc(str_decl_plugin);
}
sort * str_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) {
switch (k) {
case STRING_SORT: return m_str_decl;
case REGEX_SORT: return m_regex_decl;
default: return 0;
}
}
func_decl * str_decl_plugin::mk_func_decl(decl_kind k) {
switch(k) {
case OP_STRCAT: return m_concat_decl;
case OP_STRLEN: return m_length_decl;
case OP_STR_CHARAT: return m_charat_decl;
case OP_STR_STARTSWITH: return m_startswith_decl;
case OP_STR_ENDSWITH: return m_endswith_decl;
case OP_STR_CONTAINS: return m_contains_decl;
case OP_STR_INDEXOF: return m_indexof_decl;
case OP_STR_INDEXOF2: return m_indexof2_decl;
case OP_STR_LASTINDEXOF: return m_lastindexof_decl;
case OP_STR_SUBSTR: return m_substr_decl;
case OP_STR_REPLACE: return m_replace_decl;
case OP_STR_PREFIXOF: return m_prefixof_decl;
case OP_STR_SUFFIXOF: return m_suffixof_decl;
case OP_STR_STR2INT: return m_str2int_decl;
case OP_STR_INT2STR: return m_int2str_decl;
case OP_RE_STR2REGEX: return m_re_str2regex_decl;
case OP_RE_REGEXIN: return m_re_regexin_decl;
case OP_RE_REGEXCONCAT: return m_re_regexconcat_decl;
case OP_RE_REGEXSTAR: return m_re_regexstar_decl;
case OP_RE_REGEXPLUS: return m_re_regexplus_decl;
case OP_RE_REGEXUNION: return m_re_regexunion_decl;
case OP_RE_UNROLL: return m_re_unroll_decl;
case OP_RE_REGEXCHARRANGE: return m_re_regexcharrange_decl;
default: return 0;
}
}
func_decl * str_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters,
unsigned arity, sort * const * domain, sort * range) {
if (k == OP_STR) {
m_manager->raise_exception("OP_STR not yet implemented in mk_func_decl!");
return 0;
}
if (arity == 0) {
m_manager->raise_exception("no arguments supplied to string operator");
return 0;
}
return mk_func_decl(k);
}
app * str_decl_plugin::mk_string(std::string & val) {
std::map<std::string, app*>::iterator it = string_cache.find(val);
//if (it == string_cache.end()) {
if (true) {
char * new_buffer = alloc_svect(char, (val.length() + 1));
strcpy(new_buffer, val.c_str());
parameter p[1] = {parameter(new_buffer)};
func_decl * d;
d = m_manager->mk_const_decl(m_strv_sym, m_str_decl, func_decl_info(m_family_id, OP_STR, 1, p));
app * str = m_manager->mk_const(d);
string_cache[val] = str;
return str;
} else {
return it->second;
}
}
app * str_decl_plugin::mk_string(const char * val) {
std::string key(val);
return mk_string(key);
}
app * str_decl_plugin::mk_fresh_string() {
// cheating.
// take the longest string in the cache, append the letter "A", and call it fresh.
std::string longestString = "";
std::map<std::string, app*>::iterator it = string_cache.begin();
for (; it != string_cache.end(); ++it) {
if (it->first.length() > longestString.length()) {
longestString = it->first;
}
}
longestString += "A";
return mk_string(longestString);
}
void str_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol const & logic) {
op_names.push_back(builtin_name("str.++", OP_STRCAT));
op_names.push_back(builtin_name("str.len", OP_STRLEN));
op_names.push_back(builtin_name("str.at", OP_STR_CHARAT));
op_names.push_back(builtin_name("StartsWith", OP_STR_STARTSWITH));
op_names.push_back(builtin_name("EndsWith", OP_STR_ENDSWITH));
op_names.push_back(builtin_name("str.contains", OP_STR_CONTAINS));
op_names.push_back(builtin_name("str.indexof", OP_STR_INDEXOF));
op_names.push_back(builtin_name("Indexof2", OP_STR_INDEXOF2));
op_names.push_back(builtin_name("str.lastindexof", OP_STR_LASTINDEXOF));
op_names.push_back(builtin_name("str.substr", OP_STR_SUBSTR));
op_names.push_back(builtin_name("str.replace", OP_STR_REPLACE));
op_names.push_back(builtin_name("str.prefixof", OP_STR_PREFIXOF));
op_names.push_back(builtin_name("str.suffixof", OP_STR_SUFFIXOF));
op_names.push_back(builtin_name("str.to-int", OP_STR_STR2INT));
op_names.push_back(builtin_name("str.from-int", OP_STR_INT2STR));
op_names.push_back(builtin_name("str.to.re", OP_RE_STR2REGEX));
op_names.push_back(builtin_name("str.in.re", OP_RE_REGEXIN));
op_names.push_back(builtin_name("re.++", OP_RE_REGEXCONCAT));
op_names.push_back(builtin_name("re.*", OP_RE_REGEXSTAR));
op_names.push_back(builtin_name("re.union", OP_RE_REGEXUNION));
op_names.push_back(builtin_name("re.+", OP_RE_REGEXPLUS));
op_names.push_back(builtin_name("Unroll", OP_RE_UNROLL));
op_names.push_back(builtin_name("re.range", OP_RE_REGEXCHARRANGE));
}
void str_decl_plugin::get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) {
sort_names.push_back(builtin_name("String", STRING_SORT));
sort_names.push_back(builtin_name("Regex", REGEX_SORT));
}
bool str_decl_plugin::is_value(app * e) const {
if (e->get_family_id() != m_family_id) {
return false;
}
switch (e->get_decl_kind()) {
case OP_STR:
return true;
default:
return false;
}
}
bool str_recognizers::is_string(expr const * n, const char ** val) const {
if (!is_app_of(n, m_afid, OP_STR))
return false;
func_decl * decl = to_app(n)->get_decl();
*val = decl->get_parameter(0).get_string();
return true;
}
bool str_recognizers::is_string(expr const * n) const {
const char * tmp = 0;
return is_string(n, & tmp);
}
std::string str_recognizers::get_string_constant_value(expr const *n) const {
const char * cstr = 0;
bool isString = is_string(n, & cstr);
SASSERT(isString);
std::string strval(cstr);
return strval;
}
str_util::str_util(ast_manager &m) :
str_recognizers(m.mk_family_id(symbol("str"))),
m_manager(m) {
SASSERT(m.has_plugin(symbol("str")));
m_plugin = static_cast<str_decl_plugin*>(m.get_plugin(m.mk_family_id(symbol("str"))));
m_fid = m_plugin->get_family_id();
}
/*
* Scan through the string 'val' and interpret each instance of "backslash followed by a character"
* as a possible escape sequence. Emit all other characters as-is.
* This exists because the SMT-LIB 2.5 standard does not recognize escape sequences other than "" -> " .
* The escape sequences recognized are as follows:
* \a \b \e \f \n \r \t \v \\ : as specified by the C++ standard
* \ooo : produces the ASCII character corresponding to the octal value "ooo", where each "o" is a
* single octal digit and between 1 and 3 valid digits are given
* \xhh : produces the ASCII character corresponding to the hexadecimal value "hh", where each "h" is a
* single case-insensitive hex digit (0-9A-F) and exactly 2 digits are given
* \C, for any character C that does not start a legal escape sequence : the backslash is ignored and "C" is produced.
*/
app * str_util::mk_string_with_escape_characters(std::string & val) {
std::string parsedStr;
parsedStr.reserve(val.length());
for (unsigned i = 0; i < val.length(); ++i) {
char nextChar = val.at(i);
if (nextChar == '\\') {
// check escape sequence
i++;
if (i >= val.length()) {
get_manager().raise_exception("invalid escape sequence");
}
char escapeChar1 = val.at(i);
if (escapeChar1 == 'a') {
parsedStr.push_back('\a');
} else if (escapeChar1 == 'b') {
parsedStr.push_back('\b');
} else if (escapeChar1 == 'e') {
parsedStr.push_back('\e');
} else if (escapeChar1 == 'f') {
parsedStr.push_back('\f');
} else if (escapeChar1 == 'n') {
parsedStr.push_back('\n');
} else if (escapeChar1 == 'r') {
parsedStr.push_back('\r');
} else if (escapeChar1 == 't') {
parsedStr.push_back('\t');
} else if (escapeChar1 == 'v') {
parsedStr.push_back('\v');
} else if (escapeChar1 == '\\') {
parsedStr.push_back('\\');
} else if (escapeChar1 == 'x') {
// hex escape: we expect 'x' to be followed by exactly two hex digits
// which means that i+2 must be a valid index
if (i+2 >= val.length()) {
get_manager().raise_exception("invalid hex escape: \\x must be followed by exactly two hex digits");
}
char hexDigitHi = val.at(i+1);
char hexDigitLo = val.at(i+2);
i += 2;
if (!isxdigit((int)hexDigitHi) || !isxdigit((int)hexDigitLo)) {
get_manager().raise_exception("invalid hex escape: \\x must be followed by exactly two hex digits");
}
char tmp[3] = {hexDigitHi, hexDigitLo, '\0'};
long converted = strtol(tmp, NULL, 16);
unsigned char convChar = (unsigned char)converted;
parsedStr.push_back(convChar);
} else if (escapeChar1 == '0' || escapeChar1 == '1' || escapeChar1 == '2' || escapeChar1 == '3' ||
escapeChar1 == '4' || escapeChar1 == '5' || escapeChar1 == '6' || escapeChar1 == '7') {
// octal escape: we expect exactly three octal digits
// which means that val[i], val[i+1], val[i+2] must all be octal digits
// and that i+2 must be a valid index
if (i+2 >= val.length()) {
get_manager().raise_exception("invalid octal escape: exactly three octal digits required");
}
char c2 = escapeChar1;
char c1 = val.at(i+1);
char c0 = val.at(i+2);
i += 2;
if (!isdigit(c2) || !isdigit(c1) || !isdigit(c0)) {
get_manager().raise_exception("invalid octal escape: exactly three octal digits required");
}
if (c2 == '8' || c2 == '9' || c1 == '8' || c1 == '9' || c0 == '8' || c0 == '9') {
get_manager().raise_exception("invalid octal escape: exactly three octal digits required");
}
char tmp[4] = {c2, c1, c0, '\0'};
long converted = strtol(tmp, NULL, 8);
unsigned char convChar = (unsigned char)converted;
parsedStr.push_back(convChar);
} else {
// unrecognized escape sequence -- just emit that character
parsedStr.push_back(escapeChar1);
}
} else {
parsedStr.push_back(nextChar);
}
// i is incremented at the end of this loop.
// If it is modified, ensure that it points to the index before
// the next character.
}
return mk_string(parsedStr);
}
static std::string str2RegexStr(std::string str) {
std::string res = "";
int len = str.size();
for (int i = 0; i < len; i++) {
char nc = str[i];
// 12 special chars
if (nc == '\\' || nc == '^' || nc == '$' || nc == '.' || nc == '|' || nc == '?'
|| nc == '*' || nc == '+' || nc == '(' || nc == ')' || nc == '[' || nc == '{') {
res.append(1, '\\');
}
res.append(1, str[i]);
}
return res;
}
std::string str_util::get_std_regex_str(expr * regex) {
app * a_regex = to_app(regex);
if (is_re_Str2Reg(a_regex)) {
expr * regAst = a_regex->get_arg(0);
std::string regStr = str2RegexStr(get_string_constant_value(regAst));
return regStr;
} else if (is_re_RegexConcat(a_regex)) {
expr * reg1Ast = a_regex->get_arg(0);
expr * reg2Ast = a_regex->get_arg(1);
std::string reg1Str = get_std_regex_str(reg1Ast);
std::string reg2Str = get_std_regex_str(reg2Ast);
return "(" + reg1Str + ")(" + reg2Str + ")";
} else if (is_re_RegexUnion(a_regex)) {
expr * reg1Ast = a_regex->get_arg(0);
expr * reg2Ast = a_regex->get_arg(1);
std::string reg1Str = get_std_regex_str(reg1Ast);
std::string reg2Str = get_std_regex_str(reg2Ast);
return "(" + reg1Str + ")|(" + reg2Str + ")";
} else if (is_re_RegexStar(a_regex)) {
expr * reg1Ast = a_regex->get_arg(0);
std::string reg1Str = get_std_regex_str(reg1Ast);
return "(" + reg1Str + ")*";
} else {
TRACE("t_str", tout << "BUG: unrecognized regex term " << mk_pp(regex, get_manager()) << std::endl;);
UNREACHABLE(); return "";
}
}
#endif /* disable */

View file

@ -1,218 +0,0 @@
/*++
Module Name:
str_decl_plugin.h
Abstract:
<abstract>
Author:
Murphy Berzish (mtrberzi) 2015-09-02.
Revision History:
--*/
#if 0
#ifndef _STR_DECL_PLUGIN_H_
#define _STR_DECL_PLUGIN_H_
#include"ast.h"
#include"arith_decl_plugin.h"
#include<map>
enum str_sort_kind {
STRING_SORT,
REGEX_SORT,
};
enum str_op_kind {
OP_STR, /* string constants */
// basic string operators
OP_STRCAT,
OP_STRLEN,
// higher-level string functions -- these are reduced to basic operations
OP_STR_CHARAT,
OP_STR_STARTSWITH,
OP_STR_ENDSWITH,
OP_STR_CONTAINS,
OP_STR_INDEXOF,
OP_STR_INDEXOF2,
OP_STR_LASTINDEXOF,
OP_STR_SUBSTR,
OP_STR_REPLACE,
// SMT-LIB 2.5 standard operators -- these are rewritten to internal ones
OP_STR_PREFIXOF,
OP_STR_SUFFIXOF,
// string-integer conversion
OP_STR_STR2INT,
OP_STR_INT2STR, OP_STR_PLACEHOLDER1, OP_STR_PLACEHOLDER2,
// regular expression operators
OP_RE_STR2REGEX,
OP_RE_REGEXIN,
OP_RE_REGEXCONCAT,
OP_RE_REGEXSTAR,
OP_RE_REGEXUNION,
OP_RE_UNROLL,
// higher-level regex operators
OP_RE_REGEXPLUS,
OP_RE_REGEXCHARRANGE,
// end
LAST_STR_OP
};
class str_decl_plugin : public decl_plugin {
protected:
symbol m_strv_sym;
sort * m_str_decl;
sort * m_regex_decl;
func_decl * m_concat_decl;
func_decl * m_length_decl;
func_decl * m_charat_decl;
func_decl * m_startswith_decl;
func_decl * m_endswith_decl;
func_decl * m_contains_decl;
func_decl * m_indexof_decl;
func_decl * m_indexof2_decl;
func_decl * m_lastindexof_decl;
func_decl * m_substr_decl;
func_decl * m_replace_decl;
func_decl * m_str2int_decl;
func_decl * m_int2str_decl;
func_decl * m_prefixof_decl;
func_decl * m_suffixof_decl;
func_decl * m_re_str2regex_decl;
func_decl * m_re_regexin_decl;
func_decl * m_re_regexconcat_decl;
func_decl * m_re_regexstar_decl;
func_decl * m_re_regexunion_decl;
func_decl * m_re_unroll_decl;
func_decl * m_re_regexplus_decl;
func_decl * m_re_regexcharrange_decl;
arith_decl_plugin * m_arith_plugin;
family_id m_arith_fid;
sort * m_int_sort;
std::map<std::string, app*> string_cache;
virtual void set_manager(ast_manager * m, family_id id);
func_decl * mk_func_decl(decl_kind k);
public:
str_decl_plugin();
virtual ~str_decl_plugin();
virtual void finalize();
virtual decl_plugin * mk_fresh();
virtual sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters);
virtual func_decl * mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters,
unsigned arity, sort * const * domain, sort * range);
app * mk_string(const char * val);
app * mk_string(std::string & val);
app * mk_fresh_string();
virtual void get_op_names(svector<builtin_name> & op_names, symbol const & logic);
virtual void get_sort_names(svector<builtin_name> & sort_names, symbol const & logic);
virtual bool is_value(app * e) const;
virtual bool is_unique_value(app * e) const { return is_value(e); }
};
class str_recognizers {
family_id m_afid;
public:
str_recognizers(family_id fid):m_afid(fid) {}
family_id get_fid() const { return m_afid; }
family_id get_family_id() const { return get_fid(); }
bool is_str_sort(sort* s) const { return is_sort_of(s, m_afid, STRING_SORT); }
bool is_string(expr const * n, const char ** val) const;
bool is_string(expr const * n) const;
bool is_re_Str2Reg(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_STR2REGEX); }
bool is_re_RegexConcat(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXCONCAT); }
bool is_re_RegexUnion(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXUNION); }
bool is_re_RegexStar(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXSTAR); }
bool is_re_RegexPlus(expr const * n) const { return is_app_of(n, get_fid(), OP_RE_REGEXPLUS); }
std::string get_string_constant_value(expr const *n) const;
};
class str_util : public str_recognizers {
ast_manager & m_manager;
str_decl_plugin * m_plugin;
family_id m_fid;
public:
str_util(ast_manager & m);
ast_manager & get_manager() const { return m_manager; }
str_decl_plugin & plugin() { return *m_plugin; }
sort* mk_string_sort() const { return get_manager().mk_sort(m_fid, STRING_SORT, 0, 0); }
app * mk_string(const char * val) {
return m_plugin->mk_string(val);
}
app * mk_string(std::string & val) {
return m_plugin->mk_string(val);
}
app * mk_fresh_string() {
return m_plugin->mk_fresh_string();
}
app * mk_string_with_escape_characters(const char * val) {
std::string str(val);
return mk_string_with_escape_characters(str);
}
app * mk_string_with_escape_characters(std::string & val);
app * mk_str_StartsWith(expr * haystack, expr * needle) {
expr * es[2] = {haystack, needle};
return m_manager.mk_app(get_fid(), OP_STR_STARTSWITH, 2, es);
}
app * mk_str_EndsWith(expr * haystack, expr * needle) {
expr * es[2] = {haystack, needle};
return m_manager.mk_app(get_fid(), OP_STR_ENDSWITH, 2, es);
}
app * mk_re_Str2Reg(expr * s) {
expr * es[1] = {s};
return m_manager.mk_app(get_fid(), OP_RE_STR2REGEX, 1, es);
}
app * mk_re_Str2Reg(std::string s) {
return mk_re_Str2Reg(mk_string(s));
}
app * mk_re_RegexUnion(expr * e1, expr * e2) {
expr * es[2] = {e1, e2};
return m_manager.mk_app(get_fid(), OP_RE_REGEXUNION, 2, es);
}
app * mk_re_RegexConcat(expr * e1, expr * e2) {
expr * es[2] = {e1, e2};
return m_manager.mk_app(get_fid(), OP_RE_REGEXCONCAT, 2, es);
}
app * mk_re_RegexStar(expr * r) {
expr * es[1] = {r};
return m_manager.mk_app(get_fid(), OP_RE_REGEXSTAR, 1, es);
}
std::string get_std_regex_str(expr * regex);
};
#endif /* _STR_DECL_PLUGIN_H_ */
#endif /* disable */