mirror of
https://github.com/Z3Prover/z3
synced 2025-04-23 17:15:31 +00:00
re-add regex NFA
This commit is contained in:
parent
9ac0d098ac
commit
8b077ebbe7
2 changed files with 199 additions and 10 deletions
|
@ -598,7 +598,7 @@ app * theory_str::mk_unroll(expr * n, expr * bound) {
|
|||
ast_manager & m = get_manager();
|
||||
|
||||
expr * args[2] = {n, bound};
|
||||
app * unrollFunc = get_manager().mk_app(get_id(), OP_RE_UNROLL, 0, 0, 2, args);
|
||||
app * unrollFunc = get_manager().mk_app(get_id(), _OP_RE_UNROLL, 0, 0, 2, args);
|
||||
m_trail.push_back(unrollFunc);
|
||||
|
||||
expr_ref_vector items(m);
|
||||
|
@ -4428,7 +4428,7 @@ void theory_str::process_concat_eq_type6(expr * concatAst1, expr * concatAst2) {
|
|||
void theory_str::process_unroll_eq_const_str(expr * unrollFunc, expr * constStr) {
|
||||
ast_manager & m = get_manager();
|
||||
|
||||
if (!is_Unroll(to_app(unrollFunc))) {
|
||||
if (!u.re.is_unroll(to_app(unrollFunc))) {
|
||||
return;
|
||||
}
|
||||
if (!u.str.is_string(constStr)) {
|
||||
|
@ -5444,7 +5444,7 @@ void theory_str::get_grounded_concats(expr* node, std::map<expr*, expr*> & varAl
|
|||
std::map<expr*, expr*> & concatAliasMap, std::map<expr*, expr*> & varConstMap,
|
||||
std::map<expr*, expr*> & concatConstMap, std::map<expr*, std::map<expr*, int> > & varEqConcatMap,
|
||||
std::map<expr*, std::map<std::vector<expr*>, std::set<expr*> > > & groundedMap) {
|
||||
if (is_Unroll(to_app(node))) {
|
||||
if (u.re.is_unroll(to_app(node))) {
|
||||
return;
|
||||
}
|
||||
// **************************************************
|
||||
|
@ -6129,6 +6129,149 @@ bool theory_str::check_concat_len_in_eqc(expr * concat) {
|
|||
return no_assertions;
|
||||
}
|
||||
|
||||
// Convert a regular expression to an e-NFA using Thompson's construction
|
||||
void nfa::convert_re(expr * e, unsigned & start, unsigned & end, seq_util & u) {
|
||||
start = next_id();
|
||||
end = next_id();
|
||||
if (u.re.is_to_re(e)) {
|
||||
app * a = to_app(e);
|
||||
expr * arg_str = a->get_arg(0);
|
||||
zstring str;
|
||||
if (u.str.is_string(arg_str, str)) {
|
||||
TRACE("t_str_rw", tout << "build NFA for '" << str << "'" << "\n";);
|
||||
|
||||
/*
|
||||
* For an n-character string, we make (n-1) intermediate states,
|
||||
* labelled i_(0) through i_(n-2).
|
||||
* Then we construct the following transitions:
|
||||
* start --str[0]--> i_(0) --str[1]--> i_(1) --...--> i_(n-2) --str[n-1]--> final
|
||||
*/
|
||||
unsigned last = start;
|
||||
for (int i = 0; i <= ((int)str.length()) - 2; ++i) {
|
||||
unsigned i_state = next_id();
|
||||
make_transition(last, str[i], i_state);
|
||||
TRACE("t_str_rw", tout << "string transition " << last << "--" << str[i] << "--> " << i_state << "\n";);
|
||||
last = i_state;
|
||||
}
|
||||
make_transition(last, str[(str.length() - 1)], end);
|
||||
TRACE("t_str_rw", tout << "string transition " << last << "--" << str[(str.length() - 1)] << "--> " << end << "\n";);
|
||||
TRACE("t_str_rw", tout << "string NFA: start = " << start << ", end = " << end << std::endl;);
|
||||
} else {
|
||||
TRACE("t_str_rw", tout << "invalid string constant in Str2Reg" << std::endl;);
|
||||
m_valid = false;
|
||||
return;
|
||||
}
|
||||
} else if (u.re.is_concat(e)){
|
||||
app * a = to_app(e);
|
||||
expr * re1 = a->get_arg(0);
|
||||
expr * re2 = a->get_arg(1);
|
||||
unsigned start1, end1;
|
||||
convert_re(re1, start1, end1, u);
|
||||
unsigned start2, end2;
|
||||
convert_re(re2, start2, end2, u);
|
||||
// start --e--> start1 --...--> end1 --e--> start2 --...--> end2 --e--> end
|
||||
make_epsilon_move(start, start1);
|
||||
make_epsilon_move(end1, start2);
|
||||
make_epsilon_move(end2, end);
|
||||
TRACE("t_str_rw", tout << "concat NFA: start = " << start << ", end = " << end << std::endl;);
|
||||
} else if (u.re.is_union(e)) {
|
||||
app * a = to_app(e);
|
||||
expr * re1 = a->get_arg(0);
|
||||
expr * re2 = a->get_arg(1);
|
||||
unsigned start1, end1;
|
||||
convert_re(re1, start1, end1, u);
|
||||
unsigned start2, end2;
|
||||
convert_re(re2, start2, end2, u);
|
||||
|
||||
// start --e--> start1 ; start --e--> start2
|
||||
// end1 --e--> end ; end2 --e--> end
|
||||
make_epsilon_move(start, start1);
|
||||
make_epsilon_move(start, start2);
|
||||
make_epsilon_move(end1, end);
|
||||
make_epsilon_move(end2, end);
|
||||
TRACE("t_str_rw", tout << "union NFA: start = " << start << ", end = " << end << std::endl;);
|
||||
} else if (u.re.is_star(e)) {
|
||||
app * a = to_app(e);
|
||||
expr * subex = a->get_arg(0);
|
||||
unsigned start_subex, end_subex;
|
||||
convert_re(subex, start_subex, end_subex, u);
|
||||
// start --e--> start_subex, start --e--> end
|
||||
// end_subex --e--> start_subex, end_subex --e--> end
|
||||
make_epsilon_move(start, start_subex);
|
||||
make_epsilon_move(start, end);
|
||||
make_epsilon_move(end_subex, start_subex);
|
||||
make_epsilon_move(end_subex, end);
|
||||
TRACE("t_str_rw", tout << "star NFA: start = " << start << ", end = " << end << std::endl;);
|
||||
} else {
|
||||
TRACE("t_str_rw", tout << "invalid regular expression" << std::endl;);
|
||||
m_valid = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void nfa::epsilon_closure(unsigned start, std::set<unsigned> & closure) {
|
||||
std::deque<unsigned> worklist;
|
||||
closure.insert(start);
|
||||
worklist.push_back(start);
|
||||
|
||||
while(!worklist.empty()) {
|
||||
unsigned state = worklist.front();
|
||||
worklist.pop_front();
|
||||
if (epsilon_map.find(state) != epsilon_map.end()) {
|
||||
for (std::set<unsigned>::iterator it = epsilon_map[state].begin();
|
||||
it != epsilon_map[state].end(); ++it) {
|
||||
unsigned new_state = *it;
|
||||
if (closure.find(new_state) == closure.end()) {
|
||||
closure.insert(new_state);
|
||||
worklist.push_back(new_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool nfa::matches(zstring input) {
|
||||
/*
|
||||
* Keep a set of all states the NFA can currently be in.
|
||||
* Initially this is the e-closure of m_start_state
|
||||
* For each character A in the input string,
|
||||
* the set of next states contains
|
||||
* all states in transition_map[S][A] for each S in current_states,
|
||||
* and all states in epsilon_map[S] for each S in current_states.
|
||||
* After consuming the entire input string,
|
||||
* the match is successful iff current_states contains m_end_state.
|
||||
*/
|
||||
std::set<unsigned> current_states;
|
||||
epsilon_closure(m_start_state, current_states);
|
||||
for (unsigned i = 0; i < input.length(); ++i) {
|
||||
char A = input.at(i);
|
||||
std::set<unsigned> next_states;
|
||||
for (std::set<unsigned>::iterator it = current_states.begin();
|
||||
it != current_states.end(); ++it) {
|
||||
unsigned S = *it;
|
||||
// check transition_map
|
||||
if (transition_map[S].find(A) != transition_map[S].end()) {
|
||||
next_states.insert(transition_map[S][A]);
|
||||
}
|
||||
}
|
||||
|
||||
// take e-closure over next_states to compute the actual next_states
|
||||
std::set<unsigned> epsilon_next_states;
|
||||
for (std::set<unsigned>::iterator it = next_states.begin(); it != next_states.end(); ++it) {
|
||||
unsigned S = *it;
|
||||
std::set<unsigned> closure;
|
||||
epsilon_closure(S, closure);
|
||||
epsilon_next_states.insert(closure.begin(), closure.end());
|
||||
}
|
||||
current_states = epsilon_next_states;
|
||||
}
|
||||
if (current_states.find(m_end_state) != current_states.end()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void theory_str::check_regex_in(expr * nn1, expr * nn2) {
|
||||
context & ctx = get_context();
|
||||
ast_manager & m = get_manager();
|
||||
|
@ -6159,7 +6302,7 @@ void theory_str::check_regex_in(expr * nn1, expr * nn2) {
|
|||
// TODO figure out regex NFA stuff
|
||||
if (regex_nfa_cache.find(regexTerm) == regex_nfa_cache.end()) {
|
||||
TRACE("t_str_detail", tout << "regex_nfa_cache: cache miss" << std::endl;);
|
||||
regex_nfa_cache[regexTerm] = nfa(m_strutil, regexTerm);
|
||||
regex_nfa_cache[regexTerm] = nfa(u, regexTerm);
|
||||
} else {
|
||||
TRACE("t_str_detail", tout << "regex_nfa_cache: cache hit" << std::endl;);
|
||||
}
|
||||
|
@ -7385,7 +7528,7 @@ void theory_str::classify_ast_by_type(expr * node, std::map<expr*, int> & varMap
|
|||
if (canskip == 0 && concatMap.find(node) == concatMap.end()) {
|
||||
concatMap[node] = 1;
|
||||
}
|
||||
} else if (is_Unroll(aNode)) {
|
||||
} else if (u.re.is_unroll(aNode)) {
|
||||
// Unroll
|
||||
if (unrollMap.find(node) == unrollMap.end()) {
|
||||
unrollMap[node] = 1;
|
||||
|
@ -7658,7 +7801,7 @@ int theory_str::ctx_dep_analysis(std::map<expr*, int> & strVarMap, std::map<expr
|
|||
enode * e_curr = e_currEqc;
|
||||
do {
|
||||
app * curr = e_currEqc->get_owner();
|
||||
if (is_Unroll(curr)) {
|
||||
if (u.re.is_unroll(curr)) {
|
||||
if (aRoot == NULL) {
|
||||
aRoot = curr;
|
||||
}
|
||||
|
@ -7753,7 +7896,7 @@ int theory_str::ctx_dep_analysis(std::map<expr*, int> & strVarMap, std::map<expr
|
|||
if (!is_arg0_emptyStr && !is_arg1_emptyStr) {
|
||||
var_eq_concat_map[deAliasNode][curr] = 1;
|
||||
}
|
||||
} else if (is_Unroll(to_app(curr))) {
|
||||
} else if (u.re.is_unroll(to_app(curr))) {
|
||||
var_eq_unroll_map[deAliasNode][curr] = 1;
|
||||
}
|
||||
|
||||
|
@ -10191,7 +10334,7 @@ void theory_str::get_eqc_allUnroll(expr * n, expr * &constStr, std::set<expr*> &
|
|||
do {
|
||||
if (u.str.is_string(to_app(curr))) {
|
||||
constStr = curr;
|
||||
} else if (is_Unroll(to_app(curr))) {
|
||||
} else if (u.re.is_unroll(to_app(curr))) {
|
||||
if (unrollFuncSet.find(curr) == unrollFuncSet.end()) {
|
||||
unrollFuncSet.insert(curr);
|
||||
}
|
||||
|
@ -10210,7 +10353,7 @@ void theory_str::get_eqc_simpleUnroll(expr * n, expr * &constStr, std::set<expr*
|
|||
do {
|
||||
if (u.str.is_string(to_app(curr))) {
|
||||
constStr = curr;
|
||||
} else if (is_Unroll(to_app(curr))) {
|
||||
} else if (u.re.is_unroll(to_app(curr))) {
|
||||
expr * core = to_app(curr)->get_arg(0);
|
||||
if (u.re.is_to_re(to_app(core))) {
|
||||
if (unrollFuncSet.find(curr) == unrollFuncSet.end()) {
|
||||
|
|
|
@ -110,6 +110,52 @@ namespace smt {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
class nfa {
|
||||
protected:
|
||||
bool m_valid;
|
||||
unsigned m_next_id;
|
||||
|
||||
unsigned next_id() {
|
||||
unsigned retval = m_next_id;
|
||||
++m_next_id;
|
||||
return retval;
|
||||
}
|
||||
|
||||
unsigned m_start_state;
|
||||
unsigned m_end_state;
|
||||
|
||||
std::map<unsigned, std::map<char, unsigned> > transition_map;
|
||||
std::map<unsigned, std::set<unsigned> > epsilon_map;
|
||||
|
||||
void make_transition(unsigned start, char symbol, unsigned end) {
|
||||
transition_map[start][symbol] = end;
|
||||
}
|
||||
|
||||
void make_epsilon_move(unsigned start, unsigned end) {
|
||||
epsilon_map[start].insert(end);
|
||||
}
|
||||
|
||||
// Convert a regular expression to an e-NFA using Thompson's construction
|
||||
void convert_re(expr * e, unsigned & start, unsigned & end, seq_util & u);
|
||||
|
||||
public:
|
||||
nfa(seq_util & u, expr * e)
|
||||
: m_valid(true), m_next_id(0), m_start_state(0), m_end_state(0) {
|
||||
convert_re(e, m_start_state, m_end_state, u);
|
||||
}
|
||||
|
||||
nfa() : m_valid(false), m_next_id(0), m_start_state(0), m_end_state(0) {}
|
||||
|
||||
bool is_valid() const {
|
||||
return m_valid;
|
||||
}
|
||||
|
||||
void epsilon_closure(unsigned start, std::set<unsigned> & closure);
|
||||
|
||||
bool matches(zstring input);
|
||||
};
|
||||
|
||||
class theory_str : public theory {
|
||||
struct T_cut
|
||||
{
|
||||
|
@ -274,7 +320,7 @@ namespace smt {
|
|||
std::map<std::pair<expr*, zstring>, expr*> regex_in_bool_map;
|
||||
std::map<expr*, std::set<zstring> > regex_in_var_reg_str_map;
|
||||
|
||||
// std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
|
||||
std::map<expr*, nfa> regex_nfa_cache; // Regex term --> NFA
|
||||
|
||||
char * char_set;
|
||||
std::map<char, int> charSetLookupTable;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue