3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-29 11:55:51 +00:00

Integrate new regex solver (#4602)

* std::cout debugging statements

* comment out std::cout debugging as this is now a shared fork

* convert std::cout to TRACE statements for seq_rewriter and seq_regex

* add cases to min_length and max_length for regexes

* bug fix

* update min_length and max_length functions for REs

* initial pass on simplifying derivative normal forms by eliminating redundant predicates locally

* add seq_regex_brief trace statements

* working on debugging ref count issue

* fix ref count bug and convert trace statements to seq_regex_brief

* add compact tracing for cache hits/misses

* seq_regex fix cache hit/miss tracing and wrapper around is_nullable

* minor

* label and disable more experimental changes for testing

* minor documentation / tracing

* a few more @EXP annotations

* dead state elimination skeleton code

* progress on dead state elimination

* more progress on dead state elimination

* refactor dead state class to separate self-contained state_graph class

* finish factoring state_graph to only work with unsigned values, and implement separate functionality for expr* logic

* implement get_all_derivatives, add debug tracing

* trace statements for debugging is_nullable loop bug

* fix is_nullable loop bug

* comment out local nullable change and mark experimental

* pretty printing for state_graph

* rewrite state graph to remove the fragile assumption that all edges from a state are added at a time

* start of general cycle detection check + fix some comments

* implement full cycle detection procedure

* normalize derivative conditions to form 'ele <= a'

* order derivative conditions by character code

* fix confusing names m_to and m_from

* assign increasing state IDs from 1 instead of using get_id on AST node

* remove elim_condition call in get_dall_derivatives

* use u_map instead of uint_map to avoid memory leak

* remove unnecessary call to is_ground

* debugging

* small improvements to seq_regex_brief tracing

* fix bug on evil2 example

* save work

* new propagate code

* work in progress on using same seq sort for deriv calls

* avoid re-computing derivatives: use same head var for every derivative call

* use min_length on regexes to prune search

* simple implementation of can_be_in_cycle using rank function idea

* add a disabled experimental change

* minor cleanup comments, etc.

* seq_rewriter cleanup for PR

* typo noticed by Nikolaj

* move state graph to util/state_graph

* re-add accidentally removed line

* clean up seq_regex code removing obsolete functions and comments

* a few more cleanup items

* remove experimental functionality for integration

* fix compilation

* remove some tracing and TODOs

* remove old comment

* update copyright dates to 2020

* feedback from Nikolaj

* use [] for map access

* make state_graph methods constant

* avoid recursion in mark_dead_recursive and mark_live_recursive

* a possible bug fix in propagate_nonempty

* write down list of invariants in state_graph

* implement partial invariant check and insert CASSERT statements

* expand on invariant check and tracing

* finish state graph invariant check

* minor tweaks

* regex propagation: convert first two axioms to propagations

* remove obsolete regex solver functionality

Co-authored-by: calebstanford-msr <t-casta@microsoft.com>
This commit is contained in:
Caleb Stanford 2020-07-30 16:54:49 -04:00 committed by GitHub
parent 293b0b8cc2
commit 976e4c91b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 922 additions and 257 deletions

410
src/util/state_graph.cpp Normal file
View file

@ -0,0 +1,410 @@
/*++
Copyright (c) 2020 Microsoft Corporation
Module Name:
state_graph.cpp
Abstract:
Data structure for incrementally tracking "live" and "dead" states in an
abstract transition system.
Author:
Caleb Stanford (calebstanford-msr / cdstanford) 2020-7
--*/
#include "state_graph.h"
void state_graph::add_state_core(state s) {
STRACE("state_graph", tout << "add(" << s << ") ";);
SASSERT(!m_seen.contains(s));
// Ensure corresponding var in union find structure
while (s >= m_state_ufind.get_num_vars()) {
m_state_ufind.mk_var();
}
// Initialize as unvisited
m_seen.insert(s);
m_unexplored.insert(s);
m_targets.insert(s, state_set());
m_sources.insert(s, state_set());
m_sources_maybecycle.insert(s, state_set());
}
void state_graph::remove_state_core(state s) {
// This is a partial deletion -- the state is still seen and can't be
// added again later.
// The state should be unknown, and all edges to or from the state
// should already have been renamed.
STRACE("state_graph", tout << "del(" << s << ") ";);
SASSERT(m_seen.contains(s));
SASSERT(!m_state_ufind.is_root(s));
SASSERT(m_unknown.contains(s));
m_targets.remove(s);
m_sources.remove(s);
m_sources_maybecycle.remove(s);
m_unknown.remove(s);
}
void state_graph::mark_unknown_core(state s) {
STRACE("state_graph", tout << "unk(" << s << ") ";);
SASSERT(m_state_ufind.is_root(s));
SASSERT(m_unexplored.contains(s));
m_unexplored.remove(s);
m_unknown.insert(s);
}
void state_graph::mark_live_core(state s) {
STRACE("state_graph", tout << "live(" << s << ") ";);
SASSERT(m_state_ufind.is_root(s));
SASSERT(m_unknown.contains(s));
m_unknown.remove(s);
m_live.insert(s);
}
void state_graph::mark_dead_core(state s) {
STRACE("state_graph", tout << "dead(" << s << ") ";);
SASSERT(m_state_ufind.is_root(s));
SASSERT(m_unknown.contains(s));
m_unknown.remove(s);
m_dead.insert(s);
}
/*
Add edge to the graph.
- If the annotation 'maybecycle' is false, then the user is sure
that this edge will never be part of a cycle.
- May already exist, in which case maybecycle = false overrides
maybecycle = true.
*/
void state_graph::add_edge_core(state s1, state s2, bool maybecycle) {
STRACE("state_graph", tout << "add(" << s1 << "," << s2 << ","
<< (maybecycle ? "y" : "n") << ") ";);
SASSERT(m_state_ufind.is_root(s1));
SASSERT(m_state_ufind.is_root(s2));
if (s1 == s2) return;
if (!m_targets[s1].contains(s2)) {
// add new edge
m_targets[s1].insert(s2);
m_sources[s2].insert(s1);
if (maybecycle) m_sources_maybecycle[s2].insert(s1);
}
else if (!maybecycle && m_sources_maybecycle[s2].contains(s1)) {
// update existing edge
m_sources_maybecycle[s2].remove(s1);
}
}
void state_graph::remove_edge_core(state s1, state s2) {
STRACE("state_graph", tout << "del(" << s1 << "," << s2 << ") ";);
SASSERT(m_targets[s1].contains(s2));
SASSERT(m_sources[s2].contains(s1));
m_targets[s1].remove(s2);
m_sources[s2].remove(s1);
m_sources_maybecycle[s2].remove(s1);
}
void state_graph::rename_edge_core(state old1, state old2,
state new1, state new2) {
SASSERT(m_targets[old1].contains(old2));
SASSERT(m_sources[old2].contains(old1));
bool maybecycle = m_sources_maybecycle[old2].contains(old1);
remove_edge_core(old1, old2);
add_edge_core(new1, new2, maybecycle);
}
/*
Merge two states or more generally a set of states into one,
returning the new state. Also merges associated edges.
Preconditions:
- The set should be nonempty
- Every state in the set should be unknown
- Each state should currently exist
- If passing a set of states by reference, it should not be a set
from the edge relations, as merging states modifies edge relations.
*/
auto state_graph::merge_states(state s1, state s2) -> state {
SASSERT(m_state_ufind.is_root(s1));
SASSERT(m_state_ufind.is_root(s2));
SASSERT(m_unknown.contains(s1));
SASSERT(m_unknown.contains(s2));
STRACE("state_graph", tout << "merge(" << s1 << "," << s2 << ") ";);
m_state_ufind.merge(s1, s2);
if (m_state_ufind.is_root(s2)) std::swap(s1, s2);
// rename s2 to s1 in edges
for (auto s_to: m_targets[s2]) {
rename_edge_core(s2, s_to, s1, s_to);
}
for (auto s_from: m_sources[s2]) {
rename_edge_core(s_from, s2, s_from, s1);
}
remove_state_core(s2);
return s1;
}
auto state_graph::merge_states(state_set& s_set) -> state {
SASSERT(s_set.num_elems() > 0);
state prev_s = 0; // initialization here optional
bool first_iter = true;
for (auto s: s_set) {
if (first_iter) {
prev_s = s;
first_iter = false;
continue;
}
prev_s = merge_states(prev_s, s);
}
return prev_s;
}
/*
If s is not live, mark it, and recurse on all states into s
Precondition: s is live or unknown
*/
void state_graph::mark_live_recursive(state s) {
SASSERT(m_live.contains(s) || m_unknown.contains(s));
vector<state> to_search;
to_search.push_back(s);
while (to_search.size() > 0) {
state x = to_search.back();
to_search.pop_back();
SASSERT(m_live.contains(x) || m_unknown.contains(x));
if (m_live.contains(x)) continue;
mark_live_core(x);
for (auto x_from: m_sources[x]) {
to_search.push_back(x_from);
}
}
}
/*
Check if all targets of a state are dead.
Precondition: s is unknown
*/
bool state_graph::all_targets_dead(state s) {
SASSERT(m_unknown.contains(s));
for (auto s_to: m_targets[s]) {
// unknown pointing to live should have been marked as live!
SASSERT(!m_live.contains(s_to));
if (m_unknown.contains(s_to) || m_unexplored.contains(s_to))
return false;
}
return true;
}
/*
Check if s is now known to be dead. If so, mark and recurse
on all states into s.
Precondition: s is live, dead, or unknown
*/
void state_graph::mark_dead_recursive(state s) {
SASSERT(m_live.contains(s) || m_dead.contains(s) || m_unknown.contains(s));
vector<state> to_search;
to_search.push_back(s);
while (to_search.size() > 0) {
state x = to_search.back();
to_search.pop_back();
if (!m_unknown.contains(x)) continue;
if (!all_targets_dead(x)) continue;
// x is unknown and all targets from x are dead
mark_dead_core(x);
for (auto x_from: m_sources[x]) {
to_search.push_back(x_from);
}
}
}
/*
Merge all cycles of unknown states containing s into one state.
Return the new state
Precondition: s is unknown.
*/
auto state_graph::merge_all_cycles(state s) -> state {
SASSERT(m_unknown.contains(s));
// Visit states in a DFS backwards from s
state_set visited; // all backwards edges pushed
state_set resolved; // known in SCC or not
state_set scc; // known in SCC
resolved.insert(s);
scc.insert(s);
vector<state> to_search;
to_search.push_back(s);
while (to_search.size() > 0) {
state x = to_search.back();
if (!visited.contains(x)) {
visited.insert(x);
// recurse backwards only on maybecycle edges
// and only on unknown states
for (auto y: m_sources_maybecycle[x]) {
if (m_unknown.contains(y))
to_search.push_back(y);
}
}
else if (!resolved.contains(x)) {
resolved.insert(x);
to_search.pop_back();
// determine in SCC or not
for (auto y: m_sources_maybecycle[x]) {
if (scc.contains(y)) {
scc.insert(x);
break;
}
}
}
else {
to_search.pop_back();
}
}
// scc is the union of all cycles containing s
return merge_states(scc);
}
/*
Exposed methods
*/
void state_graph::add_state(state s) {
if (m_seen.contains(s)) return;
STRACE("state_graph", tout << "[state_graph] adding state " << s << ": ";);
add_state_core(s);
CASSERT("state_graph", check_invariant());
STRACE("state_graph", tout << std::endl;);
}
void state_graph::mark_live(state s) {
STRACE("state_graph", tout << "[state_graph] marking live " << s << ": ";);
SASSERT(m_unexplored.contains(s) || m_live.contains(s));
SASSERT(m_state_ufind.is_root(s));
if (m_unexplored.contains(s)) mark_unknown_core(s);
mark_live_recursive(s);
CASSERT("state_graph", check_invariant());
STRACE("state_graph", tout << std::endl;);
}
void state_graph::add_edge(state s1, state s2, bool maybecycle) {
STRACE("state_graph", tout << "[state_graph] adding edge "
<< s1 << "->" << s2 << ": ";);
SASSERT(m_unexplored.contains(s1) || m_live.contains(s1));
SASSERT(m_state_ufind.is_root(s1));
SASSERT(m_seen.contains(s2));
s2 = m_state_ufind.find(s2);
add_edge_core(s1, s2, maybecycle);
if (m_live.contains(s2)) mark_live(s1);
CASSERT("state_graph", check_invariant());
STRACE("state_graph", tout << std::endl;);
}
void state_graph::mark_done(state s) {
SASSERT(m_unexplored.contains(s) || m_live.contains(s));
SASSERT(m_state_ufind.is_root(s));
if (m_live.contains(s)) return;
STRACE("state_graph", tout << "[state_graph] marking done " << s << ": ";);
if (m_unexplored.contains(s)) mark_unknown_core(s);
s = merge_all_cycles(s);
mark_dead_recursive(s); // check if dead
CASSERT("state_graph", check_invariant());
STRACE("state_graph", tout << std::endl;);
}
unsigned state_graph::get_size() const {
return m_state_ufind.get_num_vars();
}
bool state_graph::is_seen(state s) const {
return m_seen.contains(s);
}
bool state_graph::is_live(state s) const {
return m_live.contains(m_state_ufind.find(s));
}
bool state_graph::is_dead(state s) const {
return m_dead.contains(m_state_ufind.find(s));
}
bool state_graph::is_done(state s) const {
return m_seen.contains(s) && !m_unexplored.contains(m_state_ufind.find(s));
}
/*
Class invariants check (and associated auxiliary functions)
check_invariant performs a sequence of SASSERT assertions,
then always returns true.
*/
#ifdef Z3DEBUG
bool state_graph::is_subset(state_set set1, state_set set2) const {
for (auto s1: set1) {
if (!set2.contains(s1)) return false;
}
return true;
}
bool state_graph::is_disjoint(state_set set1, state_set set2) const {
for (auto s1: set1) {
if (set2.contains(s1)) return false;
}
return true;
}
#define ASSERT_FOR_ALL_STATES(STATESET, COND) { \
for (auto s: STATESET) { SASSERT(COND); }} ((void) 0)
#define ASSERT_FOR_ALL_EDGES(EDGEREL, COND) { \
for (auto e: (EDGEREL)) { \
state s1 = e.m_key; for (auto s2: e.m_value) { SASSERT(COND); } \
}} ((void) 0)
bool state_graph::check_invariant() const {
// Check state invariants
SASSERT(is_subset(m_live, m_seen));
SASSERT(is_subset(m_dead, m_seen));
SASSERT(is_subset(m_unknown, m_seen));
SASSERT(is_subset(m_unexplored, m_seen));
SASSERT(is_disjoint(m_live, m_dead));
SASSERT(is_disjoint(m_live, m_unknown));
SASSERT(is_disjoint(m_live, m_unexplored));
SASSERT(is_disjoint(m_dead, m_unknown));
SASSERT(is_disjoint(m_dead, m_unexplored));
SASSERT(is_disjoint(m_unknown, m_unexplored));
ASSERT_FOR_ALL_STATES(m_seen, s < m_state_ufind.get_num_vars());
ASSERT_FOR_ALL_STATES(m_seen,
(m_state_ufind.is_root(s) ==
(m_live.contains(s) || m_dead.contains(s) ||
m_unknown.contains(s) || m_unexplored.contains(s))));
// Check edge invariants
ASSERT_FOR_ALL_EDGES(m_sources_maybecycle, m_sources[s1].contains(s2));
ASSERT_FOR_ALL_EDGES(m_sources, m_targets[s2].contains(s1));
ASSERT_FOR_ALL_EDGES(m_targets, m_sources[s2].contains(s1));
ASSERT_FOR_ALL_EDGES(m_targets,
m_state_ufind.is_root(s1) && m_state_ufind.is_root(s2));
ASSERT_FOR_ALL_EDGES(m_targets, s1 != s2);
// Check relationship between states and edges
ASSERT_FOR_ALL_EDGES(m_targets,
!m_live.contains(s2) || m_live.contains(s1));
ASSERT_FOR_ALL_STATES(m_dead, is_subset(m_targets[s], m_dead));
ASSERT_FOR_ALL_STATES(m_unknown, !is_subset(m_targets[s], m_dead));
// For the "no cycles" of unknown states on maybecycle edges,
// we only do a partial check for cycles of size 2
ASSERT_FOR_ALL_EDGES(m_sources_maybecycle,
!(m_unknown.contains(s1) && m_unknown.contains(s2) &&
m_sources_maybecycle[s2].contains(s1)));
STRACE("state_graph", tout << "(invariant passed) ";);
return true;
}
#endif
/*
Pretty printing
*/
std::ostream& state_graph::display(std::ostream& o) const {
o << "---------- State Graph ----------" << std::endl
<< "Seen:";
for (auto s: m_seen) {
o << " " << s;
state s_root = m_state_ufind.find(s);
if (s_root != s)
o << "(=" << s_root << ")";
}
o << std::endl
<< "Live:" << m_live << std::endl
<< "Dead:" << m_dead << std::endl
<< "Unknown:" << m_unknown << std::endl
<< "Unexplored:" << m_unexplored << std::endl
<< "Edges:" << std::endl;
for (auto s1: m_seen) {
if (m_state_ufind.is_root(s1)) {
o << " " << s1 << " -> " << m_targets[s1] << std::endl;
}
}
o << "---------------------------------" << std::endl;
return o;
}