From 2c02264a94d2377ebf5f64162813b7e379446619 Mon Sep 17 00:00:00 2001
From: Caleb Stanford <caleb.pirsquared@gmail.com>
Date: Thu, 13 Aug 2020 15:47:36 -0400
Subject: [PATCH] Regex solver updates (#4636)

* std::cout debugging statements

* comment out std::cout debugging as this is now a shared fork

* convert std::cout to TRACE statements for seq_rewriter and seq_regex

* add cases to min_length and max_length for regexes

* bug fix

* update min_length and max_length functions for REs

* initial pass on simplifying derivative normal forms by eliminating redundant predicates locally

* add seq_regex_brief trace statements

* working on debugging ref count issue

* fix ref count bug and convert trace statements to seq_regex_brief

* add compact tracing for cache hits/misses

* seq_regex fix cache hit/miss tracing and wrapper around is_nullable

* minor

* label and disable more experimental changes for testing

* minor documentation / tracing

* a few more @EXP annotations

* dead state elimination skeleton code

* progress on dead state elimination

* more progress on dead state elimination

* refactor dead state class to separate self-contained state_graph class

* finish factoring state_graph to only work with unsigned values, and implement separate functionality for expr* logic

* implement get_all_derivatives, add debug tracing

* trace statements for debugging is_nullable loop bug

* fix is_nullable loop bug

* comment out local nullable change and mark experimental

* pretty printing for state_graph

* rewrite state graph to remove the fragile assumption that all edges from a state are added at a time

* start of general cycle detection check + fix some comments

* implement full cycle detection procedure

* normalize derivative conditions to form 'ele <= a'

* order derivative conditions by character code

* fix confusing names m_to and m_from

* assign increasing state IDs from 1 instead of using get_id on AST node

* remove elim_condition call in get_dall_derivatives

* use u_map instead of uint_map to avoid memory leak

* remove unnecessary call to is_ground

* debugging

* small improvements to seq_regex_brief tracing

* fix bug on evil2 example

* save work

* new propagate code

* work in progress on using same seq sort for deriv calls

* avoid re-computing derivatives: use same head var for every derivative call

* use min_length on regexes to prune search

* simple implementation of can_be_in_cycle using rank function idea

* add a disabled experimental change

* minor cleanup comments, etc.

* seq_rewriter cleanup for PR

* typo noticed by Nikolaj

* move state graph to util/state_graph

* re-add accidentally removed line

* clean up seq_regex code removing obsolete functions and comments

* a few more cleanup items

* oops, missed merge change to fix compilation

* disabled change to lift unions to the top level and treat them seperately in seq_regex solver

* added get_overapprox_regex to over-approximate regex membership constraints

* replace calls to is_epsilon with a centrally available method in seq_decl_plugin

* simplifications and modifications in get_overapprox_regex and related

* added approximation support for sequence expressions that use ite

* removed is_app check that was redundant

* tweak differences with upstream

* rewrite derivative leaves

* enable Antimorov-style derivatives via lifting unions in the solver

* TODO placeholders for outputting state graph

* change order in seq_regex propagate_in_re

* implement a more restricted form of Antimorov derivatives via a special op code to indicate lifting unions

* minor

* new Antimorov optimizations based on BDD compatibility checking

* seq regex tracing for # of derivatives

* fix get_cofactors (currently this fix is buggy)

* partially revert get_cofactors buggy change

* re-implement get_cofactors to more efficiently explore nodes in the derivative expression

* dgml generation for state graph

* fix release build

* improved dgml output

* bug fixes in dgml generation

* dot output support for state_graph and moved dgml and dot output under CASSERT

* updated tracing of what regex corresponds to what state id with /tr:state_graph

* clean up & document Antimorov derivative support

* remove op cache tracing

* remove re_rank experimental idea

* small fix

* fix Antimorov derivative (important change for the good performance)

* remove unused and unnecessary code

* implemented simpler efficient get_cofactors alternative mk_deriv_accept

* simplifications in propagate_accept, and trace unusual cases

* document the various seq_regex tracing & debugging command-line options

* fix debug build (broken tracing)

* guard eager Antimorov lifting for possible disabling

* fix bug in propagate_accept Rule 1

* disable eager version of Antimorov lifting for performance reasons

* remove some remaining obsolete comments

Co-authored-by: calebstanford-msr <t-casta@microsoft.com>
Co-authored-by: Margus Veanes <margus@microsoft.com>
---
 src/ast/rewriter/seq_rewriter.cpp | 252 ++++++++++++++++++++++---
 src/ast/rewriter/seq_rewriter.h   |   6 +-
 src/ast/seq_decl_plugin.cpp       |   2 +
 src/ast/seq_decl_plugin.h         |   7 +-
 src/smt/seq_regex.cpp             | 293 ++++++++++++++++++++++--------
 src/smt/seq_regex.h               |  76 +++++++-
 src/util/state_graph.cpp          |  60 +++---
 7 files changed, 556 insertions(+), 140 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index 5dae6921d..9cd6fe00d 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -557,6 +557,12 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
             st = mk_re_concat(args[0], args[1], result); 
         }
         break;
+    case _OP_RE_ANTIMOROV_UNION:
+        SASSERT(num_args == 2);
+        // Rewrite Antimorov union to real union
+        result = re().mk_union(args[0], args[1]);
+        st = BR_REWRITE1;
+        break;
     case OP_RE_UNION:
         if (num_args == 1) {
             result = args[0]; 
@@ -2365,11 +2371,26 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) {
     }
 }
 
+/***************************************************
+ *****          Begin Derivative Code          *****
+ ***************************************************/
+
 /*
     Symbolic derivative: seq -> regex -> regex
     seq should be single char
-*/
 
+    This is the rewriter entrypoint for computing a derivative.
+    Use mk_derivative from seq_decl_plugin instead to create a derivative
+    expression without computing it (simplifying).
+
+    This calls mk_derivative, the main logic which builds a derivative
+    recursively, but mk_derivative doesn't guarantee full simplification.
+    Once the derivative is built, we return BR_REWRITE_FULL so that
+    any remaining possible simplification is performed from the bottom up.
+
+    Rewriting also replaces _OP_RE_ANTIMOROV_UNION, which is produced
+    by is_derivative, with real union.
+*/
 br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
     result = mk_derivative(ele, r);
     // TBD: we may even declare BR_DONE here and potentially miss some simplifications
@@ -2377,14 +2398,105 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
 }
 
 /*
-    Memoized, recursive implementation of the symbolic derivative such that
-    the result is in an optimized BDD form.
+    Note: Derivative Normal Form
 
-    Definition of BDD form:
-        if-then-elses are pushed outwards
-        and sorted by condition ID (cond->get_id()), from largest on
-        the outside to smallest on the inside.
-        Duplicate nested conditions are eliminated.
+    When computing derivatives recursively, we preserve the following
+    BDD normal form:
+
+    - At the top level, the derivative is a union of Antimorov derivatives
+      (Conceptually each element of the union is a different derivative).
+      We currently express this derivative using an internal op code:
+          _OP_RE_ANTIMOROV_UNION
+    - An Antimorov derivative is a nested if-then-else term.
+      if-then-elses are pushed outwards and sorted by condition ID
+      (cond->get_id()), from largest on the outside to smallest on the
+      inside. Duplicate nested conditions are eliminated.
+    - The leaves of the if-then-else BDD can have unions themselves,
+      but these are interpreted as Regex union, not as separate Antimorov
+      derivatives.
+
+    To debug the normal form, call Z3 with -dbg:seq_regex:
+    this calls check_deriv_normal_form (below) periodically.
+
+    The main logic is in mk_der_op_rec for combining normal forms
+    (some also in mk_der_compl_rec).
+*/
+
+#ifdef Z3DEBUG
+/*
+    Debugging to check the derivative normal form that we assume
+    (see definition above).
+
+    This may fail on unusual/unexpected REs, such as those containing
+    regex variables, but this is by design as this is only checked
+    during debugging, and we have not considered how normal form
+    should apply in such cases.
+*/
+bool seq_rewriter::check_deriv_normal_form(expr* r, int level) {
+    if (level == 3) { // top level
+        STRACE("seq_verbose", tout
+            << "Checking derivative normal form invariant...";);
+    }
+    expr *r1 = nullptr, *r2 = nullptr, *p = nullptr, *s = nullptr;
+    unsigned lo = 0, hi = 0;
+    STRACE("seq_verbose", tout << " (level " << level << ")";);
+    int new_level = 0;
+    if (re().is_antimorov_union(r)) {
+        SASSERT(level >= 2);
+        new_level = 2;
+    }
+    else if (m().is_ite(r)) {
+        SASSERT(level >= 1);
+        new_level = 1;
+    }
+
+    SASSERT(!re().is_diff(r));
+    SASSERT(!re().is_opt(r));
+    SASSERT(!re().is_plus(r));
+
+    if (re().is_antimorov_union(r, r1, r2) ||
+        re().is_concat(r, r1, r2) ||
+        re().is_union(r, r1, r2) ||
+        re().is_intersection(r, r1, r2) ||
+        m().is_ite(r, p, r1, r2)) {
+        check_deriv_normal_form(r1, new_level);
+        check_deriv_normal_form(r2, new_level);
+    }
+    else if (re().is_star(r, r1) ||
+             re().is_complement(r, r1) ||
+             re().is_loop(r, r1, lo) ||
+             re().is_loop(r, r1, lo, hi)) {
+        check_deriv_normal_form(r1, new_level);
+    }
+    else if (re().is_reverse(r, r1)) {
+        SASSERT(re().is_to_re(r1));
+    }
+    else if (re().is_full_seq(r) ||
+             re().is_empty(r) ||
+             re().is_range(r) ||
+             re().is_full_char(r) ||
+             re().is_of_pred(r) ||
+             re().is_to_re(r, s)) {
+        // OK
+    }
+    else {
+        SASSERT(false);
+    }
+    if (level == 3) {
+        STRACE("seq_verbose", tout << " passed!" << std::endl;);
+    }
+    return true;
+}
+#endif
+
+/*
+    Memoized, recursive implementation of the symbolic derivative such that
+    the result is in normal form.
+
+    Functions without _rec are memoized wrappers, which call the _rec
+    version if lookup fails.
+
+    The main logic is in mk_der_op_rec for combining normal forms.
 */
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
     STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m())
@@ -2396,9 +2508,14 @@ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
     }
     STRACE("seq_verbose", tout << "derivative result: "
                                << mk_pp(result, m()) << std::endl;);
+    CASSERT("seq_regex", check_deriv_normal_form(r));
     return result;
 }
 
+expr_ref seq_rewriter::mk_der_antimorov_union(expr* r1, expr* r2) {
+    return mk_der_op(_OP_RE_ANTIMOROV_UNION, r1, r2);
+}
+
 expr_ref seq_rewriter::mk_der_union(expr* r1, expr* r2) {
     return mk_der_op(OP_RE_UNION, r1, r2);
 }
@@ -2466,15 +2583,37 @@ bool seq_rewriter::pred_implies(expr* a, expr* b) {
 }
 
 /*
-    Apply a binary operation, preserving BDD normal form on derivative expressions.
+    Utility function to decide if two BDDs (nested if-then-else terms)
+    have exactly the same structure and conditions.
+*/
+bool seq_rewriter::ite_bdds_compatabile(expr* a, expr* b) {
+    expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
+    expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
+    if (m().is_ite(a, ca, a1, a2) && m().is_ite(b, cb, b1, b2)) {
+        return (ca == cb) && ite_bdds_compatabile(a1, b1)
+                          && ite_bdds_compatabile(a2, b2);
+    }
+    else if (m().is_ite(a) || m().is_ite(b)) {
+        return false;
+    }
+    else {
+        return true;
+    }
+}
+
+/*
+    Apply a binary operation, preserving normal form on derivative expressions.
 
     Preconditions:
-        - k is a binary op code on REs: one of concat, intersection, or union
-          (not difference)
-        - a and b are in BDD form
+        - k is one of the following binary op codes on REs:
+            OP_RE_INTERSECT
+            OP_RE_UNION
+            OP_RE_CONCAT
+            _OP_RE_ANTIMOROV_UNION
+        - a and b are in normal form (check_deriv_normal_form)
 
     Postcondition:
-        - result is in BDD form
+        - result is in normal form (check_deriv_normal_form)
 */
 expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     STRACE("seq_verbose", tout << "mk_der_op_rec: " << k
@@ -2483,6 +2622,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
     expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
     expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
     expr_ref result(m());
+
     // Simplify if-then-elses whenever possible
     auto mk_ite = [&](expr* c, expr* a, expr* b) {
         return (a == b) ? a : m().mk_ite(c, a, b);
@@ -2497,6 +2637,46 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
         m().is_not(e, e);
         return e->get_id();
     };
+
+    // Choose when to lift a union to the top level, by converting
+    // it to an Antimorov union
+    // This implements a restricted form of Antimorov derivatives
+    if (k == OP_RE_UNION) {
+        if (re().is_antimorov_union(a) || re().is_antimorov_union(b)) {
+            k = _OP_RE_ANTIMOROV_UNION;
+        }
+        #if 0
+        // Disabled: eager Antimorov lifting unless BDDs are compatible
+        // Note: the check for BDD compatibility could be made more
+        // sophisticated: in an Antimorov union of n terms, we really
+        // want to check if any pair of them is compatible.
+        else if (m().is_ite(a) && m().is_ite(b) &&
+                 !ite_bdds_compatabile(a, b)) {
+            k = _OP_RE_ANTIMOROV_UNION;
+        }
+        #endif
+    }
+    if (k == _OP_RE_ANTIMOROV_UNION) {
+        result = re().mk_antimorov_union(a, b);
+        return result;
+    }
+    if (re().is_antimorov_union(a, a1, a2)) {
+        expr_ref r1(m()), r2(m());
+        r1 = mk_der_op(k, a1, b);
+        r2 = mk_der_op(k, a2, b);
+        result = re().mk_antimorov_union(r1, r2);
+        return result;
+    }
+    if (re().is_antimorov_union(b, b1, b2)) {
+        expr_ref r1(m()), r2(m());
+        r1 = mk_der_op(k, a, b1);
+        r2 = mk_der_op(k, a, b2);
+        result = re().mk_antimorov_union(r1, r2);
+        return result;
+    }
+
+    // Remaining non-union case: combine two if-then-else BDDs
+    // (underneath top-level Antimorov unions)
     if (m().is_ite(a, ca, a1, a2)) {
         expr_ref r1(m()), r2(m());
         expr_ref notca(m().mk_not(ca), m());
@@ -2594,6 +2774,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
         result = mk_der_op_rec(k, a, b);
         m_op_cache.insert(k, a, b, result);
     }
+    CASSERT("seq_regex", check_deriv_normal_form(result));
     return result;
 }
 
@@ -2603,13 +2784,22 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
     expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
     if (!result) {
         expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
-        if (m().is_ite(r, c, r1, r2)) {
+        if (re().is_antimorov_union(r, r1, r2)) {
+            // Convert union to intersection
+            // Result: Antimorov union at top level is lost, pushed inside ITEs
+            expr_ref res1(m()), res2(m());
+            res1 = mk_der_compl(r1);
+            res2 = mk_der_compl(r2);
+            result = mk_der_inter(res1, res2);
+        }
+        else if (m().is_ite(r, c, r1, r2)) {
             result = m().mk_ite(c, mk_der_compl(r1), mk_der_compl(r2));
         }
         else if (BR_FAILED == mk_re_complement(r, result))
             result = re().mk_complement(r);        
         m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, result);
     }
+    CASSERT("seq_regex", check_deriv_normal_form(result));
     return result;
 }
 
@@ -2675,6 +2865,7 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
     }
     STRACE("seq_verbose", tout << "mk_der_cond result: "
         <<  mk_pp(result, m()) << std::endl;);
+    CASSERT("seq_regex", check_deriv_normal_form(result));
     return result;
 }
 
@@ -2696,7 +2887,11 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
         }
         expr_ref dr2 = mk_derivative(ele, r2);
         is_n = re_predicate(is_n, seq_sort);
-        return mk_der_union(result, mk_der_concat(is_n, dr2));        
+        // Instead of mk_der_union here, we use mk_der_antimorov_union to
+        // force the two cases to be considered separately and lifted to
+        // the top level. This avoids blowup in cases where determinization
+        // is expensive.
+        return mk_der_antimorov_union(result, mk_der_concat(is_n, dr2));
     }
     else if (re().is_star(r, r1)) {
         return mk_der_concat(mk_derivative(ele, r1), r);
@@ -2843,6 +3038,11 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
     return expr_ref(re().mk_derivative(ele, r), m());
 }
 
+/*************************************************
+ *****          End Derivative Code          *****
+ *************************************************/
+
+
 /*
  * pattern match against all ++ "abc" ++ all ++ "def" ++ all regexes.
 */
@@ -3128,11 +3328,11 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
         result = b;
         return BR_DONE;
     }
-    if (is_epsilon(a)) {
+    if (re().is_epsilon(a)) {
         result = b;
         return BR_DONE;
     }
-    if (is_epsilon(b)) {
+    if (re().is_epsilon(b)) {
         result = a;
         return BR_DONE;
     }
@@ -3249,11 +3449,11 @@ br_status seq_rewriter::mk_re_union0(expr* a, expr* b, expr_ref& result) {
         result = b;
         return BR_DONE;
     }
-    if (re().is_star(a) && is_epsilon(b)) {
+    if (re().is_star(a) && re().is_epsilon(b)) {
         result = a;
         return BR_DONE;
     }
-    if (re().is_star(b) && is_epsilon(a)) {
+    if (re().is_star(b) && re().is_epsilon(a)) {
         result = b;
         return BR_DONE;
     }
@@ -3558,11 +3758,11 @@ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) {
             result = re().mk_star(re().mk_union(b, c1));
             return BR_REWRITE2;
         }
-        if (is_epsilon(b)) {
+        if (re().is_epsilon(b)) {
             result = re().mk_star(c);
             return BR_REWRITE2;
         }
-        if (is_epsilon(c)) {
+        if (re().is_epsilon(c)) {
             result = re().mk_star(b);
             return BR_REWRITE2;
         }
@@ -3599,7 +3799,7 @@ br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) {
         result = a;
         return BR_DONE;
     }
-    if (is_epsilon(a)) {
+    if (re().is_epsilon(a)) {
         result = a;
         return BR_DONE;
     }
@@ -4252,12 +4452,6 @@ bool seq_rewriter::reduce_by_length(expr_ref_vector& ls, expr_ref_vector& rs,
     return true;
 }
 
-
-bool seq_rewriter::is_epsilon(expr* e) const {
-    expr* e1;
-    return re().is_to_re(e, e1) && str().is_empty(e1);
-}
-
 /**
    reduce for the case where rs = a constant string, 
    ls contains a substring that matches no substring of rs.
@@ -4357,6 +4551,8 @@ void seq_rewriter::op_cache::cleanup() {
     if (m_table.size() >= m_max_cache_size) {
         m_trail.reset();
         m_table.reset();
+        STRACE("seq_regex", tout << "Op cache reset!" << std::endl;);
+        STRACE("seq_regex_brief", tout << "(OP CACHE RESET) ";);
         STRACE("seq_verbose", tout << "Derivative op cache reset" << std::endl;);
     }
 }
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index b9cc2707b..634eece7d 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -191,6 +191,11 @@ class seq_rewriter {
     expr_ref mk_der_inter(expr* a, expr* b);
     expr_ref mk_der_compl(expr* a);
     expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
+    expr_ref mk_der_antimorov_union(expr* r1, expr* r2);
+    bool ite_bdds_compatabile(expr* a, expr* b);
+    #ifdef Z3DEBUG
+    bool check_deriv_normal_form(expr* r, int level = 3);
+    #endif
 
     bool lt_char(expr* ch1, expr* ch2);
     bool eq_char(expr* ch1, expr* ch2);
@@ -277,7 +282,6 @@ class seq_rewriter {
     void add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
     bool is_sequence(expr* e, expr_ref_vector& seq);
     bool is_sequence(eautomaton& aut, expr_ref_vector& seq);
-    bool is_epsilon(expr* e) const;
     bool get_lengths(expr* e, expr_ref_vector& lens, rational& pos);
     bool reduce_back(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs);
     bool reduce_front(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs);
diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index d359f010c..afa3a023f 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -616,6 +616,7 @@ void seq_decl_plugin::init() {
     m_sigs[OP_RE_OF_PRED]        = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
     m_sigs[OP_RE_REVERSE]        = alloc(psig, m, "re.reverse", 1, 1, &reA, reA);
     m_sigs[OP_RE_DERIVATIVE]     = alloc(psig, m, "re.derivative", 1, 2, AreA, reA);
+    m_sigs[_OP_RE_ANTIMOROV_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
     m_sigs[OP_SEQ_TO_RE]         = alloc(psig, m, "seq.to.re",  1, 1, &seqA, reA);
     m_sigs[OP_SEQ_IN_RE]         = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
     m_sigs[OP_SEQ_REPLACE_RE_ALL] = alloc(psig, m, "str.replace_re_all", 1, 3, seqAreAseqA, seqA);
@@ -760,6 +761,7 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
     case OP_RE_COMPLEMENT:
     case OP_RE_REVERSE:
     case OP_RE_DERIVATIVE:
+    case _OP_RE_ANTIMOROV_UNION:
         m_has_re = true;
         // fall-through
     case OP_SEQ_UNIT:
diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h
index 78286fac0..eb2feba7a 100644
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@@ -109,6 +109,7 @@ enum seq_op_kind {
     _OP_REGEXP_EMPTY,
     _OP_REGEXP_FULL_CHAR,
     _OP_RE_IS_NULLABLE,
+    _OP_RE_ANTIMOROV_UNION, // Lifted union for antimorov-style derivatives
     _OP_SEQ_SKOLEM,
     LAST_SEQ_OP
 };
@@ -237,9 +238,10 @@ class seq_util {
     mutable scoped_ptr<bv_util> m_bv;
     bv_util& bv() const;
 
+public:
+
     unsigned max_plus(unsigned x, unsigned y) const;
     unsigned max_mul(unsigned x, unsigned y) const;
-public:
 
     ast_manager& get_manager() const { return m; }
 
@@ -437,6 +439,7 @@ public:
         app* mk_of_pred(expr* p);
         app* mk_reverse(expr* r) { return m.mk_app(m_fid, OP_RE_REVERSE, r); }
         app* mk_derivative(expr* ele, expr* r) { return m.mk_app(m_fid, OP_RE_DERIVATIVE, ele, r); }
+        app* mk_antimorov_union(expr* r1, expr* r2) { return m.mk_app(m_fid, _OP_RE_ANTIMOROV_UNION, r1, r2); }
 
         bool is_to_re(expr const* n)    const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); }
         bool is_concat(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_CONCAT); }
@@ -455,6 +458,7 @@ public:
         bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); }
         bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); }
         bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); }
+        bool is_antimorov_union(expr const* n) const { return is_app_of(n, m_fid, _OP_RE_ANTIMOROV_UNION); }
         MATCH_UNARY(is_to_re);
         MATCH_BINARY(is_concat);
         MATCH_BINARY(is_union);
@@ -468,6 +472,7 @@ public:
         MATCH_UNARY(is_of_pred);
         MATCH_UNARY(is_reverse);
         MATCH_BINARY(is_derivative);
+        MATCH_BINARY(is_antimorov_union);
         bool is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const;
         bool is_loop(expr const* n, expr*& body, unsigned& lo) const;
         bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const;
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 1803f5537..06b5e7f89 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -109,24 +109,6 @@ namespace smt {
             return;
         }
 
-        // Convert a non-ground sequence into an additional regex and
-        // strengthen the original regex constraint into an intersection
-        // for example:
-        //     (x ++ "a" ++ y) in b*
-        // is coverted to
-        //     (x ++ "a" ++ y) in intersect((.* ++ "a" ++ .*), b*)
-        if (!m.is_value(s)) {
-            expr_ref s_approx = get_overapprox_regex(s);
-            if (!re().is_full_seq(s_approx)) {
-                r = re().mk_inter(r, s_approx);
-                TRACE("seq_regex", tout
-                    << "get_overapprox_regex(" << mk_pp(s, m)
-                    << ") = " << mk_pp(s_approx, m) << std::endl;);
-                STRACE("seq_regex_brief", tout
-                    << "overapprox=" << state_str(r) << " ";);
-            }
-        }
-
         if (coallesce_in_re(lit)) {
             TRACE("seq_regex", tout
                 << "simplified conjunctions to an intersection" << std::endl;);
@@ -141,6 +123,26 @@ namespace smt {
             return;
         }
 
+        // Convert a non-ground sequence into an additional regex and
+        // strengthen the original regex constraint into an intersection
+        // for example:
+        //     (x ++ "a" ++ y) in b*
+        // is coverted to
+        //     (x ++ "a" ++ y) in intersect((.* ++ "a" ++ .*), b*)
+        expr_ref _r_temp_owner(m);
+        if (!m.is_value(s)) {
+            expr_ref s_approx = get_overapprox_regex(s);
+            if (!re().is_full_seq(s_approx)) {
+                r = re().mk_inter(r, s_approx);
+                _r_temp_owner = r;
+                TRACE("seq_regex", tout
+                    << "get_overapprox_regex(" << mk_pp(s, m)
+                    << ") = " << mk_pp(s_approx, m) << std::endl;);
+                STRACE("seq_regex_brief", tout
+                    << "overapprox=" << state_str(r) << " ";);
+            }
+        }
+
         expr_ref zero(a().mk_int(0), m);
         expr_ref acc = sk().mk_accept(s, zero, r);
         literal acc_lit = th.mk_literal(acc);
@@ -213,6 +215,7 @@ namespace smt {
      *
      * Rule 1. (accept s i r) => len(s) >= i + min_len(r)
      * Rule 2. (accept s i r) & len(s) <= i => nullable(r)
+     *     (only necessary if min_len fails and returns 0 for non-nullable r)
      * Rule 3. (accept s i r) and len(s) > i =>
      *             (accept s (i + 1) (derivative s[i] r)
      *
@@ -258,24 +261,36 @@ namespace smt {
         STRACE("seq_regex_brief", tout << "(unfold) ";);
 
         // Rule 1: use min_length to prune search
-        expr_ref s_to_re(re().mk_to_re(s), m);
-        expr_ref s_plus_r(re().mk_concat(s_to_re, r), m);
-        unsigned min_len = re().min_length(s_plus_r);
-        literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len);
+        unsigned min_len = re().min_length(r);
+        unsigned min_len_plus_i = u().max_plus(min_len, idx);
+        literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len_plus_i);
         th.propagate_lit(nullptr, 1, &lit, len_s_ge_min);
         // Axiom equivalent to the above: th.add_axiom(~lit, len_s_ge_min);
 
         // Rule 2: nullable check
         literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx);
-        expr_ref is_nullable = is_nullable_wrapper(r);
-        if (m.is_false(is_nullable)) {
-            th.propagate_lit(nullptr, 1, &lit, ~len_s_le_i);
-        }
-        else if (!m.is_true(is_nullable)) {
-            // is_nullable did not simplify
-            literal is_nullable_lit = th.mk_literal(is_nullable_wrapper(r));
-            ctx.mark_as_relevant(is_nullable_lit);
-            th.add_axiom(~lit, ~len_s_le_i, is_nullable_lit);
+        if (min_len == 0) {
+            expr_ref is_nullable = is_nullable_wrapper(r);
+            if (m.is_false(is_nullable)) {
+                STRACE("seq_regex", tout
+                    << "Warning: min_length returned 0 for non-nullable regex"
+                    << std::endl;);
+                STRACE("seq_regex_brief", tout
+                    << " (Warning: min_length returned 0 for"
+                    << " non-nullable regex)";);
+                th.propagate_lit(nullptr, 1, &lit, ~len_s_le_i);
+            }
+            else if (!m.is_true(is_nullable)) {
+                // is_nullable did not simplify
+                STRACE("seq_regex", tout
+                    << "Warning: is_nullable did not simplify to true or false"
+                    << std::endl;);
+                STRACE("seq_regex_brief", tout
+                    << " (Warning: is_nullable did not simplify)";);
+                literal is_nullable_lit = th.mk_literal(is_nullable);
+                ctx.mark_as_relevant(is_nullable_lit);
+                th.add_axiom(~lit, ~len_s_le_i, is_nullable_lit);
+            }
         }
 
         // Rule 3: derivative unfolding
@@ -283,24 +298,11 @@ namespace smt {
         expr_ref hd = th.mk_nth(s, i);
         expr_ref deriv(m);
         deriv = derivative_wrapper(hd, r);
+        expr_ref accept_deriv(m);
+        accept_deriv = mk_deriv_accept(s, idx + 1, deriv);
         accept_next.push_back(~lit);
         accept_next.push_back(len_s_le_i);
-        expr_ref_pair_vector cofactors(m);
-        get_cofactors(deriv, cofactors);
-        for (auto const& p : cofactors) {
-            if (m.is_false(p.first) || re().is_empty(p.second)) continue;
-            expr_ref cond(p.first, m);
-            expr_ref deriv_leaf(p.second, m);
-
-            expr_ref acc = sk().mk_accept(s, a().mk_int(idx + 1), deriv_leaf);
-            expr_ref choice(m.mk_and(cond, acc), m);
-            literal choice_lit = th.mk_literal(choice);
-            accept_next.push_back(choice_lit);
-            // TBD: try prioritizing unvisited states here over visited
-            // ones (in the state graph), to improve performance
-            STRACE("seq_regex_verbose", tout << "added choice: "
-                                           << mk_pp(choice, m) << std::endl;);
-        }
+        accept_next.push_back(th.mk_literal(accept_deriv));
         th.add_axiom(accept_next);
     }
 
@@ -442,7 +444,7 @@ namespace smt {
         expr_ref r = symmetric_diff(r1, r2);       
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); 
-        expr_ref is_empty = sk().mk_is_empty(r, emp, n);
+        expr_ref is_empty = sk().mk_is_empty(r, r, n);
         th.add_axiom(~th.mk_eq(r1, r2, false), th.mk_literal(is_empty));
     }
     
@@ -455,7 +457,7 @@ namespace smt {
         expr_ref r = symmetric_diff(r1, r2);
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
         expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); 
-        expr_ref is_non_empty = sk().mk_is_non_empty(r, emp, n);
+        expr_ref is_non_empty = sk().mk_is_non_empty(r, r, n);
         th.add_axiom(th.mk_eq(r1, r2, false), th.mk_literal(is_non_empty));
     }
 
@@ -517,22 +519,98 @@ namespace smt {
         th.add_axiom(lits);
     }
 
-    void seq_regex::get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result) {
-        expr* cond = nullptr, *th = nullptr, *el = nullptr;
-        if (m.is_ite(r, cond, th, el)) {
-            conds.push_back(cond);
-            get_cofactors(th, conds, result);
-            conds.pop_back();
-            conds.push_back(mk_not(m, cond));
-            get_cofactors(el, conds, result);
-            conds.pop_back();
-        }
-        else {
-            expr_ref conj = mk_and(conds);
-            result.push_back(conj, r);
+    /*
+        Given a string s, index i, and a derivative regex d, return an
+        expression that is equivalent to
+            accept s i r
+        but which pushes accept s i r into the leaves (next derivatives to
+        explore).
+
+        Input r is of type regex; output is of type bool.
+
+        Example:
+            mk_deriv_accept(s, i, (ite a r1 r2) u (ite b r3 r4))
+            = (or (ite a (accept s i r1) (accept s i r2))
+                  (ite b (accept s i r3) (accept s i r4)))
+    */
+    expr_ref seq_regex::mk_deriv_accept(expr* s, unsigned i, expr* r) {
+        vector<expr*> to_visit;
+        to_visit.push_back(r);
+        obj_map<expr, expr*> re_to_bool;
+        expr_ref_vector _temp_bool_owner(m); // temp owner for bools we create
+
+        // DFS
+        while (to_visit.size() > 0) {
+            expr* e = to_visit.back();
+            expr* econd = nullptr, *e1 = nullptr, *e2 = nullptr;
+            if (!re_to_bool.contains(e)) {
+                // First visit: add children
+                STRACE("seq_regex_verbose", tout << "1";);
+                if (m.is_ite(e, econd, e1, e2) ||
+                    re().is_union(e, e1, e2)) {
+                    to_visit.push_back(e1);
+                    to_visit.push_back(e2);
+                }
+                // Mark first visit by adding nullptr to the map
+                re_to_bool.insert(e, nullptr);
+            }
+            else if (re_to_bool.find(e) == nullptr) {
+                // Second visit: set value
+                STRACE("seq_regex_verbose", tout << "2";);
+                to_visit.pop_back();
+                if (m.is_ite(e, econd, e1, e2)) {
+                    expr* b1 = re_to_bool.find(e1);
+                    expr* b2 = re_to_bool.find(e2);
+                    expr* b = m.mk_ite(econd, b1, b2);
+                    _temp_bool_owner.push_back(b);
+                    re_to_bool.find(e) = b;
+                }
+                else if (re().is_union(e, e1, e2)) {
+                    expr* b1 = re_to_bool.find(e1);
+                    expr* b2 = re_to_bool.find(e2);
+                    expr* b = m.mk_or(b1, b2);
+                    _temp_bool_owner.push_back(b);
+                    re_to_bool.find(e) = b;
+                }
+                else {
+                    expr* iplus1 = a().mk_int(i);
+                    _temp_bool_owner.push_back(iplus1);
+                    expr_ref acc_leaf = sk().mk_accept(s, iplus1, e);
+                    _temp_bool_owner.push_back(acc_leaf);
+                    re_to_bool.find(e) = acc_leaf;
+
+                    STRACE("seq_regex_verbose", tout
+                        << "mk_deriv_accept: added accept leaf: "
+                        << mk_pp(acc_leaf, m) << std::endl;);
+                }
+            }
+            else {
+                STRACE("seq_regex_verbose", tout << "3";);
+                // Remaining visits: skip
+                to_visit.pop_back();
+            }
         }
+
+        // Finalize
+        expr_ref result(m);
+        result = re_to_bool.find(r); // Assigns ownership of all exprs in
+                                     // re_to_bool for after this completes
+        rewrite(result);
+        return result;
     }
 
+    /*
+        Return a list of all leaves in the derivative of a regex r,
+        ignoring the conditions along each path.
+
+        Warning: Although the derivative
+        normal form tries to eliminate unsat condition paths, one cannot
+        assume that the path to each leaf is satisfiable in general
+        (e.g. when regexes are created using re.pred).
+        So not all results may correspond to satisfiable predicates.
+        It is OK to rely on the results being satisfiable for completeness,
+        but not soundness.
+    */
     void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) {
         // Get derivative
         sort* seq_sort = nullptr;
@@ -541,14 +619,74 @@ namespace smt {
         expr_ref hd = mk_first(r, n);
         expr_ref d(m);
         d = derivative_wrapper(hd, r);
-        // Use get_cofactors method and try to filter out unsatisfiable conds
-        expr_ref_pair_vector cofactors(m);
-        get_cofactors(d, cofactors);
-        STRACE("seq_regex_verbose", tout << "getting all derivatives of: " << mk_pp(r, m) << std::endl;);
-        for (auto const& p : cofactors) {
-            if (m.is_false(p.first) || re().is_empty(p.second)) continue;
-            STRACE("seq_regex_verbose", tout << "adding derivative: " << mk_pp(p.second, m) << std::endl;);
-            results.push_back(p.second);
+
+        // DFS
+        vector<expr*> to_visit;
+        to_visit.push_back(d);
+        obj_map<expr, bool> visited; // set<expr> (bool is used as a unit type)
+        while (to_visit.size() > 0) {
+            expr* e = to_visit.back();
+            to_visit.pop_back();
+            if (visited.contains(e)) continue;
+            visited.insert(e, true);
+            expr* econd = nullptr, *e1 = nullptr, *e2 = nullptr;
+            if (m.is_ite(e, econd, e1, e2) ||
+                re().is_union(e, e1, e2)) {
+                to_visit.push_back(e1);
+                to_visit.push_back(e2);
+            }
+            else if (!re().is_empty(e)) {
+                results.push_back(e);
+                STRACE("seq_regex_verbose", tout
+                    << "get_all_derivatives: added deriv: "
+                    << mk_pp(e, m) << std::endl;);
+            }
+        }
+
+        STRACE("seq_regex", tout << "Number of derivatives: "
+                                 << results.size() << std::endl;);
+        STRACE("seq_regex_brief", tout << "#derivs=" << results.size() << " ";);
+    }
+
+    /*
+        Return a list of all (cond, leaf) pairs in a given derivative
+        expression r.
+
+        Note: this recursive implementation is inefficient, since if nodes
+        are repeated often in the expression DAG, they may be visited
+        many times. For this reason, prefer mk_deriv_accept and
+        get_all_derivatives when possible.
+
+        This method is still used by:
+            propagate_is_empty
+            propagate_is_non_empty
+    */
+    void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) {
+        expr_ref_vector conds(m);
+        get_cofactors_rec(r, conds, result);
+        STRACE("seq_regex", tout << "Number of derivatives: "
+                                 << result.size() << std::endl;);
+        STRACE("seq_regex_brief", tout << "#derivs=" << result.size() << " ";);
+    }
+    void seq_regex::get_cofactors_rec(expr* r, expr_ref_vector& conds,
+                                      expr_ref_pair_vector& result) {
+        expr* cond = nullptr, *r1 = nullptr, *r2 = nullptr;
+        if (m.is_ite(r, cond, r1, r2)) {
+            conds.push_back(cond);
+            get_cofactors_rec(r1, conds, result);
+            conds.pop_back();
+            conds.push_back(mk_not(m, cond));
+            get_cofactors_rec(r2, conds, result);
+            conds.pop_back();
+        }
+        else if (re().is_union(r, r1, r2)) {
+            get_cofactors_rec(r1, conds, result);
+            get_cofactors_rec(r2, conds, result);
+        }
+        else {
+            expr_ref conj = mk_and(conds);
+            if (!m.is_false(conj) && !re().is_empty(r))
+                result.push_back(conj, r);
         }
     }
 
@@ -618,6 +756,9 @@ namespace smt {
             m_expr_to_state.insert(e, new_id);
             STRACE("seq_regex_brief", tout << "new(" << expr_id_str(e)
                                            << ")=" << state_str(e) << " ";);
+            STRACE("seq_regex", tout
+                << "New state ID: " << new_id
+                << " = " << mk_pp(e, m) << std::endl;);
         }
         return m_expr_to_state.find(e);
     }
@@ -627,6 +768,7 @@ namespace smt {
         return m_state_to_expr.get(id);
     }
 
+
     bool seq_regex::can_be_in_cycle(expr *r1, expr *r2) {
         // TBD: This can be used to optimize the state graph:
         // return false here if it is known that r1 -> r2 can never be
@@ -649,10 +791,11 @@ namespace smt {
             STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
             return false;
         }
-        STRACE("seq_regex", tout << "Updating state graph for regex "
-                                 << mk_pp(r, m) << ") ";);
         // Add state
         m_state_graph.add_state(r_id);
+        STRACE("state_graph", tout << "regex(" << r_id << ") = " << mk_pp(r, m) << std::endl;);
+        STRACE("seq_regex", tout << "Updating state graph for regex "
+                                 << mk_pp(r, m) << ") " << std::endl;);
         STRACE("seq_regex_brief", tout << std::endl << "USG("
                                        << state_str(r) << ") ";);
         expr_ref r_nullable = is_nullable_wrapper(r);
@@ -663,18 +806,20 @@ namespace smt {
             // Add edges to all derivatives
             expr_ref_vector derivatives(m);
             STRACE("seq_regex_verbose", tout
-                << std::endl << "  getting all derivs: " << r_id << " ";);
+                << "getting all derivs: " << r_id << " " << std::endl;);
             get_all_derivatives(r, derivatives);
             for (auto const& dr: derivatives) {
                 unsigned dr_id = get_state_id(dr);
                 STRACE("seq_regex_verbose", tout
-                    << std::endl << "  traversing deriv: " << dr_id << " ";);
+                    << "  traversing deriv: " << dr_id << " " << std::endl;);
                 m_state_graph.add_state(dr_id);
+                STRACE("state_graph", tout << "regex(" << dr_id << ") = " << mk_pp(dr, m) << std::endl;);
                 bool maybecycle = can_be_in_cycle(r, dr);
                 m_state_graph.add_edge(r_id, dr_id, maybecycle);
             }
             m_state_graph.mark_done(r_id);
         }
+        STRACE("seq_regex", m_state_graph.display(tout););
         STRACE("seq_regex_brief", tout << std::endl;);
         STRACE("seq_regex_brief", m_state_graph.display(tout););
         return true;
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index 1f49d7003..e5782a94f 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -23,6 +23,71 @@ Author:
 #include "smt/smt_context.h"
 #include "smt/seq_skolem.h"
 
+/*
+    *** Tracing and debugging in this module and related modules ***
+
+    Tracing and debugging for the regex solver are split across several
+    command-line flags.
+
+        TRACING
+
+        -tr:seq_regex and -tr:seq_regex_brief
+        These are the main tags to trace what the regex solver is doing.
+        They mostly trace the same things, except that seq_regex_brief
+        avoids printing out expressions and tries to abbreviate the output
+        as much as possible. seq_regex_brief shows the following output:
+            Top-level propagations:
+                PIR:      Propagating an in_re constraint
+                PE/PNE:   Propagating an empty/non-empty constraint
+                PEQ/PNEQ: Propagating a not-equal constraint
+                PA:       Propagating an accept constraint
+            In tracing, arguments are generally put in parentheses.
+            To achieve abbreviated output, expressions are traced in one of two
+            ways:
+                id243 (expr ID):  the regex or expression with id 243
+                3     (state ID): the regex with state ID 3
+            When a regex is newly assigned to a state ID, we print this:
+                new(id606)=4
+            Of these, PA is the most important, and traces as follows:
+                PA(x@i,r): propagate accept for string x at index i, regex r.
+                (empty), (dead), (blocked), (unfold): info about whether this
+                    PA was cut off early, or unfolded into the derivatives
+                    (next states)
+                d(r1)=r2: r2 is the derivative of r1
+                n(r1)=b:  b = whether r1 is nullable or not
+                USG(r):   updating state graph for regex r (add all derivatives)
+
+        -tr:state_graph
+        This is the tracing done by util/state_graph, the data structure
+        that seq_regex uses to track live and dead regexes, which can
+        altneratively be used to get a high-level picture of what states
+        are being explored and updated as the solver progresses.
+
+        -tr:seq_regex_verbose
+        Used for some more frequent tracing (in the style of seq_regex,
+        not in the style of seq_regex_brief)
+
+        -tr:seq and -tr:seq_verbose
+        These are the underlying sequence theory tracing, often used by
+        the rewriter.
+
+        DEBUGGING AND VIEWING STATE GRAPH GRAPHICAL OUTPUT
+
+        -dbg:seq_regex
+        Debugging that checks invariants. Currently, checks that derivative
+        normal form is correctly preserved in the rewriter.
+
+        -dbg:state_graph
+        Debugging for the state graph, which
+        1. Checks state graph invariants, and
+        2. Generates the files .z3-state-graph.dgml and .z3-state-graph.dot
+           which can be used to visually view the state graph being explored,
+           during or after executing Z3.
+           The output can be viewed:
+              - Using Visual Studio for .dgml
+              - Using a tool such as xdot (`xdot .z3-state-graph.dot`) for .dot
+*/
+
 namespace smt {
 
     class theory_seq;
@@ -93,12 +158,13 @@ namespace smt {
         expr_ref is_nullable_wrapper(expr* r);
         expr_ref derivative_wrapper(expr* hd, expr* r);
 
-        void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result);
-        void get_cofactors(expr* r, expr_ref_pair_vector& result) {
-            expr_ref_vector conds(m);
-            get_cofactors(r, conds, result);
-        }
+        // Various support for unfolding derivative expressions that are
+        // returned by derivative_wrapper
+        expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
         void get_all_derivatives(expr* r, expr_ref_vector& results);
+        void get_cofactors(expr* r, expr_ref_pair_vector& result);
+        void get_cofactors_rec(expr* r, expr_ref_vector& conds,
+                               expr_ref_pair_vector& result);
 
     public:
 
diff --git a/src/util/state_graph.cpp b/src/util/state_graph.cpp
index b5f621a00..2b870331e 100644
--- a/src/util/state_graph.cpp
+++ b/src/util/state_graph.cpp
@@ -324,13 +324,41 @@ bool state_graph::is_done(state s) const {
     return m_seen.contains(s) && !m_unexplored.contains(m_state_ufind.find(s));
 }
 
+/*
+    Pretty printing
+*/
+std::ostream& state_graph::display(std::ostream& o) const {
+    o << "---------- State Graph ----------" << std::endl
+        << "Seen:";
+    for (auto s : m_seen) {
+        o << " " << s;
+        state s_root = m_state_ufind.find(s);
+        if (s_root != s)
+            o << "(=" << s_root << ")";
+    }
+    o << std::endl
+        << "Live:" << m_live << std::endl
+        << "Dead:" << m_dead << std::endl
+        << "Unknown:" << m_unknown << std::endl
+        << "Unexplored:" << m_unexplored << std::endl
+        << "Edges:" << std::endl;
+    for (auto s1 : m_seen) {
+        if (m_state_ufind.is_root(s1)) {
+            o << "  " << s1 << " -> " << m_targets[s1] << std::endl;
+        }
+    }
+    o << "---------------------------------" << std::endl;
+
+    return o;
+}
+
+#ifdef Z3DEBUG
 /*
     Class invariants check (and associated auxiliary functions)
 
     check_invariant performs a sequence of SASSERT assertions,
     then always returns true.
 */
-#ifdef Z3DEBUG
 bool state_graph::is_subset(state_set set1, state_set set2) const {
     for (auto s1: set1) {
         if (!set2.contains(s1)) return false;
@@ -387,37 +415,7 @@ bool state_graph::check_invariant() const {
     STRACE("state_graph", tout << "(invariant passed) ";);
     return true;
 }
-#endif
 
-/*
-    Pretty printing
-*/
-std::ostream& state_graph::display(std::ostream& o) const {
-    o << "---------- State Graph ----------" << std::endl
-      << "Seen:";
-    for (auto s: m_seen) {
-        o << " " << s;
-        state s_root = m_state_ufind.find(s);
-        if (s_root != s)
-            o << "(=" << s_root << ")";
-    }
-    o << std::endl
-      << "Live:" << m_live << std::endl
-      << "Dead:" << m_dead << std::endl
-      << "Unknown:" << m_unknown << std::endl
-      << "Unexplored:" << m_unexplored << std::endl
-      << "Edges:" << std::endl;
-    for (auto s1: m_seen) {
-        if (m_state_ufind.is_root(s1)) {
-            o << "  " << s1 << " -> " << m_targets[s1] << std::endl;
-        }
-    }
-    o << "---------------------------------" << std::endl;
-
-    return o;
-}
-
-#ifdef Z3DEBUG
 /*
     Output the whole state graph in dgml format into the file '.z3-state-graph.dgml'
  */