From 738d091b58ff5bd47eae1f9fc7c9d831e548f7bc Mon Sep 17 00:00:00 2001
From: Margus Veanes <margus@microsoft.com>
Date: Thu, 20 Aug 2020 20:46:40 -0700
Subject: [PATCH 1/2] extended calculation of info for regexes, updated tracing
 of state_graph with regex info

---
 src/ast/seq_decl_plugin.cpp | 105 ++++++++++++++++++++++++++++++------
 src/ast/seq_decl_plugin.h   |  57 ++++++++++++++++++--
 src/smt/seq_regex.cpp       |  15 +++---
 3 files changed, 152 insertions(+), 25 deletions(-)

diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index 53e0966fc..40119043a 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -1637,45 +1637,102 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
 */
 seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
     info i1, i2;
+    lbool nullable(l_false);
+    unsigned min_length(0), lower_bound(0);
+    bool normalized(false);
     if (e->get_family_id() == u.get_family_id()) {
         switch (e->get_decl()->get_decl_kind())
         {
         case OP_RE_EMPTY_SET:
-            return info(UINT_MAX);
+            return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0);
         case OP_RE_FULL_SEQ_SET:
+            return info(true, true, true, true, true, true, false, l_true, 0, 1);
         case OP_RE_STAR:
+            i1 = get_info_rec(e->get_arg(0));
+            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, i1.normalized, i1.monadic, false, l_true, 0, i1.star_height + 1);
         case OP_RE_OPTION:
-            return info(0);
+            i1 = get_info_rec(e->get_arg(0));
+            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, i1.normalized, i1.monadic, false, l_true, 0, i1.star_height);
         case OP_RE_RANGE:
         case OP_RE_FULL_CHAR_SET:
         case OP_RE_OF_PRED:
-            return info(1);
+            //TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
+            //TBD: check if the range is unsat
+            return info(true, true, true, true, true, true, true, l_false, 1, 0);
         case OP_RE_CONCAT:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(u.max_plus(i1.min_length, i2.min_length));
+            return info(i1.classical & i2.classical,
+                i1.classical && i2.classical, //both args of concat must be classical for it to be standard
+                i1.interpreted && i2.interpreted,
+                i1.nonbranching && i2.nonbranching,
+                (i1.normalized && !u.re.is_concat(e->get_arg(0)) && i2.normalized),
+                i1.monadic && i2.monadic,
+                false,
+                ((i1.nullable == l_false || i2.nullable == l_false) ? l_false : ((i1.nullable == l_true && i2.nullable == l_true) ? l_true : l_undef)),
+                u.max_plus(i1.min_length, i2.min_length),
+                std::max(i1.star_height, i2.star_height));
         case OP_RE_UNION:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(std::min(i1.min_length, i2.min_length));
+            return info(i1.classical & i2.classical,
+                i1.standard && i2.standard,
+                i1.interpreted && i2.interpreted,
+                i1.nonbranching && i2.nonbranching,
+                i1.normalized && i2.normalized,
+                i1.monadic && i2.monadic,
+                i1.singleton && i2.singleton,
+                ((i1.nullable == l_true || i2.nullable == l_true) ? l_true : ((i1.nullable == l_false && i2.nullable == l_false) ? l_false : l_undef)),
+                std::min(i1.min_length, i2.min_length),
+                std::max(i1.star_height, i2.star_height));
         case OP_RE_INTERSECT:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(std::max(i1.min_length, i2.min_length));
+            return info(false,
+                i1.standard && i2.standard,
+                i1.interpreted && i2.interpreted,
+                i1.nonbranching && i2.nonbranching,
+                i1.normalized && i2.normalized,
+                i1.monadic && i2.monadic,
+                i1.singleton && i2.singleton,
+                ((i1.nullable == l_true && i2.nullable == l_true) ? l_true : ((i1.nullable == l_false || i2.nullable == l_false) ? l_false : l_undef)),
+                std::max(i1.min_length, i2.min_length),
+                std::max(i1.star_height, i2.star_height));
         case OP_SEQ_TO_RE:
-            return info(u.str.min_length(e->get_arg(0)));
+            //TBD: the sequence is not a concrete value
+            min_length = u.str.min_length(e->get_arg(0));
+            return info(true, true, true, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), l_false, min_length, 0);
         case OP_RE_REVERSE:
-        case OP_RE_PLUS:
             return get_info_rec(e->get_arg(0));
+        case OP_RE_PLUS:
+            i1 = get_info_rec(e->get_arg(0));
+            //r+ is not normalized if r is nullable, the normalized form would be r*
+            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, (i1.nullable == l_false ? i1.normalized : false), i1.monadic, false, l_true, 0, i1.star_height + 1);
         case OP_RE_COMPLEMENT:
-            return info(0); 
+            i1 = get_info_rec(e->get_arg(0));
+            nullable = (i1.nullable == l_true ? l_false : (i1.nullable == l_false ? l_true : l_undef));
+            min_length = (nullable == l_false ? 1 : 0);
+            return info(false, i1.standard, i1.interpreted, i1.nonbranching, i1.normalized, i1.monadic, false, nullable, min_length, i1.star_height);
         case OP_RE_LOOP:
             i1 = get_info_rec(e->get_arg(0));
-            return info(u.max_mul(i1.min_length, e->get_decl()->get_parameter(0).get_int()));
+            lower_bound = e->get_decl()->get_parameter(0).get_int();
+            //r{l,m} is not normalized if r is nullable but l > 0
+            normalized = (i1.nullable == l_false && lower_bound > 0 ? false : i1.normalized);
+            nullable = (i1.nullable == l_true || lower_bound == 0 ? l_true : i1.nullable);
+            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, normalized, i1.singleton, false, nullable, u.max_mul(i1.min_length, lower_bound), i1.star_height);
         case OP_RE_DIFF:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(std::max(i1.min_length, i2.min_length > 0 ? 0 : UINT_MAX));
+            return info(false,
+                i1.standard & i2.standard,
+                i1.interpreted & i2.interpreted,
+                i1.nonbranching & i2.nonbranching,
+                i1.normalized & i2.normalized,
+                i1.monadic & i2.monadic,
+                false,
+                ((i1.nullable == l_true && i2.nullable == l_false) ? l_true : ((i1.nullable == l_false || i2.nullable == l_false) ? l_false : l_undef)),
+                std::max(i1.min_length, i2.min_length),
+                std::max(i1.star_height, i2.star_height));
         }
         return invalid_info;
     }
@@ -1683,21 +1740,39 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
     if (u.m.is_ite(e, c, t, f)) {
         i1 = get_info_rec(t);
         i2 = get_info_rec(f);
-        return info(std::min(i1.min_length, i2.min_length));
+        min_length = std::min(i1.min_length, i2.min_length);
+        nullable = (i1.nullable == i2.nullable ? i1.nullable : l_undef);
+        //TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
+        return info(false, false, false, false, i1.normalized && i2.normalized, i1.monadic && i2.monadic, i1.singleton && i2.singleton, nullable, min_length, std::max(i1.star_height, i2.star_height));
     }
     return invalid_info;
 }
 
 std::ostream& seq_util::rex::info::display(std::ostream& out) const {
-    if (valid)
-        out << "info(" << "min_length=" << min_length << ")";
+    if (valid) {
+        out << "info("
+            << "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
+            << "classical=" << (classical ? "T" : "F") << ", "
+            << "standard=" << (standard ? "T" : "F") << ", "
+            << "nonbranching=" << (nonbranching ? "T" : "F") << ", "
+            << "normalized=" << (normalized ? "T" : "F") << ", "
+            << "monadic=" << (monadic ? "T" : "F") << ", "
+            << "singleton=" << (singleton ? "T" : "F") << ", "
+            << "min_length=" << min_length << ", "
+            << "star_height=" << star_height << ")";
+    }
     else
-        out << "invalid_info";
+        out << "UNDEF";
     return out;
 }
 
+/*
+  String representation of the info.
+*/
 std::string seq_util::rex::info::str() const {
     std::ostringstream out;
     display(out);
     return out.str();
 }
+
+
diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h
index 1110c06c5..eec2dd7af 100644
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@@ -25,6 +25,7 @@ Revision History:
 #include "ast/ast.h"
 #include "ast/bv_decl_plugin.h"
 #include <string>
+#include "util/lbool.h"
 
 #define Z3_USE_UNICODE 0
 
@@ -411,13 +412,62 @@ public:
     class rex {
     public:
         struct info {
+            /* Value is well-defined. */
             bool valid;
+            /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
+            bool classical;
+            /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
+            bool standard;
+            /* There are no uninterpreted symbols. */
+            bool interpreted;
+            /* No if-then-else is used. */
+            bool nonbranching;
+            /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
+            bool normalized;
+            /* All bounded loops have a body that is a singleton. */
+            bool monadic;
+            /* Positive Boolean combination of ranges or predicates or singleton sequences. */
+            bool singleton;
+            /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
+            lbool nullable;
+            /* Lower bound on the length of all accepted words. */
             unsigned min_length;
-            info() : valid(false), min_length(0) {}
-            info(unsigned k) : valid(true), min_length(k) {}
-            bool is_valid() { return valid; }
+            /* Maximum nesting depth of Kleene stars. */
+            unsigned star_height;
+
+            /* 
+              Constructs an invalid info
+            */
+            info() : valid(false) {}
+
+            /* 
+              General info constructor.
+            */
+            info(bool is_classical, 
+                bool is_standard, 
+                bool is_interpreted, 
+                bool is_nonbranching, 
+                bool is_normalized, 
+                bool is_monadic, 
+                bool is_singleton,
+                lbool is_nullable, 
+                unsigned min_l, 
+                unsigned star_h) :
+                valid(true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching),
+                normalized(is_normalized), monadic(is_monadic), nullable(is_nullable), singleton(is_singleton), 
+                min_length(min_l), star_height(star_h) {}
+
+            /*
+              Appends a string representation of the info into the stream.
+            */
             std::ostream& display(std::ostream&) const;
+
+            /*
+              Returns a string representation of the info.
+            */
             std::string str() const;
+
+            bool is_valid() const { return valid; }
         };
     private:
         seq_util&    u;
@@ -536,3 +586,4 @@ public:
 inline std::ostream& operator<<(std::ostream& out, seq_util::rex::pp const & p) { return p.display(out); }
 
 inline std::ostream& operator<<(std::ostream& out, seq_util::rex::info const& p) { return p.display(out); }
+
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index ff1f57abd..4852347bc 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -793,11 +793,12 @@ namespace smt {
         }
         STRACE("seq_regex", tout << "Updating state graph for regex "
                                  << mk_pp(r, m) << ") ";);
-        if (!m_state_graph.is_seen(r_id))
-            STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << seq_util::rex::pp(re(), r) << std::endl;);
+        
+        STRACE("state_graph",
+            if (!m_state_graph.is_seen(r_id))
+                tout << std::endl << "state(" << r_id << ") = " << seq_util::rex::pp(re(), r) << std::endl << "info(" << r_id << ") = " << re().get_info(r) << std::endl;);
         // Add state
         m_state_graph.add_state(r_id);
-        STRACE("state_graph", tout << "regex(" << r_id << ") = " << mk_pp(r, m) << std::endl;);
         STRACE("seq_regex", tout << "Updating state graph for regex "
                                  << mk_pp(r, m) << ") " << std::endl;);
         STRACE("seq_regex_brief", tout << std::endl << "USG("
@@ -815,12 +816,12 @@ namespace smt {
             for (auto const& dr: derivatives) {
                 unsigned dr_id = get_state_id(dr);
                 STRACE("seq_regex_verbose", tout
-                    << std::endl << "  traversing deriv: " << dr_id << " ";);
-                if (!m_state_graph.is_seen(dr_id))
-                    STRACE("state_graph", tout << "state(" << dr_id << ") = " << seq_util::rex::pp(re(), dr) << std::endl;);
+                    << std::endl << "  traversing deriv: " << dr_id << " ";);              
+                STRACE("state_graph",
+                    if (!m_state_graph.is_seen(dr_id))
+                        tout << "state(" << dr_id << ") = " << seq_util::rex::pp(re(), dr) << std::endl << "info(" << dr_id << ") = " << re().get_info(dr) << std::endl;);
                 // Add state
                 m_state_graph.add_state(dr_id);
-                STRACE("state_graph", tout << "regex(" << dr_id << ") = " << mk_pp(dr, m) << std::endl;);
                 bool maybecycle = can_be_in_cycle(r, dr);
                 m_state_graph.add_edge(r_id, dr_id, maybecycle);
             }

From 48792581ebbce36727c9e2c8485cec479a3d2f5d Mon Sep 17 00:00:00 2001
From: Margus Veanes <margus@microsoft.com>
Date: Fri, 21 Aug 2020 05:38:20 -0700
Subject: [PATCH 2/2] took care of PR comments and fixed some info calculation
 bugs

---
 src/ast/seq_decl_plugin.cpp | 229 ++++++++++++++++++++++++++----------
 src/ast/seq_decl_plugin.h   |  77 +++++++-----
 2 files changed, 214 insertions(+), 92 deletions(-)

diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index 40119043a..9023d3829 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -1624,8 +1624,9 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
     if (result.is_valid())
         return result;
     if (!is_app(e))
-        return invalid_info;
-    result = mk_info_rec(to_app(e));
+        result = unknown_info;
+    else 
+        result = mk_info_rec(to_app(e));
     m_infos.setx(e->get_id(), result, invalid_info);
     STRACE("re_info", tout << "compute_info(" << pp(u.re, e) << ")=" << result << std::endl;);
     return result;
@@ -1639,6 +1640,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
     info i1, i2;
     lbool nullable(l_false);
     unsigned min_length(0), lower_bound(0);
+    bool is_value(false);
     bool normalized(false);
     if (e->get_family_id() == u.get_family_id()) {
         switch (e->get_decl()->get_decl_kind())
@@ -1649,10 +1651,10 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
             return info(true, true, true, true, true, true, false, l_true, 0, 1);
         case OP_RE_STAR:
             i1 = get_info_rec(e->get_arg(0));
-            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, i1.normalized, i1.monadic, false, l_true, 0, i1.star_height + 1);
+            return i1.star();
         case OP_RE_OPTION:
             i1 = get_info_rec(e->get_arg(0));
-            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, i1.normalized, i1.monadic, false, l_true, 0, i1.star_height);
+            return i1.opt();
         case OP_RE_RANGE:
         case OP_RE_FULL_CHAR_SET:
         case OP_RE_OF_PRED:
@@ -1662,94 +1664,50 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
         case OP_RE_CONCAT:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(i1.classical & i2.classical,
-                i1.classical && i2.classical, //both args of concat must be classical for it to be standard
-                i1.interpreted && i2.interpreted,
-                i1.nonbranching && i2.nonbranching,
-                (i1.normalized && !u.re.is_concat(e->get_arg(0)) && i2.normalized),
-                i1.monadic && i2.monadic,
-                false,
-                ((i1.nullable == l_false || i2.nullable == l_false) ? l_false : ((i1.nullable == l_true && i2.nullable == l_true) ? l_true : l_undef)),
-                u.max_plus(i1.min_length, i2.min_length),
-                std::max(i1.star_height, i2.star_height));
+            return i1.concat(i2, u.re.is_concat(e->get_arg(0)));
         case OP_RE_UNION:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(i1.classical & i2.classical,
-                i1.standard && i2.standard,
-                i1.interpreted && i2.interpreted,
-                i1.nonbranching && i2.nonbranching,
-                i1.normalized && i2.normalized,
-                i1.monadic && i2.monadic,
-                i1.singleton && i2.singleton,
-                ((i1.nullable == l_true || i2.nullable == l_true) ? l_true : ((i1.nullable == l_false && i2.nullable == l_false) ? l_false : l_undef)),
-                std::min(i1.min_length, i2.min_length),
-                std::max(i1.star_height, i2.star_height));
+            return i1.disj(i2);
         case OP_RE_INTERSECT:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(false,
-                i1.standard && i2.standard,
-                i1.interpreted && i2.interpreted,
-                i1.nonbranching && i2.nonbranching,
-                i1.normalized && i2.normalized,
-                i1.monadic && i2.monadic,
-                i1.singleton && i2.singleton,
-                ((i1.nullable == l_true && i2.nullable == l_true) ? l_true : ((i1.nullable == l_false || i2.nullable == l_false) ? l_false : l_undef)),
-                std::max(i1.min_length, i2.min_length),
-                std::max(i1.star_height, i2.star_height));
+            return i1.conj(i2);
         case OP_SEQ_TO_RE:
-            //TBD: the sequence is not a concrete value
             min_length = u.str.min_length(e->get_arg(0));
-            return info(true, true, true, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), l_false, min_length, 0);
+            is_value = m.is_value(e->get_arg(0));
+            nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
+            return info(true, true, is_value, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), nullable, min_length, 0);
         case OP_RE_REVERSE:
             return get_info_rec(e->get_arg(0));
         case OP_RE_PLUS:
             i1 = get_info_rec(e->get_arg(0));
-            //r+ is not normalized if r is nullable, the normalized form would be r*
-            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, (i1.nullable == l_false ? i1.normalized : false), i1.monadic, false, l_true, 0, i1.star_height + 1);
+            return i1.plus();
         case OP_RE_COMPLEMENT:
             i1 = get_info_rec(e->get_arg(0));
-            nullable = (i1.nullable == l_true ? l_false : (i1.nullable == l_false ? l_true : l_undef));
-            min_length = (nullable == l_false ? 1 : 0);
-            return info(false, i1.standard, i1.interpreted, i1.nonbranching, i1.normalized, i1.monadic, false, nullable, min_length, i1.star_height);
+            return i1.compl();
         case OP_RE_LOOP:
             i1 = get_info_rec(e->get_arg(0));
             lower_bound = e->get_decl()->get_parameter(0).get_int();
-            //r{l,m} is not normalized if r is nullable but l > 0
-            normalized = (i1.nullable == l_false && lower_bound > 0 ? false : i1.normalized);
-            nullable = (i1.nullable == l_true || lower_bound == 0 ? l_true : i1.nullable);
-            return info(i1.classical, i1.classical, i1.interpreted, i1.nonbranching, normalized, i1.singleton, false, nullable, u.max_mul(i1.min_length, lower_bound), i1.star_height);
+            return i1.loop(lower_bound);
         case OP_RE_DIFF:
             i1 = get_info_rec(e->get_arg(0));
             i2 = get_info_rec(e->get_arg(1));
-            return info(false,
-                i1.standard & i2.standard,
-                i1.interpreted & i2.interpreted,
-                i1.nonbranching & i2.nonbranching,
-                i1.normalized & i2.normalized,
-                i1.monadic & i2.monadic,
-                false,
-                ((i1.nullable == l_true && i2.nullable == l_false) ? l_true : ((i1.nullable == l_false || i2.nullable == l_false) ? l_false : l_undef)),
-                std::max(i1.min_length, i2.min_length),
-                std::max(i1.star_height, i2.star_height));
+            return i1.diff(i2);
         }
-        return invalid_info;
+        return unknown_info;
     }
     expr* c, * t, * f;
     if (u.m.is_ite(e, c, t, f)) {
         i1 = get_info_rec(t);
         i2 = get_info_rec(f);
-        min_length = std::min(i1.min_length, i2.min_length);
-        nullable = (i1.nullable == i2.nullable ? i1.nullable : l_undef);
-        //TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
-        return info(false, false, false, false, i1.normalized && i2.normalized, i1.monadic && i2.monadic, i1.singleton && i2.singleton, nullable, min_length, std::max(i1.star_height, i2.star_height));
+        return i1.orelse(i2);
     }
-    return invalid_info;
+    return unknown_info;
 }
 
 std::ostream& seq_util::rex::info::display(std::ostream& out) const {
-    if (valid) {
+    if (is_known()) {
         out << "info("
             << "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
             << "classical=" << (classical ? "T" : "F") << ", "
@@ -1761,8 +1719,10 @@ std::ostream& seq_util::rex::info::display(std::ostream& out) const {
             << "min_length=" << min_length << ", "
             << "star_height=" << star_height << ")";
     }
+    else if (is_valid())
+        out << "UNKNOWN";
     else
-        out << "UNDEF";
+        out << "INVALID";
     return out;
 }
 
@@ -1775,4 +1735,147 @@ std::string seq_util::rex::info::str() const {
     return out.str();
 }
 
+seq_util::rex::info seq_util::rex::info::star() const {
+    //if is_known() is false then all mentioned properties will remain false
+    return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height + 1);
+}
+
+seq_util::rex::info seq_util::rex::info::plus() const {
+    if (is_known()) {
+        //r+ is not normalized if r is nullable, the normalized form would be r*
+        info(classical, classical, interpreted, nonbranching, (nullable == l_false ? normalized : false), monadic, false, nullable, min_length, star_height + 1);
+    }
+    else
+        return *this;
+}
+
+seq_util::rex::info seq_util::rex::info::opt() const {
+    //if is_known() is false then all mentioned properties will remain false
+    return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height);
+}
+
+seq_util::rex::info seq_util::rex::info::compl() const {
+    if (is_known()) {
+        lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
+        unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
+        return info(false, standard, interpreted, nonbranching, normalized, monadic, false, compl_nullable, compl_min_length, star_height);
+    }
+    else
+        return *this;
+}
+
+seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool lhs_is_concat) const {
+    if (is_known()) {
+        if (rhs.is_known()) {
+            unsigned m = min_length + rhs.min_length;
+            if (m < min_length || m < rhs.min_length)
+                m = UINT_MAX;
+            return info(classical & rhs.classical,
+                classical && rhs.classical, //both args of concat must be classical for it to be standard
+                interpreted && rhs.interpreted,
+                nonbranching && rhs.nonbranching,
+                (normalized && !lhs_is_concat && rhs.normalized),
+                monadic && rhs.monadic,
+                false,
+                ((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
+                m,
+                std::max(star_height, rhs.star_height));
+        }
+        else
+            return rhs;
+    }
+    else
+        return *this;
+}
+
+seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const {
+    if (is_known() || rhs.is_known()) {
+        //works correctly if one of the arguments is unknown
+        info(classical & rhs.classical,
+            standard && rhs.standard,
+            interpreted && rhs.interpreted,
+            nonbranching && rhs.nonbranching,
+            normalized && rhs.normalized,
+            monadic && rhs.monadic,
+            singleton && rhs.singleton,
+            ((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
+            std::min(min_length, rhs.min_length),
+            std::max(star_height, rhs.star_height));
+    }
+    else
+        return rhs;
+}
+
+seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const {
+    if (is_known()) {
+        if (rhs.is_known()) {
+            return info(false,
+                standard && rhs.standard,
+                interpreted && rhs.interpreted,
+                nonbranching && rhs.nonbranching,
+                normalized && rhs.normalized,
+                monadic && rhs.monadic,
+                singleton && rhs.singleton,
+                ((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
+                std::max(min_length, rhs.min_length),
+                std::max(star_height, rhs.star_height));
+        }
+        else
+            return rhs;
+    }
+    else
+        return *this;
+}
+
+seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const {
+    if (is_known()) {
+        if (rhs.is_known()) {
+            info(false,
+                standard & rhs.standard,
+                interpreted & rhs.interpreted,
+                nonbranching & rhs.nonbranching,
+                normalized & rhs.normalized,
+                monadic & rhs.monadic,
+                false,
+                ((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
+                std::max(min_length, rhs.min_length),
+                std::max(star_height, rhs.star_height));
+        }
+        else
+            return rhs;
+    }
+    else
+        return *this;
+}
+
+seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const {
+    if (is_known()) {
+        if (i.is_known()) {
+            unsigned ite_min_length = std::min(min_length, i.min_length);
+            lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef);
+            //TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
+            return info(false, false, false, false, normalized && i.normalized, monadic && i.monadic, singleton && i.singleton, nullable, min_length, std::max(star_height, i.star_height));
+        }
+        else
+            return i;
+    }
+    else
+        return *this;
+}
+
+seq_util::rex::info seq_util::rex::info::loop(unsigned lower) const {
+    if (is_known()) {
+        //r{l,m} is not normalized if r is nullable but l > 0
+        unsigned m = min_length * lower;
+        if (m < min_length || m < lower)
+            m = UINT_MAX;
+        bool loop_normalized = (nullable == l_false && lower > 0 ? false : normalized);
+        lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
+        //TBD: pass also upper bound and if upper bound is UINT_MAX then this is * and the loop is thus not normalized
+        return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
+    }
+    else
+        return *this;
+}
+
 
diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h
index eec2dd7af..86655b557 100644
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@@ -412,49 +412,54 @@ public:
     class rex {
     public:
         struct info {
-            /* Value is well-defined. */
-            bool valid;
+            /* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/
+            bool known{ l_undef };
             /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
-            bool classical;
+            bool classical{ false };
             /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
-            bool standard;
+            bool standard{ false };
             /* There are no uninterpreted symbols. */
-            bool interpreted;
+            bool interpreted{ false };
             /* No if-then-else is used. */
-            bool nonbranching;
+            bool nonbranching{ false };
             /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
-            bool normalized;
+            bool normalized{ false };
             /* All bounded loops have a body that is a singleton. */
-            bool monadic;
+            bool monadic{ false };
             /* Positive Boolean combination of ranges or predicates or singleton sequences. */
-            bool singleton;
+            bool singleton{ false };
             /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
-            lbool nullable;
-            /* Lower bound on the length of all accepted words. */
-            unsigned min_length;
+            lbool nullable{ l_false };
+            /* Lower bound  on the length of all accepted words. */
+            unsigned min_length{ 0 };
             /* Maximum nesting depth of Kleene stars. */
-            unsigned star_height;
+            unsigned star_height{ 0 };
 
-            /* 
-              Constructs an invalid info
+            /*
+              Default constructor of invalid info.
             */
-            info() : valid(false) {}
+            info() {}
 
-            /* 
+            /*
+              Used for constructing either an invalid info that is only used to indicate uninitialzed entry, or valid but unknown info value.
+            */
+            info(lbool is_known) : known(is_known) {}
+
+            /*
               General info constructor.
             */
-            info(bool is_classical, 
-                bool is_standard, 
-                bool is_interpreted, 
-                bool is_nonbranching, 
-                bool is_normalized, 
-                bool is_monadic, 
+            info(bool is_classical,
+                bool is_standard,
+                bool is_interpreted,
+                bool is_nonbranching,
+                bool is_normalized,
+                bool is_monadic,
                 bool is_singleton,
-                lbool is_nullable, 
-                unsigned min_l, 
+                lbool is_nullable,
+                unsigned min_l,
                 unsigned star_h) :
-                valid(true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching),
-                normalized(is_normalized), monadic(is_monadic), nullable(is_nullable), singleton(is_singleton), 
+                known(l_true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching),
+                normalized(is_normalized), monadic(is_monadic), singleton(is_singleton), nullable(is_nullable),
                 min_length(min_l), star_height(star_h) {}
 
             /*
@@ -467,7 +472,20 @@ public:
             */
             std::string str() const;
 
-            bool is_valid() const { return valid; }
+            bool is_valid() const { return known != l_undef; }
+
+            bool is_known() const { return known == l_true; }
+
+            info star() const;
+            info plus() const;
+            info opt() const;
+            info concat(info & rhs, bool lhs_is_concat) const;
+            info disj(info& rhs) const;
+            info conj(info& rhs) const;
+            info diff(info& rhs) const;
+            info orelse(info& rhs) const;
+            info loop(unsigned lower) const;
+            info compl() const;
         };
     private:
         seq_util&    u;
@@ -475,7 +493,8 @@ public:
         family_id    m_fid;
         vector<info> mutable m_infos;
         expr_ref_vector mutable m_info_pinned;
-        info invalid_info;
+        info invalid_info{ info(l_undef) };
+        info unknown_info{ info(l_false) };
 
         bool has_valid_info(expr* r) const;
         info get_info_rec(expr* r) const;