z3/src/math/automata/automaton.h

/*++
Copyright (c) 2015 Microsoft Corporation

Module Name:

    automaton.h

Abstract:

    Symbolic Automaton, a la Margus Veanes Automata library.

Author:

    Nikolaj Bjorner (nbjorner) 2015-12-23.

Revision History:


--*/

#ifndef AUTOMATON_H_
#define AUTOMATON_H_


#include "util.h"
#include "vector.h"
#include "uint_set.h"

template<class T>
class default_value_manager {
public:
    void inc_ref(T* t) {}
    void dec_ref(T* t) {}
};

template<class T, class M = default_value_manager<T> >
class automaton {
public:
    class move {
        M&       m;
        T*       m_t;
        unsigned m_src;
        unsigned m_dst;
    public:
        move(M& m, unsigned s, unsigned d, T* t = 0): m(m), m_t(t), m_src(s), m_dst(d) {
            if (t) m.inc_ref(t);
        }
        ~move() {
            if (m_t) m.dec_ref(m_t);
        }

        move(move const& other): m(other.m), m_t(other.m_t), m_src(other.m_src), m_dst(other.m_dst) {
            if (m_t) m.inc_ref(m_t);
        }

        move& operator=(move const& other) {
            SASSERT(&m == &other.m);
            T* t = other.m_t;
            if (t) m.inc_ref(t);
            if (m_t) m.dec_ref(m_t);
            m_t = t;
            m_src = other.m_src;
            m_dst = other.m_dst;
            return *this;
        }

        unsigned dst() const { return m_dst; }
        unsigned src() const { return m_src; }
        T* t() const { return m_t; }

        bool is_epsilon() const { return m_t == 0; }
    };
    typedef vector<move> moves;
private:
    M&              m;
    vector<moves>   m_delta;
    vector<moves>   m_delta_inv;
    unsigned        m_init;
    uint_set        m_final_set;
    unsigned_vector m_final_states;


    // local data-structures
    mutable uint_set        m_visited;
    mutable unsigned_vector m_todo;

    struct default_display {
        std::ostream& display(std::ostream& out, T* t) {
            return out << t;
        }
    };

public:

    // The empty automaton:
    automaton(M& m):
        m(m),
        m_init(0)
    {
        m_delta.push_back(moves());
        m_delta_inv.push_back(moves());
    }


    // create an automaton from initial state, final states, and moves
    automaton(M& m, unsigned init, unsigned_vector const& final, moves const& mvs): m(m) {
        m_init = init;
        m_delta.push_back(moves());
        m_delta_inv.push_back(moves());
        for (unsigned i = 0; i < final.size(); ++i) {
            add_to_final_states(final[i]);
        }
        for (unsigned i = 0; i < mvs.size(); ++i) {
            move const& mv = mvs[i];
            unsigned n = std::max(mv.src(), mv.dst());
            if (n >= m_delta.size()) {
                m_delta.resize(n+1, moves());
                m_delta_inv.resize(n+1, moves());
            }
            add(mv);
        }
    }

    // create an automaton that accepts a sequence.
    automaton(M& m, ptr_vector<T> const& seq):
        m(m),
        m_init(0) {
        m_delta.resize(seq.size()+1, moves());
        m_delta_inv.resize(seq.size()+1, moves());
        for (unsigned i = 0; i < seq.size(); ++i) {
            m_delta[i].push_back(move(m, i, i + 1, seq[i]));
            m_delta[i + 1].push_back(move(m, i, i + 1, seq[i]));
        }
        add_to_final_states(seq.size());
    }

    // The automaton that accepts t
    automaton(M& m, T* t):
        m(m),
        m_init(0) {
        m_delta.resize(2, moves());
        m_delta_inv.resize(2, moves());
        add_to_final_states(1);
        add(move(m, 0, 1, t));
    }

    automaton(automaton const& other):
        m(other.m),
        m_delta(other.m_delta),
        m_delta_inv(other.m_delta_inv),
        m_init(other.m_init),
        m_final_set(other.m_final_set),
        m_final_states(other.m_final_states)
    {}

    // create the automaton that accepts the empty string/sequence only.
    static automaton* mk_epsilon(M& m) {
        moves mvs;
        unsigned_vector final;
        final.push_back(0);
        return alloc(automaton, m, 0, final, mvs);
    }

    // create the automaton with a single state on condition t.
    static automaton* mk_loop(M& m, T* t) {
        moves mvs;
        unsigned_vector final;
        final.push_back(0);
        mvs.push_back(move(m, 0, 0, t));
        return alloc(automaton, m, 0, final, mvs);
    }

    static automaton* clone(automaton const& a) {
        moves mvs;
        unsigned_vector final;
        append_moves(0, a, mvs);
        append_final(0, a, final);
        return alloc(automaton, a.m, a.init(), final, mvs);
    }

    automaton* clone() const {
        return clone(*this);
    }

    // create the sum of disjoint automata
    static automaton* mk_union(automaton const& a, automaton const& b) {
        SASSERT(&a.m == &b.m);
        M& m = a.m;
        if (a.is_empty()) {
            return b.clone();
        }
        if (b.is_empty()) {
            return a.clone();
        }
        moves mvs;
        unsigned_vector final;
        unsigned offset1 = 1;
        unsigned offset2 = a.num_states() + 1;
        mvs.push_back(move(m, 0, a.init() + offset1));
        mvs.push_back(move(m, 0, b.init() + offset2));
        append_moves(offset1, a, mvs);
        append_moves(offset2, b, mvs);
        append_final(offset1, a, final);
        append_final(offset2, b, final);
        return alloc(automaton, m, 0, final, mvs);
    }

    static automaton* mk_opt(automaton const& a) {
        M& m = a.m;
        moves mvs;
        unsigned_vector final;
        unsigned offset = 0;
        unsigned init = a.init();
        if (!a.initial_state_is_source()) {
            offset = 1;
            init = 0;
            mvs.push_back(move(m, 0, a.init() + offset));
        }
        if (a.is_empty()) {
            return a.clone();
        }

        mvs.push_back(move(m, init, a.final_state() + offset));
        append_moves(offset, a, mvs);
        append_final(offset, a, final);
        return alloc(automaton, m, init, final, mvs);
    }

    // concatenate accepting languages
    static automaton* mk_concat(automaton const& a, automaton const& b) {
        SASSERT(&a.m == &b.m);
        M& m = a.m;
        if (a.is_empty()) {
            return a.clone();
        }
        if (b.is_empty()) {
            return b.clone();
        }
        if (a.is_epsilon()) {
            return b.clone();
        }
        if (b.is_epsilon()) {
            return a.clone();
        }

        moves mvs;
        unsigned_vector final;
        unsigned init = 0;
        unsigned offset1 = 1;
        unsigned offset2 = a.num_states() + offset1;
        mvs.push_back(move(m, 0, a.init() + offset1));
        append_moves(offset1, a, mvs);
        for (unsigned i = 0; i < a.m_final_states.size(); ++i) {
            mvs.push_back(move(m, a.m_final_states[i] + offset1, b.init() + offset2));
        }
        append_moves(offset2, b, mvs);
        append_final(offset2, b, final);

        return alloc(automaton, m, init, final, mvs);
    }

    static automaton* mk_reverse(automaton const& a) {
        M& m = a.m;
        if (a.is_empty()) {
            return alloc(automaton, m);
        }
        moves mvs;
        for (unsigned i = 0; i < a.m_delta.size(); ++i) {
            moves const& mvs1 = a.m_delta[i];
            for (unsigned j = 0; j < mvs1.size(); ++j) {
                move const& mv = mvs1[j];
                mvs.push_back(move(m, mv.dst(), mv.src(), mv.t()));
            }
        }
        unsigned_vector final;
        unsigned init;
        final.push_back(a.init());
        if (a.m_final_states.size() == 1) {
            init = a.m_final_states[0];
        }
        else {
            init = a.num_states();
            for (unsigned i = 0; i < a.m_final_states.size(); ++i) {
                mvs.push_back(move(m, init, a.m_final_states[i]));
            }
        }
        return alloc(automaton, m, init, final, mvs);
    }

    void add_to_final_states(unsigned s) {
        if (!is_final_state(s)) {
            m_final_set.insert(s);
            m_final_states.push_back(s);
        }
    }

    void remove_from_final_states(unsigned s) {
        if (is_final_state(s)) {
            m_final_set.remove(s);
            m_final_states.erase(s);
        }
    }

    void add_init_to_final_states() {
        add_to_final_states(init());
    }

    void add_final_to_init_moves() {
        for (unsigned i = 0; i < m_final_states.size(); ++i) {
            unsigned state = m_final_states[i];
            bool found = false;
            moves const& mvs = m_delta[state];
            for (unsigned j = 0; found && j < mvs.size(); ++j) {
                found = (mvs[j].dst() == m_init) && mvs[j].is_epsilon();
            }
            if (!found && state != m_init) {
                add(move(m, state, m_init));
            }
        }
    }

    // remove epsilon transitions
    // src - e -> dst
    // in_degree(src) = 1, final(src) => final(dst), src0 != src
    //    src0 - t -> src - e -> dst => src0 - t -> dst
    // out_degree(dst) = 1, final(dst) => final(src), dst != dst1
    //    src - e -> dst - t -> dst1 => src - t -> dst1

    // Generalized:
    // Src - E -> dst - t -> dst1 => Src - t -> dst1   if dst is final => each Src is final
    //
    // src - e -> dst - ET -> Dst1 => src - ET -> Dst1  if in_degree(dst) = 1, src != dst
    // Src - E -> dst - et -> dst1 => Src - et -> dst1  if out_degree(dst) = 1, src != dst
    //
    // Some missing:
    //  src - et -> dst - E -> Dst1 => src - et -> Dst1   if in_degree(dst) = 1
    //  Src - ET -> dst - e -> dst1 => Src - ET -> dst1  if out_degree(dst) = 1,
    //
    void compress() {
        SASSERT(!m_delta.empty());
        for (unsigned i = 0; i < m_delta.size(); ++i) {
            for (unsigned j = 0; j < m_delta[i].size(); ++j) {
                move const& mv = m_delta[i][j];
                unsigned src = mv.src();
                unsigned dst = mv.dst();
                SASSERT(src == i);
                if (mv.is_epsilon()) {
                    if (src == dst) {
                        // just remove this edge.
                    }
                    else if (1 == in_degree(src) && 1 == out_degree(src) && init() != src && (!is_final_state(src) || is_final_state(dst))) {
                        move const& mv0 = m_delta_inv[src][0];
                        unsigned src0 = mv0.src();
                        T* t = mv0.t();
                        SASSERT(mv0.dst() == src);
                        if (src0 == src) {
                            continue;
                        }
                        add(move(m, src0, dst, t));
                        remove(src0, src, t);

                    }
                    else if (1 == out_degree(dst) && 1 == in_degree(dst) && init() != dst && (!is_final_state(dst) || is_final_state(src))) {
                        move const& mv1 = m_delta[dst][0];
                        unsigned dst1 = mv1.dst();
                        T* t = mv1.t();
                        SASSERT(mv1.src() == dst);
                        if (dst1 == dst) {
                            continue;
                        }
                        add(move(m, src, dst1, t));
                        remove(dst, dst1, t);
                    }
                    else if (1 == in_degree(dst) && (!is_final_state(dst) || is_final_state(src)) && init() != dst) {
                        moves const& mvs = m_delta[dst];
                        moves mvs1;
                        for (unsigned k = 0; k < mvs.size(); ++k) {
                            mvs1.push_back(move(m, src, mvs[k].dst(), mvs[k].t()));
                        }
                        for (unsigned k = 0; k < mvs1.size(); ++k) {
                            remove(dst, mvs1[k].dst(), mvs1[k].t());
                            add(mvs1[k]);
                        }
                    }
                    //
                    // Src - E -> dst - et -> dst1 => Src - et -> dst1  if out_degree(dst) = 1, src != dst
                    //
                    else if (1 == out_degree(dst) && all_epsilon_in(dst) && init() != dst && !is_final_state(dst)) {
                        move const& mv = m_delta[dst][0];
                        unsigned dst1 = mv.dst();
                        T* t = mv.t();
                        unsigned_vector src0s;
                        moves const& mvs = m_delta_inv[dst];
                        moves mvs1;
                        for (unsigned k = 0; k < mvs.size(); ++k) {
                            SASSERT(mvs[k].is_epsilon());
                            mvs1.push_back(move(m, mvs[k].src(), dst1, t));
                        }
                        for (unsigned k = 0; k < mvs1.size(); ++k) {
                            remove(mvs1[k].src(), dst, 0);
                            add(mvs1[k]);
                        }
                        remove(dst, dst1, t);
                        --j;
                        continue;
                    }
                    //
                    // Src1 - ET -> src - e -> dst => Src1 - ET -> dst  if out_degree(src) = 1, src != init()
                    //
                    else if (1 == out_degree(src) && init() != src && (!is_final_state(src) || is_final_state(dst))) {
                        moves const& mvs = m_delta_inv[src];
                        moves mvs1;
                        for (unsigned k = 0; k < mvs.size(); ++k) {
                            mvs1.push_back(move(m, mvs[k].src(), dst, mvs[k].t()));
                        }
                        for (unsigned k = 0; k < mvs1.size(); ++k) {
                            remove(mvs1[k].src(), src, mvs1[k].t());
                            add(mvs1[k]);
                        }
                    }
                    else {
                        continue;
                    }
                    remove(src, dst, 0);
                    --j;
                }
            }
        }
        SASSERT(!m_delta.empty());
        while (true) {
            SASSERT(!m_delta.empty());
            unsigned src = m_delta.size() - 1;
            if (in_degree(src) == 0 && init() != src) {
                remove_from_final_states(src);
                m_delta.pop_back();
            }
            else {
                break;
            }
        }
    }

    bool is_sequence(unsigned& length) const {
        if (is_final_state(m_init) && (out_degree(m_init) == 0 || (out_degree(m_init) == 1 && is_loop_state(m_init)))) {
            length = 0;
            return true;
        }
        if (is_empty() || in_degree(m_init) != 0 || out_degree(m_init) != 1) {
            return false;
        }

        length = 1;
        unsigned s = get_move_from(m_init).dst();
        while (!is_final_state(s)) {
            if (out_degree(s) != 1 || in_degree(s) != 1) {
                return false;
            }
            s = get_move_from(s).dst();
            ++length;
        }
        return out_degree(s) == 0 || (out_degree(s) == 1 && is_loop_state(s));
    }

    unsigned init() const { return m_init; }
    unsigned_vector const& final_states() const { return m_final_states; }
    unsigned in_degree(unsigned state) const { return m_delta_inv[state].size(); }
    unsigned out_degree(unsigned state) const { return m_delta[state].size(); }
    move const& get_move_from(unsigned state) const { SASSERT(m_delta[state].size() == 1); return m_delta[state][0]; }
    move const& get_move_to(unsigned state) const { SASSERT(m_delta_inv[state].size() == 1); return m_delta_inv[state][0]; }
    moves const& get_moves_from(unsigned state) const { return m_delta[state]; }
    moves const& get_moves_to(unsigned state) const { return m_delta_inv[state]; }
    bool initial_state_is_source() const { return m_delta_inv[m_init].empty(); }
    bool is_final_state(unsigned s) const { return m_final_set.contains(s); }
    bool is_epsilon_free() const {
        for (unsigned i = 0; i < m_delta.size(); ++i) {
            moves const& mvs = m_delta[i];
            for (unsigned j = 0; j < mvs.size(); ++j) {
                if (!mvs[j].t()) return false;
            }
        }
        return true;
    }

    bool all_epsilon_in(unsigned s) {
        moves const& mvs = m_delta_inv[s];
        for (unsigned j = 0; j < mvs.size(); ++j) {
            if (mvs[j].t()) return false;
        }
        return true;
    }

    bool is_empty() const { return m_final_states.empty(); }
    bool is_epsilon() const { return m_final_states.size() == 1 && m_final_states.back() == init() && m_delta.empty(); }
    unsigned final_state() const { return m_final_states[0]; }
    bool has_single_final_sink() const { return m_final_states.size() == 1 && m_delta[final_state()].empty(); }
    unsigned num_states() const { return m_delta.size(); }
    bool is_loop_state(unsigned s) const {
        moves mvs;
        get_moves_from(s, mvs);
        for (unsigned i = 0; i < mvs.size(); ++i) {
            if (s == mvs[i].dst()) return true;
        }
        return false;
    }

    unsigned move_count() const {
        unsigned result = 0;
        for (unsigned i = 0; i < m_delta.size(); result += m_delta[i].size(), ++i) {}
        return result;
    }
    void get_epsilon_closure(unsigned state, unsigned_vector& states) {
        get_epsilon_closure(state, m_delta, states);
    }
    void get_inv_epsilon_closure(unsigned state, unsigned_vector& states) {
        get_epsilon_closure(state, m_delta_inv, states);
    }
    void get_moves_from(unsigned state, moves& mvs, bool epsilon_closure = true) const {
        get_moves(state, m_delta, mvs, epsilon_closure);
    }
    void get_moves_to(unsigned state, moves& mvs, bool epsilon_closure = true) {
        get_moves(state, m_delta_inv, mvs, epsilon_closure);
    }

    template<class D>
    std::ostream& display(std::ostream& out, D& displayer = D()) const {
        out << "init: " << init() << "\n";
        out << "final: ";
        for (unsigned i = 0; i < m_final_states.size(); ++i) out << m_final_states[i] << " ";
        out << "\n";
        for (unsigned i = 0; i < m_delta.size(); ++i) {
            moves const& mvs = m_delta[i];
            for (unsigned j = 0; j < mvs.size(); ++j) {
                move const& mv = mvs[j];
                out << i << " -> " << mv.dst() << " ";
                if (mv.t()) {
                    out << "if ";
                    displayer.display(out, mv.t());
                }
                out << "\n";
            }
        }
        return out;
    }
private:

    void remove_dead_states() {
        unsigned_vector remap;
        for (unsigned i = 0; i < m_delta.size(); ++i) {

        }
    }


    void add(move const& mv) {
        if (!is_duplicate_cheap(mv)) {
            m_delta[mv.src()].push_back(mv);
            m_delta_inv[mv.dst()].push_back(mv);
        }
    }

    bool is_duplicate_cheap(move const& mv) const {
        if (m_delta[mv.src()].empty()) return false;
        move const& mv0 = m_delta[mv.src()].back();
        return mv0.src() == mv.src() && mv0.dst() == mv.dst() && mv0.t() == mv.t();
    }


    unsigned find_move(unsigned src, unsigned dst, T* t, moves const& mvs) {
        for (unsigned i = 0; i < mvs.size(); ++i) {
            move const& mv = mvs[i];
            if (mv.src() == src && mv.dst() == dst && t == mv.t()) {
                return i;
            }
        }
        UNREACHABLE();
        return UINT_MAX;
    }

    void remove(unsigned src, unsigned dst, T* t, moves& mvs) {
        remove(find_move(src, dst, t, mvs), mvs);
    }

    void remove(unsigned src, unsigned dst, T* t) {
        remove(src, dst, t, m_delta[src]);
        remove(src, dst, t, m_delta_inv[dst]);
    }

    void remove(unsigned index, moves& mvs) {
        mvs[index] = mvs.back();
        mvs.pop_back();
    }

    mutable unsigned_vector m_states1, m_states2;

    void get_moves(unsigned state, vector<moves> const& delta, moves& mvs, bool epsilon_closure) const {
        m_states1.reset();
        m_states2.reset();
        get_epsilon_closure(state, delta, m_states1);
        for (unsigned i = 0; i < m_states1.size(); ++i) {
            state = m_states1[i];
            moves const& mv1 = delta[state];
            for (unsigned j = 0; j < mv1.size(); ++j) {
                move const& mv = mv1[j];
                if (!mv.is_epsilon()) {
                    if (epsilon_closure) {
                        m_states2.reset();
                        get_epsilon_closure(mv.dst(), delta, m_states2);
                        for (unsigned k = 0; k < m_states2.size(); ++k) {
                            mvs.push_back(move(m, state, m_states2[k], mv.t()));
                        }
                    }
                    else {
                        mvs.push_back(move(m, state, mv.dst(), mv.t()));
                    }
                }
            }
        }
    }

    void get_epsilon_closure(unsigned state, vector<moves> const& delta, unsigned_vector& states) const {
        m_todo.push_back(state);
        m_visited.insert(state);
        while (!m_todo.empty()) {
            state = m_todo.back();
            states.push_back(state);
            m_todo.pop_back();
            moves const& mvs = delta[state];
            for (unsigned i = 0; i < mvs.size(); ++i) {
                unsigned tgt = mvs[i].dst();
                if (mvs[i].is_epsilon() && !m_visited.contains(tgt)) {
                    m_visited.insert(tgt);
                    m_todo.push_back(tgt);
                }
            }
        }
        m_visited.reset();
        SASSERT(m_todo.empty());
    }

    static void append_moves(unsigned offset, automaton const& a, moves& mvs) {
        for (unsigned i = 0; i < a.num_states(); ++i) {
            moves const& mvs1 = a.m_delta[i];
            for (unsigned j = 0; j < mvs1.size(); ++j) {
                move const& mv = mvs1[j];
                mvs.push_back(move(a.m, mv.src() + offset, mv.dst() + offset, mv.t()));
            }
        }
    }

    static void append_final(unsigned offset, automaton const& a, unsigned_vector& final) {
        for (unsigned i = 0; i < a.m_final_states.size(); ++i) {
            final.push_back(a.m_final_states[i]+offset);
        }
    }

};

typedef automaton<unsigned> uautomaton;


#endif