/* * yosys -- Yosys Open SYnthesis Suite * * Copyright (C) 2023 Catherine * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ #ifndef CXXRTL_REPLAY_H #define CXXRTL_REPLAY_H #if !defined(WIN32) #include #define O_BINARY 0 #else #include #endif #include #include #include #include #include #include #include // Theory of operation // =================== // // Log format // ---------- // // The replay log is a simple data format based on a sequence of 32-bit words. The following BNF-like grammar describes // enough detail to understand the overall structure of the log data and be able to read hex dumps. For a greater // degree of detail see the source code. The format is considered fully internal to CXXRTL and is subject to change // without notice. // // ::= + // ::= 0x52585843 0x00004c54 // ::= * // ::= * // ::= 0xc0000000 ... // ::= 0xc0000001 ... // ::= 0x0??????? + | 0x1??????? + | 0x2??????? | 0x3??????? // , ::= 0x???????? // ::= 0xFFFFFFFF // // The replay log contains sample data, however, it does not cover the entire design. Rather, it only contains sample // data for the subset of debug items containing _design state_: inputs and registers/latches. This keeps its size to // a minimum, and recording speed to a maximum. The player samples any missing data by setting the design state items // to the same values they had during recording, and re-evaluating the design. // // Limits // ------ // // The log may contain: // // * Up to 2**28-1 debug items containing design state. // * Up to 2**32 chunks per debug item. // * Up to 2**32 rows per memory. // * Up to 2**32 samples. // // Of these limits, the last two are most likely to be eventually exceeded by practical recordings. However, other // performance considerations will likely limit the size of such practical recordings first, so the log data format // will undergo a breaking change at that point. // // Operations // ---------- // // As suggested by the name "replay log", this format is designed for recording (writing) once and playing (reading) // many times afterwards, such that reading the format can be done linearly and quickly. The log format is designed to // support three primary read operations: // // 1. Initialization // 2. Rewinding (to time T) // 3. Replaying (for N samples) // // During initialization, the player establishes the mapping between debug item names and their 28-bit identifiers in // the log. It is done once. // // During rewinding, the player begins reading at the latest non-incremental sample that still lies before the requested // sample time. It continues reading incremental samples after that point until it reaches the requested sample time. // This process is very cheap as the design is not evaluated; it is essentially a (convoluted) memory copy operation. // // During replaying, the player evaluates the design at the current time, which causes all debug items to assume // the values they had before recording. This process is expensive. Once done, the player advances to the next state // by reading the next (complete or incremental) sample, as above. Since a range of samples is replayed, this process // is repeated several times in a row. // // In principle, when replaying, the player could only read the state of the inputs and the time delta and use a normal // eval/commit loop to progress the simulation, which is fully deterministic so its calculated design state should be // exactly the same as the recorded design state. In practice, it is both faster and more reliable (in presence of e.g. // user-defined black boxes) to read the recorded values instead of calculating them. // // Note: The operations described above are conceptual and do not correspond exactly to methods on `cxxrtl::player`. // The `cxxrtl::player::replay()` method does not evaluate the design. This is so that delta cycles could be ignored // if they are not of interest while replaying. namespace cxxrtl { // A spool stores CXXRTL design state changes in a file. class spool { public: // Unique pointer to a specific sample within a replay log. (Timestamps are not unique.) typedef uint32_t pointer_t; // Numeric identifier assigned to a debug item within a replay log. Range limited to [1, MAXIMUM_IDENT]. typedef uint32_t ident_t; static constexpr uint16_t VERSION = 0x0400; static constexpr uint64_t HEADER_MAGIC = 0x00004c5452585843; static constexpr uint64_t VERSION_MASK = 0xffff000000000000; static constexpr uint32_t PACKET_DEFINE = 0xc0000000; static constexpr uint32_t PACKET_SAMPLE = 0xc0000001; enum sample_flag : uint32_t { EMPTY = 0, INCREMENTAL = 1, }; static constexpr uint32_t MAXIMUM_IDENT = 0x0fffffff; static constexpr uint32_t CHANGE_MASK = 0x30000000; static constexpr uint32_t PACKET_CHANGE = 0x00000000/* | ident */; static constexpr uint32_t PACKET_CHANGEI = 0x10000000/* | ident */; static constexpr uint32_t PACKET_CHANGEL = 0x20000000/* | ident */; static constexpr uint32_t PACKET_CHANGEH = 0x30000000/* | ident */; static constexpr uint32_t PACKET_END = 0xffffffff; // Writing spools. class writer { int fd; size_t position; std::vector buffer; // These functions aren't overloaded because of implicit numeric conversions. void emit_word(uint32_t word) { if (position + 1 == buffer.size()) flush(); buffer[position++] = word; } void emit_dword(uint64_t dword) { emit_word(dword >> 0); emit_word(dword >> 32); } void emit_ident(ident_t ident) { assert(ident <= MAXIMUM_IDENT); emit_word(ident); } void emit_size(size_t size) { assert(size <= std::numeric_limits::max()); emit_word(size); } // Same implementation as `emit_size()`, different declared intent. void emit_index(size_t index) { assert(index <= std::numeric_limits::max()); emit_word(index); } void emit_string(std::string str) { // Align to a word boundary, and add at least one terminating \0. str.resize(str.size() + (sizeof(uint32_t) - (str.size() + sizeof(uint32_t)) % sizeof(uint32_t))); for (size_t index = 0; index < str.size(); index += sizeof(uint32_t)) { uint32_t word; memcpy(&word, &str[index], sizeof(uint32_t)); emit_word(word); } } void emit_time(const time ×tamp) { const value &raw_timestamp(timestamp); emit_word(raw_timestamp.data[0]); emit_word(raw_timestamp.data[1]); emit_word(raw_timestamp.data[2]); } public: // Creates a writer, and transfers ownership of `fd`, which must be open for appending. // // The buffer size is currently fixed to a "reasonably large" size, determined empirically by measuring writer // performance on a representative design; large but not so large it would e.g. cause address space exhaustion // on 32-bit platforms. writer(spool &spool) : fd(spool.take_write()), position(0), buffer(32 * 1024 * 1024) { assert(fd != -1); #if !defined(WIN32) int result = ftruncate(fd, 0); #else int result = _chsize_s(fd, 0); #endif assert(result == 0); } writer(writer &&moved) : fd(moved.fd), position(moved.position), buffer(moved.buffer) { moved.fd = -1; moved.position = 0; } writer(const writer &) = delete; writer &operator=(const writer &) = delete; // Both write() calls and fwrite() calls are too expensive to perform implicitly. The API consumer must determine // the optimal time to flush the writer and do that explicitly for best performance. void flush() { assert(fd != -1); size_t data_size = position * sizeof(uint32_t); size_t data_written = write(fd, buffer.data(), data_size); assert(data_size == data_written); position = 0; } ~writer() { if (fd != -1) { flush(); close(fd); } } void write_magic() { // `CXXRTL` followed by version in binary. This header will read backwards on big-endian machines, which allows // detection of this case, both visually and programmatically. emit_dword(((uint64_t)VERSION << 48) | HEADER_MAGIC); } void write_define(ident_t ident, const std::string &name, size_t part_index, size_t chunks, size_t depth) { emit_word(PACKET_DEFINE); emit_ident(ident); emit_string(name); emit_index(part_index); emit_size(chunks); emit_size(depth); } void write_sample(bool incremental, pointer_t pointer, const time ×tamp) { uint32_t flags = (incremental ? sample_flag::INCREMENTAL : 0); emit_word(PACKET_SAMPLE); emit_word(flags); emit_word(pointer); emit_time(timestamp); } void write_change(ident_t ident, size_t chunks, const chunk_t *data) { assert(ident <= MAXIMUM_IDENT); if (chunks == 1 && *data == 0) { emit_word(PACKET_CHANGEL | ident); } else if (chunks == 1 && *data == 1) { emit_word(PACKET_CHANGEH | ident); } else { emit_word(PACKET_CHANGE | ident); for (size_t offset = 0; offset < chunks; offset++) emit_word(data[offset]); } } void write_change(ident_t ident, size_t chunks, const chunk_t *data, size_t index) { assert(ident <= MAXIMUM_IDENT); emit_word(PACKET_CHANGEI | ident); emit_index(index); for (size_t offset = 0; offset < chunks; offset++) emit_word(data[offset]); } void write_end() { emit_word(PACKET_END); } }; // Reading spools. class reader { FILE *f; uint32_t absorb_word() { // If we're at end of file, `fread` will not write to `word`, and `PACKET_END` will be returned. uint32_t word = PACKET_END; fread(&word, sizeof(word), 1, f); return word; } uint64_t absorb_dword() { uint32_t lo = absorb_word(); uint32_t hi = absorb_word(); return ((uint64_t)hi << 32) | lo; } ident_t absorb_ident() { ident_t ident = absorb_word(); assert(ident <= MAXIMUM_IDENT); return ident; } size_t absorb_size() { return absorb_word(); } size_t absorb_index() { return absorb_word(); } std::string absorb_string() { std::string str; do { size_t end = str.size(); str.resize(end + 4); uint32_t word = absorb_word(); memcpy(&str[end], &word, sizeof(uint32_t)); } while (str.back() != '\0'); // Strings have no embedded zeroes besides the terminating one(s). return str.substr(0, str.find('\0')); } time absorb_time() { value raw_timestamp; raw_timestamp.data[0] = absorb_word(); raw_timestamp.data[1] = absorb_word(); raw_timestamp.data[2] = absorb_word(); return time(raw_timestamp); } public: typedef uint64_t pos_t; // Creates a reader, and transfers ownership of `fd`, which must be open for reading. reader(spool &spool) : f(fdopen(spool.take_read(), "r")) { assert(f != nullptr); } reader(reader &&moved) : f(moved.f) { moved.f = nullptr; } reader(const reader &) = delete; reader &operator=(const reader &) = delete; ~reader() { if (f != nullptr) fclose(f); } pos_t position() { return ftell(f); } void rewind(pos_t position) { fseek(f, position, SEEK_SET); } void read_magic() { uint64_t magic = absorb_dword(); assert((magic & ~VERSION_MASK) == HEADER_MAGIC); assert((magic >> 48) == VERSION); } bool read_define(ident_t &ident, std::string &name, size_t &part_index, size_t &chunks, size_t &depth) { uint32_t header = absorb_word(); if (header == PACKET_END) return false; assert(header == PACKET_DEFINE); ident = absorb_ident(); name = absorb_string(); part_index = absorb_index(); chunks = absorb_size(); depth = absorb_size(); return true; } bool read_sample(bool &incremental, pointer_t &pointer, time ×tamp) { uint32_t header = absorb_word(); if (header == PACKET_END) return false; assert(header == PACKET_SAMPLE); uint32_t flags = absorb_word(); incremental = (flags & sample_flag::INCREMENTAL); pointer = absorb_word(); timestamp = absorb_time(); return true; } bool read_change_header(uint32_t &header, ident_t &ident) { header = absorb_word(); if (header == PACKET_END) return false; assert((header & ~(CHANGE_MASK | MAXIMUM_IDENT)) == 0); ident = header & MAXIMUM_IDENT; return true; } void read_change_data(uint32_t header, size_t chunks, size_t depth, chunk_t *data) { uint32_t index = 0; switch (header & CHANGE_MASK) { case PACKET_CHANGEL: *data = 0; return; case PACKET_CHANGEH: *data = 1; return; case PACKET_CHANGE: break; case PACKET_CHANGEI: index = absorb_word(); assert(index < depth); break; default: assert(false && "Unrecognized change packet"); } for (size_t offset = 0; offset < chunks; offset++) data[chunks * index + offset] = absorb_word(); } }; // Opening spools. For certain uses of the record/replay mechanism, two distinct open files (two open files, i.e. // two distinct file pointers, and not just file descriptors, which share the file pointer if duplicated) are used, // for a reader and writer thread. This class manages the lifetime of the descriptors for these files. When only // one of them is used, the other is closed harmlessly when the spool is destroyed. private: std::atomic writefd; std::atomic readfd; public: spool(const std::string &filename) : writefd(open(filename.c_str(), O_CREAT|O_BINARY|O_WRONLY|O_APPEND, 0644)), readfd(open(filename.c_str(), O_BINARY|O_RDONLY)) { assert(writefd.load() != -1 && readfd.load() != -1); } spool(spool &&moved) : writefd(moved.writefd.exchange(-1)), readfd(moved.readfd.exchange(-1)) {} spool(const spool &) = delete; spool &operator=(const spool &) = delete; ~spool() { if (int fd = writefd.exchange(-1)) close(fd); if (int fd = readfd.exchange(-1)) close(fd); } // Atomically acquire a write file descriptor for the spool. Can be called once, and will return -1 the next time // it is called. Thread-safe. int take_write() { return writefd.exchange(-1); } // Atomically acquire a read file descriptor for the spool. Can be called once, and will return -1 the next time // it is called. Thread-safe. int take_read() { return readfd.exchange(-1); } }; // A CXXRTL recorder samples design state, producing complete or incremental updates, and writes them to a spool. class recorder { struct variable { spool::ident_t ident; /* <= spool::MAXIMUM_IDENT */ size_t chunks; size_t depth; /* == 1 for wires */ chunk_t *curr; bool memory; }; spool::writer writer; std::vector variables; std::vector inputs; // values of inputs must be recorded explicitly, as their changes are not observed std::unordered_map ident_lookup; bool streaming = false; // whether variable definitions have been written spool::pointer_t pointer = 0; time timestamp; public: template recorder(Args &&...args) : writer(std::forward(args)...) {} void start(module &module) { debug_items items; module.debug_info(items); start(items); } void start(const debug_items &items) { assert(!streaming); writer.write_magic(); for (auto item : items.table) for (size_t part_index = 0; part_index < item.second.size(); part_index++) { auto &part = item.second[part_index]; if ((part.flags & debug_item::INPUT) || (part.flags & debug_item::DRIVEN_SYNC) || (part.type == debug_item::MEMORY)) { variable var; var.ident = variables.size() + 1; var.chunks = (part.width + sizeof(chunk_t) * 8 - 1) / (sizeof(chunk_t) * 8); var.depth = part.depth; var.curr = part.curr; var.memory = (part.type == debug_item::MEMORY); ident_lookup[var.curr] = var.ident; assert(variables.size() < spool::MAXIMUM_IDENT); if (part.flags & debug_item::INPUT) inputs.push_back(variables.size()); variables.push_back(var); writer.write_define(var.ident, item.first, part_index, var.chunks, var.depth); } } writer.write_end(); streaming = true; } const time &latest_time() { return timestamp; } const time &advance_time(const time &delta) { assert(!delta.is_negative()); timestamp += delta; return timestamp; } void record_complete() { assert(streaming); writer.write_sample(/*incremental=*/false, pointer++, timestamp); for (auto var : variables) { assert(var.ident != 0); if (!var.memory) writer.write_change(var.ident, var.chunks, var.curr); else for (size_t index = 0; index < var.depth; index++) writer.write_change(var.ident, var.chunks, &var.curr[var.chunks * index], index); } writer.write_end(); } // This function is generic over ModuleT to encourage observer callbacks to be inlined into the commit function. template bool record_incremental(ModuleT &module) { assert(streaming); struct { std::unordered_map *ident_lookup; spool::writer *writer; CXXRTL_ALWAYS_INLINE void on_update(size_t chunks, const chunk_t *base, const chunk_t *value) { writer->write_change(ident_lookup->at(base), chunks, value); } CXXRTL_ALWAYS_INLINE void on_update(size_t chunks, const chunk_t *base, const chunk_t *value, size_t index) { writer->write_change(ident_lookup->at(base), chunks, value, index); } } record_observer = { &ident_lookup, &writer }; writer.write_sample(/*incremental=*/true, pointer++, timestamp); for (auto input_index : inputs) { variable &var = variables.at(input_index); assert(!var.memory); writer.write_change(var.ident, var.chunks, var.curr); } bool changed = module.commit(record_observer); writer.write_end(); return changed; } void flush() { writer.flush(); } }; // A CXXRTL player reads samples from a spool, and changes the design state accordingly. To start reading samples, // a spool must have been initialized: the recorder must have been started and an initial complete sample must have // been written. class player { struct variable { size_t chunks; size_t depth; /* == 1 for wires */ chunk_t *curr; }; spool::reader reader; std::unordered_map variables; bool streaming = false; // whether variable definitions have been read bool initialized = false; // whether a sample has ever been read spool::pointer_t pointer = 0; time timestamp; std::map> index_by_pointer; std::map> index_by_timestamp; bool peek_sample(spool::pointer_t &pointer, time ×tamp) { bool incremental; auto position = reader.position(); bool success = reader.read_sample(incremental, pointer, timestamp); reader.rewind(position); return success; } public: template player(Args &&...args) : reader(std::forward(args)...) {} void start(module &module) { debug_items items; module.debug_info(items); start(items); } void start(const debug_items &items) { assert(!streaming); reader.read_magic(); while (true) { spool::ident_t ident; std::string name; size_t part_index; size_t chunks; size_t depth; if (!reader.read_define(ident, name, part_index, chunks, depth)) break; assert(variables.count(ident) == 0); assert(items.count(name) != 0); assert(part_index < items.count(name)); const debug_item &part = items.parts_at(name).at(part_index); assert(chunks == (part.width + sizeof(chunk_t) * 8 - 1) / (sizeof(chunk_t) * 8)); assert(depth == part.depth); variable &var = variables[ident]; var.chunks = chunks; var.depth = depth; var.curr = part.curr; } assert(variables.size() > 0); streaming = true; // Establish the initial state of the design. initialized = replay(); assert(initialized); } // Returns the pointer of the current sample. spool::pointer_t current_pointer() { assert(initialized); return pointer; } // Returns the time of the current sample. const time ¤t_time() { assert(initialized); return timestamp; } // Returns `true` if there is a next sample to read, and sets `pointer` to its pointer if there is. bool get_next_pointer(spool::pointer_t &pointer) { assert(streaming); time timestamp; return peek_sample(pointer, timestamp); } // Returns `true` if there is a next sample to read, and sets `timestamp` to its time if there is. bool get_next_time(time ×tamp) { assert(streaming); uint32_t pointer; return peek_sample(pointer, timestamp); } // If this function returns `true`, then `current_pointer() == at_pointer`, and the module contains values that // correspond to this pointer in the replay log. To obtain a valid pointer, call `current_pointer()`; while pointers // are monotonically increasing for each consecutive sample, using arithmetic operations to create a new pointer is // not allowed. bool rewind_to(spool::pointer_t at_pointer) { assert(initialized); // The pointers in the replay log start from one that is greater than `at_pointer`. In this case the pointer will // never be reached. assert(index_by_pointer.size() > 0); if (at_pointer < index_by_pointer.rbegin()->first) return false; // Find the last complete sample whose pointer is less than or equal to `at_pointer`. Note that the comparison // function used here is `std::greater`, inverting the direction of `lower_bound`. auto position_it = index_by_pointer.lower_bound(at_pointer); assert(position_it != index_by_pointer.end()); reader.rewind(position_it->second); // Replay samples until eventually arriving to `at_pointer` or encountering end of file. while(replay()) { if (pointer == at_pointer) return true; } return false; } // If this function returns `true`, then `current_time() <= at_or_before_timestamp`, and the module contains values // that correspond to `current_time()` in the replay log. If `current_time() == at_or_before_timestamp` and there // are several consecutive samples with the same time, the module contains values that correspond to the first of // these samples. bool rewind_to_or_before(const time &at_or_before_timestamp) { assert(initialized); // The timestamps in the replay log start from one that is greater than `at_or_before_timestamp`. In this case // the timestamp will never be reached. Otherwise, this function will always succeed. assert(index_by_timestamp.size() > 0); if (at_or_before_timestamp < index_by_timestamp.rbegin()->first) return false; // Find the last complete sample whose timestamp is less than or equal to `at_or_before_timestamp`. Note that // the comparison function used here is `std::greater`, inverting the direction of `lower_bound`. auto position_it = index_by_timestamp.lower_bound(at_or_before_timestamp); assert(position_it != index_by_timestamp.end()); reader.rewind(position_it->second); // Replay samples until eventually arriving to or past `at_or_before_timestamp` or encountering end of file. while (replay()) { if (timestamp == at_or_before_timestamp) break; time next_timestamp; if (!get_next_time(next_timestamp)) break; if (next_timestamp > at_or_before_timestamp) break; } return true; } // If this function returns `true`, then `current_pointer()` and `current_time()` are updated for the next sample // and the module now contains values that correspond to that sample. If it returns `false`, there was no next sample // to read. bool replay() { assert(streaming); bool incremental; auto position = reader.position(); if (!reader.read_sample(incremental, pointer, timestamp)) return false; // The very first sample that is read must be a complete sample. This is required for the rewind functions to work. assert(initialized || !incremental); // It is possible (though not very useful) to have several complete samples with the same timestamp in a row. // Ensure that we associate the timestamp with the position of the first such complete sample. (This condition // works because the player never jumps over a sample.) if (!incremental && !index_by_pointer.count(pointer)) { assert(!index_by_timestamp.count(timestamp)); index_by_pointer[pointer] = position; index_by_timestamp[timestamp] = position; } uint32_t header; spool::ident_t ident; variable var; while (reader.read_change_header(header, ident)) { variable &var = variables.at(ident); reader.read_change_data(header, var.chunks, var.depth, var.curr); } return true; } }; } #endif