fayalite/crates/fayalite/src/build/graph.rs

801 lines
28 KiB
Rust

// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
build::{DynJob, JobItem, JobItemName, JobParams, program_name_for_internal_jobs},
intern::Interned,
util::{HashMap, HashSet, job_server::AcquiredJob},
};
use eyre::{ContextCompat, eyre};
use petgraph::{
algo::{DfsSpace, kosaraju_scc, toposort},
graph::DiGraph,
visit::{GraphBase, Visitable},
};
use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error, ser::SerializeSeq};
use std::{
cell::OnceCell,
collections::{BTreeMap, BTreeSet, VecDeque},
fmt::{self, Write},
panic,
rc::Rc,
sync::mpsc,
thread::{self, ScopedJoinHandle},
};
macro_rules! write_str {
($s:expr, $($rest:tt)*) => {
write!($s, $($rest)*).expect("String::write_fmt can't fail")
};
}
#[derive(Clone, Debug)]
enum JobGraphNode {
Job(DynJob),
Item {
#[allow(dead_code, reason = "name used for debugging")]
name: JobItemName,
source_job: Option<DynJob>,
},
}
type JobGraphInner = DiGraph<JobGraphNode, ()>;
#[derive(Clone, Default)]
pub struct JobGraph {
jobs: HashMap<DynJob, <JobGraphInner as GraphBase>::NodeId>,
items: HashMap<JobItemName, <JobGraphInner as GraphBase>::NodeId>,
graph: JobGraphInner,
topological_order: Vec<<JobGraphInner as GraphBase>::NodeId>,
space: DfsSpace<<JobGraphInner as GraphBase>::NodeId, <JobGraphInner as Visitable>::Map>,
}
impl fmt::Debug for JobGraph {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self {
jobs: _,
items: _,
graph,
topological_order,
space: _,
} = self;
f.debug_struct("JobGraph")
.field("graph", graph)
.field("topological_order", topological_order)
.finish_non_exhaustive()
}
}
#[derive(Clone, Debug)]
pub enum JobGraphError {
CycleError {
job: DynJob,
output: JobItemName,
},
MultipleJobsCreateSameOutput {
output_item: JobItemName,
existing_job: DynJob,
new_job: DynJob,
},
}
impl std::error::Error for JobGraphError {}
impl fmt::Display for JobGraphError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::CycleError { job, output } => write!(
f,
"job can't be added to job graph because it would introduce a cyclic dependency through this job output:\n\
{output:?}\n\
job:\n{job:?}",
),
JobGraphError::MultipleJobsCreateSameOutput {
output_item,
existing_job,
new_job,
} => write!(
f,
"job can't be added to job graph because the new job has an output that is also produced by an existing job.\n\
conflicting output:\n\
{output_item:?}\n\
existing job:\n\
{existing_job:?}\n\
new job:\n\
{new_job:?}",
),
}
}
}
#[derive(Copy, Clone, Debug)]
enum EscapeForUnixShellState {
DollarSingleQuote,
SingleQuote,
Unquoted,
}
#[derive(Clone)]
pub struct EscapeForUnixShell<'a> {
state: EscapeForUnixShellState,
prefix: [u8; 3],
bytes: &'a [u8],
}
impl<'a> fmt::Debug for EscapeForUnixShell<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<'a> fmt::Display for EscapeForUnixShell<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for c in self.clone() {
f.write_char(c)?;
}
Ok(())
}
}
impl<'a> EscapeForUnixShell<'a> {
pub fn new(s: &'a str) -> Self {
Self::from_bytes(s.as_bytes())
}
fn make_prefix(bytes: &[u8]) -> [u8; 3] {
let mut prefix = [0; 3];
prefix[..bytes.len()].copy_from_slice(bytes);
prefix
}
pub fn from_bytes(bytes: &'a [u8]) -> Self {
let mut needs_single_quote = bytes.is_empty();
for &b in bytes {
match b {
b'!' | b'\'' | b'\"' | b' ' => needs_single_quote = true,
0..0x20 | 0x7F.. => {
return Self {
state: EscapeForUnixShellState::DollarSingleQuote,
prefix: Self::make_prefix(b"$'"),
bytes,
};
}
_ => {}
}
}
if needs_single_quote {
Self {
state: EscapeForUnixShellState::SingleQuote,
prefix: Self::make_prefix(b"'"),
bytes,
}
} else {
Self {
state: EscapeForUnixShellState::Unquoted,
prefix: Self::make_prefix(b""),
bytes,
}
}
}
}
impl Iterator for EscapeForUnixShell<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
match &mut self.prefix {
[0, 0, 0] => {}
[0, 0, v] | // find first
[0, v, _] | // non-zero byte
[v, _, _] => {
let retval = *v as char;
*v = 0;
return Some(retval);
}
}
let Some(&next_byte) = self.bytes.split_off_first() else {
return match self.state {
EscapeForUnixShellState::DollarSingleQuote
| EscapeForUnixShellState::SingleQuote => {
self.state = EscapeForUnixShellState::Unquoted;
Some('\'')
}
EscapeForUnixShellState::Unquoted => None,
};
};
match self.state {
EscapeForUnixShellState::DollarSingleQuote => match next_byte {
b'\'' | b'\\' => {
self.prefix = Self::make_prefix(&[next_byte]);
Some('\\')
}
b'\t' => {
self.prefix = Self::make_prefix(b"t");
Some('\\')
}
b'\n' => {
self.prefix = Self::make_prefix(b"n");
Some('\\')
}
b'\r' => {
self.prefix = Self::make_prefix(b"r");
Some('\\')
}
0x20..=0x7E => Some(next_byte as char),
_ => {
self.prefix = [
b'x',
char::from_digit(next_byte as u32 >> 4, 0x10).expect("known to be in range")
as u8,
char::from_digit(next_byte as u32 & 0xF, 0x10)
.expect("known to be in range") as u8,
];
Some('\\')
}
},
EscapeForUnixShellState::SingleQuote => {
if next_byte == b'\'' {
self.prefix = Self::make_prefix(b"\\''");
Some('\'')
} else {
Some(next_byte as char)
}
}
EscapeForUnixShellState::Unquoted => match next_byte {
b' ' | b'!' | b'"' | b'#' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b','
| b';' | b'<' | b'>' | b'?' | b'[' | b'\\' | b']' | b'^' | b'`' | b'{' | b'|'
| b'}' | b'~' => {
self.prefix = Self::make_prefix(&[next_byte]);
Some('\\')
}
_ => Some(next_byte as char),
},
}
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
#[non_exhaustive]
pub enum UnixMakefileEscapeKind {
NonRecipe,
RecipeWithoutShellEscaping,
RecipeWithShellEscaping,
}
#[derive(Copy, Clone)]
pub struct EscapeForUnixMakefile<'a> {
s: &'a str,
kind: UnixMakefileEscapeKind,
}
impl<'a> fmt::Debug for EscapeForUnixMakefile<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<'a> fmt::Display for EscapeForUnixMakefile<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.do_write(f, fmt::Write::write_str, fmt::Write::write_char, |_, _| {
Ok(())
})
}
}
impl<'a> EscapeForUnixMakefile<'a> {
fn do_write<S: ?Sized, E>(
&self,
state: &mut S,
write_str: impl Fn(&mut S, &str) -> Result<(), E>,
write_char: impl Fn(&mut S, char) -> Result<(), E>,
add_variable: impl Fn(&mut S, &'static str) -> Result<(), E>,
) -> Result<(), E> {
let escape_recipe_char = |c| match c {
'$' => write_str(state, "$$"),
'\0'..='\x1F' | '\x7F' => {
panic!("can't escape a control character for Unix Makefile: {c:?}");
}
_ => write_char(state, c),
};
match self.kind {
UnixMakefileEscapeKind::NonRecipe => self.s.chars().try_for_each(|c| match c {
'=' => {
add_variable(state, "EQUALS = =")?;
write_str(state, "$(EQUALS)")
}
';' => panic!("can't escape a semicolon (;) for Unix Makefile"),
'$' => write_str(state, "$$"),
'\\' | ' ' | '#' | ':' | '%' | '*' | '?' | '[' | ']' | '~' => {
write_char(state, '\\')?;
write_char(state, c)
}
'\0'..='\x1F' | '\x7F' => {
panic!("can't escape a control character for Unix Makefile: {c:?}");
}
_ => write_char(state, c),
}),
UnixMakefileEscapeKind::RecipeWithoutShellEscaping => {
self.s.chars().try_for_each(escape_recipe_char)
}
UnixMakefileEscapeKind::RecipeWithShellEscaping => {
EscapeForUnixShell::new(self.s).try_for_each(escape_recipe_char)
}
}
}
pub fn new(
s: &'a str,
kind: UnixMakefileEscapeKind,
needed_variables: &mut BTreeSet<&'static str>,
) -> Self {
let retval = Self { s, kind };
let Ok(()) = retval.do_write(
needed_variables,
|_, _| Ok(()),
|_, _| Ok(()),
|needed_variables, variable| -> Result<(), std::convert::Infallible> {
needed_variables.insert(variable);
Ok(())
},
);
retval
}
}
impl JobGraph {
pub fn new() -> Self {
Self::default()
}
fn try_add_item_node(
&mut self,
name: JobItemName,
new_source_job: Option<DynJob>,
new_nodes: &mut HashSet<<JobGraphInner as GraphBase>::NodeId>,
) -> Result<<JobGraphInner as GraphBase>::NodeId, JobGraphError> {
use hashbrown::hash_map::Entry;
match self.items.entry(name) {
Entry::Occupied(item_entry) => {
let node_id = *item_entry.get();
let JobGraphNode::Item {
name: _,
source_job,
} = &mut self.graph[node_id]
else {
unreachable!("known to be an item");
};
if let Some(new_source_job) = new_source_job {
if let Some(source_job) = source_job {
return Err(JobGraphError::MultipleJobsCreateSameOutput {
output_item: item_entry.key().clone(),
existing_job: source_job.clone(),
new_job: new_source_job,
});
} else {
*source_job = Some(new_source_job);
}
}
Ok(node_id)
}
Entry::Vacant(item_entry) => {
let node_id = self.graph.add_node(JobGraphNode::Item {
name,
source_job: new_source_job,
});
new_nodes.insert(node_id);
item_entry.insert(node_id);
Ok(node_id)
}
}
}
pub fn try_add_jobs<I: IntoIterator<Item = DynJob>>(
&mut self,
jobs: I,
) -> Result<(), JobGraphError> {
use hashbrown::hash_map::Entry;
let jobs = jobs.into_iter();
struct RemoveNewNodesOnError<'a> {
this: &'a mut JobGraph,
new_nodes: HashSet<<JobGraphInner as GraphBase>::NodeId>,
}
impl Drop for RemoveNewNodesOnError<'_> {
fn drop(&mut self) {
for node in self.new_nodes.drain() {
self.this.graph.remove_node(node);
}
}
}
let mut remove_new_nodes_on_error = RemoveNewNodesOnError {
this: self,
new_nodes: HashSet::with_capacity_and_hasher(jobs.size_hint().0, Default::default()),
};
let new_nodes = &mut remove_new_nodes_on_error.new_nodes;
let this = &mut *remove_new_nodes_on_error.this;
for job in jobs {
let Entry::Vacant(job_entry) = this.jobs.entry(job.clone()) else {
continue;
};
let job_node_id = this
.graph
.add_node(JobGraphNode::Job(job_entry.key().clone()));
new_nodes.insert(job_node_id);
job_entry.insert(job_node_id);
for name in job.outputs() {
let item_node_id = this.try_add_item_node(name, Some(job.clone()), new_nodes)?;
this.graph.add_edge(job_node_id, item_node_id, ());
}
for name in job.inputs() {
let item_node_id = this.try_add_item_node(name, None, new_nodes)?;
this.graph.add_edge(item_node_id, job_node_id, ());
}
}
match toposort(&this.graph, Some(&mut this.space)) {
Ok(v) => {
this.topological_order = v;
// no need to remove any of the new nodes on drop since we didn't encounter any errors
remove_new_nodes_on_error.new_nodes.clear();
Ok(())
}
Err(_) => {
// there's at least one cycle, find one!
let cycle = kosaraju_scc(&this.graph)
.into_iter()
.find_map(|scc| {
if scc.len() <= 1 {
// can't be a cycle since our graph is bipartite --
// jobs only connect to items, never jobs to jobs or items to items
None
} else {
Some(scc)
}
})
.expect("we know there's a cycle");
let cycle_set = HashSet::from_iter(cycle.iter().copied());
let job = cycle
.into_iter()
.find_map(|node_id| {
if let JobGraphNode::Job(job) = &this.graph[node_id] {
Some(job.clone())
} else {
None
}
})
.expect("a job must be part of the cycle");
let output = job
.outputs()
.into_iter()
.find(|output| cycle_set.contains(&this.items[output]))
.expect("an output must be part of the cycle");
Err(JobGraphError::CycleError { job, output })
}
}
}
#[track_caller]
pub fn add_jobs<I: IntoIterator<Item = DynJob>>(&mut self, jobs: I) {
match self.try_add_jobs(jobs) {
Ok(()) => {}
Err(e) => panic!("error: {e}"),
}
}
pub fn to_unix_makefile(&self, extra_args: &[Interned<str>]) -> String {
self.to_unix_makefile_with_internal_program_prefix(
&[program_name_for_internal_jobs()],
extra_args,
)
}
pub fn to_unix_makefile_with_internal_program_prefix(
&self,
internal_program_prefix: &[Interned<str>],
extra_args: &[Interned<str>],
) -> String {
let mut retval = String::new();
let mut needed_variables = BTreeSet::new();
let mut phony_targets = BTreeSet::new();
for &node_id in &self.topological_order {
let JobGraphNode::Job(job) = &self.graph[node_id] else {
continue;
};
let outputs = job.outputs();
if outputs.is_empty() {
retval.push_str(":");
} else {
for output in job.outputs() {
match output {
JobItemName::Path { path } => {
write_str!(
retval,
"{} ",
EscapeForUnixMakefile::new(
&path,
UnixMakefileEscapeKind::NonRecipe,
&mut needed_variables
)
);
}
JobItemName::DynamicPaths { source_job_name } => {
write_str!(
retval,
"{} ",
EscapeForUnixMakefile::new(
&source_job_name,
UnixMakefileEscapeKind::NonRecipe,
&mut needed_variables
)
);
phony_targets.insert(Interned::into_inner(source_job_name));
}
}
}
if outputs.len() == 1 {
retval.push_str(":");
} else {
retval.push_str("&:");
}
}
for input in job.inputs() {
match input {
JobItemName::Path { path } => {
write_str!(
retval,
" {}",
EscapeForUnixMakefile::new(
&path,
UnixMakefileEscapeKind::NonRecipe,
&mut needed_variables
)
);
}
JobItemName::DynamicPaths { source_job_name } => {
write_str!(
retval,
" {}",
EscapeForUnixMakefile::new(
&source_job_name,
UnixMakefileEscapeKind::NonRecipe,
&mut needed_variables
)
);
phony_targets.insert(Interned::into_inner(source_job_name));
}
}
}
retval.push_str("\n\t");
job.command_params_with_internal_program_prefix(internal_program_prefix, extra_args)
.to_unix_shell_line(&mut retval, |arg, output| {
write!(
output,
"{}",
EscapeForUnixMakefile::new(
arg,
UnixMakefileEscapeKind::RecipeWithShellEscaping,
&mut needed_variables
)
)
})
.expect("writing to String never fails");
retval.push_str("\n\n");
}
if !phony_targets.is_empty() {
retval.push_str("\n.PHONY:");
for phony_target in phony_targets {
write_str!(
retval,
" {}",
EscapeForUnixMakefile::new(
phony_target,
UnixMakefileEscapeKind::NonRecipe,
&mut needed_variables
)
);
}
retval.push_str("\n");
}
if !needed_variables.is_empty() {
retval.insert_str(
0,
&String::from_iter(needed_variables.into_iter().map(|v| format!("{v}\n"))),
);
}
retval
}
pub fn to_unix_shell_script(&self, extra_args: &[Interned<str>]) -> String {
self.to_unix_shell_script_with_internal_program_prefix(
&[program_name_for_internal_jobs()],
extra_args,
)
}
pub fn to_unix_shell_script_with_internal_program_prefix(
&self,
internal_program_prefix: &[Interned<str>],
extra_args: &[Interned<str>],
) -> String {
let mut retval = String::from(
"#!/bin/sh\n\
set -ex\n",
);
for &node_id in &self.topological_order {
let JobGraphNode::Job(job) = &self.graph[node_id] else {
continue;
};
job.command_params_with_internal_program_prefix(internal_program_prefix, extra_args)
.to_unix_shell_line(&mut retval, |arg, output| {
write!(output, "{}", EscapeForUnixShell::new(&arg))
})
.expect("writing to String never fails");
retval.push_str("\n");
}
retval
}
pub fn run(&self, params: &JobParams) -> eyre::Result<()> {
// use scope to auto-join threads on errors
thread::scope(|scope| {
struct WaitingJobState {
job_node_id: <JobGraphInner as GraphBase>::NodeId,
job: DynJob,
inputs: BTreeMap<JobItemName, OnceCell<JobItem>>,
}
let mut ready_jobs = VecDeque::new();
let mut item_name_to_waiting_jobs_map = HashMap::<_, Vec<_>>::default();
for &node_id in &self.topological_order {
let JobGraphNode::Job(job) = &self.graph[node_id] else {
continue;
};
let waiting_job = WaitingJobState {
job_node_id: node_id,
job: job.clone(),
inputs: job
.inputs()
.iter()
.map(|&name| (name, OnceCell::new()))
.collect(),
};
if waiting_job.inputs.is_empty() {
ready_jobs.push_back(waiting_job);
} else {
let waiting_job = Rc::new(waiting_job);
for &input_item in waiting_job.inputs.keys() {
item_name_to_waiting_jobs_map
.entry(input_item)
.or_default()
.push(waiting_job.clone());
}
}
}
struct RunningJob<'scope> {
job: DynJob,
thread: ScopedJoinHandle<'scope, eyre::Result<Vec<JobItem>>>,
}
let mut running_jobs = HashMap::default();
let (finished_jobs_sender, finished_jobs_receiver) = mpsc::channel();
loop {
while let Some(finished_job) = finished_jobs_receiver.try_recv().ok() {
let Some(RunningJob { job, thread }) = running_jobs.remove(&finished_job)
else {
unreachable!();
};
let output_items = thread.join().map_err(panic::resume_unwind)??;
assert!(
output_items.iter().map(JobItem::name).eq(job.outputs()),
"job's run() method returned the wrong output items:\n\
output items:\n\
{output_items:?}\n\
expected outputs:\n\
{:?}\n\
job:\n\
{job:?}",
job.outputs(),
);
for output_item in output_items {
for waiting_job in item_name_to_waiting_jobs_map
.remove(&output_item.name())
.unwrap_or_default()
{
let Ok(()) =
waiting_job.inputs[&output_item.name()].set(output_item.clone())
else {
unreachable!();
};
if let Some(waiting_job) = Rc::into_inner(waiting_job) {
ready_jobs.push_back(waiting_job);
}
}
}
}
if let Some(WaitingJobState {
job_node_id,
job,
inputs,
}) = ready_jobs.pop_front()
{
struct RunningJobInThread<'a> {
job_node_id: <JobGraphInner as GraphBase>::NodeId,
job: DynJob,
inputs: Vec<JobItem>,
params: &'a JobParams,
acquired_job: AcquiredJob,
finished_jobs_sender: mpsc::Sender<<JobGraphInner as GraphBase>::NodeId>,
}
impl RunningJobInThread<'_> {
fn run(mut self) -> eyre::Result<Vec<JobItem>> {
self.job
.run(&self.inputs, self.params, &mut self.acquired_job)
}
}
impl Drop for RunningJobInThread<'_> {
fn drop(&mut self) {
let _ = self.finished_jobs_sender.send(self.job_node_id);
}
}
let name = job.kind().name();
let running_job_in_thread = RunningJobInThread {
job_node_id,
job: job.clone(),
inputs: Result::from_iter(job.inputs().iter().map(|input_name| {
inputs.get(input_name).and_then(|v| v.get().cloned()).wrap_err_with(|| {
eyre!("failed when trying to run job {name}: nothing provided the input item: {input_name:?}")
})
}))?,
params,
acquired_job: AcquiredJob::acquire()?,
finished_jobs_sender: finished_jobs_sender.clone(),
};
running_jobs.insert(
job_node_id,
RunningJob {
job,
thread: thread::Builder::new()
.name(format!("job:{name}"))
.spawn_scoped(scope, move || running_job_in_thread.run())
.expect("failed to spawn thread for job"),
},
);
}
if running_jobs.is_empty() {
assert!(item_name_to_waiting_jobs_map.is_empty());
assert!(ready_jobs.is_empty());
return Ok(());
}
}
})
}
}
impl Extend<DynJob> for JobGraph {
#[track_caller]
fn extend<T: IntoIterator<Item = DynJob>>(&mut self, iter: T) {
self.add_jobs(iter);
}
}
impl FromIterator<DynJob> for JobGraph {
#[track_caller]
fn from_iter<T: IntoIterator<Item = DynJob>>(iter: T) -> Self {
let mut retval = Self::new();
retval.add_jobs(iter);
retval
}
}
impl Serialize for JobGraph {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut serializer = serializer.serialize_seq(Some(self.jobs.len()))?;
for &node_id in &self.topological_order {
let JobGraphNode::Job(job) = &self.graph[node_id] else {
continue;
};
serializer.serialize_element(job)?;
}
serializer.end()
}
}
impl<'de> Deserialize<'de> for JobGraph {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let jobs = Vec::<DynJob>::deserialize(deserializer)?;
let mut retval = JobGraph::new();
retval.try_add_jobs(jobs).map_err(D::Error::custom)?;
Ok(retval)
}
}