fayalite/crates/fayalite/src/build/external.rs

1177 lines
38 KiB
Rust

// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
build::{
ArgsWriter, CommandParams, GlobalParams, JobAndDependencies, JobAndKind,
JobArgsAndDependencies, JobDependencies, JobDependenciesHasBase, JobItem, JobItemName,
JobKind, JobKindAndArgs, JobParams, ToArgs, WriteArgs,
},
intern::{Intern, Interned},
util::{job_server::AcquiredJob, streaming_read_utf8::streaming_read_utf8},
};
use base64::{Engine, prelude::BASE64_URL_SAFE_NO_PAD};
use clap::builder::OsStringValueParser;
use eyre::{Context, ensure, eyre};
use serde::{
Deserialize, Deserializer, Serialize, Serializer,
de::{DeserializeOwned, Error},
};
use std::{
borrow::Cow,
collections::BTreeMap,
ffi::{OsStr, OsString},
fmt,
hash::{Hash, Hasher},
io::Write,
marker::PhantomData,
path::{Path, PathBuf},
process::ExitStatus,
sync::OnceLock,
};
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ExternalJobCacheVersion {
/// not used, used to be for `FormalCacheVersion`
V1,
V2,
}
impl ExternalJobCacheVersion {
pub const CURRENT: Self = Self::V2;
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
#[non_exhaustive]
pub enum MaybeUtf8 {
Utf8(String),
Binary(Vec<u8>),
}
impl MaybeUtf8 {
pub fn as_bytes(&self) -> &[u8] {
match self {
MaybeUtf8::Utf8(v) => v.as_bytes(),
MaybeUtf8::Binary(v) => v,
}
}
pub fn as_os_str(&self) -> &OsStr {
#![allow(unreachable_code)]
#[cfg(unix)]
{
return std::os::unix::ffi::OsStrExt::from_bytes(self.as_bytes());
}
#[cfg(target_os = "wasi")]
{
return std::os::wasi::ffi::OsStrExt::from_bytes(self.as_bytes());
}
// implementing WTF-8 is too much of a pain so don't have a special case for windows
if let Ok(s) = str::from_utf8(self.as_bytes()) {
return OsStr::new(s);
}
panic!("invalid UTF-8 conversion to OsStr is not implemented on this platform");
}
pub fn as_path(&self) -> &Path {
Path::new(self.as_os_str())
}
}
#[derive(Serialize, Deserialize)]
#[serde(rename = "MaybeUtf8")]
enum MaybeUtf8Serde<'a> {
Utf8(Cow<'a, str>),
Binary(String),
}
impl<'de> Deserialize<'de> for MaybeUtf8 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(match MaybeUtf8Serde::deserialize(deserializer)? {
MaybeUtf8Serde::Utf8(v) => Self::Utf8(v.into_owned()),
MaybeUtf8Serde::Binary(v) => BASE64_URL_SAFE_NO_PAD
.decode(&*v)
.map_err(D::Error::custom)?
.into(),
})
}
}
impl Serialize for MaybeUtf8 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
MaybeUtf8::Utf8(v) => MaybeUtf8Serde::Utf8(Cow::Borrowed(v)),
MaybeUtf8::Binary(v) => MaybeUtf8Serde::Binary(BASE64_URL_SAFE_NO_PAD.encode(v)),
}
.serialize(serializer)
}
}
impl From<Vec<u8>> for MaybeUtf8 {
fn from(value: Vec<u8>) -> Self {
match String::from_utf8(value) {
Ok(value) => Self::Utf8(value),
Err(e) => Self::Binary(e.into_bytes()),
}
}
}
impl From<String> for MaybeUtf8 {
fn from(value: String) -> Self {
Self::Utf8(value)
}
}
impl From<PathBuf> for MaybeUtf8 {
fn from(value: PathBuf) -> Self {
Self::from(value.into_os_string().into_encoded_bytes())
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize)]
#[serde(rename = "File")]
pub struct ExternalJobCacheV2File<'a> {
pub name: MaybeUtf8,
pub contents: Cow<'a, MaybeUtf8>,
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub struct ExternalJobCacheV2Files(pub BTreeMap<PathBuf, MaybeUtf8>);
impl Serialize for ExternalJobCacheV2Files {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_seq(
self.0
.iter()
.map(|(name, contents)| ExternalJobCacheV2File {
name: name.clone().into(),
contents: Cow::Borrowed(contents),
}),
)
}
}
impl<'de> Deserialize<'de> for ExternalJobCacheV2Files {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(Self(
Vec::deserialize(deserializer)?
.into_iter()
.map(|ExternalJobCacheV2File { name, contents }| {
(name.as_path().to_path_buf(), contents.into_owned())
})
.collect(),
))
}
}
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
#[serde(rename = "ExternalJobCache")]
pub struct ExternalJobCacheV2 {
pub version: ExternalJobCacheVersion,
pub inputs_hash: blake3::Hash,
pub stdout_stderr: String,
pub result: Result<ExternalJobCacheV2Files, String>,
}
impl ExternalJobCacheV2 {
fn read_from_file(cache_json_path: Interned<Path>) -> eyre::Result<Self> {
let cache_str = std::fs::read_to_string(&*cache_json_path)
.wrap_err_with(|| format!("can't read {cache_json_path:?}"))?;
serde_json::from_str(&cache_str)
.wrap_err_with(|| format!("can't decode {cache_json_path:?}"))
}
fn write_to_file(&self, cache_json_path: Interned<Path>) -> eyre::Result<()> {
let cache_str = serde_json::to_string_pretty(&self).expect("serialization can't fail");
std::fs::write(&*cache_json_path, cache_str)
.wrap_err_with(|| format!("can't write {cache_json_path:?}"))
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub struct ExternalJobCaching {
cache_json_path: Interned<Path>,
run_even_if_cached: bool,
}
#[derive(Default)]
struct JobCacheHasher(blake3::Hasher);
impl JobCacheHasher {
fn hash_size(&mut self, size: usize) {
self.0.update(&u64::to_le_bytes(
size.try_into().expect("size should fit in u64"),
));
}
fn hash_sized_bytes(&mut self, bytes: &[u8]) {
self.hash_size(bytes.len());
self.0.update(bytes);
}
fn hash_sized_os_str(&mut self, s: &OsStr) {
self.hash_sized_bytes(s.as_encoded_bytes());
}
fn hash_iter<F: FnMut(&mut Self, I::Item), I: IntoIterator<IntoIter: ExactSizeIterator>>(
&mut self,
iter: I,
mut f: F,
) {
let iter = iter.into_iter();
self.hash_size(iter.len());
iter.for_each(|item| f(self, item));
}
fn try_hash_iter<
F: FnMut(&mut Self, I::Item) -> Result<(), E>,
E,
I: IntoIterator<IntoIter: ExactSizeIterator>,
>(
&mut self,
iter: I,
mut f: F,
) -> Result<(), E> {
let mut iter = iter.into_iter();
self.hash_size(iter.len());
iter.try_for_each(|item| f(self, item))
}
}
fn write_file_atomically_no_clobber<F: FnOnce() -> C, C: AsRef<[u8]>>(
path: impl AsRef<Path>,
containing_dir: impl AsRef<Path>,
contents: F,
) -> std::io::Result<()> {
let path = path.as_ref();
let containing_dir = containing_dir.as_ref();
if !matches!(std::fs::exists(&path), Ok(true)) {
// use File::create_new rather than tempfile's code to get normal file permissions rather than mode 600 on Unix.
let mut file = tempfile::Builder::new()
.make_in(containing_dir, |path| std::fs::File::create_new(path))?;
file.write_all(contents().as_ref())?; // write all in one operation to avoid a bunch of tiny writes
file.into_temp_path().persist_noclobber(path)?;
}
Ok(())
}
impl ExternalJobCaching {
pub fn get_cache_dir_from_output_dir(output_dir: impl AsRef<Path>) -> PathBuf {
output_dir.as_ref().join(".fayalite-job-cache")
}
pub fn make_cache_dir(
cache_dir: impl AsRef<Path>,
application_name: &str,
) -> std::io::Result<()> {
let cache_dir = cache_dir.as_ref();
std::fs::create_dir_all(cache_dir)?;
write_file_atomically_no_clobber(cache_dir.join("CACHEDIR.TAG"), cache_dir, || {
format!(
"Signature: 8a477f597d28d172789f06886806bc55\n\
# This file is a cache directory tag created by {application_name}.\n\
# For information about cache directory tags see https://bford.info/cachedir/\n"
)
})?;
write_file_atomically_no_clobber(cache_dir.join(".gitignore"), cache_dir, || {
format!(
"# This is a cache directory created by {application_name}.\n\
# ignore all files\n\
*\n"
)
})
}
pub fn new(
output_dir: impl AsRef<Path>,
application_name: &str,
json_file_stem: impl AsRef<OsStr>,
run_even_if_cached: bool,
) -> std::io::Result<Self> {
let cache_dir = Self::get_cache_dir_from_output_dir(output_dir);
Self::make_cache_dir(&cache_dir, application_name)?;
let mut cache_json_path = cache_dir;
cache_json_path.push(json_file_stem.as_ref());
cache_json_path.set_extension("json");
Ok(Self {
cache_json_path: Path::intern_owned(cache_json_path),
run_even_if_cached,
})
}
fn write_stdout_stderr(stdout_stderr: &str) {
if stdout_stderr == "" {
return;
}
// use print! so output goes to Rust test output capture
if stdout_stderr.ends_with('\n') {
print!("{stdout_stderr}");
} else {
println!("{stdout_stderr}");
}
}
/// returns `Err(_)` if reading the cache failed, otherwise returns `Ok(_)` with the results from the cache
fn run_from_cache(
self,
inputs_hash: blake3::Hash,
output_file_paths: impl IntoIterator<Item = Interned<Path>>,
) -> Result<Result<(), String>, ()> {
if self.run_even_if_cached {
return Err(());
}
let Ok(ExternalJobCacheV2 {
version: ExternalJobCacheVersion::CURRENT,
inputs_hash: cached_inputs_hash,
stdout_stderr,
result,
}) = ExternalJobCacheV2::read_from_file(self.cache_json_path)
else {
return Err(());
};
if inputs_hash != cached_inputs_hash {
return Err(());
}
match result {
Ok(outputs) => {
for output_file_path in output_file_paths {
let Some(output_data) = outputs.0.get(&*output_file_path) else {
if let Ok(true) = std::fs::exists(&*output_file_path) {
// assume the existing file is the correct one
continue;
}
return Err(());
};
let Ok(()) = std::fs::write(&*output_file_path, output_data.as_bytes()) else {
return Err(());
};
}
Self::write_stdout_stderr(&stdout_stderr);
Ok(Ok(()))
}
Err(error) => {
Self::write_stdout_stderr(&stdout_stderr);
Ok(Err(error))
}
}
}
fn make_command(
command_line: Interned<[Interned<OsStr>]>,
) -> eyre::Result<std::process::Command> {
ensure!(!command_line.is_empty(), "command line must not be empty");
let mut cmd = std::process::Command::new(&*command_line[0]);
cmd.args(command_line[1..].iter().map(|arg| &**arg))
.stdin(std::process::Stdio::null());
Ok(cmd)
}
pub fn run<F>(
self,
command_line: Interned<[Interned<OsStr>]>,
input_file_paths: impl IntoIterator<Item = Interned<Path>>,
output_file_paths: impl IntoIterator<Item = Interned<Path>> + Clone,
run_fn: F,
exit_status_to_error: impl FnOnce(ExitStatus) -> eyre::Report,
) -> eyre::Result<()>
where
F: FnOnce(std::process::Command) -> eyre::Result<Result<(), ExitStatus>>,
{
let mut hasher = JobCacheHasher::default();
hasher.hash_iter(command_line.iter(), |hasher, arg| {
hasher.hash_sized_os_str(arg)
});
let mut input_file_paths =
Vec::<&Path>::from_iter(input_file_paths.into_iter().map(Interned::into_inner));
input_file_paths.sort_unstable();
input_file_paths.dedup();
hasher.try_hash_iter(
&input_file_paths,
|hasher, input_file_path| -> eyre::Result<()> {
hasher.hash_sized_os_str(input_file_path.as_ref());
hasher.hash_sized_bytes(
&std::fs::read(input_file_path).wrap_err_with(|| {
format!("can't read job input file: {input_file_path:?}")
})?,
);
Ok(())
},
)?;
let inputs_hash = hasher.0.finalize();
match self.run_from_cache(inputs_hash, output_file_paths.clone()) {
Ok(result) => return result.map_err(|e| eyre!(e)),
Err(()) => {}
}
let (pipe_reader, stdout, stderr) = std::io::pipe()
.and_then(|(r, w)| Ok((r, w.try_clone()?, w)))
.wrap_err_with(|| format!("when trying to create a pipe to run: {command_line:?}"))?;
let mut cmd = Self::make_command(command_line)?;
cmd.stdout(stdout).stderr(stderr);
let mut stdout_stderr = String::new();
let result = std::thread::scope(|scope| {
std::thread::Builder::new()
.name(format!("stdout:{}", command_line[0].display()))
.spawn_scoped(scope, || {
let _ = streaming_read_utf8(std::io::BufReader::new(pipe_reader), |s| {
stdout_stderr.push_str(s);
// use print! so output goes to Rust test output capture
print!("{s}");
std::io::Result::Ok(())
});
if !stdout_stderr.is_empty() && !stdout_stderr.ends_with('\n') {
println!();
}
})
.expect("spawn shouldn't fail");
run_fn(cmd)
})?;
if let Err(exit_status) = result {
// check if the user may have terminated it or something, don't cache the failure
let user_maybe_terminated;
#[cfg(unix)]
{
user_maybe_terminated = std::os::unix::process::ExitStatusExt::signal(&exit_status)
.is_some()
|| exit_status.code().is_none_or(|code| code > 1);
}
#[cfg(not(unix))]
{
user_maybe_terminated = !exit_status.success();
}
if user_maybe_terminated {
let _ = std::fs::remove_file(self.cache_json_path);
return Err(exit_status_to_error(exit_status));
}
}
let result = result.map_err(exit_status_to_error);
ExternalJobCacheV2 {
version: ExternalJobCacheVersion::CURRENT,
inputs_hash,
stdout_stderr,
result: match &result {
Ok(()) => Ok(ExternalJobCacheV2Files(Result::from_iter(
output_file_paths.into_iter().map(
|output_file_path: Interned<Path>| -> eyre::Result<_> {
let output_file_path = &*output_file_path;
Ok((
PathBuf::from(output_file_path),
MaybeUtf8::from(std::fs::read(output_file_path).wrap_err_with(
|| format!("can't read job output file: {output_file_path:?}"),
)?),
))
},
),
)?)),
Err(e) => Err(format!("{e:#}")),
},
}
.write_to_file(self.cache_json_path)?;
result
}
pub fn run_maybe_cached<F>(
this: Option<Self>,
command_line: Interned<[Interned<OsStr>]>,
input_file_paths: impl IntoIterator<Item = Interned<Path>>,
output_file_paths: impl IntoIterator<Item = Interned<Path>> + Clone,
run_fn: F,
exit_status_to_error: impl FnOnce(ExitStatus) -> eyre::Report,
) -> eyre::Result<()>
where
F: FnOnce(std::process::Command) -> eyre::Result<Result<(), ExitStatus>>,
{
match this {
Some(this) => this.run(
command_line,
input_file_paths,
output_file_paths,
run_fn,
exit_status_to_error,
),
None => run_fn(Self::make_command(command_line)?)?.map_err(exit_status_to_error),
}
}
}
#[derive(Clone, Eq, Hash)]
pub struct ExternalCommandJobKind<T: ExternalCommand>(PhantomData<T>);
impl<T: ExternalCommand> fmt::Debug for ExternalCommandJobKind<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ExternalCommandJobKind<{}>", std::any::type_name::<T>())
}
}
impl<T: ExternalCommand> PartialEq for ExternalCommandJobKind<T> {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl<T: ExternalCommand> Ord for ExternalCommandJobKind<T> {
fn cmp(&self, _other: &Self) -> std::cmp::Ordering {
std::cmp::Ordering::Equal
}
}
impl<T: ExternalCommand> PartialOrd for ExternalCommandJobKind<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<T: ExternalCommand> Default for ExternalCommandJobKind<T> {
fn default() -> Self {
Self(PhantomData)
}
}
impl<T: ExternalCommand> Copy for ExternalCommandJobKind<T> {}
impl<T: ExternalCommand> ExternalCommandJobKind<T> {
pub const fn new() -> Self {
Self(PhantomData)
}
}
#[derive(Copy, Clone)]
struct ExternalProgramPathValueParser(ExternalProgram);
fn parse_which_result(
which_result: which::Result<PathBuf>,
program_name: impl Into<OsString>,
program_path_arg_name: impl FnOnce() -> String,
) -> Result<Interned<Path>, ResolveProgramPathError> {
let which_result = match which_result {
Ok(v) => v,
Err(inner) => {
return Err(ResolveProgramPathError {
inner,
program_name: program_name.into(),
program_path_arg_name: program_path_arg_name(),
});
}
};
Ok(which_result.intern_deref())
}
impl clap::builder::TypedValueParser for ExternalProgramPathValueParser {
type Value = Interned<Path>;
fn parse_ref(
&self,
cmd: &clap::Command,
arg: Option<&clap::Arg>,
value: &OsStr,
) -> clap::error::Result<Self::Value> {
let program_path_arg_name = self.0.program_path_arg_name;
OsStringValueParser::new()
.try_map(move |program_name| {
parse_which_result(which::which(&program_name), program_name, || {
program_path_arg_name.into()
})
})
.parse_ref(cmd, arg, value)
}
}
#[derive(Clone, PartialEq, Eq, Hash, Debug, clap::Args)]
#[group(id = T::args_group_id())]
#[non_exhaustive]
pub struct ExternalCommandArgs<T: ExternalCommand> {
#[command(flatten)]
pub program_path: ExternalProgramPath<T::ExternalProgram>,
#[arg(
name = Interned::into_inner(T::run_even_if_cached_arg_name()),
long = T::run_even_if_cached_arg_name(),
)]
pub run_even_if_cached: bool,
#[command(flatten)]
pub additional_args: T::AdditionalArgs,
}
#[derive(Clone, Debug)]
pub struct ResolveProgramPathError {
inner: which::Error,
program_name: OsString,
program_path_arg_name: String,
}
impl fmt::Display for ResolveProgramPathError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self {
inner,
program_name,
program_path_arg_name,
} = self;
write!(
f,
"{program_path_arg_name}: failed to resolve {program_name:?} to a valid program: {inner}",
)
}
}
impl std::error::Error for ResolveProgramPathError {}
pub fn resolve_program_path(
program_name: Option<&OsStr>,
default_program_name: impl AsRef<OsStr>,
program_path_env_var_name: Option<&OsStr>,
) -> Result<Interned<Path>, ResolveProgramPathError> {
let default_program_name = default_program_name.as_ref();
let owned_program_name;
let program_name = if let Some(program_name) = program_name {
program_name
} else if let Some(v) = program_path_env_var_name.and_then(std::env::var_os) {
owned_program_name = v;
&owned_program_name
} else {
default_program_name
};
parse_which_result(which::which(program_name), program_name, || {
default_program_name.display().to_string()
})
}
impl<T: ExternalCommand> ExternalCommandArgs<T> {
pub fn with_resolved_program_path(
program_path: Interned<Path>,
additional_args: T::AdditionalArgs,
) -> Self {
Self::new(
ExternalProgramPath::with_resolved_program_path(program_path),
additional_args,
)
}
pub fn new(
program_path: ExternalProgramPath<T::ExternalProgram>,
additional_args: T::AdditionalArgs,
) -> Self {
Self {
program_path,
run_even_if_cached: false,
additional_args,
}
}
pub fn resolve_program_path(
program_name: Option<&OsStr>,
additional_args: T::AdditionalArgs,
) -> Result<Self, ResolveProgramPathError> {
Ok(Self::new(
ExternalProgramPath::resolve_program_path(program_name)?,
additional_args,
))
}
}
impl<T: ExternalCommand> ToArgs for ExternalCommandArgs<T> {
fn to_args(&self, args: &mut (impl WriteArgs + ?Sized)) {
let Self {
program_path,
run_even_if_cached,
ref additional_args,
} = *self;
program_path.to_args(args);
if run_even_if_cached {
args.write_display_arg(format_args!("--{}", T::run_even_if_cached_arg_name()));
}
additional_args.to_args(args);
}
}
#[derive(Copy, Clone)]
struct ExternalCommandJobParams {
command_params: CommandParams,
inputs: Interned<[JobItemName]>,
outputs: Interned<[JobItemName]>,
output_paths: Interned<[Interned<Path>]>,
}
impl ExternalCommandJobParams {
fn new<T: ExternalCommand>(job: &ExternalCommandJob<T>) -> Self {
let output_paths = T::output_paths(job);
let mut command_line = ArgsWriter(vec![job.program_path.as_interned_os_str()]);
T::command_line_args(job, &mut command_line);
Self {
command_params: CommandParams {
command_line: Intern::intern_owned(command_line.0),
current_dir: T::current_dir(job),
},
inputs: T::inputs(job),
outputs: output_paths
.iter()
.map(|&path| JobItemName::Path { path })
.collect(),
output_paths,
}
}
}
#[derive(Deserialize, Serialize)]
pub struct ExternalCommandJob<T: ExternalCommand> {
additional_job_data: T::AdditionalJobData,
program_path: Interned<Path>,
output_dir: Interned<Path>,
run_even_if_cached: bool,
#[serde(skip)]
params_cache: OnceLock<ExternalCommandJobParams>,
}
impl<T: ExternalCommand> Eq for ExternalCommandJob<T> {}
impl<T: ExternalCommand<AdditionalJobData: Clone>> Clone for ExternalCommandJob<T> {
fn clone(&self) -> Self {
let Self {
ref additional_job_data,
program_path,
output_dir,
run_even_if_cached,
ref params_cache,
} = *self;
Self {
additional_job_data: additional_job_data.clone(),
program_path,
output_dir,
run_even_if_cached,
params_cache: params_cache.clone(),
}
}
}
impl<T: ExternalCommand> fmt::Debug for ExternalCommandJob<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self {
additional_job_data,
program_path,
output_dir,
run_even_if_cached,
params_cache: _,
} = self;
write!(f, "ExternalCommandJob<{}>", std::any::type_name::<T>())?;
f.debug_struct("")
.field("additional_job_data", additional_job_data)
.field("program_path", program_path)
.field("output_dir", output_dir)
.field("run_even_if_cached", run_even_if_cached)
.finish()
}
}
impl<T: ExternalCommand> PartialEq for ExternalCommandJob<T> {
fn eq(&self, other: &Self) -> bool {
let Self {
additional_job_data,
program_path,
output_dir,
run_even_if_cached,
params_cache: _,
} = self;
*additional_job_data == other.additional_job_data
&& *program_path == other.program_path
&& *output_dir == other.output_dir
&& *run_even_if_cached == other.run_even_if_cached
}
}
impl<T: ExternalCommand> Hash for ExternalCommandJob<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let Self {
additional_job_data,
program_path,
output_dir,
run_even_if_cached,
params_cache: _,
} = self;
additional_job_data.hash(state);
program_path.hash(state);
output_dir.hash(state);
run_even_if_cached.hash(state);
}
}
impl<T: ExternalCommand> ExternalCommandJob<T> {
pub fn additional_job_data(&self) -> &T::AdditionalJobData {
&self.additional_job_data
}
pub fn program_path(&self) -> Interned<Path> {
self.program_path
}
pub fn output_dir(&self) -> Interned<Path> {
self.output_dir
}
pub fn run_even_if_cached(&self) -> bool {
self.run_even_if_cached
}
fn params(&self) -> &ExternalCommandJobParams {
self.params_cache
.get_or_init(|| ExternalCommandJobParams::new(self))
}
pub fn command_params(&self) -> CommandParams {
self.params().command_params
}
pub fn inputs(&self) -> Interned<[JobItemName]> {
self.params().inputs
}
pub fn output_paths(&self) -> Interned<[Interned<Path>]> {
self.params().output_paths
}
pub fn outputs(&self) -> Interned<[JobItemName]> {
self.params().outputs
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct ExternalProgramPath<T: ExternalProgramTrait> {
program_path: Interned<Path>,
_phantom: PhantomData<T>,
}
impl<T: ExternalProgramTrait> ExternalProgramPath<T> {
pub fn with_resolved_program_path(program_path: Interned<Path>) -> Self {
Self {
program_path,
_phantom: PhantomData,
}
}
pub fn resolve_program_path(
program_name: Option<&OsStr>,
) -> Result<Self, ResolveProgramPathError> {
let ExternalProgram {
default_program_name,
program_path_arg_name: _,
program_path_arg_value_name: _,
program_path_env_var_name,
} = ExternalProgram::new::<T>();
Ok(Self {
program_path: resolve_program_path(
program_name,
default_program_name,
program_path_env_var_name.as_ref().map(OsStr::new),
)?,
_phantom: PhantomData,
})
}
pub fn program_path(&self) -> Interned<Path> {
self.program_path
}
}
impl<T: ExternalProgramTrait> fmt::Debug for ExternalProgramPath<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self {
program_path,
_phantom: _,
} = self;
write!(f, "ExternalProgramPath<{}>", std::any::type_name::<T>())?;
f.debug_tuple("").field(program_path).finish()
}
}
impl<T: ExternalProgramTrait> clap::FromArgMatches for ExternalProgramPath<T> {
fn from_arg_matches(matches: &clap::ArgMatches) -> Result<Self, clap::Error> {
let id = Interned::into_inner(ExternalProgram::new::<T>().program_path_arg_name);
// don't remove argument so later instances of Self can use it too
let program_path = *matches.get_one(id).expect("arg should always be present");
Ok(Self {
program_path,
_phantom: PhantomData,
})
}
fn update_from_arg_matches(&mut self, matches: &clap::ArgMatches) -> Result<(), clap::Error> {
*self = Self::from_arg_matches(matches)?;
Ok(())
}
}
impl<T: ExternalProgramTrait> clap::Args for ExternalProgramPath<T> {
fn augment_args(cmd: clap::Command) -> clap::Command {
let external_program @ ExternalProgram {
default_program_name,
program_path_arg_name,
program_path_arg_value_name,
program_path_env_var_name,
} = ExternalProgram::new::<T>();
let arg = cmd
.get_arguments()
.find(|arg| *arg.get_id().as_str() == *program_path_arg_name);
if let Some(arg) = arg {
// don't insert duplicate arguments.
// check that the previous argument actually matches this argument:
assert!(!arg.is_required_set());
assert!(matches!(arg.get_action(), clap::ArgAction::Set));
assert_eq!(arg.get_long(), Some(&*program_path_arg_name));
assert_eq!(
arg.get_value_names(),
Some(&[clap::builder::Str::from(program_path_arg_value_name)][..])
);
assert_eq!(
arg.get_env(),
program_path_env_var_name.as_ref().map(OsStr::new)
);
assert_eq!(
arg.get_default_values(),
&[OsStr::new(&default_program_name)]
);
assert_eq!(arg.get_value_hint(), clap::ValueHint::CommandName);
cmd
} else {
cmd.arg(
clap::Arg::new(Interned::into_inner(program_path_arg_name))
.required(false)
.value_parser(ExternalProgramPathValueParser(external_program))
.action(clap::ArgAction::Set)
.long(program_path_arg_name)
.value_name(program_path_arg_value_name)
.env(program_path_env_var_name.map(Interned::into_inner))
.default_value(default_program_name)
.value_hint(clap::ValueHint::CommandName),
)
}
}
fn augment_args_for_update(cmd: clap::Command) -> clap::Command {
Self::augment_args(cmd)
}
}
impl<T: ExternalProgramTrait> ToArgs for ExternalProgramPath<T> {
fn to_args(&self, args: &mut (impl WriteArgs + ?Sized)) {
let ExternalProgram {
program_path_arg_name,
..
} = ExternalProgram::new::<T>();
let Self {
program_path,
_phantom: _,
} = self;
if args.get_long_option_eq(program_path_arg_name) != Some(program_path.as_os_str()) {
args.write_long_option_eq(program_path_arg_name, program_path);
}
}
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
#[non_exhaustive]
pub struct ExternalProgram {
default_program_name: Interned<str>,
program_path_arg_name: Interned<str>,
program_path_arg_value_name: Interned<str>,
program_path_env_var_name: Option<Interned<str>>,
}
impl ExternalProgram {
pub fn new<T: ExternalProgramTrait>() -> Self {
Self {
default_program_name: T::default_program_name(),
program_path_arg_name: T::program_path_arg_name(),
program_path_arg_value_name: T::program_path_arg_value_name(),
program_path_env_var_name: T::program_path_env_var_name(),
}
}
pub fn default_program_name(&self) -> Interned<str> {
self.default_program_name
}
pub fn program_path_arg_name(&self) -> Interned<str> {
self.program_path_arg_name
}
pub fn program_path_arg_value_name(&self) -> Interned<str> {
self.program_path_arg_value_name
}
pub fn program_path_env_var_name(&self) -> Option<Interned<str>> {
self.program_path_env_var_name
}
}
impl<T: ExternalProgramTrait> From<T> for ExternalProgram {
fn from(_value: T) -> Self {
Self::new::<T>()
}
}
impl<T: ExternalProgramTrait> From<T> for Interned<ExternalProgram> {
fn from(_value: T) -> Self {
ExternalProgram::new::<T>().intern_sized()
}
}
pub trait ExternalProgramTrait:
'static + Send + Sync + Hash + Ord + fmt::Debug + Default + Copy
{
fn program_path_arg_name() -> Interned<str> {
Self::default_program_name()
}
fn program_path_arg_value_name() -> Interned<str> {
Intern::intern_owned(Self::program_path_arg_name().to_uppercase())
}
fn default_program_name() -> Interned<str>;
fn program_path_env_var_name() -> Option<Interned<str>> {
Some(Intern::intern_owned(
Self::program_path_arg_name()
.to_uppercase()
.replace('-', "_"),
))
}
}
pub trait ExternalCommand: 'static + Send + Sync + Hash + Eq + fmt::Debug + Sized + Clone {
type AdditionalArgs: ToArgs;
type AdditionalJobData: 'static
+ Send
+ Sync
+ Hash
+ Eq
+ fmt::Debug
+ Serialize
+ DeserializeOwned;
type BaseJobPosition;
type Dependencies: JobDependenciesHasBase;
type ExternalProgram: ExternalProgramTrait;
fn dependencies() -> Self::Dependencies;
fn args_to_jobs(
args: JobArgsAndDependencies<ExternalCommandJobKind<Self>>,
params: &JobParams,
global_params: &GlobalParams,
) -> eyre::Result<(
Self::AdditionalJobData,
<Self::Dependencies as JobDependencies>::JobsAndKinds,
)>;
fn inputs(job: &ExternalCommandJob<Self>) -> Interned<[JobItemName]>;
fn output_paths(job: &ExternalCommandJob<Self>) -> Interned<[Interned<Path>]>;
fn command_line_args<W: ?Sized + WriteArgs>(job: &ExternalCommandJob<Self>, args: &mut W);
fn current_dir(job: &ExternalCommandJob<Self>) -> Option<Interned<Path>>;
fn job_kind_name() -> Interned<str>;
fn args_group_id() -> clap::Id {
Interned::into_inner(Self::job_kind_name()).into()
}
fn run_even_if_cached_arg_name() -> Interned<str> {
Intern::intern_owned(format!("{}-run-even-if-cached", Self::job_kind_name()))
}
fn subcommand_hidden() -> bool {
false
}
}
impl<T: ExternalCommand> JobKind for ExternalCommandJobKind<T> {
type Args = ExternalCommandArgs<T>;
type Job = ExternalCommandJob<T>;
type Dependencies = T::Dependencies;
fn dependencies(self) -> Self::Dependencies {
T::dependencies()
}
fn args_to_jobs(
args: JobArgsAndDependencies<Self>,
params: &JobParams,
global_params: &GlobalParams,
) -> eyre::Result<JobAndDependencies<Self>> {
let JobKindAndArgs {
kind,
args:
ExternalCommandArgs {
program_path:
ExternalProgramPath {
program_path,
_phantom: _,
},
run_even_if_cached,
additional_args: _,
},
} = args.args;
let (additional_job_data, dependencies) = T::args_to_jobs(args, params, global_params)?;
let base_job = T::Dependencies::base_job(&dependencies);
let job = ExternalCommandJob {
additional_job_data,
program_path,
output_dir: base_job.output_dir(),
run_even_if_cached: base_job.run_even_if_cached() | run_even_if_cached,
params_cache: OnceLock::new(),
};
job.params(); // fill cache
Ok(JobAndDependencies {
job: JobAndKind { kind, job },
dependencies,
})
}
fn inputs(self, job: &Self::Job) -> Interned<[JobItemName]> {
job.inputs()
}
fn outputs(self, job: &Self::Job) -> Interned<[JobItemName]> {
job.outputs()
}
fn name(self) -> Interned<str> {
T::job_kind_name()
}
fn external_command_params(self, job: &Self::Job) -> Option<CommandParams> {
Some(job.command_params())
}
fn run(
self,
job: &Self::Job,
inputs: &[JobItem],
_params: &JobParams,
global_params: &GlobalParams,
acquired_job: &mut AcquiredJob,
) -> eyre::Result<Vec<JobItem>> {
assert!(
inputs.iter().map(JobItem::name).eq(job.inputs()),
"{}\ninputs:\n{inputs:?}\njob.inputs():\n{:?}",
std::any::type_name::<Self>(),
job.inputs(),
);
let CommandParams {
command_line,
current_dir,
} = job.command_params();
ExternalJobCaching::new(
&job.output_dir,
&global_params.application_name(),
&T::job_kind_name(),
job.run_even_if_cached,
)?
.run(
command_line,
inputs
.iter()
.flat_map(|item| match item {
JobItem::Path { path } => std::slice::from_ref(path),
JobItem::DynamicPaths {
paths,
source_job_name: _,
} => paths,
})
.copied(),
job.output_paths(),
|mut cmd| {
if let Some(current_dir) = current_dir {
cmd.current_dir(current_dir);
}
let status = acquired_job.run_command(cmd, |cmd| cmd.status())?;
if !status.success() {
Ok(Err(status))
} else {
Ok(Ok(()))
}
},
|status| eyre!("running {command_line:?} failed: {status}"),
)?;
Ok(job
.output_paths()
.iter()
.map(|&path| JobItem::Path { path })
.collect())
}
fn subcommand_hidden(self) -> bool {
T::subcommand_hidden()
}
fn external_program(self) -> Option<Interned<ExternalProgram>> {
Some(ExternalProgram::new::<T::ExternalProgram>().intern_sized())
}
}