From c2f8d6092219cc6f9f6e6f344882c9a1a9f1b18a Mon Sep 17 00:00:00 2001 From: Jonathan Strong Date: Tue, 7 Nov 2023 13:36:18 -0500 Subject: [PATCH] working rough cut using publish log csv and cargo publish command --- Cargo.lock | 164 +++++++++++------ Cargo.toml | 25 ++- script/get-publish-history.py | 72 ++++---- src/publish.rs | 323 ++++++++++++++++++++++++++++------ 4 files changed, 438 insertions(+), 146 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e6aa5b8..b27f715 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,12 +41,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -247,23 +241,25 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", + "num-integer", "num-traits 0.2.17", + "serde", + "time 0.1.45", "wasm-bindgen", - "windows-targets", + "winapi", ] [[package]] name = "chrono-tz" -version = "0.8.4" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" +checksum = "29c39203181991a7dd4343b8005bd804e7a9a37afb8ac070e43771e8c820bbde" dependencies = [ "chrono", "chrono-tz-build", @@ -272,9 +268,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.2.1" +version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +checksum = "6f509c3a87b33437b05e2458750a0700e5bdd6956176773e6c7d6dd15a283a0c" dependencies = [ "parse-zoneinfo", "phf", @@ -319,6 +315,15 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie" version = "0.16.2" @@ -326,7 +331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb" dependencies = [ "percent-encoding", - "time", + "time 0.3.30", "version_check", ] @@ -343,7 +348,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "time", + "time 0.3.30", "url", ] @@ -424,6 +429,27 @@ dependencies = [ "typenum", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa 1.0.9", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -550,12 +576,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "flate2" version = "1.0.28" @@ -849,12 +869,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "humansize" -version = "2.1.3" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" -dependencies = [ - "libm", -] +checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" [[package]] name = "hyper" @@ -1036,12 +1053,6 @@ version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -1172,6 +1183,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits 0.2.17", +] + [[package]] name = "num-traits" version = "0.1.43" @@ -1304,16 +1325,6 @@ dependencies = [ "sha2", ] -[[package]] -name = "petgraph" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap 2.1.0", -] - [[package]] name = "phf" version = "0.11.2" @@ -1350,6 +1361,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", + "uncased", ] [[package]] @@ -1447,7 +1459,7 @@ dependencies = [ "mach", "once_cell", "raw-cpuid", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi 0.10.0+wasi-snapshot-preview1", "web-sys", "winapi", ] @@ -1633,12 +1645,13 @@ dependencies = [ "cargo_metadata", "chrono", "clap", + "convert_case", + "csv", "dotenvy", "flate2", "futures", "governor", "num_cpus", - "petgraph", "pretty_toa", "rayon", "regex", @@ -1652,6 +1665,7 @@ dependencies = [ "tera", "tokio", "toml", + "toml_edit", "tracing", "tracing-subscriber", "url", @@ -2035,9 +2049,9 @@ dependencies = [ [[package]] name = "tera" -version = "1.19.1" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "970dff17c11e884a4a09bc76e3a17ef71e01bb13447a11e85226e254fe6d10b8" +checksum = "3df578c295f9ec044ff1c829daf31bb7581d5b3c2a7a3d87419afe1f2531438c" dependencies = [ "chrono", "chrono-tz", @@ -2100,6 +2114,17 @@ dependencies = [ "once_cell", ] +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi", +] + [[package]] name = "time" version = "0.3.30" @@ -2207,6 +2232,23 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" + +[[package]] +name = "toml_edit" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" +dependencies = [ + "indexmap 2.1.0", + "toml_datetime", + "winnow", +] + [[package]] name = "tower-service" version = "0.3.2" @@ -2352,6 +2394,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +[[package]] +name = "uncased" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b9bc53168a4be7402ab86c3aad243a84dd7381d09be0eddc81280c1da95ca68" +dependencies = [ + "version_check", +] + [[package]] name = "unic-char-property" version = "0.9.0" @@ -2432,6 +2483,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "untrusted" version = "0.9.0" @@ -2483,9 +2540,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasi" @@ -2700,6 +2757,15 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "winnow" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829846f3e3db426d4cee4510841b71a8e58aa2a76b1132579487ae430ccd9c7b" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.50.0" diff --git a/Cargo.toml b/Cargo.toml index 895280d..564f10a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ num_cpus = "1.3" governor = "0.4.2" pretty_toa = "1" tera = { version = "1", optional = true } -chrono = { version = "0.4", optional = true } +chrono = { version = "0.4.22, < 0.4.23", features = ["serde"], optional = true } regex = "1.6" url = { version = "2", features = ["serde"] } semver = { version = "1", features = ["serde"] } @@ -48,11 +48,24 @@ tar = "0.4.38" anyhow = "1" dotenvy = "0.15" flate2 = "1" -tempfile = "3.8.1" -rayon = "1.8" -cargo_metadata = "0.18" -petgraph = "0.6.4" +tempfile = { version = "3.8.1", optional = true } +rayon = { version = "1.8", optional = true } +cargo_metadata = { version = "0.18", optional = true } +#petgraph = { version = "0.6.4", optional = true } +csv = { version = "1", optional = true } +convert_case = { version = "0.6", optional = true } +toml_edit = { version = "0.21", optional = true } [features] -default = [] +default = ["publish"] docs = ["tera", "chrono"] +publish = [ + "csv", + "chrono", + "tempfile", + "rayon", + "cargo_metadata", +# "petgraph", + "convert_case", + "toml_edit", +] diff --git a/script/get-publish-history.py b/script/get-publish-history.py index 7c89bf1..a76fe97 100644 --- a/script/get-publish-history.py +++ b/script/get-publish-history.py @@ -72,8 +72,8 @@ def versions(path, branch='master'): 'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT), #'size': diff_size(diff), #'type': diff_type(diff), - 'crate': d['name'], - 'vers': d['vers'], + 'crate_name': d['name'], + 'version': d['vers'], #'json': lastline, } @@ -84,35 +84,41 @@ def versions(path, branch='master'): yield row -def diff_size(diff): - """ - Computes the size of the diff by comparing the size of the blobs. - """ - if diff.b_blob is None and diff.deleted_file: - # This is a deletion, so return negative the size of the original. - return diff.a_blob.size * -1 - - if diff.a_blob is None and diff.new_file: - # This is a new file, so return the size of the new value. - return diff.b_blob.size - - # Otherwise just return the size a-b - return diff.a_blob.size - diff.b_blob.size +#def diff_size(diff): +# """ +# Computes the size of the diff by comparing the size of the blobs. +# """ +# if diff.b_blob is None and diff.deleted_file: +# # This is a deletion, so return negative the size of the original. +# return diff.a_blob.size * -1 +# +# if diff.a_blob is None and diff.new_file: +# # This is a new file, so return the size of the new value. +# return diff.b_blob.size +# +# # Otherwise just return the size a-b +# return diff.a_blob.size - diff.b_blob.size +# +# +#def diff_type(diff): +# """ +# Determines the type of the diff by looking at the diff flags. +# """ +# if diff.renamed: return 'R' +# if diff.deleted_file: return 'D' +# if diff.new_file: return 'A' +# return 'M' + +def main(path): + df = pd.DataFrame(versions(path)) + df['time'] = pd.to_datetime(df['time']) + df['unix_nanos'] = df['time'].astype('int') + df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index() + buf = io.StringIO() + df.to_csv(buf, index=False) + print(buf.getvalue()) + +if __name__ == '__main__': + path = sys.argv[1] + main(path) - -def diff_type(diff): - """ - Determines the type of the diff by looking at the diff flags. - """ - if diff.renamed: return 'R' - if diff.deleted_file: return 'D' - if diff.new_file: return 'A' - return 'M' - -df = pd.DataFrame(versions('crate-index')) -df['time'] = pd.to_datetime(df['time']) -df['unix_nanos'] = df['time'].astype('int') -df = df.sort_values(by='time').groupby(['crate', 'vers']).last().reset_index() -buf = io.StringIO() -df.to_csv(buf, index=False) -print(buf.getvalue()) diff --git a/src/publish.rs b/src/publish.rs index 86e9c0c..6a845c1 100644 --- a/src/publish.rs +++ b/src/publish.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use std::collections::{BTreeMap, HashMap}; -use std::io::prelude::*; +use std::io::{self, prelude::*}; use std::time::*; use serde::{Serialize, Deserialize}; @@ -17,9 +17,8 @@ use tokio::io::AsyncBufReadExt; use reqwest::header::AUTHORIZATION; use tempfile::TempDir; use rayon::prelude::*; -use petgraph::stable_graph::StableGraph; -use petgraph::visit::{Bfs, EdgeRef, Topo, Walker}; -use petgraph::graph::NodeIndex; +use chrono::prelude::*; +use convert_case::{Case, Casing}; #[derive(Parser, Debug)] #[clap(author, version, global_setting(clap::AppSettings::DeriveDisplayOrder))] @@ -50,8 +49,16 @@ struct Opt { pub struct DestinationRegistryConfig { #[serde(alias = "api")] pub api_url: Url, + /// Registry index url, i.e. the url provided to Cargo via configuration + /// to identify where to pull the index metadata from. + #[serde(alias = "index")] + pub index_url: String, #[serde(alias = "token")] pub auth_token: String, + /// The name the registry should have in the Cargo.toml files published to + /// the destination registry. This can be a rename (i.e. different than the + /// registry name provided in `SourceRegistryConfig`) or the same name. + pub registry_name: String, } #[derive(Debug, Clone, Deserialize)] @@ -61,6 +68,10 @@ pub struct SourceRegistryConfig { pub index_dir: PathBuf, #[serde(alias = "crate-files")] pub crate_files_dir: PathBuf, + /// Name used in Cargo.toml for dependencies from the registry. + pub registry_name: String, + /// Path of CSV file with log of when each crate version was published. + pub publish_history_csv: PathBuf, } #[derive(Deserialize, Debug, Clone)] @@ -125,6 +136,38 @@ impl Config { } } +#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +pub struct PublishLogRow { + pub crate_name: String, + pub version: Version, + pub path: PathBuf, + pub commit: String, + pub author: String, + pub time: DateTime, + pub unix_nanos: u64, +} + +struct CsvSetup { + pub rdr: csv::Reader>, + pub headers: csv::ByteRecord, + pub row: csv::ByteRecord, +} + +fn csv_setup(path: &Path) -> Result { + verify_file_exists(path)?; + let file = std::fs::File::open(path)?; + let buf = std::io::BufReader::new(file); + let mut rdr = csv::Reader::from_reader(buf); + let headers = + rdr.byte_headers() + .map_err(|e| anyhow!("failed to parse csv headers: {}", e))? + .clone(); + let row = csv::ByteRecord::new(); + Ok(CsvSetup { rdr, headers, row }) +} + + + /// fields we need from Cargo.toml [package] section to combine with IndexMeta /// to form a PublishMeta. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] @@ -590,7 +633,14 @@ struct VersionMeta { manifest: ManifestStub, readme: Option, tmp: TempDir, - meta: cargo_metadata::Metadata, + modified_manifest_toml: Option, + // meta: cargo_metadata::Metadata, +} + +impl VersionMeta { + pub fn source_dir(&self) -> PathBuf { + self.tmp.path().join(&format!("{}-{}", self.index_meta.name, self.index_meta.vers)) + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -679,8 +729,8 @@ fn parse_manifests( let begin = Instant::now(); let out: HashMap> = crate_versions - // .into_par_iter() - .into_iter() + .into_par_iter() + // .into_iter() .map(|(crate_name, versions)| -> Result<(String, Vec), Error> { let begin = Instant::now(); let mut version_metas = Vec::new(); @@ -722,13 +772,13 @@ fn parse_manifests( let target_dir = tmp.path().join("target"); std::fs::create_dir(&target_dir)?; - let meta = cargo_metadata::MetadataCommand::new() - .manifest_path(tmp.path().join(&format!("{crate_name}-{version}/Cargo.toml"))) - //.env("CARGO_TARGET_DIR", &target_dir) - .other_options(vec!["-vv".to_string()]) - .verbose(true) - // .other_options(["--frozen"].into_iter().map(|x| x.to_owned()).collect::>()) - .exec()?; + // let meta = cargo_metadata::MetadataCommand::new() + // .manifest_path(tmp.path().join(&format!("{crate_name}-{version}/Cargo.toml"))) + // //.env("CARGO_TARGET_DIR", &target_dir) + // .other_options(vec!["-vv".to_string()]) + // .verbose(true) + // // .other_options(["--frozen"].into_iter().map(|x| x.to_owned()).collect::>()) + // .exec()?; version_metas.push(VersionMeta { index_meta, @@ -737,7 +787,8 @@ fn parse_manifests( manifest, readme, tmp, - meta, + modified_manifest_toml: None, + // meta, }); } debug!(%crate_name, "parsed {} manifests in {:?}", version_metas.len(), begin.elapsed()); @@ -757,48 +808,160 @@ fn parse_manifests( // } // } -// fn build_dependency_graph<'a>( -// crate_versions: &'a HashMap>, -// ) -> (StableGraph, ()>, HashMap, NodeIndex>) { -// let begin = Instant::now(); -// -// let mut graph = StableGraph::new(); -// let mut index: HashMap, NodeIndex> = Default::default(); -// -// macro_rules! get_ix { -// ($node:expr) => {{ -// let key_exists = ix.contains_key($node); -// if !key_exists { -// let ix = graph.add_node($node.clnoe()); -// index.insert(node.clone(), ix); -// ix -// } else { -// *index[$node] -// } -// }} -// } -// -// for (name, versions) in crate_versions.iter() { -// for version_meta in versions.iter() { -// let v = &version_meta.index_meta; -// let node = Node { name: name.as_str(), vers: v.vers.clone() }; -// let key_exists = ix.contains_key(&node); -// let ix = get_ix!(&node); -// for dep_node in v.deps.iter().filter_map(get_registry_dep) { -// let jx = get_ix!(dep_node); -// graph.add_edge(ix, js, ()); -// } -// } -// } -// -// info!( -// n_nodes = graph.node_count(), -// n_edges = graph.edge_count(), -// "built dependency graph for entire registry in {:?}", begin.elapsed(), -// ); -// -// (graph, index) -// } +fn edit_dep_registries( + dep_key: &str, + manifest: &mut toml_edit::Document, + src_registry_name: &str, + dst_registry_name: &str, +) -> Result<(), Error> { + let Some(deps) = manifest.get_mut(dep_key).and_then(|item| item.as_table_like_mut()) else { + trace!("missing key in manifest toml: {}", dep_key); + return Ok(()) + }; + + for (k, v) in deps.iter_mut() { + let Some(t) = v.as_table_like_mut() else { continue }; + if t.contains_key("registry-index") { + warn!(dep_name = ?k, "dep table contains registry-index key!"); + } + if let Some(registry_item) = t.get_mut("registry") { + if registry_item.as_str().unwrap_or("") == src_registry_name { + trace!(dep_name = ?k, %dep_key, ?src_registry_name, ?dst_registry_name, "modifying registry in Cargo.toml"); + *registry_item = toml_edit::value(dst_registry_name); + } + } + } + Ok(()) +} + +fn edit_publish_registry_if_present( + manifest: &mut toml_edit::Document, + src_registry_name: &str, + dst_registry_name: &str, +) -> Result<(), Error> { + let Some(package) = manifest.get_mut("package").and_then(|item| item.as_table_like_mut()) else { + anyhow::bail!("package key not found in manifest toml"); + }; + + let Some(publish_item) = package.get_mut("publish") else { + trace!("no 'publish' key in Cargo.toml package section"); + return Ok(()) + }; + + let Some(publish_array) = publish_item.as_array_mut() else { + anyhow::bail!("failed to cast publish item as array"); + }; + + let Some(i) = publish_array.iter().position(|x| x.as_str().map(|s| s == src_registry_name).unwrap_or(false)) else { + anyhow::bail!("publish key exists, but source registry name does not appear in it! (`{}`)", publish_array.to_string()); + }; + + let item_i = publish_array.get_mut(i).unwrap(); + *item_i = toml_edit::Value::from(dst_registry_name); + + Ok(()) +} + +fn prepare_source_dir_for_publish(config: &Config, meta: &mut VersionMeta) -> Result<(), Error> { + let source_dir = meta.source_dir(); + let mut modified_manifest = meta.manifest_files.cargo_toml_orig.parse::()?; + + edit_dep_registries("dependencies", &mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?; + edit_dep_registries("dev-dependencies", &mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?; + edit_dep_registries("build-dependencies", &mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?; + + edit_publish_registry_if_present(&mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?; + + // write modified manifest over Cargo.toml (leaves Cargo.toml.orig as is) + let modified_manifest_toml = modified_manifest.to_string(); + let cargo_toml_path = source_dir.join("Cargo.toml"); + std::fs::write(&cargo_toml_path, modified_manifest_toml.as_bytes())?; + debug!( + crate_name = %meta.index_meta.name, + vers = %meta.index_meta.vers, + path = ?cargo_toml_path, + "wrote modified manifest file", + ); + meta.modified_manifest_toml = Some(modified_manifest_toml); + + let cargo_toml_orig_path = source_dir.join("Cargo.toml.orig"); + if cargo_toml_orig_path.exists() { + std::fs::remove_file(&cargo_toml_orig_path)?; + trace!( + crate_name = %meta.index_meta.name, + vers = %meta.index_meta.vers, + path = ?cargo_toml_orig_path, + "removed Cargo.toml.orig file", + ); + } + + let cargo_lock_path = source_dir.join("Cargo.lock"); + if cargo_lock_path.exists() { + std::fs::remove_file(&cargo_lock_path)?; + trace!( + crate_name = %meta.index_meta.name, + vers = %meta.index_meta.vers, + path = ?cargo_lock_path, + "removed Cargo.lock file", + ); + } + Ok(()) +} + +fn prepare_source_dirs_for_publish(config: &Config, manifests: &mut HashMap>) -> Result<(), Error> { + let begin = Instant::now(); + manifests.par_iter_mut() + .map(|(name, versions)| -> Result<(), Error> { + for meta in versions.iter_mut() { + prepare_source_dir_for_publish(&config, meta) + .map_err(|err| { + error!(%name, vers = %meta.index_meta.vers, ?err, "prepare_source_dir_for_publish failed"); + err + })?; + } + Ok(()) + }).collect::, Error>>()?; + info!("modified Cargo.toml manifests in {:?}", begin.elapsed()); + Ok(()) +} + +fn cargo_publish_modified_source_dir(config: &Config, meta: &VersionMeta) -> Result<(), Error> { + let begin = Instant::now(); + info!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "publishing crate version"); + let index_env_key = format!("CARGO_REGISTRIES_{}_INDEX", config.dst.registry_name.to_case(Case::ScreamingSnake)); + let token_env_key = format!("CARGO_REGISTRIES_{}_TOKEN", config.dst.registry_name.to_case(Case::ScreamingSnake)); + + let source_dir = meta.source_dir(); + let manifest_path = source_dir.join("Cargo.toml"); + let manifest_path_str = manifest_path.display().to_string(); + + let mut args: Vec<&str> = vec!["publish"]; + args.extend_from_slice(&["--registry", &config.dst.registry_name][..]); + // args.extend_from_slice(&["--index", &config.dst.index_url][..]); + args.extend_from_slice(&["--token", &config.dst.auth_token][..]); + args.extend_from_slice(&["--manifest-path", manifest_path_str.as_str()][..]); + args.extend_from_slice(&["--no-verify", "--allow-dirty", "-vv"][..]); + + debug!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "executing `cargo {}`", args.join(" ")); + let output = std::process::Command::new("cargo") + .env(&index_env_key, &config.dst.index_url) + .env(&token_env_key, &config.dst.auth_token) + .args(&args) + .output()?; + + debug!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, exit_status = ?output.status, "finished executing `cargo publish` command"); + + if !output.status.success() { + let stdout = std::str::from_utf8(&output.stdout).unwrap_or("utf8err"); + let stderr = std::str::from_utf8(&output.stderr).unwrap_or("utf8err"); + error!(exit_status = ?output.status, "cargo publish error!\nstdout:\n{}\nstderr:\n:{}\n\n", stdout, stderr); + debug!("cargo publish error - original Cargo.toml:\n***\n{}\n***", meta.manifest_files.cargo_toml_orig); + debug!("cargo publish error - modified Cargo.toml:\n***\n{}\n***", meta.modified_manifest_toml.as_ref().unwrap()); + } + + info!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "finished cargo publish in {:?}", begin.elapsed()); + Ok(()) +} async fn verify_dir_exists>(path: P) -> Result<(), Error> { match tokio::fs::metadata(path.as_ref()).await { @@ -828,6 +991,23 @@ fn verify_file_exists>(path: P) -> Result<(), Error> { } } +fn read_publish_log_csv(path: &Path) -> Result, Error> { + let begin = Instant::now(); + let CsvSetup { mut rdr, headers, mut row } = csv_setup(path)?; + let mut out = Vec::new(); + while rdr.read_byte_record(&mut row)? { + // only partially deserialized after this + let parsed: PublishLogRow = row.deserialize(Some(&headers)) + .map_err(|err| { + error!(?row, ?headers, ?err, "deserializing row failed"); + err + })?; + out.push(parsed); + } + info!(?path, "parsed publish log csv in {:?}", begin.elapsed()); + Ok(out) +} + fn main() -> Result<(), Error> { let begin = Instant::now(); @@ -843,14 +1023,41 @@ fn main() -> Result<(), Error> { rt.block_on(verify_dir_exists(&config.src.index_dir))?; rt.block_on(verify_dir_exists(&config.src.crate_files_dir))?; + verify_file_exists(&config.src.publish_history_csv)?; + if opt.validate { println!("{:#?}", config); return Ok(()) } + let mut publish_log = read_publish_log_csv(&config.src.publish_history_csv)?; + publish_log.sort_by_key(|x| x.unix_nanos); + assert!(!publish_log.is_empty()); + info!(n_rows = publish_log.len(), "parsed publish log csv"); + let krates = rt.block_on(get_index_metas(&config))?; - let manifests = parse_manifests(&config, krates)?; + let mut manifests = parse_manifests(&config, krates)?; + + prepare_source_dirs_for_publish(&config, &mut manifests)?; + + let mut by_name_vers: HashMap<(&str, &Version), &VersionMeta> = manifests.iter() + .flat_map(|(k, v)| { + v.iter().map(|m| ((k.as_str(), &m.index_meta.vers), m)) + }).collect(); + + + for row in publish_log.iter() { + let Some(meta) = by_name_vers.remove(&(row.crate_name.as_str(), &row.version)) else { + warn!(?row, "crate version in publish log not found in index versions"); + continue + }; + + if let Err(err) = cargo_publish_modified_source_dir(&config, meta) { + error!(?err, name = %meta.index_meta.name, vers = %meta.index_meta.vers, "failed to publish crate version"); + } + } + info!("finished publishing crates to destination registry"); // let (graph, ix) = build_dependency_graph(&manifests);