A command-line tool for crate registry backup/export https://shipyard.rs
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1069 lines
38 KiB

#![allow(unused_labels)]
use std::path::{Path, PathBuf};
use std::collections::{BTreeMap, HashMap};
use std::io::{self, prelude::*};
use std::time::*;
use serde::{Serialize, Deserialize};
use clap::Parser;
use tracing::{debug, error, info, trace, warn};
use tracing_subscriber::filter::EnvFilter;
use url::Url;
use anyhow::{anyhow, bail, Error, Context};
use semver::Version;
use futures::stream::StreamExt;
use tokio::io::AsyncBufReadExt;
use reqwest::header::AUTHORIZATION;
use tempfile::TempDir;
use rayon::prelude::*;
use chrono::prelude::*;
use convert_case::{Case, Casing};
#[derive(Parser, Debug)]
#[clap(author, version, global_setting(clap::AppSettings::DeriveDisplayOrder))]
struct Opt {
/// Config file with source directories and destination registry info
#[clap(short, long, value_name = "PATH")]
pub config_file: PathBuf,
/// Perform all the work of generating `cargo publish` payloads,
/// but don't send them to the destination registry server
#[clap(long)]
pub dry_run: bool,
/// Load config file, validate the settings, and display the final loaded content
/// to stdout, then exit
#[clap(long)]
pub validate: bool,
/// Use to limit which crates from the source registry are published to the
/// destination registry. Expects a regular expression which will be matched
/// against the names of crates. Only crates with names that match the regex
/// will be published. This field may also be specified at the top level of
/// the config file.
#[clap(long, value_name = "REGEX", alias = "filter")]
pub filter_crates: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct DestinationRegistryConfig {
#[serde(alias = "api")]
pub api_url: Url,
/// Registry index url, i.e. the url provided to Cargo via configuration
/// to identify where to pull the index metadata from.
#[serde(alias = "index")]
pub index_url: String,
#[serde(alias = "token")]
pub auth_token: String,
/// The name the registry should have in the Cargo.toml files published to
/// the destination registry. This can be a rename (i.e. different than the
/// registry name provided in `SourceRegistryConfig`) or the same name.
pub registry_name: String,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct SourceRegistryConfig {
#[serde(alias = "index")]
pub index_dir: PathBuf,
#[serde(alias = "crate-files")]
pub crate_files_dir: PathBuf,
/// Name used in Cargo.toml for dependencies from the registry.
pub registry_name: String,
/// Path of CSV file with log of when each crate version was published.
pub publish_history_csv: PathBuf,
}
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct HttpConfig {
/// Value of user-agent HTTP header
#[serde(default = "default_user_agent")]
pub user_agent: String,
}
const DEFAULT_USER_AGENT: &str = concat!("shipyard.rs-publish-tool/v", env!("CARGO_PKG_VERSION"));
fn default_user_agent() -> String {
DEFAULT_USER_AGENT.to_string()
}
impl Default for HttpConfig {
fn default() -> Self {
Self {
user_agent: default_user_agent(),
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Config {
/// Do everything except actually publish to the destination registry. Can also be
/// toggled using the --dry-run command line flag.
#[serde(default)]
pub dry_run: bool,
/// Local directories with source registry files
#[serde(alias = "source")]
pub src: SourceRegistryConfig,
/// Server information and authentication needed to publish to the
/// destination registry
#[serde(alias = "destination")]
pub dst: DestinationRegistryConfig,
/// Settings controlling the HTTP publish requests to the destination registry
#[serde(default)]
pub http: HttpConfig,
/// Use to limit which crates from the source registry are published to the
/// destination registry. Expects a regular expression which will be matched
/// against the names of crates. Only crates with names that match the regex
/// will be published.
#[serde(default, alias = "filter")]
pub filter_crates: Option<String>,
}
impl Config {
pub fn compile_filter(&self) -> Result<Option<regex::Regex>, Error> {
match self.filter_crates.as_ref() {
Some(regex) => {
let compiled = regex::Regex::new(regex).map_err(|e| {
error!(%regex, err = ?e, "regex failed to compile: {}", e);
e
})?;
Ok(Some(compiled))
}
None => Ok(None),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct PublishLogRow {
pub crate_name: String,
pub version: Version,
pub path: PathBuf,
pub commit: String,
pub author: String,
pub time: DateTime<Utc>,
pub unix_nanos: u64,
}
struct CsvSetup {
pub rdr: csv::Reader<io::BufReader<std::fs::File>>,
pub headers: csv::ByteRecord,
pub row: csv::ByteRecord,
}
fn csv_setup(path: &Path) -> Result<CsvSetup, Error> {
verify_file_exists(path)?;
let file = std::fs::File::open(path)?;
let buf = std::io::BufReader::new(file);
let mut rdr = csv::Reader::from_reader(buf);
let headers =
rdr.byte_headers()
.map_err(|e| anyhow!("failed to parse csv headers: {}", e))?
.clone();
let row = csv::ByteRecord::new();
Ok(CsvSetup { rdr, headers, row })
}
/// fields we need from Cargo.toml [package] section to combine with IndexMeta
/// to form a PublishMeta.
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct PackageStub {
pub name: String,
pub version: Version,
#[serde(default)]
pub authors: Vec<String>,
pub description: Option<String>,
pub license: Option<String>,
pub license_file: Option<PathBuf>,
#[serde(default)]
pub categories: Vec<String>,
#[serde(default)]
pub keywords: Vec<String>,
pub readme: Option<PathBuf>,
pub repository: Option<String>,
pub homepage: Option<String>,
pub documentation: Option<String>,
pub links: Option<String>,
}
/// Example from post-cargo publish Cargo.toml
/// ```toml,ignore
/// [dependencies.docyard]
/// version = "0.31.0"
/// registry-index = "ssh://git@ssh.shipyard.rs/shipyard-rs/crate-index.git"
/// features = [
/// "auth",
/// "storage",
/// ]
/// default-features = false
/// ```
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct GeneratedManifestDependency {
#[serde(rename = "registry-index")]
pub registry_index: Option<String>,
}
/// for parsing Cargo.toml to extract missing PublishMeta fields that do not appear
/// in IndexMeta
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct ManifestStub {
pub package: PackageStub,
// TODO: parse + modify the generated Cargo.toml (not Cargo.toml.original)
// to rewrite the `registry-index` fields.
//
// we will also need to recompute the cksum
}
/// full definition of cargo publish json
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct PublishMeta {
pub name: String,
#[serde(alias = "version")]
pub vers: semver::Version,
#[serde(alias = "dependencies")]
#[serde(default)]
pub deps: Vec<PublishDependency>,
#[serde(default)]
pub features: BTreeMap<String, Vec<String>>,
#[serde(default)]
pub authors: Vec<String>,
pub description: Option<String>,
pub documentation: Option<String>,
pub homepage: Option<String>,
pub readme: Option<String>,
pub readme_file: Option<PathBuf>,
#[serde(default)]
pub keywords: Vec<String>,
#[serde(default)]
pub categories: Vec<String>,
pub license: Option<String>,
pub license_file: Option<PathBuf>,
pub repository: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub links: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub badges: Option<BTreeMap<String, String>>,
/// from ancient cargo versions
#[serde(skip_serializing_if = "Option::is_none")]
pub features2: Option<BTreeMap<String, Vec<String>>>,
/// from ancient cargo versions
#[serde(skip_serializing_if = "Option::is_none")]
pub v: Option<u8>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct PublishDependency {
pub optional: bool,
pub default_features: bool,
pub name: String,
pub features: Vec<String>,
// cargo and crates-io have this as string
#[serde(alias = "req")]
pub version_req: semver::VersionReq,
pub target: Option<String>,
// crates-io has this as option
pub kind: DependencyKind,
#[serde(skip_serializing_if = "Option::is_none")]
pub registry: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub explicit_name_in_toml: Option<String>,
}
impl From<IndexDependency> for PublishDependency {
fn from(dep: IndexDependency) -> Self {
Self {
name: dep.name,
features: dep.features,
default_features: dep.default_features,
optional: dep.optional,
target: dep.target,
kind: dep.kind,
registry: dep.registry,
version_req: dep.req,
explicit_name_in_toml: dep.package,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct IndexMeta {
// same everything as publish metadata
pub name: String,
#[serde(alias = "version")]
pub vers: semver::Version,
#[serde(alias = "dependencies")]
pub features: BTreeMap<String, Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub links: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub badges: Option<BTreeMap<String, String>>,
// modified format/field names
pub deps: Vec<IndexDependency>,
// fields that don't appear in publish metadata
pub cksum: String,
pub yanked: bool,
// ancient fields, these were actually written
// on sanskrit on stone tablets
#[serde(skip_serializing_if = "Option::is_none")]
pub features2: Option<BTreeMap<String, Vec<String>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub v: Option<u8>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct IndexDependency {
/// corresponds to `explicit_name_in_toml` field in `publish::Dependency`
/// when a dep is renamed in Cargo.toml, otherwise same as `package`.
pub name: String,
/// corresponds to `name` in `publish::Dependency`
#[serde(skip_serializing_if = "Option::is_none")]
pub package: Option<String>,
/// in publish meta, this field is called `version_req`, and the index
/// format requires it to be renamed to `req`
#[serde(alias = "version_req")]
pub req: semver::VersionReq,
pub features: Vec<String>,
pub optional: bool,
pub default_features: bool,
pub target: Option<String>,
pub kind: DependencyKind,
#[serde(skip_serializing_if = "Option::is_none")]
pub registry: Option<String>,
}
/// Section in which this dependency was defined
#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
#[serde(rename_all = "lowercase")]
pub enum DependencyKind {
/// Used at run time
Normal,
/// Used at build time, not available at run time
Build,
/// Not fetched and not used, except for when used direclty in a workspace
Dev,
}
impl PublishMeta {
pub fn new(
index_meta: IndexMeta,
manifest: ManifestStub,
readme: Option<String>,
) -> Self {
let ManifestStub { package } = manifest;
PublishMeta {
name: package.name,
vers: package.version,
deps: index_meta.deps.into_iter().map(From::from).collect(),
features: index_meta.features,
authors: package.authors,
description: package.description,
documentation: package.documentation,
homepage: package.homepage,
readme,
readme_file: package.readme,
keywords: package.keywords,
categories: package.categories,
license: package.license,
license_file: package.license_file,
repository: package.repository,
links: package.links,
badges: index_meta.badges,
features2: index_meta.features2,
v: index_meta.v,
}
}
}
fn serialize_publish_payload(
publish_meta_json: &[u8],
dot_crate_bytes: &[u8],
) -> Vec<u8> {
assert!(publish_meta_json.len() <= u32::MAX as usize);
assert!(dot_crate_bytes.len() <= u32::MAX as usize);
let mut out: Vec<u8> = Vec::with_capacity(
publish_meta_json.len()
+ dot_crate_bytes.len()
+ 8 // 2x u32 lengths
);
out.extend_from_slice(&(publish_meta_json.len() as u32).to_le_bytes()[..]);
out.extend_from_slice(publish_meta_json);
out.extend_from_slice(&(dot_crate_bytes.len() as u32).to_le_bytes()[..]);
out.extend_from_slice(dot_crate_bytes);
out
}
fn extract_manifest_files_from_tar<R: Read>(rdr: R) -> Result<ManifestFiles, Error> {
let mut archive = tar::Archive::new(rdr);
let mut cargo_toml = None;
let mut cargo_toml_orig = None;
let mut cargo_lock = None;
for entry in archive.entries()? {
let mut entry = entry?;
let path = entry.path()?;
if path.ends_with("Cargo.toml.orig") {
let mut data = String::new();
entry.read_to_string(&mut data)?;
cargo_toml_orig = Some(data);
} else if path.ends_with("Cargo.toml") {
let mut data = String::new();
entry.read_to_string(&mut data)?;
cargo_toml = Some(data);
} else if path.ends_with("Cargo.lock") {
let mut data = String::new();
entry.read_to_string(&mut data)?;
cargo_lock = Some(data);
}
if cargo_toml.is_some()
&& cargo_toml_orig.is_some()
&& cargo_lock.is_some()
{
break
}
}
if !(cargo_toml.is_some() && cargo_toml_orig.is_some())
{
anyhow::bail!("some required manifest files missing in .crate archive \
(cargo_toml={:?} cargo_toml_orig={:?} cargo_lock={:?})",
cargo_toml.is_some(),
cargo_toml_orig.is_some(),
cargo_lock.is_some(),
);
}
Ok(ManifestFiles {
cargo_toml: cargo_toml.unwrap(),
cargo_toml_orig: cargo_toml_orig.unwrap(),
cargo_lock,
})
}
fn extract_readme_from_tar<R: Read>(rdr: R, readme_path: &Path) -> Result<Option<String>, Error> {
let mut archive = tar::Archive::new(rdr);
for entry in archive.entries()? {
let mut entry = entry?;
let path = entry.path()?;
if path == readme_path || path.ends_with(readme_path) {
let mut out = String::new();
entry.read_to_string(&mut out)?;
return Ok(Some(out))
}
}
Ok(None)
}
fn setup_logger() {
let env_filter = EnvFilter::from_default_env();
let builder = tracing_subscriber::fmt()
.with_env_filter(env_filter)
.with_ansi(true);
builder.init();
}
fn load_config_file(opt: &Opt) -> Result<Config, Error> {
if !opt.config_file.exists() {
bail!("path does not exist: {:?}", opt.config_file);
}
let toml = std::fs::read_to_string(&opt.config_file)?;
let mut config: Config = toml::from_str(&toml)
.context("read config file, but unable to parse toml - check \
format against example config")?;
// augment using command line opts
config.filter_crates = config.filter_crates.or_else(|| opt.filter_crates.clone());
config.dry_run |= opt.dry_run;
Ok(config)
}
fn is_hidden(entry: &walkdir::DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
}
async fn get_index_metas(
config: &Config,
) -> Result<HashMap<String, Vec<IndexMeta>>, Error> {
let filter = config.compile_filter()?;
let mut n_excl = 0;
let files: Vec<(String, PathBuf)> = walkdir::WalkDir::new(&config.src.index_dir)
.max_depth(3)
.into_iter()
.filter_entry(|e| !is_hidden(e))
.filter_map(|res| match res {
Ok(entry) => {
if entry.file_type().is_file() && entry.depth() >= 2 && entry.depth() <= 3 {
let path = entry.into_path();
let crate_name: &str = path.file_name().and_then(|x| x.to_str()).unwrap_or("");
if let Some(filter) = filter.as_ref() {
if !filter.is_match(crate_name.as_ref()) {
trace!(%crate_name, "crate excluded by filter");
n_excl += 1;
return None;
}
}
debug!(?path, "found crate index metadata file to parse");
Some((crate_name.to_owned(), path))
} else {
None
}
}
Err(e) => {
warn!(error = ?e, "walkdir result is error");
None
}
})
.collect();
let n_files = files.len();
info!("found {} crate index metadata files to parse", n_files);
if n_excl > 0 {
warn!(
regex = %config.filter_crates.as_deref().unwrap_or(""),
n_files,
n_excl,
"regex filter (--filter-crates) excluded {} crates", n_excl,
);
}
let crate_versions: Vec<Result<(String, Vec<IndexMeta>), Error>> =
futures::stream::iter(files.into_iter().map(|(crate_name, path)| {
async move {
let file = tokio::fs::File::open(&path).await.map_err(|e| {
error!(err = ?e, ?path, "failed to open file");
e
})?;
let buf = tokio::io::BufReader::new(file);
let mut out = Vec::new();
let mut lines = buf.lines();
'lines: while let Some(line) = lines.next_line().await? {
let index_meta: IndexMeta = serde_json::from_str(&line)
.map_err(|e| {
error!(err = ?e, ?path, "failed to parse line");
e
})?;
out.push(index_meta);
}
debug!(crate_name = %out.first().map(|x| x.name.as_str()).unwrap_or("na"),
"parsed {} crate versions from metadata file", out.len()
);
Ok((crate_name, out))
}
}))
.buffer_unordered(num_cpus::get())
.collect()
.await;
let mut total_number_of_crate_versions = 0;
// map of crate-name => [IndexMeta] (one per published version)
let crate_versions: HashMap<String, Vec<IndexMeta>> = crate_versions
.into_iter()
.filter_map(|result| match result {
Ok((crate_name, xs)) => {
total_number_of_crate_versions += xs.len();
Some((crate_name, xs))
}
Err(e) => {
error!(err = ?e, "parsing metadata failed, skipping file");
None
}
})
.collect();
info!(
n_files,
n_excl,
n_crates = crate_versions.len(),
total_number_of_crate_versions,
"parsed {} crate version metadata entries from index",
total_number_of_crate_versions,
);
Ok(crate_versions)
}
#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Default)]
pub struct PublishWarnings {
#[serde(default)]
pub invalid_categories: Vec<String>,
#[serde(default)]
pub invalid_badges: Vec<String>,
#[serde(default)]
pub other: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Default)]
pub struct PublishResponse {
#[serde(default)]
pub warnings: PublishWarnings,
}
struct ManifestFiles {
cargo_toml: String,
cargo_toml_orig: String,
cargo_lock: Option<String>,
}
struct VersionMeta {
index_meta: IndexMeta,
manifest_files: ManifestFiles,
dot_crate_path: PathBuf,
manifest: ManifestStub,
readme: Option<String>,
tmp: TempDir,
modified_manifest_toml: Option<String>,
// meta: cargo_metadata::Metadata,
}
impl VersionMeta {
pub fn source_dir(&self) -> PathBuf {
self.tmp.path().join(&format!("{}-{}", self.index_meta.name, self.index_meta.vers))
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct Node<'a> {
name: &'a str,
vers: Version,
}
// async fn process_crates(
// config: &Config,
// crate_versions: HashMap<String, Vec<IndexMeta>>,
// ) -> Result<(), Error> {
// let http_client = reqwest::Client::builder()
// .user_agent(&config.http.user_agent)
// .build()?;
//
// let publish_url = config.dst.api_url.join("/api/v1/crates/new")?;
// let publish_meta = PublishMeta::new(index_meta, manifest, readme);
//
// debug!("built publish meta using crate index json and the Cargo.toml manifest in the .crate targz archive:\n{:#?}\n", publish_meta);
//
// let publish_meta_json = serde_json::to_vec(&publish_meta)?;
// let payload = serialize_publish_payload(&publish_meta_json, &dot_crate_bytes);
// debug!(
// n_bytes = payload.len(),
// %crate_name,
// %version,
// "serialized publish payload",
// );
//
// if config.dry_run {
// debug!(
// %crate_name,
// %version,
// %publish_url,
// "skipping publish (--dry-run mode)",
// );
// continue;
// }
//
// let resp = http_client.put(publish_url.clone())
// .header(AUTHORIZATION, &config.dst.auth_token)
// .body(payload)
// .send()
// .await?;
//
// debug!(status = ?resp.status(), "rcvd server response to publish request");
//
// let resp: PublishResponse = resp
// .error_for_status()?
// .json()
// .await?;
// let PublishResponse { warnings } = resp;
//
// let mut any_warnings = false;
//
// for warning in warnings.invalid_categories.iter() {
// warn!(%crate_name, %version, "registry server invalid category warning: {}", warning);
// any_warnings = true;
// }
//
// for warning in warnings.invalid_badges.iter() {
// warn!(%crate_name, %version, "registry server invalid badge warning: {}", warning);
// any_warnings = true;
// }
//
// for warning in warnings.other.iter() {
// warn!(%crate_name, %version, "registry server 'other' warning: {}", warning);
// any_warnings = true;
// }
//
// trace!("server response body:\n{warnings:#?}");
//
// info!(
// %crate_name,
// %version,
// any_warnings,
// "published crate version in {:?}!",
// begin.elapsed(),
// );
fn parse_manifests(
config: &Config,
crate_versions: HashMap<String, Vec<IndexMeta>>,
) -> Result<HashMap<String, Vec<VersionMeta>>, Error> {
let begin = Instant::now();
let out: HashMap<String, Vec<VersionMeta>> = crate_versions
.into_par_iter()
// .into_iter()
.map(|(crate_name, versions)| -> Result<(String, Vec<VersionMeta>), Error> {
let begin = Instant::now();
let mut version_metas = Vec::new();
for index_meta in versions {
let version = index_meta.vers.clone();
trace!(%crate_name, %version, "processing crate version");
let dot_crate_path = config.src.crate_files_dir
.join(&format!("{}/{}/download", crate_name, index_meta.vers));
verify_file_exists(&dot_crate_path)?;
trace!(path = ?dot_crate_path, "reading .crate file");
let dot_crate_bytes = std::fs::read(&dot_crate_path)
.with_context(|| {
format!("failed to read .crate file for \
{crate_name} v{0} with path {dot_crate_path:?}",
index_meta.vers,
)
})?;
trace!("extracting Cargo.toml from .crate targz archive");
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]);
let manifest_files = extract_manifest_files_from_tar(decoder)?;
if manifest_files.cargo_lock.is_none() {
debug!(%crate_name, %version, "Cargo.lock not present in .crate archive");
}
let manifest: ManifestStub = toml::from_str(&manifest_files.cargo_toml)?;
let mut readme: Option<String> = None;
if let Some(readme_path) = manifest.package.readme.as_ref() {
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]);
if let Some(readme_content) = extract_readme_from_tar(decoder, readme_path)? {
trace!(length = readme_content.len(), "extracted readme file content from .crate targz archive");
readme = Some(readme_content);
}
}
let tmp = TempDir::new()?;
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]);
tar::Archive::new(decoder).unpack(tmp.path())?;
trace!(tmpdir = ?tmp.path(), "unpacked .crate archive to temp dir");
let target_dir = tmp.path().join("target");
std::fs::create_dir(&target_dir)?;
// let meta = cargo_metadata::MetadataCommand::new()
// .manifest_path(tmp.path().join(&format!("{crate_name}-{version}/Cargo.toml")))
// //.env("CARGO_TARGET_DIR", &target_dir)
// .other_options(vec!["-vv".to_string()])
// .verbose(true)
// // .other_options(["--frozen"].into_iter().map(|x| x.to_owned()).collect::<Vec<_>>())
// .exec()?;
version_metas.push(VersionMeta {
index_meta,
manifest_files,
dot_crate_path,
manifest,
readme,
tmp,
modified_manifest_toml: None,
// meta,
});
}
debug!(%crate_name, "parsed {} manifests in {:?}", version_metas.len(), begin.elapsed());
Ok((crate_name, version_metas))
}).collect::<Result<_, Error>>()?;
info!("parsed crate version manifests in {:?}", begin.elapsed());
Ok(out)
}
// fn get_registry_dep<'a>(index_dep: &'a IndexDependency) -> Option<Node<'a>> {
// match index_dep.registry.as_ref() {
// None => Some(Node { name: index_dep.name.as_str(), vers: index_dep.vers.clone() }),
// Some(index) if index.contains("github.com/rust-lang/crates.io-index") => None,
// Some(other) => panic!("unexpected registry value: {}", other),
// }
// }
fn edit_dep_registries(
dep_key: &str,
manifest: &mut toml_edit::Document,
src_registry_name: &str,
dst_registry_name: &str,
) -> Result<(), Error> {
let Some(deps) = manifest.get_mut(dep_key).and_then(|item| item.as_table_like_mut()) else {
trace!("missing key in manifest toml: {}", dep_key);
return Ok(())
};
for (k, v) in deps.iter_mut() {
let Some(t) = v.as_table_like_mut() else { continue };
if t.contains_key("registry-index") {
warn!(dep_name = ?k, "dep table contains registry-index key!");
}
if let Some(registry_item) = t.get_mut("registry") {
if registry_item.as_str().unwrap_or("") == src_registry_name {
trace!(dep_name = ?k, %dep_key, ?src_registry_name, ?dst_registry_name, "modifying registry in Cargo.toml");
*registry_item = toml_edit::value(dst_registry_name);
}
}
}
Ok(())
}
fn edit_publish_registry_if_present(
manifest: &mut toml_edit::Document,
src_registry_name: &str,
dst_registry_name: &str,
) -> Result<(), Error> {
let Some(package) = manifest.get_mut("package").and_then(|item| item.as_table_like_mut()) else {
anyhow::bail!("package key not found in manifest toml");
};
let Some(publish_item) = package.get_mut("publish") else {
trace!("no 'publish' key in Cargo.toml package section");
return Ok(())
};
let Some(publish_array) = publish_item.as_array_mut() else {
anyhow::bail!("failed to cast publish item as array");
};
let Some(i) = publish_array.iter().position(|x| x.as_str().map(|s| s == src_registry_name).unwrap_or(false)) else {
anyhow::bail!("publish key exists, but source registry name does not appear in it! (`{}`)", publish_array.to_string());
};
let item_i = publish_array.get_mut(i).unwrap();
*item_i = toml_edit::Value::from(dst_registry_name);
Ok(())
}
fn prepare_source_dir_for_publish(config: &Config, meta: &mut VersionMeta) -> Result<(), Error> {
let source_dir = meta.source_dir();
let mut modified_manifest = meta.manifest_files.cargo_toml_orig.parse::<toml_edit::Document>()?;
edit_dep_registries("dependencies", &mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?;
edit_dep_registries("dev-dependencies", &mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?;
edit_dep_registries("build-dependencies", &mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?;
edit_publish_registry_if_present(&mut modified_manifest, &config.src.registry_name, &config.dst.registry_name)?;
// write modified manifest over Cargo.toml (leaves Cargo.toml.orig as is)
let modified_manifest_toml = modified_manifest.to_string();
let cargo_toml_path = source_dir.join("Cargo.toml");
std::fs::write(&cargo_toml_path, modified_manifest_toml.as_bytes())?;
debug!(
crate_name = %meta.index_meta.name,
vers = %meta.index_meta.vers,
path = ?cargo_toml_path,
"wrote modified manifest file",
);
meta.modified_manifest_toml = Some(modified_manifest_toml);
let cargo_toml_orig_path = source_dir.join("Cargo.toml.orig");
if cargo_toml_orig_path.exists() {
std::fs::remove_file(&cargo_toml_orig_path)?;
trace!(
crate_name = %meta.index_meta.name,
vers = %meta.index_meta.vers,
path = ?cargo_toml_orig_path,
"removed Cargo.toml.orig file",
);
}
let cargo_lock_path = source_dir.join("Cargo.lock");
if cargo_lock_path.exists() {
std::fs::remove_file(&cargo_lock_path)?;
trace!(
crate_name = %meta.index_meta.name,
vers = %meta.index_meta.vers,
path = ?cargo_lock_path,
"removed Cargo.lock file",
);
}
Ok(())
}
fn prepare_source_dirs_for_publish(config: &Config, manifests: &mut HashMap<String, Vec<VersionMeta>>) -> Result<(), Error> {
let begin = Instant::now();
manifests.par_iter_mut()
.map(|(name, versions)| -> Result<(), Error> {
for meta in versions.iter_mut() {
prepare_source_dir_for_publish(&config, meta)
.map_err(|err| {
error!(%name, vers = %meta.index_meta.vers, ?err, "prepare_source_dir_for_publish failed");
err
})?;
}
Ok(())
}).collect::<Result<Vec<()>, Error>>()?;
info!("modified Cargo.toml manifests in {:?}", begin.elapsed());
Ok(())
}
fn cargo_publish_modified_source_dir(config: &Config, meta: &VersionMeta) -> Result<(), Error> {
let begin = Instant::now();
info!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "publishing crate version");
let index_env_key = format!("CARGO_REGISTRIES_{}_INDEX", config.dst.registry_name.to_case(Case::ScreamingSnake));
let token_env_key = format!("CARGO_REGISTRIES_{}_TOKEN", config.dst.registry_name.to_case(Case::ScreamingSnake));
let source_dir = meta.source_dir();
let manifest_path = source_dir.join("Cargo.toml");
let manifest_path_str = manifest_path.display().to_string();
let mut args: Vec<&str> = vec!["publish"];
args.extend_from_slice(&["--registry", &config.dst.registry_name][..]);
// args.extend_from_slice(&["--index", &config.dst.index_url][..]);
args.extend_from_slice(&["--token", &config.dst.auth_token][..]);
args.extend_from_slice(&["--manifest-path", manifest_path_str.as_str()][..]);
args.extend_from_slice(&["--no-verify", "--allow-dirty", "-vv"][..]);
debug!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "executing `cargo {}`", args.join(" "));
let output = std::process::Command::new("cargo")
.env(&index_env_key, &config.dst.index_url)
.env(&token_env_key, &config.dst.auth_token)
.args(&args)
.output()?;
debug!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, exit_status = ?output.status, "finished executing `cargo publish` command");
if !output.status.success() {
let stdout = std::str::from_utf8(&output.stdout).unwrap_or("utf8err");
let stderr = std::str::from_utf8(&output.stderr).unwrap_or("utf8err");
error!(exit_status = ?output.status, "cargo publish error!\nstdout:\n{}\nstderr:\n:{}\n\n", stdout, stderr);
debug!("cargo publish error - original Cargo.toml:\n***\n{}\n***", meta.manifest_files.cargo_toml_orig);
debug!("cargo publish error - modified Cargo.toml:\n***\n{}\n***", meta.modified_manifest_toml.as_ref().unwrap());
}
info!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "finished cargo publish in {:?}", begin.elapsed());
Ok(())
}
async fn verify_dir_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> {
match tokio::fs::metadata(path.as_ref()).await {
Ok(meta) if meta.is_dir() => Ok(()),
Ok(meta) /* if ! meta.is_dir() */ => {
debug_assert!( ! meta.is_dir());
bail!("path exists, but is not a directory: {:?}", path.as_ref())
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
bail!("path does not exist: {}", path.as_ref().display());
}
Err(e) => Err(e.into()),
}
}
fn verify_file_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> {
match std::fs::metadata(path.as_ref()) {
Ok(meta) if meta.is_file() => Ok(()),
Ok(meta) /* if ! meta.is_file() */ => {
debug_assert!( ! meta.is_file());
bail!("path exists, but is not a file: {:?}", path.as_ref())
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
bail!("path does not exist: {}", path.as_ref().display());
}
Err(e) => Err(e.into()),
}
}
fn read_publish_log_csv(path: &Path) -> Result<Vec<PublishLogRow>, Error> {
let begin = Instant::now();
let CsvSetup { mut rdr, headers, mut row } = csv_setup(path)?;
let mut out = Vec::new();
while rdr.read_byte_record(&mut row)? {
// only partially deserialized after this
let parsed: PublishLogRow = row.deserialize(Some(&headers))
.map_err(|err| {
error!(?row, ?headers, ?err, "deserializing row failed");
err
})?;
out.push(parsed);
}
info!(?path, "parsed publish log csv in {:?}", begin.elapsed());
Ok(out)
}
fn main() -> Result<(), Error> {
let begin = Instant::now();
dotenvy::dotenv().ok();
let opt = Opt::parse();
setup_logger();
let config = load_config_file(&opt)?;
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(verify_dir_exists(&config.src.index_dir))?;
rt.block_on(verify_dir_exists(&config.src.crate_files_dir))?;
verify_file_exists(&config.src.publish_history_csv)?;
if opt.validate {
println!("{:#?}", config);
return Ok(())
}
let mut publish_log = read_publish_log_csv(&config.src.publish_history_csv)?;
publish_log.sort_by_key(|x| x.unix_nanos);
assert!(!publish_log.is_empty());
info!(n_rows = publish_log.len(), "parsed publish log csv");
let krates = rt.block_on(get_index_metas(&config))?;
let mut manifests = parse_manifests(&config, krates)?;
prepare_source_dirs_for_publish(&config, &mut manifests)?;
let mut by_name_vers: HashMap<(&str, &Version), &VersionMeta> = manifests.iter()
.flat_map(|(k, v)| {
v.iter().map(|m| ((k.as_str(), &m.index_meta.vers), m))
}).collect();
for row in publish_log.iter() {
let Some(meta) = by_name_vers.remove(&(row.crate_name.as_str(), &row.version)) else {
warn!(?row, "crate version in publish log not found in index versions");
continue
};
if let Err(err) = cargo_publish_modified_source_dir(&config, meta) {
error!(?err, name = %meta.index_meta.name, vers = %meta.index_meta.vers, "failed to publish crate version");
}
}
info!("finished publishing crates to destination registry");
// let (graph, ix) = build_dependency_graph(&manifests);
drop(manifests);
drop(rt);
info!("finished in {:?}", begin.elapsed());
Ok(())
}