A command-line tool for crate registry backup/export
https://shipyard.rs
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
862 lines
29 KiB
862 lines
29 KiB
#![allow(unused_labels)] |
|
|
|
use std::path::{Path, PathBuf}; |
|
use std::collections::{BTreeMap, HashMap}; |
|
use std::io::prelude::*; |
|
use std::time::*; |
|
|
|
use serde::{Serialize, Deserialize}; |
|
use clap::Parser; |
|
use tracing::{debug, error, info, trace, warn}; |
|
use tracing_subscriber::filter::EnvFilter; |
|
use url::Url; |
|
use anyhow::{anyhow, bail, Error, Context}; |
|
use semver::Version; |
|
use futures::stream::StreamExt; |
|
use tokio::io::AsyncBufReadExt; |
|
use reqwest::header::AUTHORIZATION; |
|
use tempfile::TempDir; |
|
use rayon::prelude::*; |
|
use petgraph::stable_graph::StableGraph; |
|
use petgraph::visit::{Bfs, EdgeRef, Topo, Walker}; |
|
use petgraph::graph::NodeIndex; |
|
|
|
#[derive(Parser, Debug)] |
|
#[clap(author, version, global_setting(clap::AppSettings::DeriveDisplayOrder))] |
|
struct Opt { |
|
/// Config file with source directories and destination registry info |
|
#[clap(short, long, value_name = "PATH")] |
|
pub config_file: PathBuf, |
|
/// Perform all the work of generating `cargo publish` payloads, |
|
/// but don't send them to the destination registry server |
|
#[clap(long)] |
|
pub dry_run: bool, |
|
/// Load config file, validate the settings, and display the final loaded content |
|
/// to stdout, then exit |
|
#[clap(long)] |
|
pub validate: bool, |
|
|
|
/// Use to limit which crates from the source registry are published to the |
|
/// destination registry. Expects a regular expression which will be matched |
|
/// against the names of crates. Only crates with names that match the regex |
|
/// will be published. This field may also be specified at the top level of |
|
/// the config file. |
|
#[clap(long, value_name = "REGEX", alias = "filter")] |
|
pub filter_crates: Option<String>, |
|
} |
|
|
|
#[derive(Debug, Clone, Deserialize)] |
|
#[serde(rename_all = "kebab-case")] |
|
pub struct DestinationRegistryConfig { |
|
#[serde(alias = "api")] |
|
pub api_url: Url, |
|
#[serde(alias = "token")] |
|
pub auth_token: String, |
|
} |
|
|
|
#[derive(Debug, Clone, Deserialize)] |
|
#[serde(rename_all = "kebab-case")] |
|
pub struct SourceRegistryConfig { |
|
#[serde(alias = "index")] |
|
pub index_dir: PathBuf, |
|
#[serde(alias = "crate-files")] |
|
pub crate_files_dir: PathBuf, |
|
} |
|
|
|
#[derive(Deserialize, Debug, Clone)] |
|
#[serde(rename_all = "kebab-case")] |
|
pub struct HttpConfig { |
|
/// Value of user-agent HTTP header |
|
#[serde(default = "default_user_agent")] |
|
pub user_agent: String, |
|
} |
|
|
|
const DEFAULT_USER_AGENT: &str = concat!("shipyard.rs-publish-tool/v", env!("CARGO_PKG_VERSION")); |
|
|
|
fn default_user_agent() -> String { |
|
DEFAULT_USER_AGENT.to_string() |
|
} |
|
|
|
impl Default for HttpConfig { |
|
fn default() -> Self { |
|
Self { |
|
user_agent: default_user_agent(), |
|
} |
|
} |
|
} |
|
|
|
#[derive(Debug, Clone, Deserialize)] |
|
#[serde(rename_all = "kebab-case")] |
|
pub struct Config { |
|
/// Do everything except actually publish to the destination registry. Can also be |
|
/// toggled using the --dry-run command line flag. |
|
#[serde(default)] |
|
pub dry_run: bool, |
|
/// Local directories with source registry files |
|
#[serde(alias = "source")] |
|
pub src: SourceRegistryConfig, |
|
/// Server information and authentication needed to publish to the |
|
/// destination registry |
|
#[serde(alias = "destination")] |
|
pub dst: DestinationRegistryConfig, |
|
/// Settings controlling the HTTP publish requests to the destination registry |
|
#[serde(default)] |
|
pub http: HttpConfig, |
|
/// Use to limit which crates from the source registry are published to the |
|
/// destination registry. Expects a regular expression which will be matched |
|
/// against the names of crates. Only crates with names that match the regex |
|
/// will be published. |
|
#[serde(default, alias = "filter")] |
|
pub filter_crates: Option<String>, |
|
} |
|
|
|
impl Config { |
|
pub fn compile_filter(&self) -> Result<Option<regex::Regex>, Error> { |
|
match self.filter_crates.as_ref() { |
|
Some(regex) => { |
|
let compiled = regex::Regex::new(regex).map_err(|e| { |
|
error!(%regex, err = ?e, "regex failed to compile: {}", e); |
|
e |
|
})?; |
|
Ok(Some(compiled)) |
|
} |
|
None => Ok(None), |
|
} |
|
} |
|
} |
|
|
|
/// fields we need from Cargo.toml [package] section to combine with IndexMeta |
|
/// to form a PublishMeta. |
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct PackageStub { |
|
pub name: String, |
|
pub version: Version, |
|
#[serde(default)] |
|
pub authors: Vec<String>, |
|
pub description: Option<String>, |
|
pub license: Option<String>, |
|
pub license_file: Option<PathBuf>, |
|
#[serde(default)] |
|
pub categories: Vec<String>, |
|
#[serde(default)] |
|
pub keywords: Vec<String>, |
|
pub readme: Option<PathBuf>, |
|
pub repository: Option<String>, |
|
pub homepage: Option<String>, |
|
pub documentation: Option<String>, |
|
pub links: Option<String>, |
|
} |
|
|
|
|
|
/// Example from post-cargo publish Cargo.toml |
|
/// ```toml,ignore |
|
/// [dependencies.docyard] |
|
/// version = "0.31.0" |
|
/// registry-index = "ssh://git@ssh.shipyard.rs/shipyard-rs/crate-index.git" |
|
/// features = [ |
|
/// "auth", |
|
/// "storage", |
|
/// ] |
|
/// default-features = false |
|
/// ``` |
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct GeneratedManifestDependency { |
|
#[serde(rename = "registry-index")] |
|
pub registry_index: Option<String>, |
|
} |
|
|
|
/// for parsing Cargo.toml to extract missing PublishMeta fields that do not appear |
|
/// in IndexMeta |
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct ManifestStub { |
|
pub package: PackageStub, |
|
|
|
// TODO: parse + modify the generated Cargo.toml (not Cargo.toml.original) |
|
// to rewrite the `registry-index` fields. |
|
// |
|
// we will also need to recompute the cksum |
|
} |
|
|
|
/// full definition of cargo publish json |
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct PublishMeta { |
|
pub name: String, |
|
#[serde(alias = "version")] |
|
pub vers: semver::Version, |
|
#[serde(alias = "dependencies")] |
|
#[serde(default)] |
|
pub deps: Vec<PublishDependency>, |
|
#[serde(default)] |
|
pub features: BTreeMap<String, Vec<String>>, |
|
#[serde(default)] |
|
pub authors: Vec<String>, |
|
pub description: Option<String>, |
|
pub documentation: Option<String>, |
|
pub homepage: Option<String>, |
|
pub readme: Option<String>, |
|
pub readme_file: Option<PathBuf>, |
|
#[serde(default)] |
|
pub keywords: Vec<String>, |
|
#[serde(default)] |
|
pub categories: Vec<String>, |
|
pub license: Option<String>, |
|
pub license_file: Option<PathBuf>, |
|
pub repository: Option<String>, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub links: Option<String>, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub badges: Option<BTreeMap<String, String>>, |
|
/// from ancient cargo versions |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub features2: Option<BTreeMap<String, Vec<String>>>, |
|
/// from ancient cargo versions |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub v: Option<u8>, |
|
} |
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct PublishDependency { |
|
pub optional: bool, |
|
pub default_features: bool, |
|
pub name: String, |
|
pub features: Vec<String>, |
|
// cargo and crates-io have this as string |
|
#[serde(alias = "req")] |
|
pub version_req: semver::VersionReq, |
|
pub target: Option<String>, |
|
// crates-io has this as option |
|
pub kind: DependencyKind, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub registry: Option<String>, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub explicit_name_in_toml: Option<String>, |
|
} |
|
|
|
impl From<IndexDependency> for PublishDependency { |
|
fn from(dep: IndexDependency) -> Self { |
|
Self { |
|
name: dep.name, |
|
features: dep.features, |
|
default_features: dep.default_features, |
|
optional: dep.optional, |
|
target: dep.target, |
|
kind: dep.kind, |
|
registry: dep.registry, |
|
version_req: dep.req, |
|
explicit_name_in_toml: dep.package, |
|
} |
|
} |
|
} |
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct IndexMeta { |
|
// same everything as publish metadata |
|
pub name: String, |
|
#[serde(alias = "version")] |
|
pub vers: semver::Version, |
|
#[serde(alias = "dependencies")] |
|
pub features: BTreeMap<String, Vec<String>>, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub links: Option<String>, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub badges: Option<BTreeMap<String, String>>, |
|
|
|
// modified format/field names |
|
pub deps: Vec<IndexDependency>, |
|
|
|
// fields that don't appear in publish metadata |
|
pub cksum: String, |
|
pub yanked: bool, |
|
|
|
// ancient fields, these were actually written |
|
// on sanskrit on stone tablets |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub features2: Option<BTreeMap<String, Vec<String>>>, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub v: Option<u8>, |
|
} |
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
|
pub struct IndexDependency { |
|
/// corresponds to `explicit_name_in_toml` field in `publish::Dependency` |
|
/// when a dep is renamed in Cargo.toml, otherwise same as `package`. |
|
pub name: String, |
|
/// corresponds to `name` in `publish::Dependency` |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub package: Option<String>, |
|
/// in publish meta, this field is called `version_req`, and the index |
|
/// format requires it to be renamed to `req` |
|
#[serde(alias = "version_req")] |
|
pub req: semver::VersionReq, |
|
pub features: Vec<String>, |
|
pub optional: bool, |
|
pub default_features: bool, |
|
pub target: Option<String>, |
|
pub kind: DependencyKind, |
|
#[serde(skip_serializing_if = "Option::is_none")] |
|
pub registry: Option<String>, |
|
} |
|
|
|
/// Section in which this dependency was defined |
|
#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)] |
|
#[serde(rename_all = "lowercase")] |
|
pub enum DependencyKind { |
|
/// Used at run time |
|
Normal, |
|
/// Used at build time, not available at run time |
|
Build, |
|
/// Not fetched and not used, except for when used direclty in a workspace |
|
Dev, |
|
} |
|
|
|
impl PublishMeta { |
|
pub fn new( |
|
index_meta: IndexMeta, |
|
manifest: ManifestStub, |
|
readme: Option<String>, |
|
) -> Self { |
|
let ManifestStub { package } = manifest; |
|
PublishMeta { |
|
name: package.name, |
|
vers: package.version, |
|
deps: index_meta.deps.into_iter().map(From::from).collect(), |
|
features: index_meta.features, |
|
authors: package.authors, |
|
description: package.description, |
|
documentation: package.documentation, |
|
homepage: package.homepage, |
|
readme, |
|
readme_file: package.readme, |
|
keywords: package.keywords, |
|
categories: package.categories, |
|
license: package.license, |
|
license_file: package.license_file, |
|
repository: package.repository, |
|
links: package.links, |
|
badges: index_meta.badges, |
|
features2: index_meta.features2, |
|
v: index_meta.v, |
|
} |
|
} |
|
} |
|
|
|
fn serialize_publish_payload( |
|
publish_meta_json: &[u8], |
|
dot_crate_bytes: &[u8], |
|
) -> Vec<u8> { |
|
assert!(publish_meta_json.len() <= u32::MAX as usize); |
|
assert!(dot_crate_bytes.len() <= u32::MAX as usize); |
|
|
|
let mut out: Vec<u8> = Vec::with_capacity( |
|
publish_meta_json.len() |
|
+ dot_crate_bytes.len() |
|
+ 8 // 2x u32 lengths |
|
); |
|
|
|
out.extend_from_slice(&(publish_meta_json.len() as u32).to_le_bytes()[..]); |
|
out.extend_from_slice(publish_meta_json); |
|
out.extend_from_slice(&(dot_crate_bytes.len() as u32).to_le_bytes()[..]); |
|
out.extend_from_slice(dot_crate_bytes); |
|
|
|
out |
|
} |
|
|
|
fn extract_manifest_files_from_tar<R: Read>(rdr: R) -> Result<ManifestFiles, Error> { |
|
let mut archive = tar::Archive::new(rdr); |
|
|
|
let mut cargo_toml = None; |
|
let mut cargo_toml_orig = None; |
|
let mut cargo_lock = None; |
|
|
|
for entry in archive.entries()? { |
|
let mut entry = entry?; |
|
let path = entry.path()?; |
|
|
|
if path.ends_with("Cargo.toml.orig") { |
|
let mut data = String::new(); |
|
entry.read_to_string(&mut data)?; |
|
cargo_toml_orig = Some(data); |
|
} else if path.ends_with("Cargo.toml") { |
|
let mut data = String::new(); |
|
entry.read_to_string(&mut data)?; |
|
cargo_toml = Some(data); |
|
} else if path.ends_with("Cargo.lock") { |
|
let mut data = String::new(); |
|
entry.read_to_string(&mut data)?; |
|
cargo_lock = Some(data); |
|
} |
|
|
|
if cargo_toml.is_some() |
|
&& cargo_toml_orig.is_some() |
|
&& cargo_lock.is_some() |
|
{ |
|
break |
|
} |
|
} |
|
|
|
if !(cargo_toml.is_some() && cargo_toml_orig.is_some()) |
|
{ |
|
anyhow::bail!("some required manifest files missing in .crate archive \ |
|
(cargo_toml={:?} cargo_toml_orig={:?} cargo_lock={:?})", |
|
cargo_toml.is_some(), |
|
cargo_toml_orig.is_some(), |
|
cargo_lock.is_some(), |
|
); |
|
} |
|
|
|
Ok(ManifestFiles { |
|
cargo_toml: cargo_toml.unwrap(), |
|
cargo_toml_orig: cargo_toml_orig.unwrap(), |
|
cargo_lock, |
|
}) |
|
} |
|
|
|
fn extract_readme_from_tar<R: Read>(rdr: R, readme_path: &Path) -> Result<Option<String>, Error> { |
|
let mut archive = tar::Archive::new(rdr); |
|
for entry in archive.entries()? { |
|
let mut entry = entry?; |
|
let path = entry.path()?; |
|
if path == readme_path || path.ends_with(readme_path) { |
|
let mut out = String::new(); |
|
entry.read_to_string(&mut out)?; |
|
return Ok(Some(out)) |
|
} |
|
} |
|
Ok(None) |
|
} |
|
|
|
fn setup_logger() { |
|
let env_filter = EnvFilter::from_default_env(); |
|
let builder = tracing_subscriber::fmt() |
|
.with_env_filter(env_filter) |
|
.with_ansi(true); |
|
builder.init(); |
|
} |
|
|
|
fn load_config_file(opt: &Opt) -> Result<Config, Error> { |
|
if !opt.config_file.exists() { |
|
bail!("path does not exist: {:?}", opt.config_file); |
|
} |
|
let toml = std::fs::read_to_string(&opt.config_file)?; |
|
let mut config: Config = toml::from_str(&toml) |
|
.context("read config file, but unable to parse toml - check \ |
|
format against example config")?; |
|
// augment using command line opts |
|
config.filter_crates = config.filter_crates.or_else(|| opt.filter_crates.clone()); |
|
config.dry_run |= opt.dry_run; |
|
Ok(config) |
|
} |
|
|
|
fn is_hidden(entry: &walkdir::DirEntry) -> bool { |
|
entry |
|
.file_name() |
|
.to_str() |
|
.map(|s| s.starts_with('.')) |
|
.unwrap_or(false) |
|
} |
|
|
|
async fn get_index_metas( |
|
config: &Config, |
|
) -> Result<HashMap<String, Vec<IndexMeta>>, Error> { |
|
let filter = config.compile_filter()?; |
|
let mut n_excl = 0; |
|
|
|
let files: Vec<(String, PathBuf)> = walkdir::WalkDir::new(&config.src.index_dir) |
|
.max_depth(3) |
|
.into_iter() |
|
.filter_entry(|e| !is_hidden(e)) |
|
.filter_map(|res| match res { |
|
Ok(entry) => { |
|
if entry.file_type().is_file() && entry.depth() >= 2 && entry.depth() <= 3 { |
|
let path = entry.into_path(); |
|
let crate_name: &str = path.file_name().and_then(|x| x.to_str()).unwrap_or(""); |
|
if let Some(filter) = filter.as_ref() { |
|
if !filter.is_match(crate_name.as_ref()) { |
|
trace!(%crate_name, "crate excluded by filter"); |
|
n_excl += 1; |
|
return None; |
|
} |
|
} |
|
|
|
debug!(?path, "found crate index metadata file to parse"); |
|
Some((crate_name.to_owned(), path)) |
|
} else { |
|
None |
|
} |
|
} |
|
Err(e) => { |
|
warn!(error = ?e, "walkdir result is error"); |
|
None |
|
} |
|
}) |
|
.collect(); |
|
|
|
let n_files = files.len(); |
|
info!("found {} crate index metadata files to parse", n_files); |
|
|
|
if n_excl > 0 { |
|
warn!( |
|
regex = %config.filter_crates.as_deref().unwrap_or(""), |
|
n_files, |
|
n_excl, |
|
"regex filter (--filter-crates) excluded {} crates", n_excl, |
|
); |
|
} |
|
|
|
let crate_versions: Vec<Result<(String, Vec<IndexMeta>), Error>> = |
|
futures::stream::iter(files.into_iter().map(|(crate_name, path)| { |
|
async move { |
|
let file = tokio::fs::File::open(&path).await.map_err(|e| { |
|
error!(err = ?e, ?path, "failed to open file"); |
|
e |
|
})?; |
|
let buf = tokio::io::BufReader::new(file); |
|
let mut out = Vec::new(); |
|
let mut lines = buf.lines(); |
|
'lines: while let Some(line) = lines.next_line().await? { |
|
let index_meta: IndexMeta = serde_json::from_str(&line) |
|
.map_err(|e| { |
|
error!(err = ?e, ?path, "failed to parse line"); |
|
e |
|
})?; |
|
out.push(index_meta); |
|
} |
|
debug!(crate_name = %out.first().map(|x| x.name.as_str()).unwrap_or("na"), |
|
"parsed {} crate versions from metadata file", out.len() |
|
); |
|
|
|
Ok((crate_name, out)) |
|
} |
|
})) |
|
.buffer_unordered(num_cpus::get()) |
|
.collect() |
|
.await; |
|
|
|
let mut total_number_of_crate_versions = 0; |
|
|
|
// map of crate-name => [IndexMeta] (one per published version) |
|
let crate_versions: HashMap<String, Vec<IndexMeta>> = crate_versions |
|
.into_iter() |
|
.filter_map(|result| match result { |
|
Ok((crate_name, xs)) => { |
|
total_number_of_crate_versions += xs.len(); |
|
Some((crate_name, xs)) |
|
} |
|
Err(e) => { |
|
error!(err = ?e, "parsing metadata failed, skipping file"); |
|
None |
|
} |
|
}) |
|
.collect(); |
|
|
|
info!( |
|
n_files, |
|
n_excl, |
|
n_crates = crate_versions.len(), |
|
total_number_of_crate_versions, |
|
"parsed {} crate version metadata entries from index", |
|
total_number_of_crate_versions, |
|
); |
|
|
|
Ok(crate_versions) |
|
} |
|
|
|
#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Default)] |
|
pub struct PublishWarnings { |
|
#[serde(default)] |
|
pub invalid_categories: Vec<String>, |
|
#[serde(default)] |
|
pub invalid_badges: Vec<String>, |
|
#[serde(default)] |
|
pub other: Vec<String>, |
|
} |
|
|
|
#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Default)] |
|
pub struct PublishResponse { |
|
#[serde(default)] |
|
pub warnings: PublishWarnings, |
|
} |
|
|
|
struct ManifestFiles { |
|
cargo_toml: String, |
|
cargo_toml_orig: String, |
|
cargo_lock: Option<String>, |
|
} |
|
|
|
struct VersionMeta { |
|
index_meta: IndexMeta, |
|
manifest_files: ManifestFiles, |
|
dot_crate_path: PathBuf, |
|
manifest: ManifestStub, |
|
readme: Option<String>, |
|
tmp: TempDir, |
|
meta: cargo_metadata::Metadata, |
|
} |
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)] |
|
struct Node<'a> { |
|
name: &'a str, |
|
vers: Version, |
|
} |
|
|
|
// async fn process_crates( |
|
// config: &Config, |
|
// crate_versions: HashMap<String, Vec<IndexMeta>>, |
|
// ) -> Result<(), Error> { |
|
// let http_client = reqwest::Client::builder() |
|
// .user_agent(&config.http.user_agent) |
|
// .build()?; |
|
// |
|
// let publish_url = config.dst.api_url.join("/api/v1/crates/new")?; |
|
// let publish_meta = PublishMeta::new(index_meta, manifest, readme); |
|
// |
|
// debug!("built publish meta using crate index json and the Cargo.toml manifest in the .crate targz archive:\n{:#?}\n", publish_meta); |
|
// |
|
// let publish_meta_json = serde_json::to_vec(&publish_meta)?; |
|
// let payload = serialize_publish_payload(&publish_meta_json, &dot_crate_bytes); |
|
// debug!( |
|
// n_bytes = payload.len(), |
|
// %crate_name, |
|
// %version, |
|
// "serialized publish payload", |
|
// ); |
|
// |
|
// if config.dry_run { |
|
// debug!( |
|
// %crate_name, |
|
// %version, |
|
// %publish_url, |
|
// "skipping publish (--dry-run mode)", |
|
// ); |
|
// continue; |
|
// } |
|
// |
|
// let resp = http_client.put(publish_url.clone()) |
|
// .header(AUTHORIZATION, &config.dst.auth_token) |
|
// .body(payload) |
|
// .send() |
|
// .await?; |
|
// |
|
// debug!(status = ?resp.status(), "rcvd server response to publish request"); |
|
// |
|
// let resp: PublishResponse = resp |
|
// .error_for_status()? |
|
// .json() |
|
// .await?; |
|
// let PublishResponse { warnings } = resp; |
|
// |
|
// let mut any_warnings = false; |
|
// |
|
// for warning in warnings.invalid_categories.iter() { |
|
// warn!(%crate_name, %version, "registry server invalid category warning: {}", warning); |
|
// any_warnings = true; |
|
// } |
|
// |
|
// for warning in warnings.invalid_badges.iter() { |
|
// warn!(%crate_name, %version, "registry server invalid badge warning: {}", warning); |
|
// any_warnings = true; |
|
// } |
|
// |
|
// for warning in warnings.other.iter() { |
|
// warn!(%crate_name, %version, "registry server 'other' warning: {}", warning); |
|
// any_warnings = true; |
|
// } |
|
// |
|
// trace!("server response body:\n{warnings:#?}"); |
|
// |
|
// info!( |
|
// %crate_name, |
|
// %version, |
|
// any_warnings, |
|
// "published crate version in {:?}!", |
|
// begin.elapsed(), |
|
// ); |
|
|
|
fn parse_manifests( |
|
config: &Config, |
|
crate_versions: HashMap<String, Vec<IndexMeta>>, |
|
) -> Result<HashMap<String, Vec<VersionMeta>>, Error> { |
|
let begin = Instant::now(); |
|
|
|
let out: HashMap<String, Vec<VersionMeta>> = crate_versions |
|
// .into_par_iter() |
|
.into_iter() |
|
.map(|(crate_name, versions)| -> Result<(String, Vec<VersionMeta>), Error> { |
|
let begin = Instant::now(); |
|
let mut version_metas = Vec::new(); |
|
for index_meta in versions { |
|
let version = index_meta.vers.clone(); |
|
trace!(%crate_name, %version, "processing crate version"); |
|
let dot_crate_path = config.src.crate_files_dir |
|
.join(&format!("{}/{}/download", crate_name, index_meta.vers)); |
|
verify_file_exists(&dot_crate_path)?; |
|
|
|
trace!(path = ?dot_crate_path, "reading .crate file"); |
|
let dot_crate_bytes = std::fs::read(&dot_crate_path) |
|
.with_context(|| { |
|
format!("failed to read .crate file for \ |
|
{crate_name} v{0} with path {dot_crate_path:?}", |
|
index_meta.vers, |
|
) |
|
})?; |
|
|
|
trace!("extracting Cargo.toml from .crate targz archive"); |
|
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]); |
|
let manifest_files = extract_manifest_files_from_tar(decoder)?; |
|
if manifest_files.cargo_lock.is_none() { |
|
debug!(%crate_name, %version, "Cargo.lock not present in .crate archive"); |
|
} |
|
let manifest: ManifestStub = toml::from_str(&manifest_files.cargo_toml)?; |
|
let mut readme: Option<String> = None; |
|
if let Some(readme_path) = manifest.package.readme.as_ref() { |
|
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]); |
|
if let Some(readme_content) = extract_readme_from_tar(decoder, readme_path)? { |
|
trace!(length = readme_content.len(), "extracted readme file content from .crate targz archive"); |
|
readme = Some(readme_content); |
|
} |
|
} |
|
let tmp = TempDir::new()?; |
|
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]); |
|
tar::Archive::new(decoder).unpack(tmp.path())?; |
|
trace!(tmpdir = ?tmp.path(), "unpacked .crate archive to temp dir"); |
|
let target_dir = tmp.path().join("target"); |
|
std::fs::create_dir(&target_dir)?; |
|
|
|
let meta = cargo_metadata::MetadataCommand::new() |
|
.manifest_path(tmp.path().join(&format!("{crate_name}-{version}/Cargo.toml"))) |
|
//.env("CARGO_TARGET_DIR", &target_dir) |
|
.other_options(vec!["-vv".to_string()]) |
|
.verbose(true) |
|
// .other_options(["--frozen"].into_iter().map(|x| x.to_owned()).collect::<Vec<_>>()) |
|
.exec()?; |
|
|
|
version_metas.push(VersionMeta { |
|
index_meta, |
|
manifest_files, |
|
dot_crate_path, |
|
manifest, |
|
readme, |
|
tmp, |
|
meta, |
|
}); |
|
} |
|
debug!(%crate_name, "parsed {} manifests in {:?}", version_metas.len(), begin.elapsed()); |
|
Ok((crate_name, version_metas)) |
|
}).collect::<Result<_, Error>>()?; |
|
|
|
info!("parsed crate version manifests in {:?}", begin.elapsed()); |
|
|
|
Ok(out) |
|
} |
|
|
|
// fn get_registry_dep<'a>(index_dep: &'a IndexDependency) -> Option<Node<'a>> { |
|
// match index_dep.registry.as_ref() { |
|
// None => Some(Node { name: index_dep.name.as_str(), vers: index_dep.vers.clone() }), |
|
// Some(index) if index.contains("github.com/rust-lang/crates.io-index") => None, |
|
// Some(other) => panic!("unexpected registry value: {}", other), |
|
// } |
|
// } |
|
|
|
// fn build_dependency_graph<'a>( |
|
// crate_versions: &'a HashMap<String, Vec<VersionMeta>>, |
|
// ) -> (StableGraph<Node<'a>, ()>, HashMap<Node<'a>, NodeIndex<u32>>) { |
|
// let begin = Instant::now(); |
|
// |
|
// let mut graph = StableGraph::new(); |
|
// let mut index: HashMap<Node<'a>, NodeIndex<u32>> = Default::default(); |
|
// |
|
// macro_rules! get_ix { |
|
// ($node:expr) => {{ |
|
// let key_exists = ix.contains_key($node); |
|
// if !key_exists { |
|
// let ix = graph.add_node($node.clnoe()); |
|
// index.insert(node.clone(), ix); |
|
// ix |
|
// } else { |
|
// *index[$node] |
|
// } |
|
// }} |
|
// } |
|
// |
|
// for (name, versions) in crate_versions.iter() { |
|
// for version_meta in versions.iter() { |
|
// let v = &version_meta.index_meta; |
|
// let node = Node { name: name.as_str(), vers: v.vers.clone() }; |
|
// let key_exists = ix.contains_key(&node); |
|
// let ix = get_ix!(&node); |
|
// for dep_node in v.deps.iter().filter_map(get_registry_dep) { |
|
// let jx = get_ix!(dep_node); |
|
// graph.add_edge(ix, js, ()); |
|
// } |
|
// } |
|
// } |
|
// |
|
// info!( |
|
// n_nodes = graph.node_count(), |
|
// n_edges = graph.edge_count(), |
|
// "built dependency graph for entire registry in {:?}", begin.elapsed(), |
|
// ); |
|
// |
|
// (graph, index) |
|
// } |
|
|
|
async fn verify_dir_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> { |
|
match tokio::fs::metadata(path.as_ref()).await { |
|
Ok(meta) if meta.is_dir() => Ok(()), |
|
Ok(meta) /* if ! meta.is_dir() */ => { |
|
debug_assert!( ! meta.is_dir()); |
|
bail!("path exists, but is not a directory: {:?}", path.as_ref()) |
|
} |
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => { |
|
bail!("path does not exist: {}", path.as_ref().display()); |
|
} |
|
Err(e) => Err(e.into()), |
|
} |
|
} |
|
|
|
fn verify_file_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> { |
|
match std::fs::metadata(path.as_ref()) { |
|
Ok(meta) if meta.is_file() => Ok(()), |
|
Ok(meta) /* if ! meta.is_file() */ => { |
|
debug_assert!( ! meta.is_file()); |
|
bail!("path exists, but is not a file: {:?}", path.as_ref()) |
|
} |
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => { |
|
bail!("path does not exist: {}", path.as_ref().display()); |
|
} |
|
Err(e) => Err(e.into()), |
|
} |
|
} |
|
|
|
fn main() -> Result<(), Error> { |
|
let begin = Instant::now(); |
|
|
|
dotenvy::dotenv().ok(); |
|
|
|
let opt = Opt::parse(); |
|
|
|
setup_logger(); |
|
|
|
let config = load_config_file(&opt)?; |
|
|
|
let rt = tokio::runtime::Runtime::new()?; |
|
rt.block_on(verify_dir_exists(&config.src.index_dir))?; |
|
rt.block_on(verify_dir_exists(&config.src.crate_files_dir))?; |
|
|
|
if opt.validate { |
|
println!("{:#?}", config); |
|
return Ok(()) |
|
} |
|
|
|
let krates = rt.block_on(get_index_metas(&config))?; |
|
|
|
let manifests = parse_manifests(&config, krates)?; |
|
|
|
// let (graph, ix) = build_dependency_graph(&manifests); |
|
|
|
drop(manifests); |
|
drop(rt); |
|
|
|
info!("finished in {:?}", begin.elapsed()); |
|
Ok(()) |
|
}
|
|
|