Browse Source

working first iteration for evaluation

feat/mass-publish-tool
Jonathan Strong 1 year ago
parent
commit
b02731ecd6
  1. 2
      .gitignore
  2. 70
      Cargo.lock
  3. 10
      Cargo.toml
  4. 8
      justfile
  5. 24
      publish-config.toml.sample
  6. 501
      src/publish.rs

2
.gitignore vendored

@ -1,4 +1,6 @@
/target /target
*.swp *.swp
config.toml config.toml
publish-config.toml
/output /output
.env

70
Cargo.lock generated

@ -65,6 +65,12 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "anyhow"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
[[package]] [[package]]
name = "async-compression" name = "async-compression"
version = "0.4.4" version = "0.4.4"
@ -409,6 +415,12 @@ dependencies = [
"crypto-common", "crypto-common",
] ]
[[package]]
name = "dotenvy"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]] [[package]]
name = "dtoa" name = "dtoa"
version = "0.4.8" version = "0.4.8"
@ -436,6 +448,18 @@ dependencies = [
"syn 2.0.38", "syn 2.0.38",
] ]
[[package]]
name = "filetime"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0"
dependencies = [
"cfg-if",
"libc",
"redox_syscall 0.3.5",
"windows-sys",
]
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.28" version = "1.0.28"
@ -1094,7 +1118,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"redox_syscall", "redox_syscall 0.4.1",
"smallvec", "smallvec",
"windows-targets", "windows-targets",
] ]
@ -1388,6 +1412,15 @@ dependencies = [
"rand_core 0.3.1", "rand_core 0.3.1",
] ]
[[package]]
name = "redox_syscall"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.4.1" version = "0.4.1"
@ -1445,16 +1478,21 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
name = "registry-backup" name = "registry-backup"
version = "0.4.1" version = "0.4.1"
dependencies = [ dependencies = [
"anyhow",
"chrono", "chrono",
"clap", "clap",
"dotenvy",
"flate2",
"futures", "futures",
"governor", "governor",
"num_cpus", "num_cpus",
"pretty_toa", "pretty_toa",
"regex", "regex",
"reqwest", "reqwest",
"semver",
"serde", "serde",
"serde_json", "serde_json",
"tar",
"tempdir", "tempdir",
"tera", "tera",
"tokio", "tokio",
@ -1613,6 +1651,15 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "semver"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.190" version = "1.0.190"
@ -1784,6 +1831,17 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "tar"
version = "0.4.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]] [[package]]
name = "tempdir" name = "tempdir"
version = "0.3.7" version = "0.3.7"
@ -2208,6 +2266,7 @@ dependencies = [
"form_urlencoded", "form_urlencoded",
"idna 0.4.0", "idna 0.4.0",
"percent-encoding", "percent-encoding",
"serde",
] ]
[[package]] [[package]]
@ -2469,3 +2528,12 @@ dependencies = [
"cfg-if", "cfg-if",
"windows-sys", "windows-sys",
] ]
[[package]]
name = "xattr"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985"
dependencies = [
"libc",
]

10
Cargo.toml

@ -15,6 +15,10 @@ description = "CLI tool for backup/export of .crate files from a registry server
name = "registry-backup" name = "registry-backup"
path = "src/main.rs" path = "src/main.rs"
[[bin]]
name = "publish"
path = "src/publish.rs"
[[bin]] [[bin]]
name = "generate-readme" name = "generate-readme"
path = "src/generate-readme.rs" path = "src/generate-readme.rs"
@ -38,8 +42,12 @@ pretty_toa = "1"
tera = { version = "1", optional = true } tera = { version = "1", optional = true }
chrono = { version = "0.4", optional = true } chrono = { version = "0.4", optional = true }
regex = "1.6" regex = "1.6"
url = "2" url = { version = "2", features = ["serde"] }
semver = { version = "1", features = ["serde"] }
tar = "0.4.38"
anyhow = "1"
dotenvy = "0.15" dotenvy = "0.15"
flate2 = "1"
[features] [features]
default = [] default = []

8
justfile

@ -26,6 +26,14 @@ debug-build +args='':
release-build +args='': release-build +args='':
@just cargo build --bin registry-backup --release {{args}} @just cargo build --bin registry-backup --release {{args}}
# cargo build wrapper - builds publish tool in debug mode
debug-build-publish +args='':
@just cargo build --bin publish {{args}}
# cargo build --release wrapper - builds publish tool in release mode
release-build-publish +args='':
@just cargo build --bin publish --release {{args}}
# generate updated README.md # generate updated README.md
generate-readme: generate-readme:
just debug-build just debug-build

24
publish-config.toml.sample

@ -0,0 +1,24 @@
# optional field for providing a regex-based filter
# to limit which crates are published to the destination
# registry. only crates with names matching the regex will
# be published.
#
# filter-crates = ""
# do everything except actually publish to the destination registry
dry-run = false
[src]
# path of local dir where crate index repository has been cloned
index-dir = "path/to/cloned/rrate-index/repo"
# path of dir where .crate files were downloaded to. use the
# registry-backup tool to quickly and easily download all of
# a registry's files
crate-files-dir = "path/to/crate/files"
[dst]
# the value of the `api` field in the destination registry's
# config.json file (part of the crate index
api-url = "https://crates.shipyard.rs"
# auth token to use when publishing crate versions
auth-token = "xxx"

501
src/publish.rs

@ -1,35 +1,123 @@
#![allow(unused_labels)]
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::collections::BTreeMap; use std::collections::{BTreeMap, HashMap};
use std::borrow::Cow; use std::io::prelude::*;
use std::time::*;
use serde::Deserialize; use serde::{Serialize, Deserialize};
use clap::Parser; use clap::Parser;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, trace, warn};
use tracing_subscriber::filter::EnvFilter; use tracing_subscriber::filter::EnvFilter;
use url::Url; use url::Url;
use anyhow::{anyhow, bail, Error}; use anyhow::{anyhow, bail, Error, Context};
use semver::Version;
use futures::stream::StreamExt;
use tokio::io::AsyncBufReadExt;
use reqwest::header::AUTHORIZATION;
#[derive(Parser, Debug)]
#[clap(author, version, global_setting(clap::AppSettings::DeriveDisplayOrder))]
struct Opt {
/// Config file with source directories and destination registry info
#[clap(short, long, value_name = "PATH")]
pub config_file: PathBuf,
/// Perform all the work of generating `cargo publish` payloads,
/// but don't send them to the destination registry server
#[clap(long)]
pub dry_run: bool,
/// Load config file, validate the settings, and display the final loaded content
/// to stdout, then exit
#[clap(long)]
pub validate: bool,
/// Use to limit which crates from the source registry are published to the
/// destination registry. Expects a regular expression which will be matched
/// against the names of crates. Only crates with names that match the regex
/// will be published. This field may also be specified at the top level of
/// the config file.
#[clap(long, value_name = "REGEX", alias = "filter")]
pub filter_crates: Option<String>,
}
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")] #[serde(rename_all = "kebab-case")]
pub struct DestinationRegistryConfig { pub struct DestinationRegistryConfig {
#[serde(alias = "api")]
pub api_url: Url, pub api_url: Url,
pub token: String, #[serde(alias = "token")]
pub auth_token: String,
} }
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")] #[serde(rename_all = "kebab-case")]
pub struct SourceRegistryConfig { pub struct SourceRegistryConfig {
#[serde(alias = "index")]
pub index_dir: PathBuf, pub index_dir: PathBuf,
#[serde(alias = "crate-files")]
pub crate_files_dir: PathBuf, pub crate_files_dir: PathBuf,
} }
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct HttpConfig {
/// Value of user-agent HTTP header
#[serde(default = "default_user_agent")]
pub user_agent: String,
}
const DEFAULT_USER_AGENT: &str = concat!("shipyard.rs-publish-tool/v", env!("CARGO_PKG_VERSION"));
fn default_user_agent() -> String {
DEFAULT_USER_AGENT.to_string()
}
impl Default for HttpConfig {
fn default() -> Self {
Self {
user_agent: default_user_agent(),
}
}
}
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "kebab-case")] #[serde(rename_all = "kebab-case")]
pub struct Config { pub struct Config {
/// Do everything except actually publish to the destination registry. Can also be
/// toggled using the --dry-run command line flag.
#[serde(default)]
pub dry_run: bool,
/// Local directories with source registry files
#[serde(alias = "source")] #[serde(alias = "source")]
pub src: SourceRegistryConfig, pub src: SourceRegistryConfig,
/// Server information and authentication needed to publish to the
/// destination registry
#[serde(alias = "destination")] #[serde(alias = "destination")]
pub dst: DestinationRegistryConfig, pub dst: DestinationRegistryConfig,
/// Settings controlling the HTTP publish requests to the destination registry
#[serde(default)]
pub http: HttpConfig,
/// Use to limit which crates from the source registry are published to the
/// destination registry. Expects a regular expression which will be matched
/// against the names of crates. Only crates with names that match the regex
/// will be published.
#[serde(default, alias = "filter")]
pub filter_crates: Option<String>,
}
impl Config {
pub fn compile_filter(&self) -> Result<Option<regex::Regex>, Error> {
match self.filter_crates.as_ref() {
Some(regex) => {
let compiled = regex::Regex::new(regex).map_err(|e| {
error!(%regex, err = ?e, "regex failed to compile: {}", e);
e
})?;
Ok(Some(compiled))
}
None => Ok(None),
}
}
} }
/// fields we need from Cargo.toml [package] section to combine with IndexMeta /// fields we need from Cargo.toml [package] section to combine with IndexMeta
@ -51,6 +139,7 @@ pub struct PackageStub {
pub repository: Option<String>, pub repository: Option<String>,
pub homepage: Option<String>, pub homepage: Option<String>,
pub documentation: Option<String>, pub documentation: Option<String>,
pub links: Option<String>,
} }
/// for parsing Cargo.toml to extract missing PublishMeta fields that do not appear /// for parsing Cargo.toml to extract missing PublishMeta fields that do not appear
@ -63,43 +152,34 @@ pub struct ManifestStub {
/// full definition of cargo publish json /// full definition of cargo publish json
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct PublishMeta { pub struct PublishMeta {
#[serde(borrow)]
pub name: String, pub name: String,
#[serde(alias = "version")] #[serde(alias = "version")]
pub vers: semver::Version, pub vers: semver::Version,
#[serde(alias = "dependencies")] #[serde(alias = "dependencies")]
#[serde(default)] #[serde(default)]
pub deps: Vec<PublishDependency>, pub deps: Vec<PublishDependency>,
#[serde(default, borrow)] #[serde(default)]
pub features: BTreeMap<String, Vec<String>>, pub features: BTreeMap<String, Vec<String>>,
#[serde(default, borrow)] #[serde(default)]
pub authors: Vec<String>, pub authors: Vec<String>,
#[serde(borrow)]
pub description: Option<String>, pub description: Option<String>,
#[serde(borrow)]
pub documentation: Option<String>, pub documentation: Option<String>,
#[serde(borrow)]
pub homepage: Option<String>, pub homepage: Option<String>,
#[serde(borrow)]
pub readme: Option<String>, pub readme: Option<String>,
#[serde(borrow)] pub readme_file: Option<PathBuf>,
pub readme_file: Option<String>, #[serde(default)]
#[serde(default, borrow)]
pub keywords: Vec<String>, pub keywords: Vec<String>,
#[serde(default, borrow)] #[serde(default)]
pub categories: Vec<String>, pub categories: Vec<String>,
#[serde(borrow)]
pub license: Option<String>, pub license: Option<String>,
#[serde(borrow)] pub license_file: Option<PathBuf>,
pub license_file: Option<String>,
#[serde(borrow)]
pub repository: Option<String>, pub repository: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub links: Option<String>, pub links: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub badges: Option<BTreeMap<String, String>>, pub badges: Option<BTreeMap<String, String>>,
/// from ancient cargo versions /// from ancient cargo versions
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub features2: Option<BTreeMap<String, Vec<String>>>, pub features2: Option<BTreeMap<String, Vec<String>>>,
/// from ancient cargo versions /// from ancient cargo versions
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@ -110,35 +190,47 @@ pub struct PublishMeta {
pub struct PublishDependency { pub struct PublishDependency {
pub optional: bool, pub optional: bool,
pub default_features: bool, pub default_features: bool,
#[serde(borrow)]
pub name: String, pub name: String,
#[serde(borrow)]
pub features: Vec<String>, pub features: Vec<String>,
// cargo and crates-io have this as string // cargo and crates-io have this as string
#[serde(alias = "req")] #[serde(alias = "req")]
pub version_req: semver::VersionReq, pub version_req: semver::VersionReq,
#[serde(borrow)]
pub target: Option<String>, pub target: Option<String>,
// crates-io has this as option // crates-io has this as option
pub kind: PublishDependencyKind, pub kind: DependencyKind,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub registry: Option<String>, pub registry: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub explicit_name_in_toml: Option<String>, pub explicit_name_in_toml: Option<String>,
} }
impl From<IndexDependency> for PublishDependency {
fn from(dep: IndexDependency) -> Self {
Self {
name: dep.name,
features: dep.features,
default_features: dep.default_features,
optional: dep.optional,
target: dep.target,
kind: dep.kind,
registry: dep.registry,
version_req: dep.req,
explicit_name_in_toml: dep.package,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct IndexMeta { pub struct IndexMeta {
// same everything as publish metadata // same everything as publish metadata
#[serde(borrow)]
pub name: String, pub name: String,
#[serde(alias = "version")] #[serde(alias = "version")]
pub vers: semver::Version, pub vers: semver::Version,
#[serde(alias = "dependencies", borrow)] #[serde(alias = "dependencies")]
pub features: BTreeMap<String, Vec<String>>, pub features: BTreeMap<String, Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub links: Option<String>, pub links: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub badges: Option<BTreeMap<String, String>>, pub badges: Option<BTreeMap<String, String>>,
// modified format/field names // modified format/field names
@ -150,7 +242,7 @@ pub struct IndexMeta {
// ancient fields, these were actually written // ancient fields, these were actually written
// on sanskrit on stone tablets // on sanskrit on stone tablets
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub features2: Option<BTreeMap<String, Vec<String>>>, pub features2: Option<BTreeMap<String, Vec<String>>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub v: Option<u8>, pub v: Option<u8>,
@ -160,23 +252,20 @@ pub struct IndexMeta {
pub struct IndexDependency { pub struct IndexDependency {
/// corresponds to `explicit_name_in_toml` field in `publish::Dependency` /// corresponds to `explicit_name_in_toml` field in `publish::Dependency`
/// when a dep is renamed in Cargo.toml, otherwise same as `package`. /// when a dep is renamed in Cargo.toml, otherwise same as `package`.
#[serde(borrow)]
pub name: String, pub name: String,
/// corresponds to `name` in `publish::Dependency` /// corresponds to `name` in `publish::Dependency`
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub package: Option<String>, pub package: Option<String>,
/// in publish meta, this field is called `version_req`, and the index /// in publish meta, this field is called `version_req`, and the index
/// format requires it to be renamed to `req` /// format requires it to be renamed to `req`
#[serde(alias = "version_req")] #[serde(alias = "version_req")]
pub req: semver::VersionReq, pub req: semver::VersionReq,
#[serde(borrow)]
pub features: Vec<String>, pub features: Vec<String>,
pub optional: bool, pub optional: bool,
pub default_features: bool, pub default_features: bool,
#[serde(borrow)]
pub target: Option<String>, pub target: Option<String>,
pub kind: DependencyKind, pub kind: DependencyKind,
#[serde(skip_serializing_if = "Option::is_none", borrow)] #[serde(skip_serializing_if = "Option::is_none")]
pub registry: Option<String>, pub registry: Option<String>,
} }
@ -192,6 +281,58 @@ pub enum DependencyKind {
Dev, Dev,
} }
impl PublishMeta {
pub fn new(
index_meta: IndexMeta,
manifest: ManifestStub,
readme: Option<String>,
) -> Self {
let ManifestStub { package } = manifest;
PublishMeta {
name: package.name,
vers: package.version,
deps: index_meta.deps.into_iter().map(From::from).collect(),
features: index_meta.features,
authors: package.authors,
description: package.description,
documentation: package.documentation,
homepage: package.homepage,
readme,
readme_file: package.readme,
keywords: package.keywords,
categories: package.categories,
license: package.license,
license_file: package.license_file,
repository: package.repository,
links: package.links,
badges: index_meta.badges,
features2: index_meta.features2,
v: index_meta.v,
}
}
}
fn serialize_publish_payload(
publish_meta_json: &[u8],
dot_crate_bytes: &[u8],
) -> Vec<u8> {
assert!(publish_meta_json.len() <= u32::MAX as usize);
assert!(dot_crate_bytes.len() <= u32::MAX as usize);
let mut out: Vec<u8> = Vec::with_capacity(
publish_meta_json.len()
+ dot_crate_bytes.len()
+ 8 // 2x u32 lengths
);
out.extend_from_slice(&(publish_meta_json.len() as u32).to_le_bytes()[..]);
out.extend_from_slice(publish_meta_json);
out.extend_from_slice(&(dot_crate_bytes.len() as u32).to_le_bytes()[..]);
out.extend_from_slice(dot_crate_bytes);
out
}
fn extract_manifest_from_tar<R: Read>(rdr: R) -> Result<Option<String>, Error> { fn extract_manifest_from_tar<R: Read>(rdr: R) -> Result<Option<String>, Error> {
let mut archive = tar::Archive::new(rdr); let mut archive = tar::Archive::new(rdr);
for entry in archive.entries()? { for entry in archive.entries()? {
@ -221,4 +362,284 @@ fn extract_readme_from_tar<R: Read>(rdr: R, readme_path: &Path) -> Result<Option
Ok(None) Ok(None)
} }
fn setup_logger() {
let env_filter = EnvFilter::from_default_env();
let builder = tracing_subscriber::fmt()
.with_env_filter(env_filter)
.with_ansi(true);
builder.init();
}
fn load_config_file(opt: &Opt) -> Result<Config, Error> {
if !opt.config_file.exists() {
bail!("path does not exist: {:?}", opt.config_file);
}
let toml = std::fs::read_to_string(&opt.config_file)?;
let mut config: Config = toml::from_str(&toml)
.context("read config file, but unable to parse toml - check \
format against example config")?;
// augment using command line opts
config.filter_crates = config.filter_crates.or_else(|| opt.filter_crates.clone());
config.dry_run |= opt.dry_run;
Ok(config)
}
fn is_hidden(entry: &walkdir::DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
}
async fn get_index_metas(
config: &Config,
) -> Result<HashMap<String, Vec<IndexMeta>>, Error> {
let filter = config.compile_filter()?;
let mut n_excl = 0;
let files: Vec<(String, PathBuf)> = walkdir::WalkDir::new(&config.src.index_dir)
.max_depth(3)
.into_iter()
.filter_entry(|e| !is_hidden(e))
.filter_map(|res| match res {
Ok(entry) => {
if entry.file_type().is_file() && entry.depth() >= 2 && entry.depth() <= 3 {
let path = entry.into_path();
let crate_name: &str = path.file_name().and_then(|x| x.to_str()).unwrap_or("");
if let Some(filter) = filter.as_ref() {
if !filter.is_match(crate_name.as_ref()) {
trace!(%crate_name, "crate excluded by filter");
n_excl += 1;
return None;
}
}
debug!(?path, "found crate index metadata file to parse");
Some((crate_name.to_owned(), path))
} else {
None
}
}
Err(e) => {
warn!(error = ?e, "walkdir result is error");
None
}
})
.collect();
let n_files = files.len();
info!("found {} crate index metadata files to parse", n_files);
if n_excl > 0 {
warn!(
regex = %config.filter_crates.as_deref().unwrap_or(""),
n_files,
n_excl,
"regex filter (--filter-crates) excluded {} crates", n_excl,
);
}
let crate_versions: Vec<Result<(String, Vec<IndexMeta>), Error>> =
futures::stream::iter(files.into_iter().map(|(crate_name, path)| {
async move {
let file = tokio::fs::File::open(&path).await.map_err(|e| {
error!(err = ?e, ?path, "failed to open file");
e
})?;
let buf = tokio::io::BufReader::new(file);
let mut out = Vec::new();
let mut lines = buf.lines();
'lines: while let Some(line) = lines.next_line().await? {
let index_meta: IndexMeta = serde_json::from_str(&line)
.map_err(|e| {
error!(err = ?e, ?path, "failed to parse line");
e
})?;
out.push(index_meta);
}
debug!(crate_name = %out.first().map(|x| x.name.as_str()).unwrap_or("na"),
"parsed {} crate versions from metadata file", out.len()
);
Ok((crate_name, out))
}
}))
.buffer_unordered(num_cpus::get())
.collect()
.await;
let mut total_number_of_crate_versions = 0;
// map of crate-name => [IndexMeta] (one per published version)
let crate_versions: HashMap<String, Vec<IndexMeta>> = crate_versions
.into_iter()
.filter_map(|result| match result {
Ok((crate_name, xs)) => {
total_number_of_crate_versions += xs.len();
Some((crate_name, xs))
}
Err(e) => {
error!(err = ?e, "parsing metadata failed, skipping file");
None
}
})
.collect();
info!(
n_files,
n_excl,
n_crates = crate_versions.len(),
total_number_of_crate_versions,
"parsed {} crate version metadata entries from index",
total_number_of_crate_versions,
);
Ok(crate_versions)
}
async fn process_crates(
config: &Config,
crate_versions: HashMap<String, Vec<IndexMeta>>,
) -> Result<(), Error> {
let http_client = reqwest::Client::builder()
.user_agent(&config.http.user_agent)
.build()?;
let publish_url = config.dst.api_url.join("/api/v1/crates/new")?;
for (crate_name, versions) in crate_versions {
for index_meta in versions {
let version = index_meta.vers.clone();
debug!(%crate_name, %version, "processing crate version");
let dot_crate_path = config.src.crate_files_dir
.join(&format!("{}/{}/download", crate_name, index_meta.vers));
verify_file_exists(&dot_crate_path).await?;
debug!(path = ?dot_crate_path, "reading .crate file");
let dot_crate_bytes = tokio::fs::read(&dot_crate_path)
.await
.with_context(|| {
format!("failed to read .crate file for \
{crate_name} v{0} with path {dot_crate_path:?}",
index_meta.vers,
)
})?;
debug!("extracting Cargo.toml from .crate targz archive");
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]);
let manifest_toml = extract_manifest_from_tar(decoder)?
.ok_or_else(|| anyhow!("Cargo.toml not found in .crate targz archive"))?;
let manifest: ManifestStub = toml::from_str(&manifest_toml)?;
let mut readme: Option<String> = None;
if let Some(readme_path) = manifest.package.readme.as_ref() {
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]);
if let Some(readme_content) = extract_readme_from_tar(decoder, readme_path)? {
debug!(length = readme_content.len(), "extracted readme file content from .crate targz archive");
readme = Some(readme_content);
}
}
let publish_meta = PublishMeta::new(index_meta, manifest, readme);
let publish_meta_json = serde_json::to_vec(&publish_meta)?;
let payload = serialize_publish_payload(&publish_meta_json, &dot_crate_bytes);
debug!(
n_bytes = payload.len(),
%crate_name,
%version,
"serialized publish payload",
);
if config.dry_run {
debug!(
%crate_name,
%version,
%publish_url,
"skipping publish (--dry-run mode)",
);
continue;
}
let resp = http_client.put(publish_url.clone())
.header(AUTHORIZATION, &config.dst.auth_token)
.body(payload)
.send()
.await?;
debug!(status = ?resp.status(), "rcvd server response to publish request");
let resp_body: serde_json::Value = resp
.error_for_status()?
.json()
.await?;
debug!("server response body:\n{resp_body:#?}");
info!(
%crate_name,
%version,
"published crate version",
);
}
}
Ok(())
}
async fn verify_dir_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> {
match tokio::fs::metadata(path.as_ref()).await {
Ok(meta) if meta.is_dir() => Ok(()),
Ok(meta) /* if ! meta.is_dir() */ => {
debug_assert!( ! meta.is_dir());
bail!("path exists, but is not a directory: {:?}", path.as_ref())
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
bail!("path does not exist: {}", path.as_ref().display());
}
Err(e) => Err(e.into()),
}
}
async fn verify_file_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> {
match tokio::fs::metadata(path.as_ref()).await {
Ok(meta) if meta.is_file() => Ok(()),
Ok(meta) /* if ! meta.is_file() */ => {
debug_assert!( ! meta.is_file());
bail!("path exists, but is not a file: {:?}", path.as_ref())
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
bail!("path does not exist: {}", path.as_ref().display());
}
Err(e) => Err(e.into()),
}
}
fn main() -> Result<(), Error> {
let begin = Instant::now();
dotenvy::dotenv().ok();
let opt = Opt::parse();
setup_logger();
let config = load_config_file(&opt)?;
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(verify_dir_exists(&config.src.index_dir))?;
rt.block_on(verify_dir_exists(&config.src.crate_files_dir))?;
if opt.validate {
println!("{:#?}", config);
return Ok(())
}
let krates = rt.block_on(get_index_metas(&config))?;
rt.block_on(process_crates(&config, krates))?;
info!("finished in {:?}", begin.elapsed());
Ok(())
}

Loading…
Cancel
Save