From b02731ecd642bf2d59e0eb9baa96a5a705235f3f Mon Sep 17 00:00:00 2001 From: Jonathan Strong Date: Fri, 3 Nov 2023 04:21:29 -0400 Subject: [PATCH] working first iteration for evaluation --- .gitignore | 2 + Cargo.lock | 70 +++++- Cargo.toml | 10 +- justfile | 8 + publish-config.toml.sample | 24 ++ src/publish.rs | 501 ++++++++++++++++++++++++++++++++++--- 6 files changed, 573 insertions(+), 42 deletions(-) create mode 100644 publish-config.toml.sample diff --git a/.gitignore b/.gitignore index 63118b1..4b196c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /target *.swp config.toml +publish-config.toml /output +.env diff --git a/Cargo.lock b/Cargo.lock index eba849b..906e4b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,6 +65,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + [[package]] name = "async-compression" version = "0.4.4" @@ -409,6 +415,12 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + [[package]] name = "dtoa" version = "0.4.8" @@ -436,6 +448,18 @@ dependencies = [ "syn 2.0.38", ] +[[package]] +name = "filetime" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.3.5", + "windows-sys", +] + [[package]] name = "flate2" version = "1.0.28" @@ -1094,7 +1118,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.4.1", "smallvec", "windows-targets", ] @@ -1388,6 +1412,15 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -1445,16 +1478,21 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" name = "registry-backup" version = "0.4.1" dependencies = [ + "anyhow", "chrono", "clap", + "dotenvy", + "flate2", "futures", "governor", "num_cpus", "pretty_toa", "regex", "reqwest", + "semver", "serde", "serde_json", + "tar", "tempdir", "tera", "tokio", @@ -1613,6 +1651,15 @@ dependencies = [ "untrusted", ] +[[package]] +name = "semver" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +dependencies = [ + "serde", +] + [[package]] name = "serde" version = "1.0.190" @@ -1784,6 +1831,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempdir" version = "0.3.7" @@ -2208,6 +2266,7 @@ dependencies = [ "form_urlencoded", "idna 0.4.0", "percent-encoding", + "serde", ] [[package]] @@ -2469,3 +2528,12 @@ dependencies = [ "cfg-if", "windows-sys", ] + +[[package]] +name = "xattr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985" +dependencies = [ + "libc", +] diff --git a/Cargo.toml b/Cargo.toml index ecf2f94..19f46b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,10 @@ description = "CLI tool for backup/export of .crate files from a registry server name = "registry-backup" path = "src/main.rs" +[[bin]] +name = "publish" +path = "src/publish.rs" + [[bin]] name = "generate-readme" path = "src/generate-readme.rs" @@ -38,8 +42,12 @@ pretty_toa = "1" tera = { version = "1", optional = true } chrono = { version = "0.4", optional = true } regex = "1.6" -url = "2" +url = { version = "2", features = ["serde"] } +semver = { version = "1", features = ["serde"] } +tar = "0.4.38" +anyhow = "1" dotenvy = "0.15" +flate2 = "1" [features] default = [] diff --git a/justfile b/justfile index ff4c163..197567e 100644 --- a/justfile +++ b/justfile @@ -26,6 +26,14 @@ debug-build +args='': release-build +args='': @just cargo build --bin registry-backup --release {{args}} +# cargo build wrapper - builds publish tool in debug mode +debug-build-publish +args='': + @just cargo build --bin publish {{args}} + +# cargo build --release wrapper - builds publish tool in release mode +release-build-publish +args='': + @just cargo build --bin publish --release {{args}} + # generate updated README.md generate-readme: just debug-build diff --git a/publish-config.toml.sample b/publish-config.toml.sample new file mode 100644 index 0000000..e4287fb --- /dev/null +++ b/publish-config.toml.sample @@ -0,0 +1,24 @@ +# optional field for providing a regex-based filter +# to limit which crates are published to the destination +# registry. only crates with names matching the regex will +# be published. +# +# filter-crates = "" + +# do everything except actually publish to the destination registry +dry-run = false + +[src] +# path of local dir where crate index repository has been cloned +index-dir = "path/to/cloned/rrate-index/repo" +# path of dir where .crate files were downloaded to. use the +# registry-backup tool to quickly and easily download all of +# a registry's files +crate-files-dir = "path/to/crate/files" + +[dst] +# the value of the `api` field in the destination registry's +# config.json file (part of the crate index +api-url = "https://crates.shipyard.rs" +# auth token to use when publishing crate versions +auth-token = "xxx" diff --git a/src/publish.rs b/src/publish.rs index b4da7c1..4a7cde9 100644 --- a/src/publish.rs +++ b/src/publish.rs @@ -1,35 +1,123 @@ +#![allow(unused_labels)] + use std::path::{Path, PathBuf}; -use std::collections::BTreeMap; -use std::borrow::Cow; +use std::collections::{BTreeMap, HashMap}; +use std::io::prelude::*; +use std::time::*; -use serde::Deserialize; +use serde::{Serialize, Deserialize}; use clap::Parser; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, trace, warn}; use tracing_subscriber::filter::EnvFilter; use url::Url; -use anyhow::{anyhow, bail, Error}; +use anyhow::{anyhow, bail, Error, Context}; +use semver::Version; +use futures::stream::StreamExt; +use tokio::io::AsyncBufReadExt; +use reqwest::header::AUTHORIZATION; + +#[derive(Parser, Debug)] +#[clap(author, version, global_setting(clap::AppSettings::DeriveDisplayOrder))] +struct Opt { + /// Config file with source directories and destination registry info + #[clap(short, long, value_name = "PATH")] + pub config_file: PathBuf, + /// Perform all the work of generating `cargo publish` payloads, + /// but don't send them to the destination registry server + #[clap(long)] + pub dry_run: bool, + /// Load config file, validate the settings, and display the final loaded content + /// to stdout, then exit + #[clap(long)] + pub validate: bool, + + /// Use to limit which crates from the source registry are published to the + /// destination registry. Expects a regular expression which will be matched + /// against the names of crates. Only crates with names that match the regex + /// will be published. This field may also be specified at the top level of + /// the config file. + #[clap(long, value_name = "REGEX", alias = "filter")] + pub filter_crates: Option, +} #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct DestinationRegistryConfig { + #[serde(alias = "api")] pub api_url: Url, - pub token: String, + #[serde(alias = "token")] + pub auth_token: String, } #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct SourceRegistryConfig { + #[serde(alias = "index")] pub index_dir: PathBuf, + #[serde(alias = "crate-files")] pub crate_files_dir: PathBuf, } +#[derive(Deserialize, Debug, Clone)] +#[serde(rename_all = "kebab-case")] +pub struct HttpConfig { + /// Value of user-agent HTTP header + #[serde(default = "default_user_agent")] + pub user_agent: String, +} + +const DEFAULT_USER_AGENT: &str = concat!("shipyard.rs-publish-tool/v", env!("CARGO_PKG_VERSION")); + +fn default_user_agent() -> String { + DEFAULT_USER_AGENT.to_string() +} + +impl Default for HttpConfig { + fn default() -> Self { + Self { + user_agent: default_user_agent(), + } + } +} + #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct Config { + /// Do everything except actually publish to the destination registry. Can also be + /// toggled using the --dry-run command line flag. + #[serde(default)] + pub dry_run: bool, + /// Local directories with source registry files #[serde(alias = "source")] pub src: SourceRegistryConfig, + /// Server information and authentication needed to publish to the + /// destination registry #[serde(alias = "destination")] pub dst: DestinationRegistryConfig, + /// Settings controlling the HTTP publish requests to the destination registry + #[serde(default)] + pub http: HttpConfig, + /// Use to limit which crates from the source registry are published to the + /// destination registry. Expects a regular expression which will be matched + /// against the names of crates. Only crates with names that match the regex + /// will be published. + #[serde(default, alias = "filter")] + pub filter_crates: Option, +} + +impl Config { + pub fn compile_filter(&self) -> Result, Error> { + match self.filter_crates.as_ref() { + Some(regex) => { + let compiled = regex::Regex::new(regex).map_err(|e| { + error!(%regex, err = ?e, "regex failed to compile: {}", e); + e + })?; + Ok(Some(compiled)) + } + None => Ok(None), + } + } } /// fields we need from Cargo.toml [package] section to combine with IndexMeta @@ -51,6 +139,7 @@ pub struct PackageStub { pub repository: Option, pub homepage: Option, pub documentation: Option, + pub links: Option, } /// for parsing Cargo.toml to extract missing PublishMeta fields that do not appear @@ -63,43 +152,34 @@ pub struct ManifestStub { /// full definition of cargo publish json #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] pub struct PublishMeta { - #[serde(borrow)] pub name: String, #[serde(alias = "version")] pub vers: semver::Version, #[serde(alias = "dependencies")] #[serde(default)] pub deps: Vec, - #[serde(default, borrow)] + #[serde(default)] pub features: BTreeMap>, - #[serde(default, borrow)] + #[serde(default)] pub authors: Vec, - #[serde(borrow)] pub description: Option, - #[serde(borrow)] pub documentation: Option, - #[serde(borrow)] pub homepage: Option, - #[serde(borrow)] pub readme: Option, - #[serde(borrow)] - pub readme_file: Option, - #[serde(default, borrow)] + pub readme_file: Option, + #[serde(default)] pub keywords: Vec, - #[serde(default, borrow)] + #[serde(default)] pub categories: Vec, - #[serde(borrow)] pub license: Option, - #[serde(borrow)] - pub license_file: Option, - #[serde(borrow)] + pub license_file: Option, pub repository: Option, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub links: Option, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub badges: Option>, /// from ancient cargo versions - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub features2: Option>>, /// from ancient cargo versions #[serde(skip_serializing_if = "Option::is_none")] @@ -110,35 +190,47 @@ pub struct PublishMeta { pub struct PublishDependency { pub optional: bool, pub default_features: bool, - #[serde(borrow)] pub name: String, - #[serde(borrow)] pub features: Vec, // cargo and crates-io have this as string #[serde(alias = "req")] pub version_req: semver::VersionReq, - #[serde(borrow)] pub target: Option, // crates-io has this as option - pub kind: PublishDependencyKind, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + pub kind: DependencyKind, + #[serde(skip_serializing_if = "Option::is_none")] pub registry: Option, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub explicit_name_in_toml: Option, } +impl From for PublishDependency { + fn from(dep: IndexDependency) -> Self { + Self { + name: dep.name, + features: dep.features, + default_features: dep.default_features, + optional: dep.optional, + target: dep.target, + kind: dep.kind, + registry: dep.registry, + version_req: dep.req, + explicit_name_in_toml: dep.package, + } + } +} + #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] pub struct IndexMeta { // same everything as publish metadata - #[serde(borrow)] pub name: String, #[serde(alias = "version")] pub vers: semver::Version, - #[serde(alias = "dependencies", borrow)] + #[serde(alias = "dependencies")] pub features: BTreeMap>, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub links: Option, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub badges: Option>, // modified format/field names @@ -150,7 +242,7 @@ pub struct IndexMeta { // ancient fields, these were actually written // on sanskrit on stone tablets - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub features2: Option>>, #[serde(skip_serializing_if = "Option::is_none")] pub v: Option, @@ -160,23 +252,20 @@ pub struct IndexMeta { pub struct IndexDependency { /// corresponds to `explicit_name_in_toml` field in `publish::Dependency` /// when a dep is renamed in Cargo.toml, otherwise same as `package`. - #[serde(borrow)] pub name: String, /// corresponds to `name` in `publish::Dependency` - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub package: Option, /// in publish meta, this field is called `version_req`, and the index /// format requires it to be renamed to `req` #[serde(alias = "version_req")] pub req: semver::VersionReq, - #[serde(borrow)] pub features: Vec, pub optional: bool, pub default_features: bool, - #[serde(borrow)] pub target: Option, pub kind: DependencyKind, - #[serde(skip_serializing_if = "Option::is_none", borrow)] + #[serde(skip_serializing_if = "Option::is_none")] pub registry: Option, } @@ -192,6 +281,58 @@ pub enum DependencyKind { Dev, } +impl PublishMeta { + pub fn new( + index_meta: IndexMeta, + manifest: ManifestStub, + readme: Option, + ) -> Self { + let ManifestStub { package } = manifest; + PublishMeta { + name: package.name, + vers: package.version, + deps: index_meta.deps.into_iter().map(From::from).collect(), + features: index_meta.features, + authors: package.authors, + description: package.description, + documentation: package.documentation, + homepage: package.homepage, + readme, + readme_file: package.readme, + keywords: package.keywords, + categories: package.categories, + license: package.license, + license_file: package.license_file, + repository: package.repository, + links: package.links, + badges: index_meta.badges, + features2: index_meta.features2, + v: index_meta.v, + } + } +} + +fn serialize_publish_payload( + publish_meta_json: &[u8], + dot_crate_bytes: &[u8], +) -> Vec { + assert!(publish_meta_json.len() <= u32::MAX as usize); + assert!(dot_crate_bytes.len() <= u32::MAX as usize); + + let mut out: Vec = Vec::with_capacity( + publish_meta_json.len() + + dot_crate_bytes.len() + + 8 // 2x u32 lengths + ); + + out.extend_from_slice(&(publish_meta_json.len() as u32).to_le_bytes()[..]); + out.extend_from_slice(publish_meta_json); + out.extend_from_slice(&(dot_crate_bytes.len() as u32).to_le_bytes()[..]); + out.extend_from_slice(dot_crate_bytes); + + out +} + fn extract_manifest_from_tar(rdr: R) -> Result, Error> { let mut archive = tar::Archive::new(rdr); for entry in archive.entries()? { @@ -221,4 +362,284 @@ fn extract_readme_from_tar(rdr: R, readme_path: &Path) -> Result