Compare commits
23 Commits
master
...
v0.5.0-bet
| Author | SHA1 | Date |
|---|---|---|
|
|
c890c5f429 | 2 years ago |
|
|
4c2a9e5fc2 | 2 years ago |
|
|
7e9c5ec9bd | 2 years ago |
|
|
2ad520d230 | 2 years ago |
|
|
a9c8906096 | 2 years ago |
|
|
dd4eb957ad | 2 years ago |
|
|
b717fc952b | 2 years ago |
|
|
934559fd41 | 2 years ago |
|
|
de6c86115b | 2 years ago |
|
|
5d3ea67dfc | 2 years ago |
|
|
9545b350a4 | 2 years ago |
|
|
24bbd17e82 | 2 years ago |
|
|
1afd7b465f | 2 years ago |
|
|
4a12c2802c | 2 years ago |
|
|
c2f8d60922 | 2 years ago |
|
|
cc275bacb7 | 2 years ago |
|
|
fa147a8ee4 | 2 years ago |
|
|
7ae5ab55d5 | 2 years ago |
|
|
b02731ecd6 | 2 years ago |
|
|
8b57bf70b0 | 2 years ago |
|
|
315c27b25b | 2 years ago |
|
|
8bb8d218c7 | 2 years ago |
|
|
3cbe8997a0 | 2 years ago |
14 changed files with 2409 additions and 507 deletions
@ -1,4 +1,6 @@
|
||||
/target |
||||
*.swp |
||||
config.toml |
||||
publish-config.toml |
||||
/output |
||||
.env |
||||
|
||||
@ -1,17 +1,19 @@
|
||||
Available recipes: |
||||
cargo +args='' # cargo wrapper; executes a cargo command using the settings in justfile (RUSTFLAGS, etc.) |
||||
check +args='' # cargo check wrapper |
||||
debug-build +args='' # cargo build wrapper - builds registry-backup in debug mode |
||||
generate-readme # generate updated README.md |
||||
cargo +args='' # cargo wrapper; executes a cargo command using the settings in justfile (RUSTFLAGS, etc.) |
||||
check +args='' # cargo check wrapper |
||||
debug-build +args='' # cargo build wrapper - builds registry-backup in debug mode |
||||
debug-build-publish +args='' # cargo build wrapper - builds publish tool in debug mode |
||||
generate-readme # generate updated README.md |
||||
get-crate-version |
||||
install # cargo install registry-backup via git dep |
||||
pre-release # check, run tests, check non-error output for clippy, run rustfmt |
||||
release # release version (regenerate docs, git tag v0.0.0) |
||||
release-build +args='' # cargo build --release wrapper - builds registry-backup in release mode |
||||
release-prep # get everything all ready for release |
||||
show-build-env # diagnostic command for viewing value of build variables at runtime |
||||
test +args='' # cargo test wrapper |
||||
update-readme # re-generate README.md and overwrite existing file with output |
||||
update-readme-and-commit # re-generate, overwrite, stage, and commit |
||||
update-readme-and-stage # re-generate, overwrite, and stage changes |
||||
verify-clean-git # verify no uncommitted changes |
||||
install # cargo install registry-backup via git dep |
||||
pre-release # check, run tests, check non-error output for clippy, run rustfmt |
||||
release # release version (regenerate docs, git tag v0.0.0) |
||||
release-build +args='' # cargo build --release wrapper - builds registry-backup in release mode |
||||
release-build-publish +args='' # cargo build --release wrapper - builds publish tool in release mode |
||||
release-prep # get everything all ready for release |
||||
show-build-env # diagnostic command for viewing value of build variables at runtime |
||||
test +args='' # cargo test wrapper |
||||
update-readme # re-generate README.md and overwrite existing file with output |
||||
update-readme-and-commit # re-generate, overwrite, stage, and commit |
||||
update-readme-and-stage # re-generate, overwrite, and stage changes |
||||
verify-clean-git # verify no uncommitted changes |
||||
|
||||
@ -0,0 +1,19 @@
|
||||
registry-backup 0.5.0-beta.1 |
||||
Jonathan Strong <jstrong@shipyard.rs> |
||||
|
||||
USAGE: |
||||
publish [OPTIONS] --config-file <PATH> |
||||
|
||||
OPTIONS: |
||||
-c, --config-file <PATH> Config file with source directories and destination registry info |
||||
--dry-run Perform all the work of generating `cargo publish` payloads, but |
||||
don't send them to the destination registry server |
||||
--validate Load config file, validate the settings, and display the final |
||||
loaded content to stdout, then exit |
||||
--filter-crates <REGEX> Use to limit which crates from the source registry are published |
||||
to the destination registry. Expects a regular expression which |
||||
will be matched against the names of crates. Only crates with |
||||
names that match the regex will be published. This field may also |
||||
be specified at the top level of the config file |
||||
-h, --help Print help information |
||||
-V, --version Print version information |
||||
@ -0,0 +1,23 @@
|
||||
# optional field for providing a regex-based filter |
||||
# to limit which crates are published to the destination |
||||
# registry. only crates with names matching the regex will |
||||
# be published. |
||||
# |
||||
filter-crates = "^." |
||||
|
||||
# do everything except actually publish to the destination registry |
||||
dry-run = false |
||||
|
||||
# source registry config |
||||
[src] |
||||
index-dir = "path/to/crate-index/repo" # git clone of crate index repository |
||||
crate-files-dir = "path/to/crate/files" # i.e. files downloaded by registry-backup tool |
||||
publish-history-csv = "path/to/publish-log.csv" # see docs above |
||||
registry-name = "my-old-registry" # whatever label the source registry was given in Cargo.toml files |
||||
index-url = "https://github.com/my-org/crate-index.git" # index url, i.e. same as one provided in ~/.cargo/config.toml |
||||
|
||||
# destination registry config |
||||
[dst] |
||||
index-url = "ssh://git@ssh.shipyard.rs/my-new-registry/crate-index.git" # index url of new registry |
||||
registry-name = "my-new-registry" # can be same as old name or a different name |
||||
auth-token = "xxx" # auth token for publishing to the destination registry |
||||
@ -0,0 +1,92 @@
|
||||
# std |
||||
import os |
||||
import sys |
||||
import io |
||||
from pathlib import Path |
||||
import json |
||||
|
||||
# non-std |
||||
import git |
||||
import pandas as pd |
||||
|
||||
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" |
||||
EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" |
||||
|
||||
def versions(path, branch='master'): |
||||
""" |
||||
This function returns a generator which iterates through all commits of |
||||
the repository located in the given path for the given branch. It yields |
||||
file diff information to show a timeseries of file changes. |
||||
""" |
||||
|
||||
# Create the repository, raises an error if it isn't one. |
||||
repo = git.Repo(path) |
||||
|
||||
# Iterate through every commit for the given branch in the repository |
||||
for commit in repo.iter_commits(branch): |
||||
# Determine the parent of the commit to diff against. |
||||
# If no parent, this is the first commit, so use empty tree. |
||||
# Then create a mapping of path to diff for each file changed. |
||||
parent = commit.parents[0] if commit.parents else EMPTY_TREE_SHA |
||||
diffs = { |
||||
diff.a_path: diff for diff in commit.diff(parent) |
||||
} |
||||
|
||||
# The stats on the commit is a summary of all the changes for this |
||||
# commit, we'll iterate through it to get the information we need. |
||||
for objpath, stats in commit.stats.files.items(): |
||||
|
||||
# Select the diff for the path in the stats |
||||
diff = diffs.get(objpath) |
||||
|
||||
# If the path is not in the dictionary, it's because it was |
||||
# renamed, so search through the b_paths for the current name. |
||||
if not diff: |
||||
for diff in diffs.values(): |
||||
if diff.b_path == path and diff.renamed: |
||||
break |
||||
|
||||
p = Path(objpath) |
||||
if len(p.parts) != 3: |
||||
print(f'skipping path: wrong depth ({p.parts})', file=sys.stderr) |
||||
continue |
||||
try: |
||||
obj = commit.tree / objpath |
||||
with io.BytesIO(obj.data_stream.read()) as f: |
||||
lastline = list(f.readlines())[-1].decode('utf-8') |
||||
except Exception as e: |
||||
print(f'failed to load file at commit {commit}', file=sys.stderr) |
||||
continue |
||||
|
||||
lastline = lastline.strip() |
||||
try: |
||||
d = json.loads(lastline) |
||||
except Exception as e: |
||||
print(f'failed to parse json at commit {commit}: {e}', file=sys.stderr) |
||||
continue |
||||
|
||||
row = { |
||||
'path': os.path.join(path, objpath), |
||||
'commit': commit.hexsha, |
||||
'author': commit.author.email, |
||||
'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT), |
||||
'crate_name': d['name'], |
||||
'version': d['vers'], |
||||
} |
||||
yield row |
||||
|
||||
def main(path): |
||||
df = pd.DataFrame(versions(path)) |
||||
df['time'] = pd.to_datetime(df['time'], utc=True) |
||||
df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index() |
||||
buf = io.StringIO() |
||||
df.to_csv(buf, index=False) |
||||
print(buf.getvalue()) |
||||
|
||||
if __name__ == '__main__': |
||||
if len(sys.argv) == 1 or any(a == '-h' or a == '--help' for a in sys.argv): |
||||
print("USAGE:\n python3 get-publish-history.py PATH\n", file=sys.stderr) |
||||
else: |
||||
path = sys.argv[1] |
||||
main(path) |
||||
|
||||
@ -0,0 +1,871 @@
|
||||
#![allow(unused_labels)] |
||||
|
||||
use std::collections::{BTreeMap, HashMap}; |
||||
use std::io::{self, prelude::*}; |
||||
use std::path::{Path, PathBuf}; |
||||
use std::time::*; |
||||
|
||||
use anyhow::{anyhow, bail, Context, Error}; |
||||
use chrono::prelude::*; |
||||
use clap::Parser; |
||||
use convert_case::{Case, Casing}; |
||||
use futures::stream::StreamExt; |
||||
use rayon::prelude::*; |
||||
use semver::Version; |
||||
use serde::{Deserialize, Serialize}; |
||||
use tempfile::TempDir; |
||||
use tokio::io::AsyncBufReadExt; |
||||
use tracing::{debug, error, info, trace, warn}; |
||||
use tracing_subscriber::filter::EnvFilter; |
||||
|
||||
#[derive(Parser, Debug)] |
||||
#[clap(author, version, global_setting(clap::AppSettings::DeriveDisplayOrder))] |
||||
struct Opt { |
||||
/// Config file with source directories and destination registry info
|
||||
#[clap(short, long, value_name = "PATH")] |
||||
pub config_file: PathBuf, |
||||
/// Perform all the work of generating `cargo publish` payloads,
|
||||
/// but don't send them to the destination registry server
|
||||
#[clap(long)] |
||||
pub dry_run: bool, |
||||
/// Load config file, validate the settings, and display the final loaded content
|
||||
/// to stdout, then exit
|
||||
#[clap(long)] |
||||
pub validate: bool, |
||||
|
||||
/// Use to limit which crates from the source registry are published to the
|
||||
/// destination registry. Expects a regular expression which will be matched
|
||||
/// against the names of crates. Only crates with names that match the regex
|
||||
/// will be published. This field may also be specified at the top level of
|
||||
/// the config file.
|
||||
#[clap(long, value_name = "REGEX", alias = "filter")] |
||||
pub filter_crates: Option<String>, |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Deserialize)] |
||||
#[serde(rename_all = "kebab-case")] |
||||
struct DestinationRegistryConfig { |
||||
/// Registry index url, i.e. the url provided to Cargo via configuration
|
||||
/// to identify where to pull the index metadata from.
|
||||
#[serde(alias = "index")] |
||||
pub index_url: String, |
||||
#[serde(alias = "token")] |
||||
pub auth_token: String, |
||||
/// The name the registry should have in the Cargo.toml files published to
|
||||
/// the destination registry. This can be a rename (i.e. different than the
|
||||
/// registry name provided in `SourceRegistryConfig`) or the same name.
|
||||
pub registry_name: String, |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Deserialize)] |
||||
#[serde(rename_all = "kebab-case")] |
||||
struct SourceRegistryConfig { |
||||
#[serde(alias = "index")] |
||||
pub index_dir: PathBuf, |
||||
#[serde(alias = "crate-files")] |
||||
pub crate_files_dir: PathBuf, |
||||
/// Name used in Cargo.toml for dependencies from the registry.
|
||||
pub registry_name: String, |
||||
/// Path of CSV file with log of when each crate version was published.
|
||||
pub publish_history_csv: PathBuf, |
||||
pub index_url: String, |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Deserialize)] |
||||
#[serde(rename_all = "kebab-case")] |
||||
struct Config { |
||||
/// Do everything except actually publish to the destination registry. Can also be
|
||||
/// toggled using the --dry-run command line flag.
|
||||
#[serde(default)] |
||||
pub dry_run: bool, |
||||
/// Local directories with source registry files
|
||||
#[serde(alias = "source")] |
||||
pub src: SourceRegistryConfig, |
||||
/// Server information and authentication needed to publish to the
|
||||
/// destination registry
|
||||
#[serde(alias = "destination")] |
||||
pub dst: DestinationRegistryConfig, |
||||
/// Use to limit which crates from the source registry are published to the
|
||||
/// destination registry. Expects a regular expression which will be matched
|
||||
/// against the names of crates. Only crates with names that match the regex
|
||||
/// will be published.
|
||||
#[serde(default, alias = "filter")] |
||||
pub filter_crates: Option<String>, |
||||
} |
||||
|
||||
impl Config { |
||||
pub fn compile_filter(&self) -> Result<Option<regex::Regex>, Error> { |
||||
match self.filter_crates.as_ref() { |
||||
Some(regex) => { |
||||
let compiled = regex::Regex::new(regex).map_err(|e| { |
||||
error!(%regex, err = ?e, "regex failed to compile: {}", e); |
||||
e |
||||
})?; |
||||
Ok(Some(compiled)) |
||||
} |
||||
None => Ok(None), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
||||
struct PublishLogRow { |
||||
pub crate_name: String, |
||||
pub version: Version, |
||||
pub path: PathBuf, |
||||
pub commit: String, |
||||
pub author: String, |
||||
pub time: DateTime<Utc>, |
||||
// pub unix_nanos: u64,
|
||||
} |
||||
|
||||
struct CsvSetup { |
||||
pub rdr: csv::Reader<io::BufReader<std::fs::File>>, |
||||
pub headers: csv::ByteRecord, |
||||
pub row: csv::ByteRecord, |
||||
} |
||||
|
||||
fn csv_setup(path: &Path) -> Result<CsvSetup, Error> { |
||||
verify_file_exists(path)?; |
||||
let file = std::fs::File::open(path)?; |
||||
let buf = std::io::BufReader::new(file); |
||||
let mut rdr = csv::Reader::from_reader(buf); |
||||
let headers = rdr |
||||
.byte_headers() |
||||
.map_err(|e| anyhow!("failed to parse csv headers: {}", e))? |
||||
.clone(); |
||||
let row = csv::ByteRecord::new(); |
||||
Ok(CsvSetup { rdr, headers, row }) |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
||||
struct PublishDependency { |
||||
pub optional: bool, |
||||
pub default_features: bool, |
||||
pub name: String, |
||||
pub features: Vec<String>, |
||||
// cargo and crates-io have this as string
|
||||
#[serde(alias = "req")] |
||||
pub version_req: semver::VersionReq, |
||||
pub target: Option<String>, |
||||
// crates-io has this as option
|
||||
pub kind: DependencyKind, |
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub registry: Option<String>, |
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub explicit_name_in_toml: Option<String>, |
||||
} |
||||
|
||||
impl From<IndexDependency> for PublishDependency { |
||||
fn from(dep: IndexDependency) -> Self { |
||||
Self { |
||||
name: dep.name, |
||||
features: dep.features, |
||||
default_features: dep.default_features, |
||||
optional: dep.optional, |
||||
target: dep.target, |
||||
kind: dep.kind, |
||||
registry: dep.registry, |
||||
version_req: dep.req, |
||||
explicit_name_in_toml: dep.package, |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
||||
struct IndexMeta { |
||||
// same everything as publish metadata
|
||||
pub name: String, |
||||
#[serde(alias = "version")] |
||||
pub vers: semver::Version, |
||||
#[serde(alias = "dependencies")] |
||||
pub features: BTreeMap<String, Vec<String>>, |
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub links: Option<String>, |
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub badges: Option<BTreeMap<String, String>>, |
||||
|
||||
// modified format/field names
|
||||
pub deps: Vec<IndexDependency>, |
||||
|
||||
// fields that don't appear in publish metadata
|
||||
pub cksum: String, |
||||
pub yanked: bool, |
||||
|
||||
// ancient fields, these were actually written
|
||||
// on sanskrit on stone tablets
|
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub features2: Option<BTreeMap<String, Vec<String>>>, |
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub v: Option<u8>, |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] |
||||
struct IndexDependency { |
||||
/// corresponds to `explicit_name_in_toml` field in `publish::Dependency`
|
||||
/// when a dep is renamed in Cargo.toml, otherwise same as `package`.
|
||||
pub name: String, |
||||
/// corresponds to `name` in `publish::Dependency`
|
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub package: Option<String>, |
||||
/// in publish meta, this field is called `version_req`, and the index
|
||||
/// format requires it to be renamed to `req`
|
||||
#[serde(alias = "version_req")] |
||||
pub req: semver::VersionReq, |
||||
pub features: Vec<String>, |
||||
pub optional: bool, |
||||
pub default_features: bool, |
||||
pub target: Option<String>, |
||||
pub kind: DependencyKind, |
||||
#[serde(skip_serializing_if = "Option::is_none")] |
||||
pub registry: Option<String>, |
||||
} |
||||
|
||||
/// Section in which this dependency was defined
|
||||
#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)] |
||||
#[serde(rename_all = "lowercase")] |
||||
pub enum DependencyKind { |
||||
/// Used at run time
|
||||
Normal, |
||||
/// Used at build time, not available at run time
|
||||
Build, |
||||
/// Not fetched and not used, except for when used direclty in a workspace
|
||||
Dev, |
||||
} |
||||
|
||||
fn extract_manifest_files_from_tar<R: Read>(rdr: R) -> Result<ManifestFiles, Error> { |
||||
let mut archive = tar::Archive::new(rdr); |
||||
|
||||
let mut cargo_toml = None; |
||||
let mut cargo_toml_orig = None; |
||||
let mut cargo_lock = None; |
||||
|
||||
for entry in archive.entries()? { |
||||
let mut entry = entry?; |
||||
let path = entry.path()?; |
||||
|
||||
if path.ends_with("Cargo.toml.orig") { |
||||
let mut data = String::new(); |
||||
entry.read_to_string(&mut data)?; |
||||
cargo_toml_orig = Some(data); |
||||
} else if path.ends_with("Cargo.toml") { |
||||
let mut data = String::new(); |
||||
entry.read_to_string(&mut data)?; |
||||
cargo_toml = Some(data); |
||||
} else if path.ends_with("Cargo.lock") { |
||||
let mut data = String::new(); |
||||
entry.read_to_string(&mut data)?; |
||||
cargo_lock = Some(data); |
||||
} |
||||
|
||||
if cargo_toml.is_some() && cargo_toml_orig.is_some() && cargo_lock.is_some() { |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if !(cargo_toml.is_some() && cargo_toml_orig.is_some()) { |
||||
anyhow::bail!( |
||||
"some required manifest files missing in .crate archive \ |
||||
(cargo_toml={:?} cargo_toml_orig={:?} cargo_lock={:?})", |
||||
cargo_toml.is_some(), |
||||
cargo_toml_orig.is_some(), |
||||
cargo_lock.is_some(), |
||||
); |
||||
} |
||||
|
||||
Ok(ManifestFiles { |
||||
cargo_toml: cargo_toml.unwrap(), |
||||
cargo_toml_orig: cargo_toml_orig.unwrap(), |
||||
cargo_lock, |
||||
}) |
||||
} |
||||
|
||||
fn setup_logger() { |
||||
let env_filter = EnvFilter::from_default_env(); |
||||
let builder = tracing_subscriber::fmt() |
||||
.with_env_filter(env_filter) |
||||
.with_ansi(true); |
||||
builder.init(); |
||||
} |
||||
|
||||
fn load_config_file(opt: &Opt) -> Result<Config, Error> { |
||||
if !opt.config_file.exists() { |
||||
bail!("path does not exist: {:?}", opt.config_file); |
||||
} |
||||
let toml = std::fs::read_to_string(&opt.config_file)?; |
||||
let mut config: Config = toml::from_str(&toml).context( |
||||
"read config file, but unable to parse toml - check \ |
||||
format against example config", |
||||
)?; |
||||
// augment using command line opts
|
||||
config.filter_crates = config.filter_crates.or_else(|| opt.filter_crates.clone()); |
||||
config.dry_run |= opt.dry_run; |
||||
Ok(config) |
||||
} |
||||
|
||||
fn is_hidden(entry: &walkdir::DirEntry) -> bool { |
||||
entry |
||||
.file_name() |
||||
.to_str() |
||||
.map(|s| s.starts_with('.')) |
||||
.unwrap_or(false) |
||||
} |
||||
|
||||
async fn get_index_metas(config: &Config) -> Result<HashMap<String, Vec<IndexMeta>>, Error> { |
||||
let filter = config.compile_filter()?; |
||||
let mut n_excl = 0; |
||||
|
||||
let files: Vec<(String, PathBuf)> = walkdir::WalkDir::new(&config.src.index_dir) |
||||
.max_depth(3) |
||||
.into_iter() |
||||
.filter_entry(|e| !is_hidden(e)) |
||||
.filter_map(|res| match res { |
||||
Ok(entry) => { |
||||
if entry.file_type().is_file() && entry.depth() >= 2 && entry.depth() <= 3 { |
||||
let path = entry.into_path(); |
||||
let crate_name: &str = path.file_name().and_then(|x| x.to_str()).unwrap_or(""); |
||||
if let Some(filter) = filter.as_ref() { |
||||
if !filter.is_match(crate_name.as_ref()) { |
||||
trace!(%crate_name, "crate excluded by filter"); |
||||
n_excl += 1; |
||||
return None; |
||||
} |
||||
} |
||||
|
||||
debug!(?path, "found crate index metadata file to parse"); |
||||
Some((crate_name.to_owned(), path)) |
||||
} else { |
||||
None |
||||
} |
||||
} |
||||
Err(e) => { |
||||
warn!(error = ?e, "walkdir result is error"); |
||||
None |
||||
} |
||||
}) |
||||
.collect(); |
||||
|
||||
let n_files = files.len(); |
||||
info!("found {} crate index metadata files to parse", n_files); |
||||
|
||||
if n_excl > 0 { |
||||
warn!( |
||||
regex = %config.filter_crates.as_deref().unwrap_or(""), |
||||
n_files, |
||||
n_excl, |
||||
"regex filter (--filter-crates) excluded {} crates", n_excl, |
||||
); |
||||
} |
||||
|
||||
let crate_versions: Vec<Result<(String, Vec<IndexMeta>), Error>> = |
||||
futures::stream::iter(files.into_iter().map(|(crate_name, path)| async move { |
||||
let file = tokio::fs::File::open(&path).await.map_err(|e| { |
||||
error!(err = ?e, ?path, "failed to open file"); |
||||
e |
||||
})?; |
||||
let buf = tokio::io::BufReader::new(file); |
||||
let mut out = Vec::new(); |
||||
let mut lines = buf.lines(); |
||||
'lines: while let Some(line) = lines.next_line().await? { |
||||
let index_meta: IndexMeta = serde_json::from_str(&line).map_err(|e| { |
||||
error!(err = ?e, ?path, "failed to parse line"); |
||||
e |
||||
})?; |
||||
out.push(index_meta); |
||||
} |
||||
debug!(crate_name = %out.first().map(|x| x.name.as_str()).unwrap_or("na"), |
||||
"parsed {} crate versions from metadata file", out.len() |
||||
); |
||||
|
||||
Ok((crate_name, out)) |
||||
})) |
||||
.buffer_unordered(num_cpus::get()) |
||||
.collect() |
||||
.await; |
||||
|
||||
let mut total_number_of_crate_versions = 0; |
||||
|
||||
// map of crate-name => [IndexMeta] (one per published version)
|
||||
let crate_versions: HashMap<String, Vec<IndexMeta>> = crate_versions |
||||
.into_iter() |
||||
.filter_map(|result| match result { |
||||
Ok((crate_name, xs)) => { |
||||
total_number_of_crate_versions += xs.len(); |
||||
Some((crate_name, xs)) |
||||
} |
||||
Err(e) => { |
||||
error!(err = ?e, "parsing metadata failed, skipping file"); |
||||
None |
||||
} |
||||
}) |
||||
.collect(); |
||||
|
||||
info!( |
||||
n_files, |
||||
n_excl, |
||||
n_crates = crate_versions.len(), |
||||
total_number_of_crate_versions, |
||||
"parsed {} crate version metadata entries from index", |
||||
total_number_of_crate_versions, |
||||
); |
||||
|
||||
Ok(crate_versions) |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Default)] |
||||
struct PublishWarnings { |
||||
#[serde(default)] |
||||
pub invalid_categories: Vec<String>, |
||||
#[serde(default)] |
||||
pub invalid_badges: Vec<String>, |
||||
#[serde(default)] |
||||
pub other: Vec<String>, |
||||
} |
||||
|
||||
#[derive(Debug, Clone, Deserialize, Eq, PartialEq, Default)] |
||||
struct PublishResponse { |
||||
#[serde(default)] |
||||
pub warnings: PublishWarnings, |
||||
} |
||||
|
||||
struct ManifestFiles { |
||||
#[allow(dead_code)] |
||||
cargo_toml: String, |
||||
cargo_toml_orig: String, |
||||
#[allow(dead_code)] |
||||
cargo_lock: Option<String>, |
||||
} |
||||
|
||||
struct VersionMeta { |
||||
index_meta: IndexMeta, |
||||
manifest_files: ManifestFiles, |
||||
tmp: TempDir, |
||||
modified_manifest_toml: Option<String>, |
||||
} |
||||
|
||||
impl VersionMeta { |
||||
pub fn source_dir(&self) -> PathBuf { |
||||
self.tmp |
||||
.path() |
||||
.join(format!("{}-{}", self.index_meta.name, self.index_meta.vers)) |
||||
} |
||||
} |
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)] |
||||
struct Node<'a> { |
||||
name: &'a str, |
||||
vers: Version, |
||||
} |
||||
|
||||
fn parse_one_manifest( |
||||
config: &Config, |
||||
crate_name: &str, |
||||
index_meta: IndexMeta, |
||||
) -> Result<VersionMeta, Error> { |
||||
let version = index_meta.vers.clone(); |
||||
trace!(%crate_name, %version, "processing crate version"); |
||||
let dot_crate_path = config |
||||
.src |
||||
.crate_files_dir |
||||
.join(format!("{}/{}/download", crate_name, index_meta.vers)); |
||||
verify_file_exists(&dot_crate_path)?; |
||||
|
||||
trace!(path = ?dot_crate_path, "reading .crate file"); |
||||
let dot_crate_bytes = std::fs::read(&dot_crate_path).with_context(|| { |
||||
format!( |
||||
"failed to read .crate file for \ |
||||
{crate_name} v{0} with path {dot_crate_path:?}", |
||||
index_meta.vers, |
||||
) |
||||
})?; |
||||
|
||||
trace!("extracting Cargo.toml from .crate targz archive"); |
||||
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]); |
||||
let manifest_files = extract_manifest_files_from_tar(decoder).map_err(|err| { |
||||
error!(%crate_name, vers = %index_meta.vers, ?err, "failed to extract manifest files"); |
||||
err |
||||
})?; |
||||
|
||||
let tmp = TempDir::new()?; |
||||
let decoder = flate2::read::GzDecoder::new(&dot_crate_bytes[..]); |
||||
tar::Archive::new(decoder) |
||||
.unpack(tmp.path()) |
||||
.map_err(|err| { |
||||
error!(%crate_name, vers = %index_meta.vers, ?err, "failed to unpack to temp dir"); |
||||
err |
||||
})?; |
||||
|
||||
trace!(tmpdir = ?tmp.path(), "unpacked .crate archive to temp dir"); |
||||
let target_dir = tmp.path().join("target"); |
||||
std::fs::create_dir(target_dir)?; |
||||
|
||||
Ok(VersionMeta { |
||||
index_meta, |
||||
manifest_files, |
||||
tmp, |
||||
modified_manifest_toml: None, |
||||
}) |
||||
} |
||||
|
||||
fn parse_manifests( |
||||
config: &Config, |
||||
crate_versions: HashMap<String, Vec<IndexMeta>>, |
||||
) -> Result<HashMap<String, Vec<VersionMeta>>, Error> { |
||||
let begin = Instant::now(); |
||||
|
||||
let out: HashMap<String, Vec<VersionMeta>> = crate_versions |
||||
.into_par_iter() |
||||
.filter_map(|(crate_name, versions)| -> Option<(String, Vec<VersionMeta>)> { |
||||
let begin = Instant::now(); |
||||
debug!(%crate_name, "parsing manifests"); |
||||
let mut version_metas = Vec::new(); |
||||
let mut n_err = 0; |
||||
for index_meta in versions { |
||||
let version = index_meta.vers.clone(); |
||||
match parse_one_manifest(config, &crate_name, index_meta) { |
||||
Ok(meta) => { |
||||
version_metas.push(meta); |
||||
} |
||||
|
||||
Err(err) => { |
||||
error!(?err, %crate_name, %version, "failed to parse manifest; skipping"); |
||||
n_err += 1; |
||||
} |
||||
} |
||||
} |
||||
debug!(%crate_name, n_err, "parsed {} manifests in {:?}", version_metas.len(), begin.elapsed()); |
||||
if version_metas.is_empty() { |
||||
warn!(%crate_name, n_err, "parsed zero manifests successfully for crate!"); |
||||
None |
||||
} else { |
||||
Some((crate_name, version_metas)) |
||||
} |
||||
}).collect(); |
||||
|
||||
info!("parsed crate version manifests in {:?}", begin.elapsed()); |
||||
|
||||
Ok(out) |
||||
} |
||||
|
||||
/// edit registry deps to point to the destination registry.
|
||||
///
|
||||
/// NOTE: recursive traversing of the toml is needed to handle things
|
||||
/// like conditional deps blocks like:
|
||||
///
|
||||
/// ```toml,ignore
|
||||
/// [target.'cfg(not(target_env = "msvc"))'.dependencies]
|
||||
/// dep-one = { version = "0.1.0", registry = "old-registry" }
|
||||
/// ```
|
||||
fn edit_deps(manifest: &mut toml_edit::Document, config: &Config) { |
||||
use toml_edit::{visit_mut::VisitMut, TableLike}; |
||||
|
||||
struct DepsVisitor<'a>(&'a Config); |
||||
|
||||
impl<'a> VisitMut for DepsVisitor<'a> { |
||||
fn visit_table_like_mut(&mut self, dep: &mut dyn TableLike) { |
||||
let config = self.0; |
||||
let src_registry_name = config.src.registry_name.as_str(); |
||||
let dst_registry_name = config.dst.registry_name.as_str(); |
||||
let src_index_url = config.src.index_url.as_str(); |
||||
let dst_index_url = config.dst.index_url.as_str(); |
||||
let mut edited = false; |
||||
|
||||
if let Some(registry_item) = dep.get_mut("registry") { |
||||
if registry_item.as_str().unwrap_or("") == src_registry_name { |
||||
*registry_item = toml_edit::value(dst_registry_name); |
||||
edited = true; |
||||
} |
||||
} |
||||
|
||||
if let Some(registry_index_item) = dep.get_mut("registry-index") { |
||||
if registry_index_item.as_str().unwrap_or("") == src_index_url { |
||||
*registry_index_item = toml_edit::value(dst_index_url); |
||||
edited = true; |
||||
} |
||||
} |
||||
|
||||
if !edited { |
||||
for (_, v) in dep.iter_mut() { |
||||
if let Some(t) = v.as_table_like_mut() { |
||||
toml_edit::visit_mut::visit_table_like_mut(self, t); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
let mut visitor = DepsVisitor(config); |
||||
visitor.visit_document_mut(&mut *manifest); |
||||
} |
||||
|
||||
fn edit_publish_registry( |
||||
manifest: &mut toml_edit::Document, |
||||
src_registry_name: &str, |
||||
dst_registry_name: &str, |
||||
) -> Result<(), Error> { |
||||
let Some(package) = manifest |
||||
.get_mut("package") |
||||
.and_then(|item| item.as_table_like_mut()) |
||||
else { |
||||
anyhow::bail!("package key not found in manifest toml"); |
||||
}; |
||||
|
||||
let Some(publish_item) = package.get_mut("publish") else { |
||||
trace!("no 'publish' key in Cargo.toml package section"); |
||||
return Ok(()); |
||||
}; |
||||
|
||||
let Some(publish_array) = publish_item.as_array_mut() else { |
||||
anyhow::bail!("failed to cast publish item as array"); |
||||
}; |
||||
|
||||
let Some(i) = publish_array |
||||
.iter() |
||||
.position(|x| x.as_str().map(|s| s == src_registry_name).unwrap_or(false)) |
||||
else { |
||||
anyhow::bail!( |
||||
"publish key exists, but source registry name does not appear in it! (`{}`)", |
||||
publish_array.to_string() |
||||
); |
||||
}; |
||||
|
||||
let item_i = publish_array.get_mut(i).unwrap(); |
||||
*item_i = toml_edit::Value::from(dst_registry_name); |
||||
|
||||
Ok(()) |
||||
} |
||||
|
||||
fn prepare_source_dir_for_publish(config: &Config, meta: &mut VersionMeta) -> Result<(), Error> { |
||||
let source_dir = meta.source_dir(); |
||||
let mut modified_manifest = meta |
||||
.manifest_files |
||||
.cargo_toml_orig |
||||
.parse::<toml_edit::Document>()?; |
||||
|
||||
edit_deps(&mut modified_manifest, config); |
||||
edit_publish_registry( |
||||
&mut modified_manifest, |
||||
&config.src.registry_name, |
||||
&config.dst.registry_name, |
||||
)?; |
||||
|
||||
// write modified manifest over Cargo.toml (leaves Cargo.toml.orig as is)
|
||||
let modified_manifest_toml = modified_manifest.to_string(); |
||||
let cargo_toml_path = source_dir.join("Cargo.toml"); |
||||
std::fs::write(&cargo_toml_path, modified_manifest_toml.as_bytes())?; |
||||
debug!( |
||||
crate_name = %meta.index_meta.name, |
||||
vers = %meta.index_meta.vers, |
||||
path = ?cargo_toml_path, |
||||
"wrote modified manifest file", |
||||
); |
||||
meta.modified_manifest_toml = Some(modified_manifest_toml); |
||||
|
||||
let cargo_toml_orig_path = source_dir.join("Cargo.toml.orig"); |
||||
if cargo_toml_orig_path.exists() { |
||||
std::fs::remove_file(&cargo_toml_orig_path)?; |
||||
trace!( |
||||
crate_name = %meta.index_meta.name, |
||||
vers = %meta.index_meta.vers, |
||||
path = ?cargo_toml_orig_path, |
||||
"removed Cargo.toml.orig file", |
||||
); |
||||
} |
||||
|
||||
let cargo_lock_path = source_dir.join("Cargo.lock"); |
||||
if cargo_lock_path.exists() { |
||||
std::fs::remove_file(&cargo_lock_path)?; |
||||
trace!( |
||||
crate_name = %meta.index_meta.name, |
||||
vers = %meta.index_meta.vers, |
||||
path = ?cargo_lock_path, |
||||
"removed Cargo.lock file", |
||||
); |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
fn prepare_source_dirs_for_publish( |
||||
config: &Config, |
||||
manifests: &mut HashMap<String, Vec<VersionMeta>>, |
||||
) -> Result<(), Error> { |
||||
let begin = Instant::now(); |
||||
manifests.par_iter_mut() |
||||
.map(|(name, versions)| -> Result<(), Error> { |
||||
for meta in versions.iter_mut() { |
||||
prepare_source_dir_for_publish(config, meta) |
||||
.map_err(|err| { |
||||
error!(%name, vers = %meta.index_meta.vers, ?err, "prepare_source_dir_for_publish failed"); |
||||
err |
||||
})?; |
||||
} |
||||
Ok(()) |
||||
}).collect::<Result<Vec<()>, Error>>()?; |
||||
info!("modified Cargo.toml manifests in {:?}", begin.elapsed()); |
||||
Ok(()) |
||||
} |
||||
|
||||
fn cargo_publish_modified_source_dir(config: &Config, meta: &VersionMeta) -> Result<(), Error> { |
||||
let begin = Instant::now(); |
||||
info!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "publishing crate version"); |
||||
let index_env_key = format!( |
||||
"CARGO_REGISTRIES_{}_INDEX", |
||||
config.dst.registry_name.to_case(Case::ScreamingSnake) |
||||
); |
||||
let token_env_key = format!( |
||||
"CARGO_REGISTRIES_{}_TOKEN", |
||||
config.dst.registry_name.to_case(Case::ScreamingSnake) |
||||
); |
||||
|
||||
let source_dir = meta.source_dir(); |
||||
let manifest_path = source_dir.join("Cargo.toml"); |
||||
let manifest_path_str = manifest_path.display().to_string(); |
||||
|
||||
let mut args: Vec<&str> = vec!["publish"]; |
||||
args.extend_from_slice(&["--registry", &config.dst.registry_name][..]); |
||||
// args.extend_from_slice(&["--index", &config.dst.index_url][..]);
|
||||
args.extend_from_slice(&["--token", &config.dst.auth_token][..]); |
||||
args.extend_from_slice(&["--manifest-path", manifest_path_str.as_str()][..]); |
||||
args.extend_from_slice(&["--no-verify", "--allow-dirty", "-vv"][..]); |
||||
|
||||
debug!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "executing `cargo {}`", args.join(" ")); |
||||
let output = std::process::Command::new("cargo") |
||||
.env(&index_env_key, &config.dst.index_url) |
||||
.env(&token_env_key, &config.dst.auth_token) |
||||
.args(&args) |
||||
.output()?; |
||||
|
||||
debug!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, exit_status = ?output.status, "finished executing `cargo publish` command"); |
||||
|
||||
if !output.status.success() { |
||||
let stdout = std::str::from_utf8(&output.stdout).unwrap_or("utf8err"); |
||||
let stderr = std::str::from_utf8(&output.stderr).unwrap_or("utf8err"); |
||||
error!(exit_status = ?output.status, "cargo publish error!\nstdout:\n{}\nstderr:\n:{}\n\n", stdout, stderr); |
||||
if !stderr.contains("already exists") { |
||||
debug!( |
||||
"cargo publish error - original Cargo.toml:\n***\n{}\n***", |
||||
meta.manifest_files.cargo_toml_orig |
||||
); |
||||
debug!( |
||||
"cargo publish error - modified Cargo.toml:\n***\n{}\n***", |
||||
meta.modified_manifest_toml.as_ref().unwrap() |
||||
); |
||||
} |
||||
} |
||||
|
||||
info!(name = %meta.index_meta.name, vers = %meta.index_meta.vers, "finished cargo publish in {:?}", begin.elapsed()); |
||||
Ok(()) |
||||
} |
||||
|
||||
async fn verify_dir_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> { |
||||
match tokio::fs::metadata(path.as_ref()).await { |
||||
Ok(meta) if meta.is_dir() => Ok(()), |
||||
Ok(meta) /* if ! meta.is_dir() */ => { |
||||
debug_assert!( ! meta.is_dir()); |
||||
bail!("path exists, but is not a directory: {:?}", path.as_ref()) |
||||
} |
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => { |
||||
bail!("path does not exist: {}", path.as_ref().display()); |
||||
} |
||||
Err(e) => Err(e.into()), |
||||
} |
||||
} |
||||
|
||||
fn verify_file_exists<P: AsRef<std::path::Path>>(path: P) -> Result<(), Error> { |
||||
match std::fs::metadata(path.as_ref()) { |
||||
Ok(meta) if meta.is_file() => Ok(()), |
||||
Ok(meta) /* if ! meta.is_file() */ => { |
||||
debug_assert!( ! meta.is_file()); |
||||
bail!("path exists, but is not a file: {:?}", path.as_ref()) |
||||
} |
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => { |
||||
bail!("path does not exist: {}", path.as_ref().display()); |
||||
} |
||||
Err(e) => Err(e.into()), |
||||
} |
||||
} |
||||
|
||||
fn read_publish_log_csv(path: &Path) -> Result<Vec<PublishLogRow>, Error> { |
||||
let begin = Instant::now(); |
||||
let CsvSetup { |
||||
mut rdr, |
||||
headers, |
||||
mut row, |
||||
} = csv_setup(path)?; |
||||
let mut out = Vec::new(); |
||||
while rdr.read_byte_record(&mut row)? { |
||||
// only partially deserialized after this
|
||||
let parsed: PublishLogRow = row.deserialize(Some(&headers)).map_err(|err| { |
||||
error!(?row, ?headers, ?err, "deserializing row failed"); |
||||
err |
||||
})?; |
||||
out.push(parsed); |
||||
} |
||||
info!(?path, "parsed publish log csv in {:?}", begin.elapsed()); |
||||
Ok(out) |
||||
} |
||||
|
||||
fn main() -> Result<(), Error> { |
||||
let begin = Instant::now(); |
||||
|
||||
dotenvy::dotenv().ok(); |
||||
|
||||
let opt = Opt::parse(); |
||||
|
||||
setup_logger(); |
||||
|
||||
let config = load_config_file(&opt)?; |
||||
|
||||
let rt = tokio::runtime::Runtime::new()?; |
||||
rt.block_on(verify_dir_exists(&config.src.index_dir))?; |
||||
rt.block_on(verify_dir_exists(&config.src.crate_files_dir))?; |
||||
|
||||
verify_file_exists(&config.src.publish_history_csv)?; |
||||
|
||||
if opt.validate { |
||||
println!("{:#?}", config); |
||||
return Ok(()); |
||||
} |
||||
|
||||
let mut publish_log = read_publish_log_csv(&config.src.publish_history_csv)?; |
||||
publish_log.sort_by_key(|x| x.time); |
||||
assert!(!publish_log.is_empty()); |
||||
info!(n_rows = publish_log.len(), "parsed publish log csv"); |
||||
|
||||
if let Some(filter) = config.compile_filter()? { |
||||
publish_log.retain(|x| filter.is_match(&x.crate_name)); |
||||
info!(n_filtered_rows = publish_log.len(), "filtered publish log"); |
||||
} |
||||
|
||||
let krates = rt.block_on(get_index_metas(&config))?; |
||||
|
||||
let mut manifests = parse_manifests(&config, krates)?; |
||||
|
||||
prepare_source_dirs_for_publish(&config, &mut manifests)?; |
||||
|
||||
let mut by_name_vers: HashMap<(&str, &Version), &VersionMeta> = manifests |
||||
.iter() |
||||
.flat_map(|(k, v)| v.iter().map(|m| ((k.as_str(), &m.index_meta.vers), m))) |
||||
.collect(); |
||||
|
||||
for row in publish_log.iter() { |
||||
let Some(meta) = by_name_vers.remove(&(row.crate_name.as_str(), &row.version)) else { |
||||
warn!( |
||||
?row, |
||||
"crate version in publish log not found in index versions" |
||||
); |
||||
continue; |
||||
}; |
||||
|
||||
if let Err(err) = cargo_publish_modified_source_dir(&config, meta) { |
||||
error!(?err, name = %meta.index_meta.name, vers = %meta.index_meta.vers, "failed to publish crate version"); |
||||
} |
||||
} |
||||
info!("finished publishing crates to destination registry"); |
||||
|
||||
drop(manifests); |
||||
drop(rt); |
||||
|
||||
info!("finished in {:?}", begin.elapsed()); |
||||
Ok(()) |
||||
} |
||||
Loading…
Reference in new issue