Browse Source

implement url templates

in addition to url templates, this also includes special handling for crates.io:

crates.io index config.json dl: "https://crates.io/api/v1/crates"

request to "https://crates.io/api/v1/crates/{crate}/{vers}/download" will
return a 302 redirect to "https://static.crates.io/crates/{crate}/{crate}-{vers}.crate".

rather than follow the 302, this detects if the dl value is crates.io (`is_crates_io` method)
and performs the redirect preemptively.

in discussion with crates.io team on discord, @carol10cents indicated this would
avoid need to throttle requests as they would go right to cdn instead of
hitting crates.io webserver.
master
Jonathan Strong 2 years ago
parent
commit
eb111aad33
  1. 3
      Cargo.lock
  2. 3
      Cargo.toml
  3. 116
      src/main.rs

3
Cargo.lock generated

@ -1323,7 +1323,7 @@ checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
[[package]] [[package]]
name = "registry-backup" name = "registry-backup"
version = "0.3.0" version = "0.4.0-rc.1"
dependencies = [ dependencies = [
"chrono", "chrono",
"clap", "clap",
@ -1341,6 +1341,7 @@ dependencies = [
"toml", "toml",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"url",
"walkdir", "walkdir",
] ]

3
Cargo.toml

@ -1,7 +1,7 @@
[package] [package]
name = "registry-backup" name = "registry-backup"
authors = ["Jonathan Strong <jstrong@shipyard.rs>"] authors = ["Jonathan Strong <jstrong@shipyard.rs>"]
version = "0.3.0" version = "0.4.0-rc.1"
edition = "2021" edition = "2021"
publish = ["shipyard-rs-public"] publish = ["shipyard-rs-public"]
readme = "README.md" readme = "README.md"
@ -38,6 +38,7 @@ pretty_toa = "1"
tera = { version = "1", optional = true } tera = { version = "1", optional = true }
chrono = { version = "0.4", optional = true } chrono = { version = "0.4", optional = true }
regex = "1.6" regex = "1.6"
url = "2"
[features] [features]
default = [] default = []

116
src/main.rs

@ -12,7 +12,7 @@ use clap::Parser;
use futures::stream::StreamExt; use futures::stream::StreamExt;
use governor::prelude::*; use governor::prelude::*;
use governor::{Quota, RateLimiter}; use governor::{Quota, RateLimiter};
use reqwest::header::{ACCEPT, AUTHORIZATION, CONTENT_TYPE}; use reqwest::header::AUTHORIZATION; // ACCEPT, CONTENT_TYPE};
use serde::Deserialize; use serde::Deserialize;
use tokio::io::AsyncBufReadExt; use tokio::io::AsyncBufReadExt;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
@ -20,6 +20,9 @@ use tracing_subscriber::filter::EnvFilter;
type AnyError = Box<dyn std::error::Error>; type AnyError = Box<dyn std::error::Error>;
// const CRATESIO_INDEX: &str = "https://github.com/rust-lang/crates.io-index.git";
const CRATESIO_DL_URL: &str = "https://crates.io/api/v1/crates";
/// type representing the schema of the config.json file /// type representing the schema of the config.json file
/// placed at the root of the crate index repo. /// placed at the root of the crate index repo.
/// ///
@ -43,11 +46,62 @@ pub struct RegistryConfig {
pub auth_required: Option<bool>, pub auth_required: Option<bool>,
} }
impl RegistryConfig {
pub fn is_crates_io(&self) -> bool {
self.dl == CRATESIO_DL_URL
}
pub fn get_dl_url(&self, name: &str, version: &str, cksum: &str) -> String {
const TEMPLATE_KEYS: [&str; 5] = [
"{crate}",
"{version}",
"{prefix}",
"{lowerprefix}",
"{sha256-checksum}",
];
if self.is_crates_io() {
// instead of following 302 redirect from /api endpoint, just preemptively
// get the static cdn url
format!(
"https://static.crates.io/crates/{name}/{name}-{version}.crate",
name = name,
version = version,
)
} else if TEMPLATE_KEYS.iter().any(|k| self.dl.contains(k)) {
let mut out = self.dl.clone();
if self.dl.contains("{prefix}") {
let prefix = relative_index_file_helper(name).join("/");
out = out.replace("{prefix}", &prefix);
}
if self.dl.contains("{lowerprefix}") {
let prefix = relative_index_file_helper(&name.to_lowercase()).join("/");
out = out.replace("{lowerprefix}", &prefix);
}
out = out.replace("{crate}", name);
out = out.replace("{version}", version);
out = out.replace("{sha256-checksum}", cksum);
out
} else {
format!(
"{dl}/{name}/{version}/download",
dl = self.dl,
name = name,
version = version,
)
}
}
}
/// One version per line in the index metadata files. /// One version per line in the index metadata files.
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct CrateVersion { pub struct CrateVersion {
pub name: String, pub name: String,
pub vers: String, pub vers: String,
pub cksum: String,
} }
/// Configuration for where to save the downloaded .crate files, and /// Configuration for where to save the downloaded .crate files, and
@ -175,8 +229,6 @@ pub struct Config {
const DEFAULT_OUTPUT_PATH: &str = "output"; const DEFAULT_OUTPUT_PATH: &str = "output";
const DEFAULT_USER_AGENT: &str = concat!("registry-backup/v", clap::crate_version!()); const DEFAULT_USER_AGENT: &str = concat!("registry-backup/v", clap::crate_version!());
//const CRATESIO_INDEX: &str = "https://github.com/rust-lang/crates.io-index.git";
fn default_output_path() -> PathBuf { fn default_output_path() -> PathBuf {
PathBuf::from(DEFAULT_OUTPUT_PATH) PathBuf::from(DEFAULT_OUTPUT_PATH)
} }
@ -487,6 +539,37 @@ async fn ensure_file_parent_dir_exists<P: AsRef<std::path::Path>>(path: P) -> Re
} }
} }
// relative_index_* fns taken from rust-lang/crates.io source code
/// Returns the relative path to the crate index file.
/// Does not perform conversion to lowercase.
fn relative_index_file_helper(name: &str) -> Vec<&str> {
match name.len() {
1 => vec!["1", name],
2 => vec!["2", name],
3 => vec!["3", &name[..1], name],
_ => vec![&name[0..2], &name[2..4], name],
}
}
// /// Returns the relative path to the crate index file that corresponds to
// /// the given crate name as a path (i.e. with platform-dependent folder separators).
// ///
// /// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
// fn relative_index_file(name: &str) -> PathBuf {
// let name = name.to_lowercase();
// Self::relative_index_file_helper(&name).iter().collect()
// }
//
// /// Returns the relative path to the crate index file that corresponds to
// /// the given crate name for usage in URLs (i.e. with `/` separator).
// ///
// /// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
// fn relative_index_file_for_url(name: &str) -> String {
// let name = name.to_lowercase();
// Self::relative_index_file_helper(&name).join("/")
// }
macro_rules! megabytes { macro_rules! megabytes {
($x:expr) => {{ ($x:expr) => {{
use pretty_toa::ThousandsSep; use pretty_toa::ThousandsSep;
@ -532,9 +615,9 @@ async fn download_versions(
async move { async move {
// TODO actually parse and use the format // TODO actually parse and use the format
let vers_path = format!("{}/{}/download", vers.name, vers.vers); let url =
let url = format!("{}/{}", registry_config.dl, vers_path); url::Url::parse(&registry_config.get_dl_url(&vers.name, &vers.vers, &vers.cksum))?;
let output_path = config.output.path.join(vers_path); let output_path = config.output.path.join(url.path());
if config.dry_run { if config.dry_run {
debug!(%url, "skipping download (--dry-run mode)"); debug!(%url, "skipping download (--dry-run mode)");
@ -542,11 +625,7 @@ async fn download_versions(
} }
debug!(?url, "downloading..."); debug!(?url, "downloading...");
let req = http_client let req = http_client.get(url);
.get(url)
//.header(CONTENT_TYPE, "application/json")
//.header(ACCEPT, "application/json")
;
let req = if let Some(token) = config.registry.auth_token.as_deref() { let req = if let Some(token) = config.registry.auth_token.as_deref() {
req.header(AUTHORIZATION, token) req.header(AUTHORIZATION, token)
@ -687,4 +766,19 @@ mod tests {
const TOML: &str = include_str!("../config.toml.sample"); const TOML: &str = include_str!("../config.toml.sample");
let _config: Config = toml::from_str(TOML).unwrap(); let _config: Config = toml::from_str(TOML).unwrap();
} }
#[test]
fn sanity_check_url_template_rendering() {
let config = RegistryConfig {
dl: "{prefix}__{lowerprefix}__{sha256-checksum}__{crate}__{version}.tar.gz".to_string(),
api: String::new(),
allowed_registries: vec![],
auth_required: Some(true),
};
assert_eq!(
config.get_dl_url("iM-14yo-LoL", "0.69.42-rc.123", "c5b6fc73"),
"iM/-1/iM-14yo-LoL__im/-1/im-14yo-lol__c5b6fc73__iM-14yo-LoL__0.69.42-rc.123.tar.gz",
);
}
} }

Loading…
Cancel
Save