Skip to content

Commit 3f67acf

Browse files
committed
Auto merge of #3973 - nipunn1313:readme2, r=Turbo87
Support relative URLs from repo-subdirectory packages Extract `.cargo_vcs_info.json` from the tarball and use it to determine the path in vcs. Support for this was added to cargo with rust-lang/cargo#9866 Fixes #3484 (note that support for this in admin::render_readmes not yet exists - would come with #4095) Depends on #4097 (Because of community/community#4477 - github ends up rendering the diffs together. Go to the commits tab and just look at the most recent commits to review).
2 parents b07754a + 86d852f commit 3f67acf

File tree

5 files changed

+148
-21
lines changed

5 files changed

+148
-21
lines changed

cargo-registry-markdown/lib.rs

+25-12
Original file line numberDiff line numberDiff line change
@@ -245,18 +245,23 @@ static MARKDOWN_EXTENSIONS: [&str; 7] =
245245
/// use cargo_registry_markdown::text_to_html;
246246
///
247247
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
248-
/// let rendered = text_to_html(text, "README.md", None);
248+
/// let rendered = text_to_html(text, "README.md", None, None);
249249
/// assert_eq!(rendered, "<p><a href=\"https://rust-lang.org/\" rel=\"nofollow noopener noreferrer\">Rust</a> is an awesome <em>systems programming</em> language!</p>\n");
250250
/// ```
251-
pub fn text_to_html(text: &str, path: &str, base_url: Option<&str>) -> String {
252-
let path = Path::new(path);
253-
let base_dir = path.parent().and_then(|p| p.to_str()).unwrap_or("");
254-
255-
if path.extension().is_none() {
251+
pub fn text_to_html(
252+
text: &str,
253+
readme_path_in_pkg: &str,
254+
base_url: Option<&str>,
255+
pkg_path_in_vcs: Option<&str>,
256+
) -> String {
257+
let path_in_vcs = Path::new(pkg_path_in_vcs.unwrap_or("")).join(readme_path_in_pkg);
258+
let base_dir = path_in_vcs.parent().and_then(|p| p.to_str()).unwrap_or("");
259+
260+
if path_in_vcs.extension().is_none() {
256261
return markdown_to_html(text, base_url, base_dir);
257262
}
258263

259-
if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) {
264+
if let Some(ext) = path_in_vcs.extension().and_then(|ext| ext.to_str()) {
260265
if MARKDOWN_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
261266
return markdown_to_html(text, base_url, base_dir);
262267
}
@@ -477,30 +482,38 @@ mod tests {
477482
"s1/s2/readme.md",
478483
] {
479484
assert_eq!(
480-
text_to_html("*lobster*", f, None),
485+
text_to_html("*lobster*", f, None, None),
481486
"<p><em>lobster</em></p>\n"
482487
);
483488
}
484489

485490
assert_eq!(
486-
text_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test")),
491+
text_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test"), None),
487492
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
488493
);
489494
assert_eq!(
490-
text_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test")),
495+
text_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test"), None),
491496
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
492497
);
493498
assert_eq!(
494-
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test")),
499+
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), None),
495500
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
496501
);
502+
assert_eq!(
503+
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), Some("path/in/vcs/")),
504+
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/path/in/vcs/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
505+
);
506+
assert_eq!(
507+
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), Some("path/in/vcs")),
508+
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/path/in/vcs/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
509+
);
497510
}
498511

499512
#[test]
500513
fn text_to_html_renders_other_things() {
501514
for f in &["readme.exe", "readem.org", "blah.adoc"] {
502515
assert_eq!(
503-
text_to_html("<script>lobster</script>\n\nis my friend\n", f, None),
516+
text_to_html("<script>lobster</script>\n\nis my friend\n", f, None, None),
504517
"&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
505518
);
506519
}

src/admin/render_readmes.rs

+5
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,15 @@ fn render_pkg_readme<R: Read>(mut archive: Archive<R>, pkg_name: &str) -> anyhow
205205
let contents = find_file_by_path(&mut entries, Path::new(&path))
206206
.with_context(|| format!("Failed to read {} file", readme_path))?;
207207

208+
// pkg_path_in_vcs Unsupported from admin::render_readmes. See #4095
209+
// Would need access to cargo_vcs_info
210+
let pkg_path_in_vcs = None;
211+
208212
text_to_html(
209213
&contents,
210214
&readme_path,
211215
manifest.package.repository.as_deref(),
216+
pkg_path_in_vcs,
212217
)
213218
};
214219
return Ok(rendered);

src/controllers/krate/publish.rs

+68-8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use hex::ToHex;
55
use sha2::{Digest, Sha256};
66
use std::collections::HashMap;
77
use std::io::Read;
8+
use std::path::Path;
89
use std::sync::Arc;
910
use swirl::Job;
1011

@@ -18,7 +19,7 @@ use crate::worker;
1819

1920
use crate::schema::*;
2021
use crate::util::errors::{cargo_err, AppResult};
21-
use crate::util::{read_fill, read_le_u32, LimitErrorReader, Maximums};
22+
use crate::util::{read_fill, read_le_u32, CargoVcsInfo, LimitErrorReader, Maximums};
2223
use crate::views::{
2324
EncodableCrate, EncodableCrateDependency, EncodableCrateUpload, GoodCrate, PublishWarnings,
2425
};
@@ -195,7 +196,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
195196
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
196197
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();
197198
let pkg_name = format!("{}-{}", krate.name, vers);
198-
verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
199+
let cargo_vcs_info = verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
200+
let pkg_path_in_vcs = cargo_vcs_info.map(|info| info.path_in_vcs);
199201

200202
if let Some(readme) = new_crate.readme {
201203
worker::render_and_upload_readme(
@@ -205,6 +207,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
205207
.readme_file
206208
.unwrap_or_else(|| String::from("README.md")),
207209
repo,
210+
pkg_path_in_vcs,
208211
)
209212
.enqueue(&conn)?;
210213
}
@@ -379,7 +382,11 @@ pub fn add_dependencies(
379382
Ok(git_deps)
380383
}
381384

382-
fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<()> {
385+
fn verify_tarball(
386+
pkg_name: &str,
387+
tarball: &[u8],
388+
max_unpack: u64,
389+
) -> AppResult<Option<CargoVcsInfo>> {
383390
// All our data is currently encoded with gzip
384391
let decoder = GzDecoder::new(tarball);
385392

@@ -389,8 +396,12 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
389396

390397
// Use this I/O object now to take a peek inside
391398
let mut archive = tar::Archive::new(decoder);
399+
400+
let vcs_info_path = Path::new(&pkg_name).join(".cargo_vcs_info.json");
401+
let mut vcs_info = None;
402+
392403
for entry in archive.entries()? {
393-
let entry = entry.map_err(|err| {
404+
let mut entry = entry.map_err(|err| {
394405
err.chain(cargo_err(
395406
"uploaded tarball is malformed or too large when decompressed",
396407
))
@@ -401,9 +412,15 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
401412
// upload a tarball that contains both `foo-0.1.0/` source code as well
402413
// as `bar-0.1.0/` source code, and this could overwrite other crates in
403414
// the registry!
404-
if !entry.path()?.starts_with(&pkg_name) {
415+
let entry_path = entry.path()?;
416+
if !entry_path.starts_with(&pkg_name) {
405417
return Err(cargo_err("invalid tarball uploaded"));
406418
}
419+
if entry_path == vcs_info_path {
420+
let mut contents = String::new();
421+
entry.read_to_string(&mut contents)?;
422+
vcs_info = CargoVcsInfo::from_contents(&contents).ok();
423+
}
407424

408425
// Historical versions of the `tar` crate which Cargo uses internally
409426
// don't properly prevent hard links and symlinks from overwriting
@@ -415,7 +432,7 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
415432
return Err(cargo_err("invalid tarball uploaded"));
416433
}
417434
}
418-
Ok(())
435+
Ok(vcs_info)
419436
}
420437

421438
#[cfg(test)]
@@ -435,14 +452,57 @@ mod tests {
435452
#[test]
436453
fn verify_tarball_test() {
437454
let mut pkg = tar::Builder::new(vec![]);
438-
add_file(&mut pkg, "foo-0.0.1/.cargo_vcs_info.json", br#"{}"#);
455+
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
439456
let mut serialized_archive = vec![];
440457
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
441458
.read_to_end(&mut serialized_archive)
442459
.unwrap();
443460

444461
let limit = 512 * 1024 * 1024;
445-
assert_ok!(verify_tarball("foo-0.0.1", &serialized_archive, limit));
462+
assert_eq!(
463+
verify_tarball("foo-0.0.1", &serialized_archive, limit).unwrap(),
464+
None
465+
);
446466
assert_err!(verify_tarball("bar-0.0.1", &serialized_archive, limit));
447467
}
468+
469+
#[test]
470+
fn verify_tarball_test_incomplete_vcs_info() {
471+
let mut pkg = tar::Builder::new(vec![]);
472+
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
473+
add_file(
474+
&mut pkg,
475+
"foo-0.0.1/.cargo_vcs_info.json",
476+
br#"{"unknown": "field"}"#,
477+
);
478+
let mut serialized_archive = vec![];
479+
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
480+
.read_to_end(&mut serialized_archive)
481+
.unwrap();
482+
let limit = 512 * 1024 * 1024;
483+
let vcs_info = verify_tarball("foo-0.0.1", &serialized_archive, limit)
484+
.unwrap()
485+
.unwrap();
486+
assert_eq!(vcs_info.path_in_vcs, "");
487+
}
488+
489+
#[test]
490+
fn verify_tarball_test_vcs_info() {
491+
let mut pkg = tar::Builder::new(vec![]);
492+
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
493+
add_file(
494+
&mut pkg,
495+
"foo-0.0.1/.cargo_vcs_info.json",
496+
br#"{"path_in_vcs": "path/in/vcs"}"#,
497+
);
498+
let mut serialized_archive = vec![];
499+
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
500+
.read_to_end(&mut serialized_archive)
501+
.unwrap();
502+
let limit = 512 * 1024 * 1024;
503+
let vcs_info = verify_tarball("foo-0.0.1", &serialized_archive, limit)
504+
.unwrap()
505+
.unwrap();
506+
assert_eq!(vcs_info.path_in_vcs, "path/in/vcs");
507+
}
448508
}

src/util.rs

+43
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,46 @@ impl Maximums {
5353
}
5454
}
5555
}
56+
57+
/// Represents relevant contents of .cargo_vcs_info.json file when uploaded from cargo
58+
/// or downloaded from crates.io
59+
#[derive(Debug, Deserialize, Eq, PartialEq)]
60+
pub struct CargoVcsInfo {
61+
/// Path to the package within repo (empty string if root). / not \
62+
#[serde(default)]
63+
pub path_in_vcs: String,
64+
}
65+
66+
impl CargoVcsInfo {
67+
pub fn from_contents(contents: &str) -> serde_json::Result<Self> {
68+
serde_json::from_str(contents)
69+
}
70+
}
71+
72+
#[cfg(test)]
73+
mod tests {
74+
use super::CargoVcsInfo;
75+
76+
#[test]
77+
fn test_cargo_vcs_info() {
78+
assert_eq!(CargoVcsInfo::from_contents("").ok(), None);
79+
assert_eq!(
80+
CargoVcsInfo::from_contents("{}").unwrap(),
81+
CargoVcsInfo {
82+
path_in_vcs: "".into()
83+
}
84+
);
85+
assert_eq!(
86+
CargoVcsInfo::from_contents(r#"{"path_in_vcs": "hi"}"#).unwrap(),
87+
CargoVcsInfo {
88+
path_in_vcs: "hi".into()
89+
}
90+
);
91+
assert_eq!(
92+
CargoVcsInfo::from_contents(r#"{"path_in_vcs": "hi", "future": "field"}"#).unwrap(),
93+
CargoVcsInfo {
94+
path_in_vcs: "hi".into()
95+
}
96+
);
97+
}
98+
}

src/worker/readmes.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,17 @@ pub fn render_and_upload_readme(
1414
text: String,
1515
readme_path: String,
1616
base_url: Option<String>,
17+
pkg_path_in_vcs: Option<String>,
1718
) -> Result<(), PerformError> {
1819
use crate::schema::*;
1920
use diesel::prelude::*;
2021

21-
let rendered = text_to_html(&text, &readme_path, base_url.as_deref());
22+
let rendered = text_to_html(
23+
&text,
24+
&readme_path,
25+
base_url.as_deref(),
26+
pkg_path_in_vcs.as_deref(),
27+
);
2228

2329
conn.transaction(|| {
2430
Version::record_readme_rendering(version_id, conn)?;

0 commit comments

Comments
 (0)