Update generate-copyright
This tool now scans for cargo dependencies and includes any important looking license files. We do this because cargo package metadata is not sufficient - the Apache-2.0 license says you have to include any NOTICE file, for example. And authors != copyright holders (cargo has the former, we must include the latter).
This commit is contained in:
parent
93ea767e29
commit
ba0d6c9739
@ -1408,6 +1408,8 @@ dependencies = [
|
||||
"anyhow",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -217,6 +217,8 @@ fn run(self, builder: &Builder<'_>) -> Self::Output {
|
||||
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
|
||||
cmd.env("LICENSE_METADATA", &license_metadata);
|
||||
cmd.env("DEST", &dest);
|
||||
cmd.env("OUT_DIR", &builder.out);
|
||||
cmd.env("CARGO", &builder.initial_cargo);
|
||||
cmd.run(builder);
|
||||
|
||||
dest
|
||||
|
@ -2,6 +2,8 @@
|
||||
name = "collect-license-metadata"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
|
||||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
|
@ -8,6 +8,11 @@
|
||||
|
||||
use crate::licenses::LicensesInterner;
|
||||
|
||||
/// The entry point to the binary.
|
||||
///
|
||||
/// You should probably let `bootstrap` execute this program instead of running it directly.
|
||||
///
|
||||
/// Run `x.py run collect-license-metadata`
|
||||
fn main() -> Result<(), Error> {
|
||||
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
|
||||
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
|
||||
|
@ -2,6 +2,7 @@
|
||||
name = "generate-copyright"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -9,3 +10,5 @@ edition = "2021"
|
||||
anyhow = "1.0.65"
|
||||
serde = { version = "1.0.147", features = ["derive"] }
|
||||
serde_json = "1.0.85"
|
||||
thiserror = "1"
|
||||
tempfile = "3"
|
||||
|
196
src/tools/generate-copyright/src/cargo_metadata.rs
Normal file
196
src/tools/generate-copyright/src/cargo_metadata.rs
Normal file
@ -0,0 +1,196 @@
|
||||
//! Gets metadata about a workspace from Cargo
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::path::Path;
|
||||
|
||||
/// Describes how this module can fail
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Failed to run cargo metadata: {0:?}")]
|
||||
LaunchingMetadata(#[from] std::io::Error),
|
||||
#[error("Failed get output from cargo metadata: {0:?}")]
|
||||
GettingMetadata(String),
|
||||
#[error("Failed parse JSON output from cargo metadata: {0:?}")]
|
||||
ParsingJson(#[from] serde_json::Error),
|
||||
#[error("Failed find expected JSON element {0} in output from cargo metadata")]
|
||||
MissingJsonElement(&'static str),
|
||||
#[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")]
|
||||
MissingJsonElementForPackage(String, String),
|
||||
#[error("Failed to run cargo vendor: {0:?}")]
|
||||
LaunchingVendor(std::io::Error),
|
||||
#[error("Failed to complete cargo vendor")]
|
||||
RunningVendor,
|
||||
}
|
||||
|
||||
/// Describes one of our dependencies
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Dependency {
|
||||
/// The name of the package
|
||||
pub name: String,
|
||||
/// The version number
|
||||
pub version: String,
|
||||
/// The license it is under
|
||||
pub license: String,
|
||||
/// The list of authors from the package metadata
|
||||
pub authors: Vec<String>,
|
||||
/// A list of important files from the package, with their contents.
|
||||
///
|
||||
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
|
||||
pub notices: BTreeMap<OsString, String>,
|
||||
}
|
||||
|
||||
/// Use `cargo` to get a list of dependencies and their license data.
|
||||
///
|
||||
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
|
||||
/// grab the license files.
|
||||
///
|
||||
/// Any dependency with a path beginning with `root_path` is ignored, as we
|
||||
/// assume `reuse` has covered it already.
|
||||
pub fn get(
|
||||
cargo: &Path,
|
||||
dest: &Path,
|
||||
root_path: &Path,
|
||||
manifest_paths: &[&Path],
|
||||
) -> Result<BTreeSet<Dependency>, Error> {
|
||||
let mut temp_set = BTreeSet::new();
|
||||
// Look at the metadata for each manifest
|
||||
for manifest_path in manifest_paths {
|
||||
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
|
||||
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
|
||||
}
|
||||
let metadata_json = get_metadata_json(cargo, manifest_path)?;
|
||||
let packages = metadata_json["packages"]
|
||||
.as_array()
|
||||
.ok_or_else(|| Error::MissingJsonElement("packages array"))?;
|
||||
for package in packages {
|
||||
let package =
|
||||
package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?;
|
||||
let manifest_path = package
|
||||
.get("manifest_path")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(Path::new)
|
||||
.ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
|
||||
if manifest_path.starts_with(&root_path) {
|
||||
// it's an in-tree dependency and reuse covers it
|
||||
continue;
|
||||
}
|
||||
// otherwise it's an out-of-tree dependency
|
||||
let get_string = |field_name: &str, package_name: &str| {
|
||||
package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| {
|
||||
Error::MissingJsonElementForPackage(
|
||||
format!("package.{field_name}"),
|
||||
package_name.to_owned(),
|
||||
)
|
||||
})
|
||||
};
|
||||
let name = get_string("name", "unknown")?;
|
||||
let license = get_string("license", name)?;
|
||||
let version = get_string("version", name)?;
|
||||
let authors_list = package
|
||||
.get("authors")
|
||||
.and_then(|v| v.as_array())
|
||||
.ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
|
||||
let authors: Vec<String> =
|
||||
authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
|
||||
temp_set.insert(Dependency {
|
||||
name: name.to_owned(),
|
||||
version: version.to_owned(),
|
||||
license: license.to_owned(),
|
||||
authors,
|
||||
notices: BTreeMap::new(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Now do a cargo-vendor and grab everything
|
||||
let vendor_path = dest.join("vendor");
|
||||
println!("Vendoring deps into {}...", vendor_path.display());
|
||||
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
|
||||
|
||||
// Now for each dependency we found, go and grab any important looking files
|
||||
let mut output = BTreeSet::new();
|
||||
for mut dep in temp_set {
|
||||
load_important_files(&mut dep, &vendor_path)?;
|
||||
output.insert(dep);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Get cargo-metdata for a package, as JSON
|
||||
fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result<serde_json::Value, Error> {
|
||||
let metadata_output = std::process::Command::new(cargo)
|
||||
.arg("metadata")
|
||||
.arg("--format-version=1")
|
||||
.arg("--all-features")
|
||||
.arg("--manifest-path")
|
||||
.arg(manifest_path)
|
||||
.env("RUSTC_BOOTSTRAP", "1")
|
||||
.output()
|
||||
.map_err(|e| Error::LaunchingMetadata(e))?;
|
||||
if !metadata_output.status.success() {
|
||||
return Err(Error::GettingMetadata(
|
||||
String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"),
|
||||
));
|
||||
}
|
||||
let json = serde_json::from_slice(&metadata_output.stdout)?;
|
||||
Ok(json)
|
||||
}
|
||||
|
||||
/// Run cargo-vendor, fetching into the given dir
|
||||
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
|
||||
let mut vendor_command = std::process::Command::new(cargo);
|
||||
vendor_command.env("RUSTC_BOOTSTRAP", "1");
|
||||
vendor_command.arg("vendor");
|
||||
vendor_command.arg("--quiet");
|
||||
vendor_command.arg("--versioned-dirs");
|
||||
for manifest_path in manifest_paths {
|
||||
vendor_command.arg("-s");
|
||||
vendor_command.arg(manifest_path);
|
||||
}
|
||||
vendor_command.arg(dest);
|
||||
|
||||
let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;
|
||||
|
||||
if !vendor_status.success() {
|
||||
return Err(Error::RunningVendor);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add important files off disk into this dependency.
|
||||
///
|
||||
/// Maybe one-day Cargo.toml will contain enough information that we don't need
|
||||
/// to do this manual scraping.
|
||||
fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
|
||||
let name_version = format!("{}-{}", dep.name, dep.version);
|
||||
println!("Scraping notices for {}...", name_version);
|
||||
let dep_vendor_path = vendor_root.join(name_version);
|
||||
for entry in std::fs::read_dir(dep_vendor_path)? {
|
||||
let entry = entry?;
|
||||
let metadata = entry.metadata()?;
|
||||
let path = entry.path();
|
||||
if let Some(filename) = path.file_name() {
|
||||
let lc_filename = filename.to_ascii_lowercase();
|
||||
let lc_filename_str = lc_filename.to_string_lossy();
|
||||
let mut keep = false;
|
||||
for m in ["copyright", "licence", "license", "author", "notice"] {
|
||||
if lc_filename_str.contains(m) {
|
||||
keep = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if keep {
|
||||
if metadata.is_dir() {
|
||||
// scoop up whole directory
|
||||
} else if metadata.is_file() {
|
||||
println!("Scraping {}", filename.to_string_lossy());
|
||||
dep.notices.insert(filename.to_owned(), std::fs::read_to_string(path)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
@ -1,54 +1,114 @@
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::Error;
|
||||
|
||||
mod cargo_metadata;
|
||||
|
||||
/// The entry point to the binary.
|
||||
///
|
||||
/// You should probably let `bootstrap` execute this program instead of running it directly.
|
||||
///
|
||||
/// Run `x.py run generate-metadata`
|
||||
fn main() -> Result<(), Error> {
|
||||
let dest = env_path("DEST")?;
|
||||
let dest_file = env_path("DEST")?;
|
||||
let out_dir = env_path("OUT_DIR")?;
|
||||
let cargo = env_path("CARGO")?;
|
||||
let license_metadata = env_path("LICENSE_METADATA")?;
|
||||
|
||||
let metadata: Metadata = serde_json::from_slice(&std::fs::read(&license_metadata)?)?;
|
||||
let collected_tree_metadata: Metadata =
|
||||
serde_json::from_slice(&std::fs::read(&license_metadata)?)?;
|
||||
|
||||
let root_path = std::path::absolute(".")?;
|
||||
let workspace_paths = [
|
||||
Path::new("./Cargo.toml"),
|
||||
Path::new("./src/tools/cargo/Cargo.toml"),
|
||||
Path::new("./library/std/Cargo.toml"),
|
||||
];
|
||||
let collected_cargo_metadata =
|
||||
cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
render_recursive(&metadata.files, &mut buffer, 0)?;
|
||||
|
||||
std::fs::write(&dest, &buffer)?;
|
||||
writeln!(buffer, "# COPYRIGHT for Rust")?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(
|
||||
buffer,
|
||||
"This file describes the copyright and licensing information for the source code within The Rust Project git tree, and the third-party dependencies used when building the Rust toolchain (including the Rust Standard Library)"
|
||||
)?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "## Table of Contents")?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "* [In-tree files](#in-tree-files)")?;
|
||||
writeln!(buffer, "* [Out-of-tree files](#out-of-tree-files)")?;
|
||||
// writeln!(buffer, "* [License Texts](#license-texts)")?;
|
||||
writeln!(buffer)?;
|
||||
|
||||
writeln!(buffer, "## In-tree files")?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(
|
||||
buffer,
|
||||
"The following licenses cover the in-tree source files that were used in this release:"
|
||||
)?;
|
||||
writeln!(buffer)?;
|
||||
render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0)?;
|
||||
|
||||
writeln!(buffer)?;
|
||||
|
||||
writeln!(buffer, "## Out-of-tree files")?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(
|
||||
buffer,
|
||||
"The following licenses cover the out-of-tree crates that were used in this release:"
|
||||
)?;
|
||||
writeln!(buffer)?;
|
||||
render_deps(collected_cargo_metadata.iter(), &mut buffer)?;
|
||||
|
||||
std::fs::write(&dest_file, &buffer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn render_recursive(node: &Node, buffer: &mut Vec<u8>, depth: usize) -> Result<(), Error> {
|
||||
/// Recursively draw the tree of files/folders we found on disk and their licenses, as
|
||||
/// markdown, into the given Vec.
|
||||
fn render_tree_recursive(node: &Node, buffer: &mut Vec<u8>, depth: usize) -> Result<(), Error> {
|
||||
let prefix = std::iter::repeat("> ").take(depth + 1).collect::<String>();
|
||||
|
||||
match node {
|
||||
Node::Root { children } => {
|
||||
for child in children {
|
||||
render_recursive(child, buffer, depth)?;
|
||||
render_tree_recursive(child, buffer, depth)?;
|
||||
}
|
||||
}
|
||||
Node::Directory { name, children, license } => {
|
||||
render_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?;
|
||||
render_tree_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?;
|
||||
if !children.is_empty() {
|
||||
writeln!(buffer, "{prefix}")?;
|
||||
writeln!(buffer, "{prefix}*Exceptions:*")?;
|
||||
for child in children {
|
||||
writeln!(buffer, "{prefix}")?;
|
||||
render_recursive(child, buffer, depth + 1)?;
|
||||
render_tree_recursive(child, buffer, depth + 1)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Node::Group { files, directories, license } => {
|
||||
render_license(&prefix, directories.iter().chain(files.iter()), Some(license), buffer)?;
|
||||
render_tree_license(
|
||||
&prefix,
|
||||
directories.iter().chain(files.iter()),
|
||||
Some(license),
|
||||
buffer,
|
||||
)?;
|
||||
}
|
||||
Node::File { name, license } => {
|
||||
render_license(&prefix, std::iter::once(name), Some(license), buffer)?;
|
||||
render_tree_license(&prefix, std::iter::once(name), Some(license), buffer)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn render_license<'a>(
|
||||
/// Draw a series of sibling files/folders, as markdown, into the given Vec.
|
||||
fn render_tree_license<'a>(
|
||||
prefix: &str,
|
||||
names: impl Iterator<Item = &'a String>,
|
||||
license: Option<&License>,
|
||||
@ -67,11 +127,47 @@ fn render_license<'a>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Render a list of out-of-tree dependencies as markdown into the given Vec.
|
||||
fn render_deps<'a, 'b>(
|
||||
deps: impl Iterator<Item = &'a cargo_metadata::Dependency>,
|
||||
buffer: &'b mut Vec<u8>,
|
||||
) -> Result<(), Error> {
|
||||
for dep in deps {
|
||||
let authors_list = dep.authors.join(", ").replace("<", "\\<").replace(">", "\\>");
|
||||
let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version);
|
||||
writeln!(buffer)?;
|
||||
writeln!(
|
||||
buffer,
|
||||
"### [{name} {version}]({url})",
|
||||
name = dep.name,
|
||||
version = dep.version,
|
||||
url = url,
|
||||
)?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "* Authors: {}", authors_list)?;
|
||||
writeln!(buffer, "* License: {}", dep.license)?;
|
||||
for (name, contents) in &dep.notices {
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "#### {}", name.to_string_lossy())?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "<details><summary>Click to expand</summary>")?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "```")?;
|
||||
writeln!(buffer, "{}", contents)?;
|
||||
writeln!(buffer, "```")?;
|
||||
writeln!(buffer)?;
|
||||
writeln!(buffer, "</details>")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
/// Describes a tree of metadata for our filesystem tree
|
||||
#[derive(serde::Deserialize)]
|
||||
struct Metadata {
|
||||
files: Node,
|
||||
}
|
||||
|
||||
/// Describes one node in our metadata tree
|
||||
#[derive(serde::Deserialize)]
|
||||
#[serde(rename_all = "kebab-case", tag = "type")]
|
||||
pub(crate) enum Node {
|
||||
@ -81,12 +177,14 @@ pub(crate) enum Node {
|
||||
Group { files: Vec<String>, directories: Vec<String>, license: License },
|
||||
}
|
||||
|
||||
/// A License has an SPDX license name and a list of copyright holders.
|
||||
#[derive(serde::Deserialize)]
|
||||
struct License {
|
||||
spdx: String,
|
||||
copyright: Vec<String>,
|
||||
}
|
||||
|
||||
/// Grab an environment variable as a PathBuf, or fail nicely.
|
||||
fn env_path(var: &str) -> Result<PathBuf, Error> {
|
||||
if let Some(var) = std::env::var_os(var) {
|
||||
Ok(var.into())
|
||||
|
Loading…
Reference in New Issue
Block a user