internal: Move grammar codegen into xtask

This commit is contained in:
Lukas Wirth 2024-03-19 10:40:36 +01:00
parent 232125be12
commit b38d5394bb
14 changed files with 448 additions and 405 deletions

View File

@ -32,4 +32,5 @@ jobs:
git config --global user.name "GitHub Action"
# Remove r-a crates from the workspaces so we don't auto-publish them as well
sed -i 's/ "crates\/\*"//' ./Cargo.toml
sed -i 's/ "xtask\/"//' ./Cargo.toml
cargo workspaces publish --yes --exact --from-git --no-git-commit --allow-dirty

8
Cargo.lock generated
View File

@ -1869,20 +1869,16 @@ dependencies = [
"itertools",
"once_cell",
"parser",
"proc-macro2",
"quote",
"ra-ap-rustc_lexer",
"rayon",
"rowan",
"rustc-hash",
"smol_str",
"sourcegen",
"stdx",
"test-utils",
"text-edit",
"tracing",
"triomphe",
"ungrammar",
]
[[package]]
@ -2438,8 +2434,12 @@ version = "0.1.0"
dependencies = [
"anyhow",
"flate2",
"itertools",
"proc-macro2",
"quote",
"stdx",
"time",
"ungrammar",
"write-json",
"xflags",
"xshell",

View File

@ -53,7 +53,7 @@ expect-test = "1.4.0"
tracing.workspace = true
tracing-subscriber.workspace = true
tracing-tree.workspace = true
project-model = { path = "../project-model" }
project-model.workspace = true
# local deps
test-utils.workspace = true

View File

@ -51,8 +51,12 @@ pub(crate) fn need_mut(ctx: &DiagnosticsContext<'_>, d: &hir::NeedMut) -> Option
// Diagnostic: unused-mut
//
// This diagnostic is triggered when a mutable variable isn't actually mutated.
pub(crate) fn unused_mut(ctx: &DiagnosticsContext<'_>, d: &hir::UnusedMut) -> Diagnostic {
pub(crate) fn unused_mut(ctx: &DiagnosticsContext<'_>, d: &hir::UnusedMut) -> Option<Diagnostic> {
let ast = d.local.primary_source(ctx.sema.db).syntax_ptr();
if ast.file_id.macro_file().is_some() {
// FIXME: Our infra can't handle allow from within macro expansions rn
return None;
}
let fixes = (|| {
let file_id = ast.file_id.file_id()?;
let mut edit_builder = TextEdit::builder();
@ -76,14 +80,16 @@ pub(crate) fn unused_mut(ctx: &DiagnosticsContext<'_>, d: &hir::UnusedMut) -> Di
)])
})();
let ast = d.local.primary_source(ctx.sema.db).syntax_ptr();
Diagnostic::new_with_syntax_node_ptr(
ctx,
DiagnosticCode::RustcLint("unused_mut"),
"variable does not need to be mutable",
ast,
Some(
Diagnostic::new_with_syntax_node_ptr(
ctx,
DiagnosticCode::RustcLint("unused_mut"),
"variable does not need to be mutable",
ast,
)
.experimental() // Not supporting `#[allow(unused_mut)]` in proc macros leads to false positive.
.with_fixes(fixes),
)
.experimental() // Not supporting `#[allow(unused_mut)]` in proc macros leads to false positive.
.with_fixes(fixes)
}
pub(super) fn token(parent: &SyntaxNode, kind: SyntaxKind) -> Option<SyntaxToken> {

View File

@ -387,7 +387,10 @@ pub fn diagnostics(
AnyDiagnostic::UnresolvedMethodCall(d) => handlers::unresolved_method::unresolved_method(&ctx, &d),
AnyDiagnostic::UnresolvedModule(d) => handlers::unresolved_module::unresolved_module(&ctx, &d),
AnyDiagnostic::UnresolvedProcMacro(d) => handlers::unresolved_proc_macro::unresolved_proc_macro(&ctx, &d, config.proc_macros_enabled, config.proc_attr_macros_enabled),
AnyDiagnostic::UnusedMut(d) => handlers::mutability_errors::unused_mut(&ctx, &d),
AnyDiagnostic::UnusedMut(d) => match handlers::mutability_errors::unused_mut(&ctx, &d) {
Some(it) => it,
None => continue,
},
AnyDiagnostic::UnusedVariable(d) => match handlers::unused_variables::unused_variables(&ctx, &d) {
Some(it) => it,
None => continue,

View File

@ -33,12 +33,8 @@ text-edit.workspace = true
[dev-dependencies]
rayon.workspace = true
expect-test = "1.4.0"
proc-macro2 = "1.0.47"
quote = "1.0.20"
ungrammar = "1.16.1"
test-utils.workspace = true
sourcegen.workspace = true
[features]
in-rust-tree = []

File diff suppressed because it is too large Load Diff

View File

@ -7,58 +7,16 @@ use crate::{
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Whitespace {
pub struct Byte {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for Whitespace {
impl std::fmt::Display for Byte {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for Whitespace {
fn can_cast(kind: SyntaxKind) -> bool { kind == WHITESPACE }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Comment {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for Comment {
fn can_cast(kind: SyntaxKind) -> bool { kind == COMMENT }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct String {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for String {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for String {
fn can_cast(kind: SyntaxKind) -> bool { kind == STRING }
impl AstToken for Byte {
fn can_cast(kind: SyntaxKind) -> bool { kind == BYTE }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
@ -112,16 +70,37 @@ impl AstToken for CString {
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IntNumber {
pub struct Char {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for IntNumber {
impl std::fmt::Display for Char {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for IntNumber {
fn can_cast(kind: SyntaxKind) -> bool { kind == INT_NUMBER }
impl AstToken for Char {
fn can_cast(kind: SyntaxKind) -> bool { kind == CHAR }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Comment {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for Comment {
fn can_cast(kind: SyntaxKind) -> bool { kind == COMMENT }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
@ -153,48 +132,6 @@ impl AstToken for FloatNumber {
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Char {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for Char {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for Char {
fn can_cast(kind: SyntaxKind) -> bool { kind == CHAR }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Byte {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for Byte {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for Byte {
fn can_cast(kind: SyntaxKind) -> bool { kind == BYTE }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Ident {
pub(crate) syntax: SyntaxToken,
@ -215,3 +152,66 @@ impl AstToken for Ident {
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IntNumber {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for IntNumber {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for IntNumber {
fn can_cast(kind: SyntaxKind) -> bool { kind == INT_NUMBER }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct String {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for String {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for String {
fn can_cast(kind: SyntaxKind) -> bool { kind == STRING }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Whitespace {
pub(crate) syntax: SyntaxToken,
}
impl std::fmt::Display for Whitespace {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.syntax, f)
}
}
impl AstToken for Whitespace {
fn can_cast(kind: SyntaxKind) -> bool { kind == WHITESPACE }
fn cast(syntax: SyntaxToken) -> Option<Self> {
if Self::can_cast(syntax.kind()) {
Some(Self { syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxToken { &self.syntax }
}

View File

@ -1,8 +1,3 @@
#[cfg(not(feature = "in-rust-tree"))]
mod ast_src;
#[cfg(not(feature = "in-rust-tree"))]
mod sourcegen_ast;
use std::{
fs,
path::{Path, PathBuf},
@ -82,7 +77,25 @@ fn reparse_fuzz_tests() {
fn self_hosting_parsing() {
let crates_dir = project_root().join("crates");
let mut files = ::sourcegen::list_rust_files(&crates_dir);
let mut files = Vec::new();
let mut work = vec![crates_dir.to_path_buf()];
while let Some(dir) = work.pop() {
for entry in dir.read_dir().unwrap() {
let entry = entry.unwrap();
let file_type = entry.file_type().unwrap();
let path = entry.path();
let file_name = &path.file_name().unwrap_or_default().to_str().unwrap_or_default();
let is_hidden = file_name.starts_with('.');
if !is_hidden {
if file_type.is_dir() {
work.push(path);
} else if file_type.is_file() && file_name.ends_with(".rs") {
files.push(path);
}
}
}
}
files.retain(|path| {
// Get all files which are not in the crates/syntax/test_data folder
!path.components().any(|component| component.as_os_str() == "test_data")

View File

@ -15,6 +15,10 @@ xflags = "0.3.0"
time = { version = "0.3", default-features = false }
zip = { version = "0.6", default-features = false, features = ["deflate", "time"] }
stdx.workspace = true
proc-macro2 = "1.0.47"
quote = "1.0.20"
ungrammar = "1.16.1"
itertools.workspace = true
# Avoid adding more dependencies to this crate
[lints]

View File

@ -9,6 +9,7 @@ use crate::{flags, project_root};
pub(crate) mod assists_doc_tests;
pub(crate) mod diagnostics_docs;
mod grammar;
mod lints;
impl flags::Codegen {
@ -20,6 +21,7 @@ impl flags::Codegen {
// lints::generate(self.check) Updating clones the rust repo, so don't run it unless
// explicitly asked for
}
flags::CodegenType::Grammar => grammar::generate(self.check),
flags::CodegenType::AssistsDocTests => assists_doc_tests::generate(self.check),
flags::CodegenType::DiagnosticsDocs => diagnostics_docs::generate(self.check),
flags::CodegenType::LintDefinitions => lints::generate(self.check),

View File

@ -3,37 +3,45 @@
//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
//! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
use std::{collections::BTreeSet, fmt::Write};
#![allow(clippy::disallowed_types)]
use std::{
collections::{BTreeSet, HashSet},
fmt::Write,
fs,
};
use itertools::Itertools;
use proc_macro2::{Punct, Spacing};
use quote::{format_ident, quote};
use rustc_hash::FxHashSet;
use ungrammar::{Grammar, Rule};
use crate::tests::ast_src::{
AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC,
use crate::{
codegen::{add_preamble, ensure_file_contents, reformat},
project_root,
};
#[test]
fn sourcegen_ast() {
let syntax_kinds = generate_syntax_kinds(KINDS_SRC);
let syntax_kinds_file =
sourcegen::project_root().join("crates/parser/src/syntax_kind/generated.rs");
sourcegen::ensure_file_contents(syntax_kinds_file.as_path(), &syntax_kinds);
mod ast_src;
use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC};
let grammar =
include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/rust.ungram")).parse().unwrap();
pub(crate) fn generate(check: bool) {
let syntax_kinds = generate_syntax_kinds(KINDS_SRC);
let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs");
ensure_file_contents(syntax_kinds_file.as_path(), &syntax_kinds, check);
let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
.unwrap()
.parse()
.unwrap();
let ast = lower(&grammar);
let ast_tokens = generate_tokens(&ast);
let ast_tokens_file =
sourcegen::project_root().join("crates/syntax/src/ast/generated/tokens.rs");
sourcegen::ensure_file_contents(ast_tokens_file.as_path(), &ast_tokens);
let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs");
ensure_file_contents(ast_tokens_file.as_path(), &ast_tokens, check);
let ast_nodes = generate_nodes(KINDS_SRC, &ast);
let ast_nodes_file = sourcegen::project_root().join("crates/syntax/src/ast/generated/nodes.rs");
sourcegen::ensure_file_contents(ast_nodes_file.as_path(), &ast_nodes);
let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs");
ensure_file_contents(ast_nodes_file.as_path(), &ast_nodes, check);
}
fn generate_tokens(grammar: &AstSrc) -> String {
@ -60,9 +68,9 @@ fn generate_tokens(grammar: &AstSrc) -> String {
}
});
sourcegen::add_preamble(
add_preamble(
"sourcegen_ast",
sourcegen::reformat(
reformat(
quote! {
use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
#(#tokens)*
@ -77,7 +85,6 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
.nodes
.iter()
.sorted_by_key(|it| it.name.clone())
.map(|node| {
let name = format_ident!("{}", node.name);
let kind = format_ident!("{}", to_upper_snake_case(&node.name));
@ -89,13 +96,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
node.name != "ForExpr" && node.name != "WhileExpr"
|| trait_name.as_str() != "HasLoopBody"
})
.sorted()
.map(|trait_name| {
let trait_name = format_ident!("{}", trait_name);
quote!(impl ast::#trait_name for #name {})
});
let methods = node.fields.iter().sorted_by_key(|it| it.method_name()).map(|field| {
let methods = node.fields.iter().map(|field| {
let method_name = field.method_name();
let ty = field.ty();
@ -151,7 +157,6 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
.enums
.iter()
.sorted_by_key(|it| it.name.clone())
.map(|en| {
let variants: Vec<_> =
en.variants.iter().map(|var| format_ident!("{}", var)).sorted().collect();
@ -216,14 +221,13 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
)
})
.unzip();
let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
.nodes
.iter()
.flat_map(|node| node.traits.iter().map(move |t| (t, node)))
.into_group_map()
.into_iter()
.sorted_by_key(|(k, _)| *k)
.sorted_by_key(|(name, _)| *name)
.map(|(trait_name, nodes)| {
let name = format_ident!("Any{}", trait_name);
let trait_name = format_ident!("{}", trait_name);
@ -270,19 +274,17 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
let node_names = grammar.nodes.iter().map(|it| &it.name);
let display_impls =
enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).sorted().map(
|name| {
quote! {
impl std::fmt::Display for #name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
quote! {
impl std::fmt::Display for #name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
}
},
);
}
});
let defined_nodes: FxHashSet<_> = node_names.collect();
let defined_nodes: HashSet<_> = node_names.collect();
for node in kinds
.nodes
@ -326,7 +328,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
}
}
let res = sourcegen::add_preamble("sourcegen_ast", sourcegen::reformat(res));
let res = add_preamble("sourcegen_ast", reformat(res));
res.replace("#[derive", "\n#[derive")
}
@ -456,7 +458,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String {
}
};
sourcegen::add_preamble("sourcegen_ast", sourcegen::reformat(ast.to_string()))
add_preamble("sourcegen_ast", reformat(ast.to_string()))
}
fn to_upper_snake_case(s: &str) -> String {
@ -606,6 +608,20 @@ fn lower(grammar: &Grammar) -> AstSrc {
extract_enums(&mut res);
extract_struct_traits(&mut res);
extract_enum_traits(&mut res);
res.nodes.sort_by_key(|it| it.name.clone());
res.enums.sort_by_key(|it| it.name.clone());
res.tokens.sort();
res.nodes.iter_mut().for_each(|it| {
it.traits.sort();
it.fields.sort_by_key(|it| match it {
Field::Token(name) => (true, name.clone()),
Field::Node { name, .. } => (false, name.clone()),
});
});
res.enums.iter_mut().for_each(|it| {
it.traits.sort();
it.variants.sort();
});
res
}

View File

@ -91,6 +91,7 @@ pub struct Codegen {
pub enum CodegenType {
#[default]
All,
Grammar,
AssistsDocTests,
DiagnosticsDocs,
LintDefinitions,
@ -101,6 +102,7 @@ impl FromStr for CodegenType {
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"all" => Ok(Self::All),
"grammar" => Ok(Self::Grammar),
"assists-doc-tests" => Ok(Self::AssistsDocTests),
"diagnostics-docs" => Ok(Self::DiagnosticsDocs),
"lints-definitions" => Ok(Self::LintDefinitions),