From 85c42fba1291f1cc41fb7bfec63117895b394fc5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 4 Feb 2018 16:46:26 +0300 Subject: [PATCH] Support contextual tokens --- grammar.ron | 5 + src/parser/event.rs | 27 +++-- src/parser/grammar/items/mod.rs | 22 +++- src/parser/input.rs | 7 +- src/parser/parser.rs | 19 ++- src/syntax_kinds.rs | 110 +++++++++--------- .../parser/inline/0009_unsafe_auto_trait.rs | 1 + .../parser/inline/0009_unsafe_auto_trait.txt | 13 +++ .../parser/inline/0010_unsafe_default_impl.rs | 1 + .../inline/0010_unsafe_default_impl.txt | 13 +++ tools/src/bin/gen.rs | 9 +- 11 files changed, 155 insertions(+), 72 deletions(-) create mode 100644 tests/data/parser/inline/0009_unsafe_auto_trait.rs create mode 100644 tests/data/parser/inline/0009_unsafe_auto_trait.txt create mode 100644 tests/data/parser/inline/0010_unsafe_default_impl.rs create mode 100644 tests/data/parser/inline/0010_unsafe_default_impl.txt diff --git a/grammar.ron b/grammar.ron index c2fcc44f5be..e97ef0c2ca5 100644 --- a/grammar.ron +++ b/grammar.ron @@ -27,6 +27,11 @@ Grammar( "mut", "unsafe", ], + contextual_keywords: [ + "auto", + "default", + "union", + ], tokens: [ "ERROR", "IDENT", diff --git a/src/parser/event.rs b/src/parser/event.rs index fd6bdc0863c..64d751d639f 100644 --- a/src/parser/event.rs +++ b/src/parser/event.rs @@ -1,4 +1,4 @@ -use {File, FileBuilder, Sink, SyntaxKind, Token}; +use {File, FileBuilder, Sink, SyntaxKind, Token, TextUnit}; use syntax_kinds::TOMBSTONE; use super::is_insignificant; @@ -120,18 +120,25 @@ pub(super) fn to_file(text: String, tokens: &[Token], events: Vec) -> Fil builder.finish_internal() } &Event::Token { - kind: _, + kind, mut n_raw_tokens, - } => loop { - let token = tokens[idx]; - if !is_insignificant(token.kind) { - n_raw_tokens -= 1; + } => { + // FIXME: currently, we attach whitespace to some random node + // this should be done in a sensible manner instead + loop { + let token = tokens[idx]; + if !is_insignificant(token.kind) { + break; + } + builder.leaf(token.kind, token.len); + idx += 1 } - idx += 1; - builder.leaf(token.kind, token.len); - if n_raw_tokens == 0 { - break; + let mut len = TextUnit::new(0); + for _ in 0..n_raw_tokens { + len += tokens[idx].len; + idx += 1; } + builder.leaf(kind, len); }, &Event::Error { ref message } => builder.error().message(message.clone()).emit(), } diff --git a/src/parser/grammar/items/mod.rs b/src/parser/grammar/items/mod.rs index 3612802e17b..4afe2e41864 100644 --- a/src/parser/grammar/items/mod.rs +++ b/src/parser/grammar/items/mod.rs @@ -81,7 +81,6 @@ fn item(p: &mut Parser) { CONST_ITEM } }, - // TODO: auto trait // test unsafe_trait // unsafe trait T {} UNSAFE_KW if la == TRAIT_KW => { @@ -89,7 +88,16 @@ fn item(p: &mut Parser) { traits::trait_item(p); TRAIT_ITEM } - // TODO: default impl + + // test unsafe_auto_trait + // unsafe auto trait T {} + UNSAFE_KW if p.at_kw(1, "auto") && p.nth(2) == TRAIT_KW => { + p.bump(); + p.bump_remap(AUTO_KW); + traits::trait_item(p); + TRAIT_ITEM + } + // test unsafe_impl // unsafe impl Foo {} UNSAFE_KW if la == IMPL_KW => { @@ -97,6 +105,16 @@ fn item(p: &mut Parser) { traits::impl_item(p); IMPL_ITEM } + + // test unsafe_default_impl + // unsafe default impl Foo {} + UNSAFE_KW if p.at_kw(1, "default") && p.nth(2) == IMPL_KW => { + p.bump(); + p.bump_remap(DEFAULT_KW); + traits::impl_item(p); + IMPL_ITEM + } + MOD_KW => { mod_item(p); MOD_ITEM diff --git a/src/parser/input.rs b/src/parser/input.rs index 162b9ef5fb6..2ad62116685 100644 --- a/src/parser/input.rs +++ b/src/parser/input.rs @@ -46,9 +46,10 @@ pub fn text(&self, pos: InputPosition) -> &'t str { if !(idx < self.tokens.len()) { return ""; } - let start_offset = self.start_offsets[idx]; - let end_offset = self.tokens[idx].len; - let range = TextRange::from_to(start_offset, end_offset); + let range = TextRange::from_len( + self.start_offsets[idx], + self.tokens[idx].len + ); &self.text[range] } } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index bb775c4a5bc..7e1b22ee595 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,6 +1,6 @@ use super::Event; use super::input::{InputPosition, ParserInput}; -use SyntaxKind::{self, EOF, TOMBSTONE}; +use SyntaxKind::{self, EOF, TOMBSTONE, IDENT}; pub(crate) struct Marker { pos: u32, @@ -145,14 +145,31 @@ pub(crate) fn bump(&mut self) { }); } + pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.current() == EOF { + // TODO: panic!? + return; + } + self.pos += 1; + self.event(Event::Token { + kind, + n_raw_tokens: 1, + }); + } + pub(crate) fn nth(&self, n: u32) -> SyntaxKind { self.inp.kind(self.pos + n) } + pub(crate) fn at_kw(&self, n: u32, t: &str) -> bool { + self.nth(n) == IDENT && self.inp.text(self.pos + n) == t + } + pub(crate) fn current(&self) -> SyntaxKind { self.nth(0) } + fn event(&mut self, event: Event) { self.events.push(event) } diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index 22c61583155..27bc1cafa53 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs @@ -6,32 +6,6 @@ /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum SyntaxKind { - USE_KW, - FN_KW, - STRUCT_KW, - ENUM_KW, - TRAIT_KW, - IMPL_KW, - TRUE_KW, - FALSE_KW, - AS_KW, - EXTERN_KW, - CRATE_KW, - MOD_KW, - PUB_KW, - SELF_KW, - SUPER_KW, - IN_KW, - WHERE_KW, - FOR_KW, - LOOP_KW, - WHILE_KW, - IF_KW, - MATCH_KW, - CONST_KW, - STATIC_KW, - MUT_KW, - UNSAFE_KW, ERROR, IDENT, UNDERSCORE, @@ -83,6 +57,35 @@ pub enum SyntaxKind { COMMENT, DOC_COMMENT, SHEBANG, + USE_KW, + FN_KW, + STRUCT_KW, + ENUM_KW, + TRAIT_KW, + IMPL_KW, + TRUE_KW, + FALSE_KW, + AS_KW, + EXTERN_KW, + CRATE_KW, + MOD_KW, + PUB_KW, + SELF_KW, + SUPER_KW, + IN_KW, + WHERE_KW, + FOR_KW, + LOOP_KW, + WHILE_KW, + IF_KW, + MATCH_KW, + CONST_KW, + STATIC_KW, + MUT_KW, + UNSAFE_KW, + AUTO_KW, + DEFAULT_KW, + UNION_KW, FILE, STRUCT_ITEM, ENUM_ITEM, @@ -123,32 +126,6 @@ pub enum SyntaxKind { impl SyntaxKind { pub(crate) fn info(self) -> &'static SyntaxInfo { match self { - USE_KW => &SyntaxInfo { name: "USE_KW" }, - FN_KW => &SyntaxInfo { name: "FN_KW" }, - STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" }, - ENUM_KW => &SyntaxInfo { name: "ENUM_KW" }, - TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" }, - IMPL_KW => &SyntaxInfo { name: "IMPL_KW" }, - TRUE_KW => &SyntaxInfo { name: "TRUE_KW" }, - FALSE_KW => &SyntaxInfo { name: "FALSE_KW" }, - AS_KW => &SyntaxInfo { name: "AS_KW" }, - EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" }, - CRATE_KW => &SyntaxInfo { name: "CRATE_KW" }, - MOD_KW => &SyntaxInfo { name: "MOD_KW" }, - PUB_KW => &SyntaxInfo { name: "PUB_KW" }, - SELF_KW => &SyntaxInfo { name: "SELF_KW" }, - SUPER_KW => &SyntaxInfo { name: "SUPER_KW" }, - IN_KW => &SyntaxInfo { name: "IN_KW" }, - WHERE_KW => &SyntaxInfo { name: "WHERE_KW" }, - FOR_KW => &SyntaxInfo { name: "FOR_KW" }, - LOOP_KW => &SyntaxInfo { name: "LOOP_KW" }, - WHILE_KW => &SyntaxInfo { name: "WHILE_KW" }, - IF_KW => &SyntaxInfo { name: "IF_KW" }, - MATCH_KW => &SyntaxInfo { name: "MATCH_KW" }, - CONST_KW => &SyntaxInfo { name: "CONST_KW" }, - STATIC_KW => &SyntaxInfo { name: "STATIC_KW" }, - MUT_KW => &SyntaxInfo { name: "MUT_KW" }, - UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" }, ERROR => &SyntaxInfo { name: "ERROR" }, IDENT => &SyntaxInfo { name: "IDENT" }, UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, @@ -200,6 +177,35 @@ pub(crate) fn info(self) -> &'static SyntaxInfo { COMMENT => &SyntaxInfo { name: "COMMENT" }, DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" }, SHEBANG => &SyntaxInfo { name: "SHEBANG" }, + USE_KW => &SyntaxInfo { name: "USE_KW" }, + FN_KW => &SyntaxInfo { name: "FN_KW" }, + STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" }, + ENUM_KW => &SyntaxInfo { name: "ENUM_KW" }, + TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" }, + IMPL_KW => &SyntaxInfo { name: "IMPL_KW" }, + TRUE_KW => &SyntaxInfo { name: "TRUE_KW" }, + FALSE_KW => &SyntaxInfo { name: "FALSE_KW" }, + AS_KW => &SyntaxInfo { name: "AS_KW" }, + EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" }, + CRATE_KW => &SyntaxInfo { name: "CRATE_KW" }, + MOD_KW => &SyntaxInfo { name: "MOD_KW" }, + PUB_KW => &SyntaxInfo { name: "PUB_KW" }, + SELF_KW => &SyntaxInfo { name: "SELF_KW" }, + SUPER_KW => &SyntaxInfo { name: "SUPER_KW" }, + IN_KW => &SyntaxInfo { name: "IN_KW" }, + WHERE_KW => &SyntaxInfo { name: "WHERE_KW" }, + FOR_KW => &SyntaxInfo { name: "FOR_KW" }, + LOOP_KW => &SyntaxInfo { name: "LOOP_KW" }, + WHILE_KW => &SyntaxInfo { name: "WHILE_KW" }, + IF_KW => &SyntaxInfo { name: "IF_KW" }, + MATCH_KW => &SyntaxInfo { name: "MATCH_KW" }, + CONST_KW => &SyntaxInfo { name: "CONST_KW" }, + STATIC_KW => &SyntaxInfo { name: "STATIC_KW" }, + MUT_KW => &SyntaxInfo { name: "MUT_KW" }, + UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" }, + AUTO_KW => &SyntaxInfo { name: "AUTO_KW" }, + DEFAULT_KW => &SyntaxInfo { name: "DEFAULT_KW" }, + UNION_KW => &SyntaxInfo { name: "UNION_KW" }, FILE => &SyntaxInfo { name: "FILE" }, STRUCT_ITEM => &SyntaxInfo { name: "STRUCT_ITEM" }, ENUM_ITEM => &SyntaxInfo { name: "ENUM_ITEM" }, diff --git a/tests/data/parser/inline/0009_unsafe_auto_trait.rs b/tests/data/parser/inline/0009_unsafe_auto_trait.rs new file mode 100644 index 00000000000..03d29f3241d --- /dev/null +++ b/tests/data/parser/inline/0009_unsafe_auto_trait.rs @@ -0,0 +1 @@ +unsafe auto trait T {} diff --git a/tests/data/parser/inline/0009_unsafe_auto_trait.txt b/tests/data/parser/inline/0009_unsafe_auto_trait.txt new file mode 100644 index 00000000000..0a9a1e11747 --- /dev/null +++ b/tests/data/parser/inline/0009_unsafe_auto_trait.txt @@ -0,0 +1,13 @@ +FILE@[0; 23) + TRAIT_ITEM@[0; 23) + UNSAFE_KW@[0; 6) + WHITESPACE@[6; 7) + AUTO_KW@[7; 11) + WHITESPACE@[11; 12) + TRAIT_KW@[12; 17) + WHITESPACE@[17; 18) + IDENT@[18; 19) "T" + WHITESPACE@[19; 20) + L_CURLY@[20; 21) + R_CURLY@[21; 22) + WHITESPACE@[22; 23) diff --git a/tests/data/parser/inline/0010_unsafe_default_impl.rs b/tests/data/parser/inline/0010_unsafe_default_impl.rs new file mode 100644 index 00000000000..9cd6c57bd89 --- /dev/null +++ b/tests/data/parser/inline/0010_unsafe_default_impl.rs @@ -0,0 +1 @@ +unsafe default impl Foo {} diff --git a/tests/data/parser/inline/0010_unsafe_default_impl.txt b/tests/data/parser/inline/0010_unsafe_default_impl.txt new file mode 100644 index 00000000000..7450381cbc9 --- /dev/null +++ b/tests/data/parser/inline/0010_unsafe_default_impl.txt @@ -0,0 +1,13 @@ +FILE@[0; 27) + IMPL_ITEM@[0; 27) + UNSAFE_KW@[0; 6) + WHITESPACE@[6; 7) + DEFAULT_KW@[7; 14) + WHITESPACE@[14; 15) + IMPL_KW@[15; 19) + WHITESPACE@[19; 20) + IDENT@[20; 23) "Foo" + WHITESPACE@[23; 24) + L_CURLY@[24; 25) + R_CURLY@[25; 26) + WHITESPACE@[26; 27) diff --git a/tools/src/bin/gen.rs b/tools/src/bin/gen.rs index 17cdea7a10c..c71e6da7376 100644 --- a/tools/src/bin/gen.rs +++ b/tools/src/bin/gen.rs @@ -20,6 +20,7 @@ fn main() { #[derive(Deserialize)] struct Grammar { keywords: Vec, + contextual_keywords: Vec, tokens: Vec, nodes: Vec, } @@ -38,10 +39,9 @@ fn to_syntax_kinds(&self) -> String { acc.push_str("use tree::SyntaxInfo;\n"); acc.push_str("\n"); - let syntax_kinds: Vec = self.keywords - .iter() - .map(|kw| kw_token(kw)) - .chain(self.tokens.iter().cloned()) + let syntax_kinds: Vec =self.tokens.iter().cloned() + .chain(self.keywords.iter().map(|kw| kw_token(kw))) + .chain(self.contextual_keywords.iter().map(|kw| kw_token(kw))) .chain(self.nodes.iter().cloned()) .collect(); @@ -86,6 +86,7 @@ fn to_syntax_kinds(&self) -> String { // fn ident_to_keyword acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option {\n"); acc.push_str(" match ident {\n"); + // NB: no contextual_keywords here! for kw in self.keywords.iter() { write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); }