Reorganize

This commit is contained in:
Aleksey Kladov 2018-07-29 15:16:07 +03:00
parent ad188d4c3d
commit 415c891d64
20 changed files with 209 additions and 196 deletions

View File

@ -1,7 +1,6 @@
use lexer::ptr::Ptr;
use SyntaxKind;
use syntax_kinds::*;
use SyntaxKind::{self, *};
pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool {
if ptr.next_is('!') && ptr.nnext_is('/') {

View File

@ -1,21 +1,32 @@
use {SyntaxKind, Token};
use syntax_kinds::*;
mod ptr;
use self::ptr::Ptr;
mod classes;
use self::classes::*;
mod numbers;
use self::numbers::scan_number;
mod strings;
use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string,
scan_string};
mod comments;
use self::comments::{scan_comment, scan_shebang};
mod strings;
mod numbers;
mod classes;
use {
TextUnit,
SyntaxKind::{self, *},
};
use self::{
ptr::Ptr,
classes::*,
numbers::scan_number,
strings::{
is_string_literal_start, scan_byte_char_or_string, scan_char,
scan_raw_string, scan_string},
comments::{scan_comment, scan_shebang},
};
/// A token of Rust source.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token {
/// The kind of token.
pub kind: SyntaxKind,
/// The length of the token.
pub len: TextUnit,
}
/// Break a string up into its component tokens
pub fn tokenize(text: &str) -> Vec<Token> {
@ -29,6 +40,7 @@ pub fn tokenize(text: &str) -> Vec<Token> {
}
acc
}
/// Get the next token from a string
pub fn next_token(text: &str) -> Token {
assert!(!text.is_empty());
@ -109,7 +121,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
DOTDOT
}
_ => DOT,
}
};
}
':' => {
return match ptr.next() {
@ -118,7 +130,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
COLONCOLON
}
_ => COLON,
}
};
}
'=' => {
return match ptr.next() {
@ -131,7 +143,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
FAT_ARROW
}
_ => EQ,
}
};
}
'!' => {
return match ptr.next() {
@ -140,7 +152,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
NEQ
}
_ => EXCL,
}
};
}
'-' => {
return if ptr.next_is('>') {
@ -148,7 +160,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
THIN_ARROW
} else {
MINUS
}
};
}
// If the character is an ident start not followed by another single
@ -202,7 +214,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
return if c == '_' { UNDERSCORE } else { IDENT };
}
ptr.bump_while(is_ident_continue);
if let Some(kind) = ident_to_keyword(ptr.current_token_text()) {
if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) {
return kind;
}
IDENT

View File

@ -1,8 +1,7 @@
use lexer::ptr::Ptr;
use lexer::classes::*;
use SyntaxKind;
use syntax_kinds::*;
use SyntaxKind::{self, *};
pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
if c == '0' {

View File

@ -1,5 +1,4 @@
use SyntaxKind;
use syntax_kinds::*;
use SyntaxKind::{self, *};
use lexer::ptr::Ptr;

View File

@ -19,27 +19,36 @@
extern crate unicode_xid;
extern crate text_unit;
mod tree;
mod lexer;
mod parser;
mod yellow;
mod syntax_kinds;
pub use {
text_unit::{TextRange, TextUnit},
syntax_kinds::SyntaxKind,
yellow::{SyntaxNode},
lexer::{tokenize, Token},
};
pub(crate) use {
yellow::SyntaxError
};
pub fn parse(text: String) -> SyntaxNode {
let tokens = tokenize(&text);
parser::parse::<yellow::GreenBuilder>(text, &tokens)
}
pub mod syntax_kinds;
pub use text_unit::{TextRange, TextUnit};
pub use tree::{SyntaxKind, Token};
pub(crate) use tree::{Sink, GreenBuilder};
pub use lexer::{next_token, tokenize};
pub use yellow::SyntaxNode;
pub(crate) use yellow::SError;
pub use parser::{parse_green};
/// Utilities for simple uses of the parser.
pub mod utils {
use std::fmt::Write;
use std::{
fmt::Write,
collections::BTreeSet
};
use {SyntaxNode};
use std::collections::BTreeSet;
use SError;
use {SyntaxNode, SyntaxError};
/// Parse a file and create a string representation of the resulting parse tree.
pub fn dump_tree_green(syntax: &SyntaxNode) -> String {
@ -48,7 +57,7 @@ pub fn dump_tree_green(syntax: &SyntaxNode) -> String {
go(syntax, &mut result, 0, &mut errors);
return result;
fn go(node: &SyntaxNode, buff: &mut String, level: usize, errors: &mut BTreeSet<SError>) {
fn go(node: &SyntaxNode, buff: &mut String, level: usize, errors: &mut BTreeSet<SyntaxError>) {
buff.push_str(&String::from(" ").repeat(level));
write!(buff, "{:?}\n", node).unwrap();
let my_errors: Vec<_> = errors.iter().filter(|e| e.offset == node.range().start())

View File

@ -1,8 +1,29 @@
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
//!
//! The `Sink` trait is the bridge between the parser and the
//! tree builder: the parser produces a stream of events like
//! `start node`, `finish node`, and `FileBuilder` converts
//! this stream to a real tree.
use {
Sink, SyntaxKind, Token,
syntax_kinds::TOMBSTONE,
TextUnit,
SyntaxKind::{self, TOMBSTONE},
lexer::Token,
};
use super::is_insignificant;
pub(crate) trait Sink {
type Tree;
fn new(text: String) -> Self;
fn leaf(&mut self, kind: SyntaxKind, len: TextUnit);
fn start_internal(&mut self, kind: SyntaxKind);
fn finish_internal(&mut self);
fn error(&mut self, err: String);
fn finish(self) -> Self::Tree;
}
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
@ -67,7 +88,7 @@ pub(crate) enum Event {
},
}
pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec<Event>) {
pub(super) fn process(builder: &mut impl Sink, tokens: &[Token], events: Vec<Event>) {
let mut idx = 0;
let mut holes = Vec::new();
@ -111,7 +132,7 @@ pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec<Event>)
&Event::Finish => {
while idx < tokens.len() {
let token = tokens[idx];
if is_insignificant(token.kind) {
if token.kind.is_trivia() {
idx += 1;
builder.leaf(token.kind, token.len);
} else {
@ -128,7 +149,7 @@ pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec<Event>)
// this should be done in a sensible manner instead
loop {
let token = tokens[idx];
if !is_insignificant(token.kind) {
if !token.kind.is_trivia() {
break;
}
builder.leaf(token.kind, token.len);

View File

@ -21,11 +21,6 @@
//! After adding a new inline-test, run `cargo collect-tests` to extract
//! it as a standalone text-fixture into `tests/data/parser/inline`, and
//! run `cargo test` once to create the "gold" value.
use parser::parser::Parser;
use parser::token_set::TokenSet;
use SyntaxKind;
use syntax_kinds::*;
mod items;
mod attributes;
mod expressions;
@ -34,6 +29,14 @@
mod paths;
mod type_params;
use {
SyntaxKind::{self, *},
parser::{
parser::Parser,
token_set::TokenSet
}
};
pub(crate) fn file(p: &mut Parser) {
let file = p.start();
p.eat(SHEBANG);

View File

@ -1,6 +1,8 @@
use {SyntaxKind, TextRange, TextUnit, Token};
use syntax_kinds::EOF;
use super::is_insignificant;
use {
SyntaxKind, TextRange, TextUnit,
SyntaxKind::EOF,
lexer::Token,
};
use std::ops::{Add, AddAssign};
@ -16,7 +18,7 @@ pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
let mut start_offsets = Vec::new();
let mut len = 0.into();
for &token in raw_tokens.iter() {
if !is_insignificant(token.kind) {
if !token.kind.is_trivia() {
tokens.push(token);
start_offsets.push(len);
}

View File

@ -5,18 +5,16 @@
mod event;
mod grammar;
use std::sync::Arc;
use {
Token,
yellow::SyntaxNode,
syntax_kinds::*
lexer::Token,
parser::event::{process}
};
use GreenBuilder;
use parser::event::process;
pub(crate) use self::event::Sink;
/// Parse a sequence of tokens into the representative node tree
pub fn parse_green(text: String, tokens: &[Token]) -> SyntaxNode {
pub(crate) fn parse<S: Sink>(text: String, tokens: &[Token]) -> S::Tree {
let events = {
let input = input::ParserInput::new(&text, tokens);
let parser_impl = parser::imp::ParserImpl::new(&input);
@ -24,15 +22,7 @@ pub fn parse_green(text: String, tokens: &[Token]) -> SyntaxNode {
grammar::file(&mut parser);
parser.0.into_events()
};
let mut builder = GreenBuilder::new(text);
process(&mut builder, tokens, events);
let (green, errors) = builder.finish();
SyntaxNode::new(Arc::new(green), errors)
}
fn is_insignificant(kind: SyntaxKind) -> bool {
match kind {
WHITESPACE | COMMENT => true,
_ => false,
}
let mut sink = S::new(text);
process(&mut sink, tokens, events);
sink.finish()
}

View File

@ -1,8 +1,7 @@
use parser::input::{InputPosition, ParserInput};
use parser::event::Event;
use SyntaxKind;
use syntax_kinds::{EOF, TOMBSTONE};
use SyntaxKind::{self, EOF, TOMBSTONE};
/// Implementation details of `Parser`, extracted
/// to a separate struct in order not to pollute

View File

@ -1,5 +1,4 @@
use SyntaxKind;
use syntax_kinds::ERROR;
use SyntaxKind::{self, ERROR};
pub(super) mod imp;
use self::imp::ParserImpl;

View File

@ -1,7 +1,7 @@
#![allow(bad_style, missing_docs, unreachable_pub)]
#![cfg_attr(rustfmt, rustfmt_skip)]
//! Generated from grammar.ron
use tree::SyntaxInfo;
use super::SyntaxInfo;
/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -288,39 +288,39 @@ pub(crate) fn info(self) -> &'static SyntaxInfo {
EOF => &SyntaxInfo { name: "EOF" },
}
}
}
pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {
match ident {
"use" => Some(USE_KW),
"fn" => Some(FN_KW),
"struct" => Some(STRUCT_KW),
"enum" => Some(ENUM_KW),
"trait" => Some(TRAIT_KW),
"impl" => Some(IMPL_KW),
"true" => Some(TRUE_KW),
"false" => Some(FALSE_KW),
"as" => Some(AS_KW),
"extern" => Some(EXTERN_KW),
"crate" => Some(CRATE_KW),
"mod" => Some(MOD_KW),
"pub" => Some(PUB_KW),
"self" => Some(SELF_KW),
"super" => Some(SUPER_KW),
"in" => Some(IN_KW),
"where" => Some(WHERE_KW),
"for" => Some(FOR_KW),
"loop" => Some(LOOP_KW),
"while" => Some(WHILE_KW),
"if" => Some(IF_KW),
"match" => Some(MATCH_KW),
"const" => Some(CONST_KW),
"static" => Some(STATIC_KW),
"mut" => Some(MUT_KW),
"unsafe" => Some(UNSAFE_KW),
"type" => Some(TYPE_KW),
"ref" => Some(REF_KW),
"let" => Some(LET_KW),
_ => None,
pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {
match ident {
"use" => Some(USE_KW),
"fn" => Some(FN_KW),
"struct" => Some(STRUCT_KW),
"enum" => Some(ENUM_KW),
"trait" => Some(TRAIT_KW),
"impl" => Some(IMPL_KW),
"true" => Some(TRUE_KW),
"false" => Some(FALSE_KW),
"as" => Some(AS_KW),
"extern" => Some(EXTERN_KW),
"crate" => Some(CRATE_KW),
"mod" => Some(MOD_KW),
"pub" => Some(PUB_KW),
"self" => Some(SELF_KW),
"super" => Some(SUPER_KW),
"in" => Some(IN_KW),
"where" => Some(WHERE_KW),
"for" => Some(FOR_KW),
"loop" => Some(LOOP_KW),
"while" => Some(WHILE_KW),
"if" => Some(IF_KW),
"match" => Some(MATCH_KW),
"const" => Some(CONST_KW),
"static" => Some(STATIC_KW),
"mut" => Some(MUT_KW),
"unsafe" => Some(UNSAFE_KW),
"type" => Some(TYPE_KW),
"ref" => Some(REF_KW),
"let" => Some(LET_KW),
_ => None,
}
}
}

27
src/syntax_kinds/mod.rs Normal file
View File

@ -0,0 +1,27 @@
mod generated;
use std::fmt;
use ::{SyntaxKind::*};
pub use self::generated::SyntaxKind;
impl fmt::Debug for SyntaxKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let name = self.info().name;
f.write_str(name)
}
}
pub(crate) struct SyntaxInfo {
pub name: &'static str,
}
impl SyntaxKind {
pub(crate) fn is_trivia(self: SyntaxKind) -> bool {
match self {
WHITESPACE | COMMENT | DOC_COMMENT => true,
_ => false,
}
}
}

View File

@ -1,27 +0,0 @@
mod file_builder;
use ::{TextUnit};
use std::{fmt};
pub(crate) use self::file_builder::{Sink, GreenBuilder};
pub use syntax_kinds::SyntaxKind;
impl fmt::Debug for SyntaxKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let name = self.info().name;
f.write_str(name)
}
}
pub(crate) struct SyntaxInfo {
pub name: &'static str,
}
/// A token of Rust source.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token {
/// The kind of token.
pub kind: SyntaxKind,
/// The length of the token.
pub len: TextUnit,
}

View File

@ -1,36 +1,26 @@
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
//!
//! The `Sink` trait is the bridge between the parser and the
//! tree builder: the parser produces a stream of events like
//! `start node`, `finish node`, and `FileBuilder` converts
//! this stream to a real tree.
use std::sync::Arc;
use {
SyntaxKind, TextRange, TextUnit,
yellow::GreenNode
yellow::{SyntaxNode, GreenNode, SyntaxError},
parser::Sink
};
use SError;
pub(crate) trait Sink {
fn leaf(&mut self, kind: SyntaxKind, len: TextUnit);
fn start_internal(&mut self, kind: SyntaxKind);
fn finish_internal(&mut self);
fn error(&mut self, err: String);
}
pub(crate) struct GreenBuilder {
text: String,
stack: Vec<GreenNode>,
pos: TextUnit,
root: Option<GreenNode>,
errors: Vec<SError>,
errors: Vec<SyntaxError>,
}
impl GreenBuilder {
pub(crate) fn new(text: String) -> GreenBuilder {
}
impl Sink for GreenBuilder {
type Tree = SyntaxNode;
fn new(text: String) -> Self {
GreenBuilder {
text,
stack: Vec::new(),
@ -40,12 +30,6 @@ pub(crate) fn new(text: String) -> GreenBuilder {
}
}
pub(crate) fn finish(self) -> (GreenNode, Vec<SError>) {
(self.root.unwrap(), self.errors)
}
}
impl Sink for GreenBuilder {
fn leaf(&mut self, kind: SyntaxKind, len: TextUnit) {
let range = TextRange::offset_len(self.pos, len);
self.pos += len;
@ -73,15 +57,12 @@ fn finish_internal(&mut self) {
}
fn error(&mut self, message: String) {
self.errors.push(SError { message, offset: self.pos })
self.errors.push(SyntaxError { message, offset: self.pos })
}
}
impl SyntaxKind {
fn is_trivia(self) -> bool {
match self {
SyntaxKind::WHITESPACE | SyntaxKind::DOC_COMMENT | SyntaxKind::COMMENT => true,
_ => false
}
fn finish(self) -> SyntaxNode {
SyntaxNode::new(Arc::new(self.root.unwrap()), self.errors)
}
}

View File

@ -1,6 +1,7 @@
mod green;
mod red;
mod syntax;
mod builder;
use std::{
sync::{Arc, Weak},
@ -9,7 +10,8 @@
pub(crate) use self::{
green::{GreenNode, TextLen},
red::RedNode,
syntax::SError,
syntax::SyntaxError,
builder::GreenBuilder,
};
pub use self::syntax::SyntaxNode;

View File

@ -4,7 +4,8 @@
};
use {
TextRange, TextUnit, SyntaxKind,
TextRange, TextUnit,
SyntaxKind::{self, *},
yellow::{Ptr, RedNode, GreenNode, TextLen},
};
@ -18,17 +19,17 @@ pub struct SyntaxNode {
#[derive(Clone)]
pub struct SyntaxRoot {
red: Arc<RedNode>,
pub(crate) errors: Arc<Vec<SError>>,
pub(crate) errors: Arc<Vec<SyntaxError>>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub(crate) struct SError {
pub(crate) struct SyntaxError {
pub(crate) message: String,
pub(crate) offset: TextUnit,
}
impl SyntaxNode {
pub(crate) fn new(root: Arc<GreenNode>, errors: Vec<SError>) -> SyntaxNode {
pub(crate) fn new(root: Arc<GreenNode>, errors: Vec<SyntaxError>) -> SyntaxNode {
let root = Arc::new(RedNode::new_root(root));
let red = Ptr::new(&root);
let root = SyntaxRoot { red: root, errors: Arc::new(errors) };
@ -123,7 +124,6 @@ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
}
fn has_short_text(kind: SyntaxKind) -> bool {
use syntax_kinds::*;
match kind {
IDENT | LIFETIME => true,
_ => false,

View File

@ -1,15 +1,14 @@
extern crate libsyntax2;
extern crate testutils;
use libsyntax2::{tokenize, parse_green};
use libsyntax2::{parse};
use libsyntax2::utils::{dump_tree_green};
use testutils::dir_tests;
#[test]
fn parser_tests() {
dir_tests(&["parser/inline", "parser/ok", "parser/err"], |text| {
let tokens = tokenize(text);
let file = parse_green(text.to_string(), &tokens);
let file = parse(text.to_string());
dump_tree_green(&file)
})
}

View File

@ -36,7 +36,7 @@ fn to_syntax_kinds(&self) -> String {
acc.push_str("#![allow(bad_style, missing_docs, unreachable_pub)]\n");
acc.push_str("#![cfg_attr(rustfmt, rustfmt_skip)]\n");
acc.push_str("//! Generated from grammar.ron\n");
acc.push_str("use tree::SyntaxInfo;\n");
acc.push_str("use super::SyntaxInfo;\n");
acc.push_str("\n");
let syntax_kinds: Vec<String> = self.tokens
@ -82,19 +82,19 @@ fn to_syntax_kinds(&self) -> String {
acc.push_str(" EOF => &SyntaxInfo { name: \"EOF\" },\n");
acc.push_str(" }\n");
acc.push_str(" }\n");
acc.push_str("}\n");
acc.push_str("\n");
// fn ident_to_keyword
acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n");
acc.push_str(" match ident {\n");
// fn from_keyword
acc.push_str(" pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {\n");
acc.push_str(" match ident {\n");
// NB: no contextual_keywords here!
for kw in self.keywords.iter() {
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
}
acc.push_str(" _ => None,\n");
acc.push_str(" _ => None,\n");
acc.push_str(" }\n");
acc.push_str(" }\n");
acc.push_str("}\n");
acc.push_str("\n");
acc
}
}
@ -104,7 +104,7 @@ fn grammar_file() -> PathBuf {
}
fn generated_file() -> PathBuf {
base_dir().join("src/syntax_kinds.rs")
base_dir().join("src/syntax_kinds/generated.rs")
}
fn scream(word: &str) -> String {

View File

@ -2,13 +2,12 @@
use std::io::Read;
use libsyntax2::{parse_green, tokenize};
use libsyntax2::{parse};
use libsyntax2::utils::dump_tree_green;
fn main() {
let text = read_input();
let tokens = tokenize(&text);
let file = parse_green(text, &tokens);
let file = parse(text);
let tree = dump_tree_green(&file);
println!("{}", tree);
}