Simplify string optimizations

This commit is contained in:
Aleksey Kladov 2018-08-09 02:26:22 +03:00
parent 7974c6b1a0
commit 08475a690c
4 changed files with 46 additions and 155 deletions

View File

@ -1,6 +1,7 @@
use std::{sync::Arc};
const INLINE_CAP: usize = 22;
const WS_TAG: u8 = (INLINE_CAP + 1) as u8;
#[derive(Clone, Debug)]
pub(crate) enum SmolStr {
@ -17,18 +18,34 @@ impl SmolStr {
if len <= INLINE_CAP {
let mut buf = [0; INLINE_CAP];
buf[..len].copy_from_slice(text.as_bytes());
SmolStr::Inline { len: len as u8, buf }
} else {
SmolStr::Heap(
text.to_string().into_boxed_str().into()
)
return SmolStr::Inline { len: len as u8, buf };
}
let newlines = text.bytes().take_while(|&b| b == b'\n').count();
let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count();
if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES {
let mut buf = [0; INLINE_CAP];
buf[0] = newlines as u8;
buf[1] = spaces as u8;
return SmolStr::Inline { len: WS_TAG, buf };
}
SmolStr::Heap(
text.to_string().into_boxed_str().into()
)
}
pub fn as_str(&self) -> &str {
match self {
SmolStr::Heap(data) => &*data,
SmolStr::Inline { len, buf } => {
if *len == WS_TAG {
let newlines = buf[0] as usize;
let spaces = buf[1] as usize;
assert!(newlines <= N_NEWLINES && spaces <= N_SPACES);
return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]
}
let len = *len as usize;
let buf = &buf[..len];
unsafe { ::std::str::from_utf8_unchecked(buf) }
@ -37,6 +54,12 @@ impl SmolStr {
}
}
const N_NEWLINES: usize = 32;
const N_SPACES: usize = 128;
const WS: &str =
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
#[cfg(test)]
mod tests {
use super::*;

View File

@ -504,93 +504,5 @@ impl SyntaxKind {
};
Some(tok)
}
pub(crate) fn static_text(self) -> Option<&'static str> {
let tok = match self {
SEMI => ";",
COMMA => ",",
L_PAREN => "(",
R_PAREN => ")",
L_CURLY => "{",
R_CURLY => "}",
L_BRACK => "[",
R_BRACK => "]",
L_ANGLE => "<",
R_ANGLE => ">",
AT => "@",
POUND => "#",
TILDE => "~",
QUESTION => "?",
DOLLAR => "$",
AMP => "&",
PIPE => "|",
PLUS => "+",
STAR => "*",
SLASH => "/",
CARET => "^",
PERCENT => "%",
DOT => ".",
DOTDOT => "..",
DOTDOTDOT => "...",
DOTDOTEQ => "..=",
COLON => ":",
COLONCOLON => "::",
EQ => "=",
EQEQ => "==",
FAT_ARROW => "=>",
EXCL => "!",
NEQ => "!=",
MINUS => "-",
THIN_ARROW => "->",
LTEQ => "<=",
GTEQ => ">=",
PLUSEQ => "+=",
MINUSEQ => "-=",
AMPAMP => "&&",
PIPEPIPE => "||",
SHL => "<<",
SHR => ">>",
SHLEQ => "<<=",
SHREQ => ">>=",
USE_KW => "use",
FN_KW => "fn",
STRUCT_KW => "struct",
ENUM_KW => "enum",
TRAIT_KW => "trait",
IMPL_KW => "impl",
TRUE_KW => "true",
FALSE_KW => "false",
AS_KW => "as",
EXTERN_KW => "extern",
CRATE_KW => "crate",
MOD_KW => "mod",
PUB_KW => "pub",
SELF_KW => "self",
SUPER_KW => "super",
IN_KW => "in",
WHERE_KW => "where",
FOR_KW => "for",
LOOP_KW => "loop",
WHILE_KW => "while",
IF_KW => "if",
ELSE_KW => "else",
MATCH_KW => "match",
CONST_KW => "const",
STATIC_KW => "static",
MUT_KW => "mut",
UNSAFE_KW => "unsafe",
TYPE_KW => "type",
REF_KW => "ref",
LET_KW => "let",
MOVE_KW => "move",
RETURN_KW => "return",
AUTO_KW => "auto",
DEFAULT_KW => "default",
UNION_KW => "union",
_ => return None,
};
Some(tok)
}
}

View File

@ -64,19 +64,6 @@ impl SyntaxKind {
let tok = match c {
{%- for t in single_byte_tokens %}
'{{t.0}}' => {{t.1}},
{%- endfor %}
_ => return None,
};
Some(tok)
}
pub(crate) fn static_text(self) -> Option<&'static str> {
let tok = match self {
{%- for t in concat(a=single_byte_tokens, b=multi_byte_tokens) %}
{{t.1}} => "{{t.0}}",
{%- endfor %}
{% for kw in concat(a=keywords, b=contextual_keywords) %}
{{kw | upper}}_KW => "{{kw}}",
{%- endfor %}
_ => return None,
};

View File

@ -1,8 +1,7 @@
use std::sync::Arc;
use {
SyntaxKind, TextUnit,
smol_str::SmolStr,
SyntaxKind::{self, *},
TextUnit,
};
#[derive(Clone, Debug)]
@ -91,59 +90,23 @@ impl GreenBranch {
}
#[derive(Clone, Debug)]
pub(crate) enum GreenLeaf {
Whitespace {
newlines: u8,
spaces: u8,
},
Token {
kind: SyntaxKind,
text: Option<SmolStr>,
},
pub(crate) struct GreenLeaf {
kind: SyntaxKind,
text: SmolStr,
}
impl GreenLeaf {
fn new(kind: SyntaxKind, text: &str) -> Self {
if kind == WHITESPACE {
let newlines = text.bytes().take_while(|&b| b == b'\n').count();
let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count();
if newlines + spaces == text.len() && newlines <= N_NEWLINES && spaces <= N_SPACES {
return GreenLeaf::Whitespace {
newlines: newlines as u8,
spaces: spaces as u8,
};
}
}
let text = match SyntaxKind::static_text(kind) {
Some(t) => {
debug_assert_eq!(t, text);
None
}
None => Some(SmolStr::new(text)),
};
GreenLeaf::Token { kind, text }
let text = SmolStr::new(text);
GreenLeaf { kind, text }
}
pub(crate) fn kind(&self) -> SyntaxKind {
match self {
GreenLeaf::Whitespace { .. } => WHITESPACE,
GreenLeaf::Token { kind, .. } => *kind,
}
self.kind
}
pub(crate) fn text(&self) -> &str {
match self {
&GreenLeaf::Whitespace { newlines, spaces } => {
let newlines = newlines as usize;
let spaces = spaces as usize;
assert!(newlines <= N_NEWLINES && spaces <= N_SPACES);
&WS[N_NEWLINES - newlines..N_NEWLINES + spaces]
}
GreenLeaf::Token { kind, text } => match text {
None => kind.static_text().unwrap(),
Some(t) => t.as_str(),
},
}
self.text.as_str()
}
pub(crate) fn text_len(&self) -> TextUnit {
@ -151,7 +114,13 @@ impl GreenLeaf {
}
}
const N_NEWLINES: usize = 16;
const N_SPACES: usize = 64;
const WS: &str =
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
#[test]
fn test_sizes() {
use std::mem::size_of;
println!("GreenNode = {}", size_of::<GreenNode>());
println!("GreenLeaf = {}", size_of::<GreenLeaf>());
println!("SyntaxKind = {}", size_of::<SyntaxKind>());
println!("SmolStr = {}", size_of::<SmolStr>());
}