Simplify string optimizations
This commit is contained in:
parent
7974c6b1a0
commit
08475a690c
@ -1,6 +1,7 @@
|
||||
use std::{sync::Arc};
|
||||
|
||||
const INLINE_CAP: usize = 22;
|
||||
const WS_TAG: u8 = (INLINE_CAP + 1) as u8;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) enum SmolStr {
|
||||
@ -17,18 +18,34 @@ impl SmolStr {
|
||||
if len <= INLINE_CAP {
|
||||
let mut buf = [0; INLINE_CAP];
|
||||
buf[..len].copy_from_slice(text.as_bytes());
|
||||
SmolStr::Inline { len: len as u8, buf }
|
||||
} else {
|
||||
SmolStr::Heap(
|
||||
text.to_string().into_boxed_str().into()
|
||||
)
|
||||
return SmolStr::Inline { len: len as u8, buf };
|
||||
}
|
||||
|
||||
let newlines = text.bytes().take_while(|&b| b == b'\n').count();
|
||||
let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count();
|
||||
if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES {
|
||||
let mut buf = [0; INLINE_CAP];
|
||||
buf[0] = newlines as u8;
|
||||
buf[1] = spaces as u8;
|
||||
return SmolStr::Inline { len: WS_TAG, buf };
|
||||
}
|
||||
|
||||
SmolStr::Heap(
|
||||
text.to_string().into_boxed_str().into()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
SmolStr::Heap(data) => &*data,
|
||||
SmolStr::Inline { len, buf } => {
|
||||
if *len == WS_TAG {
|
||||
let newlines = buf[0] as usize;
|
||||
let spaces = buf[1] as usize;
|
||||
assert!(newlines <= N_NEWLINES && spaces <= N_SPACES);
|
||||
return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]
|
||||
}
|
||||
|
||||
let len = *len as usize;
|
||||
let buf = &buf[..len];
|
||||
unsafe { ::std::str::from_utf8_unchecked(buf) }
|
||||
@ -37,6 +54,12 @@ impl SmolStr {
|
||||
}
|
||||
}
|
||||
|
||||
const N_NEWLINES: usize = 32;
|
||||
const N_SPACES: usize = 128;
|
||||
const WS: &str =
|
||||
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -504,93 +504,5 @@ impl SyntaxKind {
|
||||
};
|
||||
Some(tok)
|
||||
}
|
||||
|
||||
pub(crate) fn static_text(self) -> Option<&'static str> {
|
||||
let tok = match self {
|
||||
SEMI => ";",
|
||||
COMMA => ",",
|
||||
L_PAREN => "(",
|
||||
R_PAREN => ")",
|
||||
L_CURLY => "{",
|
||||
R_CURLY => "}",
|
||||
L_BRACK => "[",
|
||||
R_BRACK => "]",
|
||||
L_ANGLE => "<",
|
||||
R_ANGLE => ">",
|
||||
AT => "@",
|
||||
POUND => "#",
|
||||
TILDE => "~",
|
||||
QUESTION => "?",
|
||||
DOLLAR => "$",
|
||||
AMP => "&",
|
||||
PIPE => "|",
|
||||
PLUS => "+",
|
||||
STAR => "*",
|
||||
SLASH => "/",
|
||||
CARET => "^",
|
||||
PERCENT => "%",
|
||||
DOT => ".",
|
||||
DOTDOT => "..",
|
||||
DOTDOTDOT => "...",
|
||||
DOTDOTEQ => "..=",
|
||||
COLON => ":",
|
||||
COLONCOLON => "::",
|
||||
EQ => "=",
|
||||
EQEQ => "==",
|
||||
FAT_ARROW => "=>",
|
||||
EXCL => "!",
|
||||
NEQ => "!=",
|
||||
MINUS => "-",
|
||||
THIN_ARROW => "->",
|
||||
LTEQ => "<=",
|
||||
GTEQ => ">=",
|
||||
PLUSEQ => "+=",
|
||||
MINUSEQ => "-=",
|
||||
AMPAMP => "&&",
|
||||
PIPEPIPE => "||",
|
||||
SHL => "<<",
|
||||
SHR => ">>",
|
||||
SHLEQ => "<<=",
|
||||
SHREQ => ">>=",
|
||||
|
||||
USE_KW => "use",
|
||||
FN_KW => "fn",
|
||||
STRUCT_KW => "struct",
|
||||
ENUM_KW => "enum",
|
||||
TRAIT_KW => "trait",
|
||||
IMPL_KW => "impl",
|
||||
TRUE_KW => "true",
|
||||
FALSE_KW => "false",
|
||||
AS_KW => "as",
|
||||
EXTERN_KW => "extern",
|
||||
CRATE_KW => "crate",
|
||||
MOD_KW => "mod",
|
||||
PUB_KW => "pub",
|
||||
SELF_KW => "self",
|
||||
SUPER_KW => "super",
|
||||
IN_KW => "in",
|
||||
WHERE_KW => "where",
|
||||
FOR_KW => "for",
|
||||
LOOP_KW => "loop",
|
||||
WHILE_KW => "while",
|
||||
IF_KW => "if",
|
||||
ELSE_KW => "else",
|
||||
MATCH_KW => "match",
|
||||
CONST_KW => "const",
|
||||
STATIC_KW => "static",
|
||||
MUT_KW => "mut",
|
||||
UNSAFE_KW => "unsafe",
|
||||
TYPE_KW => "type",
|
||||
REF_KW => "ref",
|
||||
LET_KW => "let",
|
||||
MOVE_KW => "move",
|
||||
RETURN_KW => "return",
|
||||
AUTO_KW => "auto",
|
||||
DEFAULT_KW => "default",
|
||||
UNION_KW => "union",
|
||||
_ => return None,
|
||||
};
|
||||
Some(tok)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,19 +64,6 @@ impl SyntaxKind {
|
||||
let tok = match c {
|
||||
{%- for t in single_byte_tokens %}
|
||||
'{{t.0}}' => {{t.1}},
|
||||
{%- endfor %}
|
||||
_ => return None,
|
||||
};
|
||||
Some(tok)
|
||||
}
|
||||
|
||||
pub(crate) fn static_text(self) -> Option<&'static str> {
|
||||
let tok = match self {
|
||||
{%- for t in concat(a=single_byte_tokens, b=multi_byte_tokens) %}
|
||||
{{t.1}} => "{{t.0}}",
|
||||
{%- endfor %}
|
||||
{% for kw in concat(a=keywords, b=contextual_keywords) %}
|
||||
{{kw | upper}}_KW => "{{kw}}",
|
||||
{%- endfor %}
|
||||
_ => return None,
|
||||
};
|
||||
|
@ -1,8 +1,7 @@
|
||||
use std::sync::Arc;
|
||||
use {
|
||||
SyntaxKind, TextUnit,
|
||||
smol_str::SmolStr,
|
||||
SyntaxKind::{self, *},
|
||||
TextUnit,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@ -91,59 +90,23 @@ impl GreenBranch {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) enum GreenLeaf {
|
||||
Whitespace {
|
||||
newlines: u8,
|
||||
spaces: u8,
|
||||
},
|
||||
Token {
|
||||
kind: SyntaxKind,
|
||||
text: Option<SmolStr>,
|
||||
},
|
||||
pub(crate) struct GreenLeaf {
|
||||
kind: SyntaxKind,
|
||||
text: SmolStr,
|
||||
}
|
||||
|
||||
impl GreenLeaf {
|
||||
fn new(kind: SyntaxKind, text: &str) -> Self {
|
||||
if kind == WHITESPACE {
|
||||
let newlines = text.bytes().take_while(|&b| b == b'\n').count();
|
||||
let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count();
|
||||
if newlines + spaces == text.len() && newlines <= N_NEWLINES && spaces <= N_SPACES {
|
||||
return GreenLeaf::Whitespace {
|
||||
newlines: newlines as u8,
|
||||
spaces: spaces as u8,
|
||||
};
|
||||
}
|
||||
}
|
||||
let text = match SyntaxKind::static_text(kind) {
|
||||
Some(t) => {
|
||||
debug_assert_eq!(t, text);
|
||||
None
|
||||
}
|
||||
None => Some(SmolStr::new(text)),
|
||||
};
|
||||
GreenLeaf::Token { kind, text }
|
||||
let text = SmolStr::new(text);
|
||||
GreenLeaf { kind, text }
|
||||
}
|
||||
|
||||
pub(crate) fn kind(&self) -> SyntaxKind {
|
||||
match self {
|
||||
GreenLeaf::Whitespace { .. } => WHITESPACE,
|
||||
GreenLeaf::Token { kind, .. } => *kind,
|
||||
}
|
||||
self.kind
|
||||
}
|
||||
|
||||
pub(crate) fn text(&self) -> &str {
|
||||
match self {
|
||||
&GreenLeaf::Whitespace { newlines, spaces } => {
|
||||
let newlines = newlines as usize;
|
||||
let spaces = spaces as usize;
|
||||
assert!(newlines <= N_NEWLINES && spaces <= N_SPACES);
|
||||
&WS[N_NEWLINES - newlines..N_NEWLINES + spaces]
|
||||
}
|
||||
GreenLeaf::Token { kind, text } => match text {
|
||||
None => kind.static_text().unwrap(),
|
||||
Some(t) => t.as_str(),
|
||||
},
|
||||
}
|
||||
self.text.as_str()
|
||||
}
|
||||
|
||||
pub(crate) fn text_len(&self) -> TextUnit {
|
||||
@ -151,7 +114,13 @@ impl GreenLeaf {
|
||||
}
|
||||
}
|
||||
|
||||
const N_NEWLINES: usize = 16;
|
||||
const N_SPACES: usize = 64;
|
||||
const WS: &str =
|
||||
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
|
||||
|
||||
#[test]
|
||||
fn test_sizes() {
|
||||
use std::mem::size_of;
|
||||
|
||||
println!("GreenNode = {}", size_of::<GreenNode>());
|
||||
println!("GreenLeaf = {}", size_of::<GreenLeaf>());
|
||||
println!("SyntaxKind = {}", size_of::<SyntaxKind>());
|
||||
println!("SmolStr = {}", size_of::<SmolStr>());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user