Auto merge of #103812 - clubby789:improve-include-bytes, r=petrochenkov

Delay `include_bytes` to AST lowering

Hopefully addresses #65818.
This PR introduces a new `ExprKind::IncludedBytes` which stores the path and bytes of a file included with `include_bytes!()`. We can then create a literal from the bytes during AST lowering, which means we don't need to escape the bytes into valid UTF8 which is the cause of most of the overhead of embedding large binary blobs.
This commit is contained in:
bors 2022-11-12 14:30:34 +00:00
commit 8ef2485bd5
19 changed files with 78 additions and 15 deletions

View File

@ -1208,7 +1208,7 @@ impl Expr {
ExprKind::Tup(_) => ExprPrecedence::Tup, ExprKind::Tup(_) => ExprPrecedence::Tup,
ExprKind::Binary(op, ..) => ExprPrecedence::Binary(op.node), ExprKind::Binary(op, ..) => ExprPrecedence::Binary(op.node),
ExprKind::Unary(..) => ExprPrecedence::Unary, ExprKind::Unary(..) => ExprPrecedence::Unary,
ExprKind::Lit(_) => ExprPrecedence::Lit, ExprKind::Lit(_) | ExprKind::IncludedBytes(..) => ExprPrecedence::Lit,
ExprKind::Type(..) | ExprKind::Cast(..) => ExprPrecedence::Cast, ExprKind::Type(..) | ExprKind::Cast(..) => ExprPrecedence::Cast,
ExprKind::Let(..) => ExprPrecedence::Let, ExprKind::Let(..) => ExprPrecedence::Let,
ExprKind::If(..) => ExprPrecedence::If, ExprKind::If(..) => ExprPrecedence::If,
@ -1446,6 +1446,12 @@ pub enum ExprKind {
/// with an optional value to be returned. /// with an optional value to be returned.
Yeet(Option<P<Expr>>), Yeet(Option<P<Expr>>),
/// Bytes included via `include_bytes!`
/// Added for optimization purposes to avoid the need to escape
/// large binary blobs - should always behave like [`ExprKind::Lit`]
/// with a `ByteStr` literal.
IncludedBytes(Lrc<[u8]>),
/// Placeholder for an expression that wasn't syntactically well formed in some way. /// Placeholder for an expression that wasn't syntactically well formed in some way.
Err, Err,
} }

View File

@ -1428,7 +1428,7 @@ pub fn noop_visit_expr<T: MutVisitor>(
} }
ExprKind::Try(expr) => vis.visit_expr(expr), ExprKind::Try(expr) => vis.visit_expr(expr),
ExprKind::TryBlock(body) => vis.visit_block(body), ExprKind::TryBlock(body) => vis.visit_block(body),
ExprKind::Lit(_) | ExprKind::Err => {} ExprKind::Lit(_) | ExprKind::IncludedBytes(..) | ExprKind::Err => {}
} }
vis.visit_id(id); vis.visit_id(id);
vis.visit_span(span); vis.visit_span(span);

View File

@ -2,6 +2,7 @@
use crate::ast::{self, Lit, LitKind}; use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token}; use crate::token::{self, Token};
use rustc_data_structures::sync::Lrc;
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span; use rustc_span::Span;
@ -231,6 +232,13 @@ impl Lit {
Lit { token_lit: kind.to_token_lit(), kind, span } Lit { token_lit: kind.to_token_lit(), kind, span }
} }
/// Recovers an AST literal from a string of bytes produced by `include_bytes!`.
/// This requires ASCII-escaping the string, which can result in poor performance
/// for very large strings of bytes.
pub fn from_included_bytes(bytes: &Lrc<[u8]>, span: Span) -> Lit {
Self::from_lit_kind(LitKind::ByteStr(bytes.clone()), span)
}
/// Losslessly convert an AST literal into a token. /// Losslessly convert an AST literal into a token.
pub fn to_token(&self) -> Token { pub fn to_token(&self) -> Token {
let kind = match self.token_lit.kind { let kind = match self.token_lit.kind {

View File

@ -901,7 +901,7 @@ pub fn walk_expr<'a, V: Visitor<'a>>(visitor: &mut V, expression: &'a Expr) {
} }
ExprKind::Try(ref subexpression) => visitor.visit_expr(subexpression), ExprKind::Try(ref subexpression) => visitor.visit_expr(subexpression),
ExprKind::TryBlock(ref body) => visitor.visit_block(body), ExprKind::TryBlock(ref body) => visitor.visit_block(body),
ExprKind::Lit(_) | ExprKind::Err => {} ExprKind::Lit(_) | ExprKind::IncludedBytes(..) | ExprKind::Err => {}
} }
visitor.visit_expr_post(expression) visitor.visit_expr_post(expression)

View File

@ -87,6 +87,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
ExprKind::Lit(ref l) => { ExprKind::Lit(ref l) => {
hir::ExprKind::Lit(respan(self.lower_span(l.span), l.kind.clone())) hir::ExprKind::Lit(respan(self.lower_span(l.span), l.kind.clone()))
} }
ExprKind::IncludedBytes(ref bytes) => hir::ExprKind::Lit(respan(
self.lower_span(e.span),
LitKind::ByteStr(bytes.clone()),
)),
ExprKind::Cast(ref expr, ref ty) => { ExprKind::Cast(ref expr, ref ty) => {
let expr = self.lower_expr(expr); let expr = self.lower_expr(expr);
let ty = let ty =

View File

@ -323,7 +323,10 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
// ``` // ```
fn lower_expr_within_pat(&mut self, expr: &Expr, allow_paths: bool) -> &'hir hir::Expr<'hir> { fn lower_expr_within_pat(&mut self, expr: &Expr, allow_paths: bool) -> &'hir hir::Expr<'hir> {
match expr.kind { match expr.kind {
ExprKind::Lit(..) | ExprKind::ConstBlock(..) | ExprKind::Err => {} ExprKind::Lit(..)
| ExprKind::ConstBlock(..)
| ExprKind::IncludedBytes(..)
| ExprKind::Err => {}
ExprKind::Path(..) if allow_paths => {} ExprKind::Path(..) if allow_paths => {}
ExprKind::Unary(UnOp::Neg, ref inner) if matches!(inner.kind, ExprKind::Lit(_)) => {} ExprKind::Unary(UnOp::Neg, ref inner) if matches!(inner.kind, ExprKind::Lit(_)) => {}
_ => { _ => {

View File

@ -322,6 +322,10 @@ impl<'a> State<'a> {
ast::ExprKind::Lit(ref lit) => { ast::ExprKind::Lit(ref lit) => {
self.print_literal(lit); self.print_literal(lit);
} }
ast::ExprKind::IncludedBytes(ref bytes) => {
let lit = ast::Lit::from_included_bytes(bytes, expr.span);
self.print_literal(&lit)
}
ast::ExprKind::Cast(ref expr, ref ty) => { ast::ExprKind::Cast(ref expr, ref ty) => {
let prec = AssocOp::As.precedence() as i8; let prec = AssocOp::As.precedence() as i8;
self.print_expr_maybe_paren(expr, prec); self.print_expr_maybe_paren(expr, prec);

View File

@ -303,6 +303,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
| ExprKind::Field(_, _) | ExprKind::Field(_, _)
| ExprKind::ForLoop(_, _, _, _) | ExprKind::ForLoop(_, _, _, _)
| ExprKind::If(_, _, _) | ExprKind::If(_, _, _)
| ExprKind::IncludedBytes(..)
| ExprKind::InlineAsm(_) | ExprKind::InlineAsm(_)
| ExprKind::Let(_, _, _) | ExprKind::Let(_, _, _)
| ExprKind::Lit(_) | ExprKind::Lit(_)

View File

@ -43,6 +43,9 @@ pub fn expand_concat(
has_errors = true; has_errors = true;
} }
}, },
ast::ExprKind::IncludedBytes(..) => {
cx.span_err(e.span, "cannot concatenate a byte string literal")
}
ast::ExprKind::Err => { ast::ExprKind::Err => {
has_errors = true; has_errors = true;
} }

View File

@ -108,6 +108,16 @@ fn handle_array_element(
None None
} }
}, },
ast::ExprKind::IncludedBytes(..) => {
if !*has_errors {
cx.struct_span_err(expr.span, "cannot concatenate doubly nested array")
.note("byte strings are treated as arrays of bytes")
.help("try flattening the array")
.emit();
}
*has_errors = true;
None
}
_ => { _ => {
missing_literals.push(expr.span); missing_literals.push(expr.span);
None None
@ -167,6 +177,9 @@ pub fn expand_concat_bytes(
has_errors = true; has_errors = true;
} }
}, },
ast::ExprKind::IncludedBytes(ref bytes) => {
accumulator.extend_from_slice(bytes);
}
ast::ExprKind::Err => { ast::ExprKind::Err => {
has_errors = true; has_errors = true;
} }

View File

@ -216,7 +216,10 @@ pub fn expand_include_bytes(
} }
}; };
match cx.source_map().load_binary_file(&file) { match cx.source_map().load_binary_file(&file) {
Ok(bytes) => base::MacEager::expr(cx.expr_byte_str(sp, bytes)), Ok(bytes) => {
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes.into()));
base::MacEager::expr(expr)
}
Err(e) => { Err(e) => {
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e)); cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
DummyResult::any(sp) DummyResult::any(sp)

View File

@ -525,6 +525,13 @@ impl server::TokenStream for Rustc<'_, '_> {
ast::ExprKind::Lit(l) => { ast::ExprKind::Lit(l) => {
Ok(tokenstream::TokenStream::token_alone(token::Literal(l.token_lit), l.span)) Ok(tokenstream::TokenStream::token_alone(token::Literal(l.token_lit), l.span))
} }
ast::ExprKind::IncludedBytes(bytes) => {
let lit = ast::Lit::from_included_bytes(bytes, expr.span);
Ok(tokenstream::TokenStream::token_alone(
token::TokenKind::Literal(lit.token_lit),
expr.span,
))
}
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {
ast::ExprKind::Lit(l) => match l.token_lit { ast::ExprKind::Lit(l) => match l.token_lit {
token::Lit { kind: token::Integer | token::Float, .. } => { token::Lit { kind: token::Integer | token::Float, .. } => {

View File

@ -631,7 +631,9 @@ impl<'a> Parser<'a> {
/// - A single-segment path. /// - A single-segment path.
pub(super) fn expr_is_valid_const_arg(&self, expr: &P<rustc_ast::Expr>) -> bool { pub(super) fn expr_is_valid_const_arg(&self, expr: &P<rustc_ast::Expr>) -> bool {
match &expr.kind { match &expr.kind {
ast::ExprKind::Block(_, _) | ast::ExprKind::Lit(_) => true, ast::ExprKind::Block(_, _)
| ast::ExprKind::Lit(_)
| ast::ExprKind::IncludedBytes(..) => true,
ast::ExprKind::Unary(ast::UnOp::Neg, expr) => { ast::ExprKind::Unary(ast::UnOp::Neg, expr) => {
matches!(expr.kind, ast::ExprKind::Lit(_)) matches!(expr.kind, ast::ExprKind::Lit(_))
} }

View File

@ -560,13 +560,14 @@ impl<'v> ast_visit::Visitor<'v> for StatCollector<'v> {
} }
fn visit_expr(&mut self, e: &'v ast::Expr) { fn visit_expr(&mut self, e: &'v ast::Expr) {
#[rustfmt::skip]
record_variants!( record_variants!(
(self, e, e.kind, Id::None, ast, Expr, ExprKind), (self, e, e.kind, Id::None, ast, Expr, ExprKind),
[ [
Box, Array, ConstBlock, Call, MethodCall, Tup, Binary, Unary, Lit, Cast, Type, Let, Box, Array, ConstBlock, Call, MethodCall, Tup, Binary, Unary, Lit, Cast, Type, Let,
If, While, ForLoop, Loop, Match, Closure, Block, Async, Await, TryBlock, Assign, If, While, ForLoop, Loop, Match, Closure, Block, Async, Await, TryBlock, Assign,
AssignOp, Field, Index, Range, Underscore, Path, AddrOf, Break, Continue, Ret, AssignOp, Field, Index, Range, Underscore, Path, AddrOf, Break, Continue, Ret,
InlineAsm, MacCall, Struct, Repeat, Paren, Try, Yield, Yeet, Err InlineAsm, MacCall, Struct, Repeat, Paren, Try, Yield, Yeet, IncludedBytes, Err
] ]
); );
ast_visit::walk_expr(self, e) ast_visit::walk_expr(self, e)

View File

@ -1,5 +1,5 @@
// aux-build:expand-expr.rs // aux-build:expand-expr.rs
#![feature(concat_bytes)]
extern crate expand_expr; extern crate expand_expr;
use expand_expr::{ use expand_expr::{
@ -23,6 +23,11 @@ expand_expr_is!(
concat!("contents: ", include_str!("auxiliary/included-file.txt")) concat!("contents: ", include_str!("auxiliary/included-file.txt"))
); );
expand_expr_is!(
b"contents: Included file contents\n",
concat_bytes!(b"contents: ", include_bytes!("auxiliary/included-file.txt"))
);
// Correct value is checked for multiple sources. // Correct value is checked for multiple sources.
check_expand_expr_file!(file!()); check_expand_expr_file!(file!());

View File

@ -1,29 +1,29 @@
error: expected one of `.`, `?`, or an operator, found `;` error: expected one of `.`, `?`, or an operator, found `;`
--> $DIR/expand-expr.rs:101:27 --> $DIR/expand-expr.rs:106:27
| |
LL | expand_expr_fail!("string"; hello); LL | expand_expr_fail!("string"; hello);
| ^ expected one of `.`, `?`, or an operator | ^ expected one of `.`, `?`, or an operator
error: expected expression, found `$` error: expected expression, found `$`
--> $DIR/expand-expr.rs:104:19 --> $DIR/expand-expr.rs:109:19
| |
LL | expand_expr_fail!($); LL | expand_expr_fail!($);
| ^ expected expression | ^ expected expression
error: expected expression, found `$` error: expected expression, found `$`
--> $DIR/expand-expr.rs:33:23 --> $DIR/expand-expr.rs:38:23
| |
LL | ($($t:tt)*) => { $($t)* }; LL | ($($t:tt)*) => { $($t)* };
| ^^^^ expected expression | ^^^^ expected expression
error: expected expression, found `$` error: expected expression, found `$`
--> $DIR/expand-expr.rs:106:28 --> $DIR/expand-expr.rs:111:28
| |
LL | expand_expr_fail!(echo_pm!($)); LL | expand_expr_fail!(echo_pm!($));
| ^ expected expression | ^ expected expression
error: macro expansion ignores token `hello` and any following error: macro expansion ignores token `hello` and any following
--> $DIR/expand-expr.rs:110:47 --> $DIR/expand-expr.rs:115:47
| |
LL | expand_expr_is!("string", echo_tts!("string"; hello)); LL | expand_expr_is!("string", echo_tts!("string"; hello));
| --------------------^^^^^-- help: you might be missing a semicolon here: `;` | --------------------^^^^^-- help: you might be missing a semicolon here: `;`
@ -33,7 +33,7 @@ LL | expand_expr_is!("string", echo_tts!("string"; hello));
= note: the usage of `echo_tts!` is likely invalid in expression context = note: the usage of `echo_tts!` is likely invalid in expression context
error: macro expansion ignores token `;` and any following error: macro expansion ignores token `;` and any following
--> $DIR/expand-expr.rs:111:44 --> $DIR/expand-expr.rs:116:44
| |
LL | expand_expr_is!("string", echo_pm!("string"; hello)); LL | expand_expr_is!("string", echo_pm!("string"; hello));
| -----------------^-------- help: you might be missing a semicolon here: `;` | -----------------^-------- help: you might be missing a semicolon here: `;`
@ -43,7 +43,7 @@ LL | expand_expr_is!("string", echo_pm!("string"; hello));
= note: the usage of `echo_pm!` is likely invalid in expression context = note: the usage of `echo_pm!` is likely invalid in expression context
error: recursion limit reached while expanding `recursive_expand!` error: recursion limit reached while expanding `recursive_expand!`
--> $DIR/expand-expr.rs:119:16 --> $DIR/expand-expr.rs:124:16
| |
LL | const _: u32 = recursive_expand!(); LL | const _: u32 = recursive_expand!();
| ^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^

View File

@ -207,6 +207,7 @@ impl<'a> Sugg<'a> {
| ast::ExprKind::InlineAsm(..) | ast::ExprKind::InlineAsm(..)
| ast::ExprKind::ConstBlock(..) | ast::ExprKind::ConstBlock(..)
| ast::ExprKind::Lit(..) | ast::ExprKind::Lit(..)
| ast::ExprKind::IncludedBytes(..)
| ast::ExprKind::Loop(..) | ast::ExprKind::Loop(..)
| ast::ExprKind::MacCall(..) | ast::ExprKind::MacCall(..)
| ast::ExprKind::MethodCall(..) | ast::ExprKind::MethodCall(..)

View File

@ -399,6 +399,7 @@ pub(crate) fn format_expr(
} }
} }
ast::ExprKind::Underscore => Some("_".to_owned()), ast::ExprKind::Underscore => Some("_".to_owned()),
ast::ExprKind::IncludedBytes(..) => unreachable!(),
ast::ExprKind::Err => None, ast::ExprKind::Err => None,
}; };

View File

@ -496,6 +496,7 @@ pub(crate) fn is_block_expr(context: &RewriteContext<'_>, expr: &ast::Expr, repr
| ast::ExprKind::Continue(..) | ast::ExprKind::Continue(..)
| ast::ExprKind::Err | ast::ExprKind::Err
| ast::ExprKind::Field(..) | ast::ExprKind::Field(..)
| ast::ExprKind::IncludedBytes(..)
| ast::ExprKind::InlineAsm(..) | ast::ExprKind::InlineAsm(..)
| ast::ExprKind::Let(..) | ast::ExprKind::Let(..)
| ast::ExprKind::Path(..) | ast::ExprKind::Path(..)