From b49e9fa794addc197e58743bdc120cb9740b73c0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 4 Sep 2013 17:05:31 -0400 Subject: [PATCH 1/2] forbid cast as bool This is currently unsound since `bool` is represented as `i8`. It will become sound when `bool` is stored as `i8` but always used as `i1`. However, the current behaviour will always be identical to `x & 1 != 0`, so there's no need for it. It's also surprising, since `x != 0` is the expected behaviour. Closes #7311 --- src/libextra/ebml.rs | 2 +- src/librustc/middle/trans/base.rs | 2 +- src/librustc/middle/typeck/check/mod.rs | 4 ++++ src/test/compile-fail/cast-as-bool.rs | 12 ++++++++++++ src/test/run-pass/cast.rs | 1 - src/test/run-pass/supported-cast.rs | 15 --------------- 6 files changed, 18 insertions(+), 18 deletions(-) create mode 100644 src/test/compile-fail/cast-as-bool.rs diff --git a/src/libextra/ebml.rs b/src/libextra/ebml.rs index 3527a7b5e55..d8d54e20e97 100644 --- a/src/libextra/ebml.rs +++ b/src/libextra/ebml.rs @@ -410,7 +410,7 @@ pub mod reader { } fn read_bool(&mut self) -> bool { - doc_as_u8(self.next_doc(EsBool)) as bool + doc_as_u8(self.next_doc(EsBool)) != 0 } fn read_f64(&mut self) -> f64 { diff --git a/src/librustc/middle/trans/base.rs b/src/librustc/middle/trans/base.rs index 92aedbdef84..9b5ba00a34a 100644 --- a/src/librustc/middle/trans/base.rs +++ b/src/librustc/middle/trans/base.rs @@ -2219,7 +2219,7 @@ pub fn trans_item(ccx: @mut CrateContext, item: &ast::item) { } let v = ccx.const_values.get_copy(&item.id); unsafe { - if !(llvm::LLVMConstIntGetZExtValue(v) as bool) { + if !(llvm::LLVMConstIntGetZExtValue(v) != 0) { ccx.sess.span_fatal(expr.span, "static assertion failed"); } } diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index 9afcec7c340..0e335cf3f14 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -2696,6 +2696,7 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, }, t_e, None); } + let t1 = structurally_resolved_type(fcx, e.span, t_1); let te = structurally_resolved_type(fcx, e.span, t_e); let t_1_is_char = type_is_char(fcx, expr.span, t_1); @@ -2710,6 +2711,9 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, fmt!("only `u8` can be cast as `char`, not `%s`", actual) }, t_e, None); } + } else if ty::get(t1).sty == ty::ty_bool { + fcx.tcx().sess.span_err(expr.span, + "cannot cast as `bool`, compare with zero instead"); } else if type_is_region_ptr(fcx, expr.span, t_e) && type_is_unsafe_ptr(fcx, expr.span, t_1) { diff --git a/src/test/compile-fail/cast-as-bool.rs b/src/test/compile-fail/cast-as-bool.rs new file mode 100644 index 00000000000..6d68f56b2b1 --- /dev/null +++ b/src/test/compile-fail/cast-as-bool.rs @@ -0,0 +1,12 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// error-pattern: cannot cast as `bool`, compare with zero instead +fn main() { let u = (5 as bool); } diff --git a/src/test/run-pass/cast.rs b/src/test/run-pass/cast.rs index 00d3155cd98..8cf7c2eec99 100644 --- a/src/test/run-pass/cast.rs +++ b/src/test/run-pass/cast.rs @@ -18,6 +18,5 @@ pub fn main() { assert_eq!(i as u8, 'Q' as u8); assert_eq!(i as u8 as i8, 'Q' as u8 as i8); assert_eq!(0x51u8 as char, 'Q'); - assert_eq!(true, 1 as bool); assert_eq!(0 as u32, false as u32); } diff --git a/src/test/run-pass/supported-cast.rs b/src/test/run-pass/supported-cast.rs index edadf282163..663f36ce673 100644 --- a/src/test/run-pass/supported-cast.rs +++ b/src/test/run-pass/supported-cast.rs @@ -26,7 +26,6 @@ pub fn main() { info!(1 as int); info!(1 as uint); info!(1 as float); - info!(1 as bool); info!(1 as *libc::FILE); info!(1 as i8); info!(1 as i16); @@ -42,7 +41,6 @@ pub fn main() { info!(1u as int); info!(1u as uint); info!(1u as float); - info!(1u as bool); info!(1u as *libc::FILE); info!(1u as i8); info!(1u as i16); @@ -58,7 +56,6 @@ pub fn main() { info!(1i8 as int); info!(1i8 as uint); info!(1i8 as float); - info!(1i8 as bool); info!(1i8 as *libc::FILE); info!(1i8 as i8); info!(1i8 as i16); @@ -74,7 +71,6 @@ pub fn main() { info!(1u8 as int); info!(1u8 as uint); info!(1u8 as float); - info!(1u8 as bool); info!(1u8 as *libc::FILE); info!(1u8 as i8); info!(1u8 as i16); @@ -90,7 +86,6 @@ pub fn main() { info!(1i16 as int); info!(1i16 as uint); info!(1i16 as float); - info!(1i16 as bool); info!(1i16 as *libc::FILE); info!(1i16 as i8); info!(1i16 as i16); @@ -106,7 +101,6 @@ pub fn main() { info!(1u16 as int); info!(1u16 as uint); info!(1u16 as float); - info!(1u16 as bool); info!(1u16 as *libc::FILE); info!(1u16 as i8); info!(1u16 as i16); @@ -122,7 +116,6 @@ pub fn main() { info!(1i32 as int); info!(1i32 as uint); info!(1i32 as float); - info!(1i32 as bool); info!(1i32 as *libc::FILE); info!(1i32 as i8); info!(1i32 as i16); @@ -138,7 +131,6 @@ pub fn main() { info!(1u32 as int); info!(1u32 as uint); info!(1u32 as float); - info!(1u32 as bool); info!(1u32 as *libc::FILE); info!(1u32 as i8); info!(1u32 as i16); @@ -154,7 +146,6 @@ pub fn main() { info!(1i64 as int); info!(1i64 as uint); info!(1i64 as float); - info!(1i64 as bool); info!(1i64 as *libc::FILE); info!(1i64 as i8); info!(1i64 as i16); @@ -170,7 +161,6 @@ pub fn main() { info!(1u64 as int); info!(1u64 as uint); info!(1u64 as float); - info!(1u64 as bool); info!(1u64 as *libc::FILE); info!(1u64 as i8); info!(1u64 as i16); @@ -186,7 +176,6 @@ pub fn main() { info!(1u64 as int); info!(1u64 as uint); info!(1u64 as float); - info!(1u64 as bool); info!(1u64 as *libc::FILE); info!(1u64 as i8); info!(1u64 as i16); @@ -202,7 +191,6 @@ pub fn main() { info!(true as int); info!(true as uint); info!(true as float); - info!(true as bool); info!(true as *libc::FILE); info!(true as i8); info!(true as i16); @@ -218,7 +206,6 @@ pub fn main() { info!(1. as int); info!(1. as uint); info!(1. as float); - info!(1. as bool); info!(1. as i8); info!(1. as i16); info!(1. as i32); @@ -233,7 +220,6 @@ pub fn main() { info!(1f32 as int); info!(1f32 as uint); info!(1f32 as float); - info!(1f32 as bool); info!(1f32 as i8); info!(1f32 as i16); info!(1f32 as i32); @@ -248,7 +234,6 @@ pub fn main() { info!(1f64 as int); info!(1f64 as uint); info!(1f64 as float); - info!(1f64 as bool); info!(1f64 as i8); info!(1f64 as i16); info!(1f64 as i32); From b153219556e20cb9f0e70c6a064cdfd10469ea32 Mon Sep 17 00:00:00 2001 From: blake2-ppc Date: Wed, 4 Sep 2013 02:36:55 +0200 Subject: [PATCH 2/2] std::str: Deny surrogates in is_utf8 Reject codepoints \uD800 to \uDFFF which are the surrogates (reserved/unused codepoints that are invalid to encode into UTF-8) The surrogates is the only hole of invalid codepoints in the range from \u0 to \u10FFFF. --- src/libstd/str.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 87effda2540..704a9f05856 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -799,6 +799,8 @@ pub fn is_utf8(v: &[u8]) -> bool { // first C2 80 last DF BF // 3-byte encoding is for codepoints \u0800 to \uffff // first E0 A0 80 last EF BF BF + // excluding surrogates codepoints \ud800 to \udfff + // ED A0 80 to ED BF BF // 4-byte encoding is for codepoints \u10000 to \u10ffff // first F0 90 80 80 last F4 8F BF BF // @@ -812,8 +814,6 @@ pub fn is_utf8(v: &[u8]) -> bool { // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / // %xF4 %x80-8F 2( UTF8-tail ) // UTF8-tail = %x80-BF - // -- - // This code allows surrogate pairs: \uD800 to \uDFFF -> ED A0 80 to ED BF BF match w { 2 => if unsafe_get(v, i + 1) & 192u8 != TAG_CONT_U8 { return false @@ -822,7 +822,9 @@ pub fn is_utf8(v: &[u8]) -> bool { unsafe_get(v, i + 1), unsafe_get(v, i + 2) & 192u8) { (0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) => (), - (0xE1 .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => (), + (0xE1 .. 0xEC, 0x80 .. 0xBF, TAG_CONT_U8) => (), + (0xED , 0x80 .. 0x9F, TAG_CONT_U8) => (), + (0xEE .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => (), _ => return false, }, _ => match (v_i, @@ -3012,6 +3014,7 @@ mod tests { #[test] fn test_is_utf8() { + // deny overlong encodings assert!(!is_utf8([0xc0, 0x80])); assert!(!is_utf8([0xc0, 0xae])); assert!(!is_utf8([0xe0, 0x80, 0x80])); @@ -3020,9 +3023,15 @@ mod tests { assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac])); assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80])); + // deny surrogates + assert!(!is_utf8([0xED, 0xA0, 0x80])); + assert!(!is_utf8([0xED, 0xBF, 0xBF])); + assert!(is_utf8([0xC2, 0x80])); assert!(is_utf8([0xDF, 0xBF])); assert!(is_utf8([0xE0, 0xA0, 0x80])); + assert!(is_utf8([0xED, 0x9F, 0xBF])); + assert!(is_utf8([0xEE, 0x80, 0x80])); assert!(is_utf8([0xEF, 0xBF, 0xBF])); assert!(is_utf8([0xF0, 0x90, 0x80, 0x80])); assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));