#!/usr/bin/env python # # Copyright 2011-2016 The Rust Project Developers. See the COPYRIGHT # file at the top-level directory of this distribution and at # http://rust-lang.org/COPYRIGHT. # # Licensed under the Apache License, Version 2.0 or the MIT license # , at your # option. This file may not be copied, modified, or distributed # except according to those terms. # This script uses the following Unicode tables: # - Categories.txt import os import subprocess def to_ranges(iter): current = None for i in iter: if current is None or i != current[1] or i in (0x10000, 0x20000): if current is not None: yield tuple(current) current = [i, i + 1] else: current[1] += 1 if current is not None: yield tuple(current) def get_escaped(dictionary): for i in range(0x110000): if dictionary.get(i, "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and i != ord(' '): yield i def get_file(f): try: return open(os.path.basename(f)) except FileNotFoundError: subprocess.run(["curl", "-O", f], check=True) return open(os.path.basename(f)) def main(): file = get_file("http://www.unicode.org/notes/tn36/Categories.txt") dictionary = {int(line.split()[0], 16): line.split()[1] for line in file} CUTOFF=0x10000 singletons0 = [] singletons1 = [] normal0 = [] normal1 = [] extra = [] for a, b in to_ranges(get_escaped(dictionary)): if a > 2 * CUTOFF: extra.append((a, b - a)) elif a == b - 1: if a & CUTOFF: singletons1.append(a & ~CUTOFF) else: singletons0.append(a) elif a == b - 2: if a & CUTOFF: singletons1.append(a & ~CUTOFF) singletons1.append((a + 1) & ~CUTOFF) else: singletons0.append(a) singletons0.append(a + 1) else: if a >= 2 * CUTOFF: extra.append((a, b - a)) elif a & CUTOFF: normal1.append((a & ~CUTOFF, b - a)) else: normal0.append((a, b - a)) print("""\ // Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. // NOTE: The following code was generated by "src/etc/char_private.py", // do not edit directly! use slice::SliceExt; fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool { for &s in singletons { if x == s { return false; } else if x < s { break; } } for w in normal.chunks(2) { let start = w[0]; let len = w[1]; let difference = (x as i32) - (start as i32); if 0 <= difference { if difference < len as i32 { return false; } } else { break; } } true } pub fn is_printable(x: char) -> bool { let x = x as u32; let lower = x as u16; if x < 0x10000 { check(lower, SINGLETONS0, NORMAL0) } else if x < 0x20000 { check(lower, SINGLETONS1, NORMAL1) } else {\ """) for a, b in extra: print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b)) print(" return false;") print(" }") print("""\ true } }\ """) print() print("const SINGLETONS0: &'static [u16] = &[") for s in singletons0: print(" 0x{:x},".format(s)) print("];") print("const SINGLETONS1: &'static [u16] = &[") for s in singletons1: print(" 0x{:x},".format(s)) print("];") print("const NORMAL0: &'static [u16] = &[") for a, b in normal0: print(" 0x{:x}, 0x{:x},".format(a, b)) print("];") print("const NORMAL1: &'static [u16] = &[") for a, b in normal1: print(" 0x{:x}, 0x{:x},".format(a, b)) print("];") if __name__ == '__main__': main()