rust/src/etc/extract_grammar.py
2014-05-12 19:52:29 -07:00

157 lines
3.8 KiB
Python
Executable File

#!/usr/bin/env python
#
# Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
# This script is for extracting the grammar from the rust docs.
import fileinput
collections = { "gram": [],
"keyword": [],
"reserved": [],
"binop": [],
"unop": [] }
in_coll = False
coll = ""
for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
if in_coll:
if line.startswith("~~~~"):
in_coll = False
else:
if coll in ["keyword", "reserved", "binop", "unop"]:
for word in line.split():
if word not in collections[coll]:
collections[coll].append(word)
else:
collections[coll].append(line)
else:
if line.startswith("~~~~"):
for cname in collections:
if ("." + cname) in line:
coll = cname
in_coll = True
break
# Define operator symbol-names here
tokens = ["non_star", "non_slash", "non_eol",
"non_single_quote", "non_double_quote", "ident" ]
symnames = {
".": "dot",
"+": "plus",
"-": "minus",
"/": "slash",
"*": "star",
"%": "percent",
"~": "tilde",
"@": "at",
"!": "not",
"&": "and",
"|": "or",
"^": "xor",
"<<": "lsl",
">>": "lsr",
">>>": "asr",
"&&": "andand",
"||": "oror",
"<" : "lt",
"<=" : "le",
"==" : "eqeq",
">=" : "ge",
">" : "gt",
"=": "eq",
"+=": "plusequal",
"-=": "minusequal",
"/=": "divequal",
"*=": "starequal",
"%=": "percentequal",
"&=": "andequal",
"|=": "orequal",
"^=": "xorequal",
">>=": "lsrequal",
">>>=": "asrequal",
"<<=": "lslequal",
"::": "coloncolon",
"->": "rightarrow",
"<-": "leftarrow",
"<->": "swaparrow",
"//": "linecomment",
"/*": "openblockcomment",
"*/": "closeblockcomment",
"macro_rules": "macro_rules",
"=>" : "eg",
".." : "dotdot",
"," : "comma"
}
lines = []
for line in collections["gram"]:
line2 = ""
for word in line.split():
# replace strings with keyword-names or symbol-names from table
if word.startswith("\""):
word = word[1:-1]
if word in symnames:
word = symnames[word]
else:
for ch in word:
if not ch.isalpha():
raise Exception("non-alpha apparent keyword: "
+ word)
if word not in tokens:
if (word in collections["keyword"] or
word in collections["reserved"]):
tokens.append(word)
else:
raise Exception("unknown keyword/reserved word: "
+ word)
line2 += " " + word
lines.append(line2)
for word in collections["keyword"] + collections["reserved"]:
if word not in tokens:
tokens.append(word)
for sym in collections["unop"] + collections["binop"] + symnames.keys():
word = symnames[sym]
if word not in tokens:
tokens.append(word)
print("%start parser, token;")
print("%%token %s ;" % ("\n\t, ".join(tokens)))
for coll in ["keyword", "reserved"]:
print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
for coll in ["binop", "unop"]:
print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
for x in collections[coll]])));
print("\n".join(lines));