rust/src/etc/extract_grammar.py

#!/usr/bin/env python
#
# Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

# This script is for extracting the grammar from the rust docs.

import fileinput

collections = { "gram": [],
                "keyword": [],
                "reserved": [],
                "binop": [],
                "unop": [] }


in_coll = False
coll = ""

for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
    if in_coll:
        if line.startswith("~~~~"):
            in_coll = False
        else:
            if coll in ["keyword", "reserved", "binop", "unop"]:
                for word in line.split():
                    if word not in collections[coll]:
                        collections[coll].append(word)
            else:
                collections[coll].append(line)

    else:
        if line.startswith("~~~~"):
            for cname in collections:
                if ("." + cname) in line:
                    coll = cname
                    in_coll = True
                    break

# Define operator symbol-names here

tokens = ["non_star", "non_slash", "non_eol",
          "non_single_quote", "non_double_quote", "ident" ]

symnames = {
".": "dot",
"+": "plus",
"-": "minus",
"/": "slash",
"*": "star",
"%": "percent",

"~": "tilde",
"@": "at",

"!": "not",
"&": "and",
"|": "or",
"^": "xor",

"<<": "lsl",
">>": "lsr",
">>>": "asr",

"&&": "andand",
"||": "oror",

"<" : "lt",
"<=" : "le",
"==" : "eqeq",
">=" : "ge",
">" : "gt",

"=": "eq",

"+=": "plusequal",
"-=": "minusequal",
"/=": "divequal",
"*=": "starequal",
"%=": "percentequal",

"&=": "andequal",
"|=": "orequal",
"^=": "xorequal",

">>=": "lsrequal",
">>>=": "asrequal",
"<<=": "lslequal",

"::": "coloncolon",

"->": "rightarrow",
"<-": "leftarrow",
"<->": "swaparrow",

"//": "linecomment",
"/*": "openblockcomment",
"*/": "closeblockcomment",
"macro_rules": "macro_rules",
"=>" : "eg",
".." : "dotdot",
","  : "comma"
}

lines = []

for line in collections["gram"]:
    line2 = ""
    for word in line.split():
        # replace strings with keyword-names or symbol-names from table
        if word.startswith("\""):
            word = word[1:-1]
            if word in symnames:
                word = symnames[word]
            else:
                for ch in word:
                    if not ch.isalpha():
                        raise Exception("non-alpha apparent keyword: "
                                        + word)
                if word not in tokens:
                    if (word in collections["keyword"] or
                        word in collections["reserved"]):
                       tokens.append(word)
                    else:
                        raise Exception("unknown keyword/reserved word: "
                                        + word)

        line2 += " " + word
    lines.append(line2)


for word in collections["keyword"] + collections["reserved"]:
    if word not in tokens:
        tokens.append(word)

for sym in collections["unop"] + collections["binop"] + symnames.keys():
    word = symnames[sym]
    if word not in tokens:
        tokens.append(word)


print("%start parser, token;")
print("%%token %s ;" % ("\n\t, ".join(tokens)))
for coll in ["keyword", "reserved"]:
    print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
for coll in ["binop", "unop"]:
    print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
                                              for x in collections[coll]])));
print("\n".join(lines));
add shebang to scripts that have execute bit set 2014-05-12 13:35:08 -05:00			`#!/usr/bin/env python`
			`#`
etc: add missing license boilerplates 2014-02-02 04:47:02 -06:00			`# Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT`
			`# file at the top-level directory of this distribution and at`
			`# http://rust-lang.org/COPYRIGHT.`
			`#`
			`# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or`
			`# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license`
			`# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your`
			`# option. This file may not be copied, modified, or distributed`
			`# except according to those terms.`
Begin shift over to using pandoc, markdown and llnextgen for reference manual. Fix man page URL while at it. 2012-01-12 21:10:30 -06:00
			`# This script is for extracting the grammar from the rust docs.`

			`import fileinput`

			`collections = { "gram": [],`
			`"keyword": [],`
			`"reserved": [],`
			`"binop": [],`
			`"unop": [] }`


			`in_coll = False`
			`coll = ""`

			`for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):`
			`if in_coll:`
			`if line.startswith("~~~~"):`
			`in_coll = False`
			`else:`
			`if coll in ["keyword", "reserved", "binop", "unop"]:`
			`for word in line.split():`
			`if word not in collections[coll]:`
			`collections[coll].append(word)`
			`else:`
			`collections[coll].append(line)`

			`else:`
			`if line.startswith("~~~~"):`
			`for cname in collections:`
			`if ("." + cname) in line:`
			`coll = cname`
			`in_coll = True`
			`break`

			`# Define operator symbol-names here`

			`tokens = ["non_star", "non_slash", "non_eol",`
			`"non_single_quote", "non_double_quote", "ident" ]`

			`symnames = {`
			`".": "dot",`
			`"+": "plus",`
			`"-": "minus",`
			`"/": "slash",`
			`"*": "star",`
			`"%": "percent",`

			`"~": "tilde",`
			`"@": "at",`

			`"!": "not",`
			`"&": "and",`
			`"\|": "or",`
			`"^": "xor",`

			`"<<": "lsl",`
			`">>": "lsr",`
			`">>>": "asr",`

			`"&&": "andand",`
			`"\|\|": "oror",`

			`"<" : "lt",`
			`"<=" : "le",`
			`"==" : "eqeq",`
			`">=" : "ge",`
			`">" : "gt",`

			`"=": "eq",`

			`"+=": "plusequal",`
			`"-=": "minusequal",`
			`"/=": "divequal",`
			`"*=": "starequal",`
			`"%=": "percentequal",`

			`"&=": "andequal",`
			`"\|=": "orequal",`
			`"^=": "xorequal",`

			`">>=": "lsrequal",`
			`">>>=": "asrequal",`
			`"<<=": "lslequal",`

			`"::": "coloncolon",`

More doc porting. 2012-01-13 17:05:12 -06:00			`"->": "rightarrow",`
			`"<-": "leftarrow",`
			`"<->": "swaparrow",`

Begin shift over to using pandoc, markdown and llnextgen for reference manual. Fix man page URL while at it. 2012-01-12 21:10:30 -06:00			`"//": "linecomment",`
			`"/*": "openblockcomment",`
extract_grammar symnames add missing symnames Signed-off-by: Jan Kobler <eng1@koblersystems.de> 2013-08-16 19:10:46 -05:00			`"*/": "closeblockcomment",`
			`"macro_rules": "macro_rules",`
			`"=>" : "eg",`
			`".." : "dotdot",`
			`"," : "comma"`
Begin shift over to using pandoc, markdown and llnextgen for reference manual. Fix man page URL while at it. 2012-01-12 21:10:30 -06:00			`}`

			`lines = []`

			`for line in collections["gram"]:`
			`line2 = ""`
			`for word in line.split():`
			`# replace strings with keyword-names or symbol-names from table`
			`if word.startswith("\""):`
			`word = word[1:-1]`
			`if word in symnames:`
			`word = symnames[word]`
			`else:`
			`for ch in word:`
			`if not ch.isalpha():`
			`raise Exception("non-alpha apparent keyword: "`
			`+ word)`
			`if word not in tokens:`
			`if (word in collections["keyword"] or`
			`word in collections["reserved"]):`
			`tokens.append(word)`
			`else:`
			`raise Exception("unknown keyword/reserved word: "`
			`+ word)`

			`line2 += " " + word`
			`lines.append(line2)`


			`for word in collections["keyword"] + collections["reserved"]:`
			`if word not in tokens:`
			`tokens.append(word)`

			`for sym in collections["unop"] + collections["binop"] + symnames.keys():`
			`word = symnames[sym]`
			`if word not in tokens:`
			`tokens.append(word)`


			`print("%start parser, token;")`
			`print("%%token %s ;" % ("\n\t, ".join(tokens)))`
			`for coll in ["keyword", "reserved"]:`
			`print("%s: %s ; " % (coll, "\n\t\| ".join(collections[coll])));`
			`for coll in ["binop", "unop"]:`
			`print("%s: %s ; " % (coll, "\n\t\| ".join([symnames[x]`
			`for x in collections[coll]])));`
			`print("\n".join(lines));`