diff options
Diffstat (limited to 'cgi-bin/js_check')
| -rw-r--r-- | cgi-bin/js_check/__init__.py | 3 | ||||
| -rw-r--r-- | cgi-bin/js_check/__init__.pyc | bin | 0 -> 156 bytes | |||
| -rw-r--r-- | cgi-bin/js_check/jsparser.py | 1110 | ||||
| -rw-r--r-- | cgi-bin/js_check/jsparser.pyc | bin | 0 -> 27289 bytes | |||
| -rw-r--r-- | cgi-bin/js_check/test_javascript.py | 82 | ||||
| -rw-r--r-- | cgi-bin/js_check/test_javascript.py.bk | 85 | ||||
| -rw-r--r-- | cgi-bin/js_check/validate.py | 91 | ||||
| -rw-r--r-- | cgi-bin/js_check/validate.pyc | bin | 0 -> 2770 bytes | |||
| -rw-r--r-- | cgi-bin/js_check/verify.pyc | bin | 0 -> 2479 bytes |
9 files changed, 1371 insertions, 0 deletions
diff --git a/cgi-bin/js_check/__init__.py b/cgi-bin/js_check/__init__.py new file mode 100644 index 0000000..e59d5d9 --- /dev/null +++ b/cgi-bin/js_check/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/python2 +__all__ = ["test_javascript", "verify"] + diff --git a/cgi-bin/js_check/__init__.pyc b/cgi-bin/js_check/__init__.pyc Binary files differnew file mode 100644 index 0000000..5d23962 --- /dev/null +++ b/cgi-bin/js_check/__init__.pyc diff --git a/cgi-bin/js_check/jsparser.py b/cgi-bin/js_check/jsparser.py new file mode 100644 index 0000000..12e3a30 --- /dev/null +++ b/cgi-bin/js_check/jsparser.py @@ -0,0 +1,1110 @@ +#!/usr/bin/python2.5 + + +""" + PyNarcissus + + A lexical scanner and parser. JS implemented in JS, ported to Python. +""" + +__author__ = "JT Olds" +__author_email__ = "jtolds@xnet5.com" +__date__ = "2009-03-24" +__all__ = ["ParseError", "parse", "tokens"] + +import re, sys, types + +class Object: pass +class Error_(Exception): pass +class ParseError(Error_): pass + +tokens = dict(enumerate(( + # End of source. + "END", + + # Operators and punctuators. Some pair-wise order matters, e.g. (+, -) + # and (UNARY_PLUS, UNARY_MINUS). + "\n", ";", + ",", + "=", + "?", ":", "CONDITIONAL", + "||", + "&&", + "|", + "^", + "&", + "==", "!=", "===", "!==", + "<", "<=", ">=", ">", + "<<", ">>", ">>>", + "+", "-", + "*", "/", "%", + "!", "~", "UNARY_PLUS", "UNARY_MINUS", + "++", "--", + ".", + "[", "]", + "{", "}", + "(", ")", + + # Nonterminal tree node type codes. + "SCRIPT", "BLOCK", "LABEL", "FOR_IN", "CALL", "NEW_WITH_ARGS", "INDEX", + "ARRAY_INIT", "OBJECT_INIT", "PROPERTY_INIT", "GETTER", "SETTER", + "GROUP", "LIST", + + # Terminals. + "IDENTIFIER", "NUMBER", "STRING", "REGEXP", + + # Keywords. + "break", + "case", "catch", "const", "continue", + "debugger", "default", "delete", "do", + "else", "enum", + "false", "finally", "for", "function", + "if", "in", "instanceof", + "new", "null", + "return", + "switch", + "this", "throw", "true", "try", "typeof", + "var", "void", + "while", "with"))) + +# Operator and punctuator mapping from token to tree node type name. +# NB: superstring tokens (e.g., ++) must come before their substring token +# counterparts (+ in the example), so that the opRegExp regular expression +# synthesized from this list makes the longest possible match. +opTypeNames = [ + ('\n', "NEWLINE"), + (';', "SEMICOLON"), + (',', "COMMA"), + ('?', "HOOK"), + (':', "COLON"), + ('||', "OR"), + ('&&', "AND"), + ('|', "BITWISE_OR"), + ('^', "BITWISE_XOR"), + ('&', "BITWISE_AND"), + ('===', "STRICT_EQ"), + ('==', "EQ"), + ('=', "ASSIGN"), + ('!==', "STRICT_NE"), + ('!=', "NE"), + ('<<', "LSH"), + ('<=', "LE"), + ('<', "LT"), + ('>>>', "URSH"), + ('>>', "RSH"), + ('>=', "GE"), + ('>', "GT"), + ('++', "INCREMENT"), + ('--', "DECREMENT"), + ('+', "PLUS"), + ('-', "MINUS"), + ('*', "MUL"), + ('/', "DIV"), + ('%', "MOD"), + ('!', "NOT"), + ('~', "BITWISE_NOT"), + ('.', "DOT"), + ('[', "LEFT_BRACKET"), + (']', "RIGHT_BRACKET"), + ('{', "LEFT_CURLY"), + ('}', "RIGHT_CURLY"), + ('(', "LEFT_PAREN"), + (')', "RIGHT_PAREN"), + ] + +keywords = {} + +# Define const END, etc., based on the token names. Also map name to index. +for i, t in tokens.copy().iteritems(): + if re.match(r'^[a-z]', t): + const_name = t.upper() + keywords[t] = i + elif re.match(r'^\W', t): + const_name = dict(opTypeNames)[t] + else: + const_name = t + globals()[const_name] = i + tokens[t] = i + +assignOps = {} + +# Map assignment operators to their indexes in the tokens array. +for i, t in enumerate(['|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%']): + assignOps[t] = tokens[t] + assignOps[i] = t + +# Build a regexp that recognizes operators and punctuators (except newline). +opRegExpSrc = "^" +for i, j in opTypeNames: + if i == "\n": continue + if opRegExpSrc != "^": opRegExpSrc += "|^" + opRegExpSrc += re.sub(r'[?|^&(){}\[\]+\-*\/\.]', lambda x: "\\%s" % x.group(0), i) +opRegExp = re.compile(opRegExpSrc) + +# Convert opTypeNames to an actual dictionary now that we don't care about ordering +opTypeNames = dict(opTypeNames) + +# A regexp to match floating point literals (but not integer literals). +fpRegExp = re.compile(r'^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+|^\.\d+(?:[eE][-+]?\d+)?') + +# A regexp to match regexp literals. +reRegExp = re.compile(r'^\/((?:\\.|\[(?:\\.|[^\]])*\]|[^\/])+)\/([gimy]*)') + +class SyntaxError_(ParseError): + def __init__(self, message, filename, lineno): + ParseError.__init__(self, "Syntax error: %s\n%s:%s" % + (message, filename, lineno)) + +class Tokenizer(object): + def __init__(self, s, f, l): + self.cursor = 0 + self.source = str(s) + self.tokens = {} + self.tokenIndex = 0 + self.lookahead = 0 + self.scanNewlines = False + self.scanOperand = True + self.filename = f + self.lineno = l + + input_ = property(lambda self: self.source[self.cursor:]) + done = property(lambda self: self.peek() == END) + token = property(lambda self: self.tokens.get(self.tokenIndex)) + + def match(self, tt): + return self.get() == tt or self.unget() + + def mustMatch(self, tt): + if not self.match(tt): + raise self.newSyntaxError("Missing " + tokens.get(tt).lower()) + return self.token + + def peek(self): + if self.lookahead: + next = self.tokens.get((self.tokenIndex + self.lookahead) & 3) + if self.scanNewlines and (getattr(next, "lineno", None) != + getattr(self, "lineno", None)): + tt = NEWLINE + else: + tt = getattr(next, "type_", None) + else: + tt = self.get() + self.unget() + return tt + + def peekOnSameLine(self): + self.scanNewlines = True + tt = self.peek() + self.scanNewlines = False + return tt + + def get(self): + while self.lookahead: + self.lookahead -= 1 + self.tokenIndex = (self.tokenIndex + 1) & 3 + token = self.tokens.get(self.tokenIndex) + if getattr(token, "type_", None) != NEWLINE or self.scanNewlines: + return getattr(token, "type_", None) + + while True: + input__ = self.input_ + if self.scanNewlines: + match = re.match(r'^[ \t]+', input__) + else: + match = re.match(r'^\s+', input__) + if match: + spaces = match.group(0) + self.cursor += len(spaces) + newlines = re.findall(r'\n', spaces) + if newlines: + self.lineno += len(newlines) + input__ = self.input_ + + match = re.match(r'^\/(?:\*(?:.|\n)*?\*\/|\/.*)', input__) + if not match: + break + comment = match.group(0) + self.cursor += len(comment) + newlines = re.findall(r'\n', comment) + if newlines: + self.lineno += len(newlines) + + self.tokenIndex = (self.tokenIndex + 1) & 3 + token = self.tokens.get(self.tokenIndex) + if not token: + token = Object() + self.tokens[self.tokenIndex] = token + + if not input__: + token.type_ = END + return END + + def matchInput(): + match = fpRegExp.match(input__) + if match: + token.type_ = NUMBER + token.value = float(match.group(0)) + return match.group(0) + + match = re.match(r'^0[xX][\da-fA-F]+|^0[0-7]*|^\d+', input__) + if match: + token.type_ = NUMBER + token.value = eval(match.group(0)) + return match.group(0) + + match = re.match(r'^[$_\w]+', input__) # FIXME no ES3 unicode + if match: + id_ = match.group(0) + token.type_ = keywords.get(id_, IDENTIFIER) + token.value = id_ + return match.group(0) + + match = re.match(r'^"(?:\\.|[^"])*"|^\'(?:\\.|[^\'])*\'', input__) + if match: + token.type_ = STRING + token.value = eval(match.group(0)) + return match.group(0) + + if self.scanOperand: + match = reRegExp.match(input__) + if match: + token.type_ = REGEXP + token.value = {"regexp": match.group(1), + "modifiers": match.group(2)} + return match.group(0) + + match = opRegExp.match(input__) + if match: + op = match.group(0) + if assignOps.has_key(op) and input__[len(op)] == '=': + token.type_ = ASSIGN + token.assignOp = globals()[opTypeNames[op]] + token.value = op + return match.group(0) + "=" + token.type_ = globals()[opTypeNames[op]] + if self.scanOperand and (token.type_ in (PLUS, MINUS)): + token.type_ += UNARY_PLUS - PLUS + token.assignOp = None + token.value = op + return match.group(0) + + if self.scanNewlines: + match = re.match(r'^\n', input__) + if match: + token.type_ = NEWLINE + return match.group(0) + + raise self.newSyntaxError("Illegal token") + + token.start = self.cursor + self.cursor += len(matchInput()) + token.end = self.cursor + token.lineno = self.lineno + return getattr(token, "type_", None) + + def unget(self): + self.lookahead += 1 + if self.lookahead == 4: raise "PANIC: too much lookahead!" + self.tokenIndex = (self.tokenIndex - 1) & 3 + + def newSyntaxError(self, m): + return SyntaxError_(m, self.filename, self.lineno) + +class CompilerContext(object): + def __init__(self, inFunction): + self.inFunction = inFunction + self.stmtStack = [] + self.funDecls = [] + self.varDecls = [] + self.bracketLevel = 0 + self.curlyLevel = 0 + self.parenLevel = 0 + self.hookLevel = 0 + self.ecmaStrictMode = False + self.inForLoopInit = False + +def Script(t, x): + n = Statements(t, x) + n.type_ = SCRIPT + n.funDecls = x.funDecls + n.varDecls = x.varDecls + return n + +class Node(list): + + def __init__(self, t, type_=None, args=[]): + list.__init__(self) + + token = t.token + if token: + if type_: + self.type_ = type_ + else: + self.type_ = getattr(token, "type_", None) + self.value = token.value + self.lineno = token.lineno + self.start = token.start + self.end = token.end + else: + self.type_ = type_ + self.lineno = t.lineno + self.tokenizer = t + + for arg in args: + self.append(arg) + + type = property(lambda self: tokenstr(self.type_)) + + # Always use push to add operands to an expression, to update start and end. + def append(self, kid, numbers=[]): + if kid: + if hasattr(self, "start") and kid.start < self.start: + self.start = kid.start + if hasattr(self, "end") and self.end < kid.end: + self.end = kid.end + return list.append(self, kid) + + indentLevel = 0 + + def __str__(self): + a = list((str(i), v) for i, v in enumerate(self)) + for attr in dir(self): + if attr[0] == "_": continue + elif attr == "tokenizer": + a.append((attr, "[object Object]")) + elif attr in ("append", "count", "extend", "getSource", "index", + "insert", "pop", "remove", "reverse", "sort", "type_", + "target", "filename", "indentLevel", "type"): + continue + else: + a.append((attr, getattr(self, attr))) + if len(self): a.append(("length", len(self))) + a.sort(lambda a, b: cmp(a[0], b[0])) + INDENTATION = " " + Node.indentLevel += 1 + n = Node.indentLevel + s = "{\n%stype: %s" % ((INDENTATION * n), tokenstr(self.type_)) + for i, value in a: + s += ",\n%s%s: " % ((INDENTATION * n), i) + if i == "value" and self.type_ == REGEXP: + s += "/%s/%s" % (value["regexp"], value["modifiers"]) + elif value is None: + s += "null" + elif value is False: + s += "false" + elif value is True: + s += "true" + elif type(value) == list: + s += ','.join((str(x) for x in value)) + else: + s += str(value) + Node.indentLevel -= 1 + n = Node.indentLevel + s += "\n%s}" % (INDENTATION * n) + return s + __repr__ = __str__ + + def getSource(self): + if getattr(self, "start", None) is not None: + if getattr(self, "end", None) is not None: + return self.tokenizer.source[self.start:self.end] + return self.tokenizer.source[self.start:] + if getattr(self, "end", None) is not None: + return self.tokenizer.source[:self.end] + return self.tokenizer.source[:] + + filename = property(lambda self: self.tokenizer.filename) + + def __nonzero__(self): return True + +# Statement stack and nested statement handler. +def nest(t, x, node, func, end=None): + x.stmtStack.append(node) + n = func(t, x) + x.stmtStack.pop() + if end: t.mustMatch(end) + return n + +def tokenstr(tt): + t = tokens[tt] + if re.match(r'^\W', t): + return opTypeNames[t] + return t.upper() + +def Statements(t, x): + n = Node(t, BLOCK) + x.stmtStack.append(n) + while not t.done and t.peek() != RIGHT_CURLY: + n.append(Statement(t, x)) + x.stmtStack.pop() + return n + +def Block(t, x): + t.mustMatch(LEFT_CURLY) + n = Statements(t, x) + t.mustMatch(RIGHT_CURLY) + return n + +DECLARED_FORM = 0 +EXPRESSED_FORM = 1 +STATEMENT_FORM = 2 + +def Statement(t, x): + tt = t.get() + + # Cases for statements ending in a right curly return early, avoiding the + # common semicolon insertion magic after this switch. + if tt == FUNCTION: + if len(x.stmtStack) > 1: + type_ = STATEMENT_FORM + else: + type_ = DECLARED_FORM + return FunctionDefinition(t, x, True, type_) + + elif tt == LEFT_CURLY: + n = Statements(t, x) + t.mustMatch(RIGHT_CURLY) + return n + + elif tt == IF: + n = Node(t) + n.condition = ParenExpression(t, x) + x.stmtStack.append(n) + n.thenPart = Statement(t, x) + if t.match(ELSE): + n.elsePart = Statement(t, x) + else: + n.elsePart = None + x.stmtStack.pop() + return n + + elif tt == SWITCH: + n = Node(t) + t.mustMatch(LEFT_PAREN) + n.discriminant = Expression(t, x) + t.mustMatch(RIGHT_PAREN) + n.cases = [] + n.defaultIndex = -1 + x.stmtStack.append(n) + t.mustMatch(LEFT_CURLY) + while True: + tt = t.get() + if tt == RIGHT_CURLY: break + + if tt in (DEFAULT, CASE): + if tt == DEFAULT and n.defaultIndex >= 0: + raise t.newSyntaxError("More than one switch default") + n2 = Node(t) + if tt == DEFAULT: + n.defaultIndex = len(n.cases) + else: + n2.caseLabel = Expression(t, x, COLON) + else: + raise t.newSyntaxError("Invalid switch case") + t.mustMatch(COLON) + n2.statements = Node(t, BLOCK) + while True: + tt = t.peek() + if(tt == CASE or tt == DEFAULT or tt == RIGHT_CURLY): break + n2.statements.append(Statement(t, x)) + n.cases.append(n2) + x.stmtStack.pop() + return n + + elif tt == FOR: + n = Node(t) + n2 = None + n.isLoop = True + t.mustMatch(LEFT_PAREN) + tt = t.peek() + if tt != SEMICOLON: + x.inForLoopInit = True + if tt == VAR or tt == CONST: + t.get() + n2 = Variables(t, x) + else: + n2 = Expression(t, x) + x.inForLoopInit = False + + if n2 and t.match(IN): + n.type_ = FOR_IN + if n2.type_ == VAR: + if len(n2) != 1: + raise SyntaxError("Invalid for..in left-hand side", + t.filename, n2.lineno) + + # NB: n2[0].type_ == INDENTIFIER and n2[0].value == n2[0].name + n.iterator = n2[0] + n.varDecl = n2 + else: + n.iterator = n2 + n.varDecl = None + n.object = Expression(t, x) + else: + if n2: + n.setup = n2 + else: + n.setup = None + t.mustMatch(SEMICOLON) + if t.peek() == SEMICOLON: + n.condition = None + else: + n.condition = Expression(t, x) + t.mustMatch(SEMICOLON) + if t.peek() == RIGHT_PAREN: + n.update = None + else: + n.update = Expression(t, x) + t.mustMatch(RIGHT_PAREN) + n.body = nest(t, x, n, Statement) + return n + + elif tt == WHILE: + n = Node(t) + n.isLoop = True + n.condition = ParenExpression(t, x) + n.body = nest(t, x, n, Statement) + return n + + elif tt == DO: + n = Node(t) + n.isLoop = True + n.body = nest(t, x, n, Statement, WHILE) + n.condition = ParenExpression(t, x) + if not x.ecmaStrictMode: + # <script language="JavaScript"> (without version hints) may need + # automatic semicolon insertion without a newline after do-while. + # See http://bugzilla.mozilla.org/show_bug.cgi?id=238945. + t.match(SEMICOLON) + return n + + elif tt in (BREAK, CONTINUE): + n = Node(t) + if t.peekOnSameLine() == IDENTIFIER: + t.get() + n.label = t.token.value + ss = x.stmtStack + i = len(ss) + label = getattr(n, "label", None) + if label: + while True: + i -= 1 + if i < 0: + raise t.newSyntaxError("Label not found") + if getattr(ss[i], "label", None) == label: break + else: + while True: + i -= 1 + if i < 0: + if tt == BREAK: + raise t.newSyntaxError("Invalid break") + else: + raise t.newSyntaxError("Invalid continue") + if (getattr(ss[i], "isLoop", None) or (tt == BREAK and + ss[i].type_ == SWITCH)): + break + n.target = ss[i] + + elif tt == TRY: + n = Node(t) + n.tryBlock = Block(t, x) + n.catchClauses = [] + while t.match(CATCH): + n2 = Node(t) + t.mustMatch(LEFT_PAREN) + n2.varName = t.mustMatch(IDENTIFIER).value + if t.match(IF): + if x.ecmaStrictMode: + raise t.newSyntaxError("Illegal catch guard") + if n.catchClauses and not n.catchClauses[-1].guard: + raise t.newSyntaxError("Gaurded catch after unguarded") + n2.guard = Expression(t, x) + else: + n2.guard = None + t.mustMatch(RIGHT_PAREN) + n2.block = Block(t, x) + n.catchClauses.append(n2) + if t.match(FINALLY): + n.finallyBlock = Block(t, x) + if not n.catchClauses and not getattr(n, "finallyBlock", None): + raise t.newSyntaxError("Invalid try statement") + return n + + elif tt in (CATCH, FINALLY): + raise t.newSyntaxError(tokens[tt] + " without preceding try") + + elif tt == THROW: + n = Node(t) + n.exception = Expression(t, x) + + elif tt == RETURN: + if not x.inFunction: + raise t.newSyntaxError("Invalid return") + n = Node(t) + tt = t.peekOnSameLine() + if tt not in (END, NEWLINE, SEMICOLON, RIGHT_CURLY): + n.value = Expression(t, x) + + elif tt == WITH: + n = Node(t) + n.object = ParenExpression(t, x) + n.body = nest(t, x, n, Statement) + return n + + elif tt in (VAR, CONST): + n = Variables(t, x) + + elif tt == DEBUGGER: + n = Node(t) + + elif tt in (NEWLINE, SEMICOLON): + n = Node(t, SEMICOLON) + n.expression = None + return n + + else: + if tt == IDENTIFIER: + t.scanOperand = False + tt = t.peek() + t.scanOperand = True + if tt == COLON: + label = t.token.value + ss = x.stmtStack + i = len(ss) - 1 + while i >= 0: + if getattr(ss[i], "label", None) == label: + raise t.newSyntaxError("Duplicate label") + i -= 1 + t.get() + n = Node(t, LABEL) + n.label = label + n.statement = nest(t, x, n, Statement) + return n + + n = Node(t, SEMICOLON) + t.unget() + n.expression = Expression(t, x) + n.end = n.expression.end + + if t.lineno == t.token.lineno: + tt = t.peekOnSameLine() + if tt not in (END, NEWLINE, SEMICOLON, RIGHT_CURLY): + raise t.newSyntaxError("Missing ; before statement") + t.match(SEMICOLON) + return n + +def FunctionDefinition(t, x, requireName, functionForm): + f = Node(t) + if f.type_ != FUNCTION: + if f.value == "get": + f.type_ = GETTER + else: + f.type_ = SETTER + if t.match(IDENTIFIER): + f.name = t.token.value + elif requireName: + raise t.newSyntaxError("Missing function identifier") + + t.mustMatch(LEFT_PAREN) + f.params = [] + while True: + tt = t.get() + if tt == RIGHT_PAREN: break + if tt != IDENTIFIER: + raise t.newSyntaxError("Missing formal parameter") + f.params.append(t.token.value) + if t.peek() != RIGHT_PAREN: + t.mustMatch(COMMA) + + t.mustMatch(LEFT_CURLY) + x2 = CompilerContext(True) + f.body = Script(t, x2) + t.mustMatch(RIGHT_CURLY) + f.end = t.token.end + + f.functionForm = functionForm + if functionForm == DECLARED_FORM: + x.funDecls.append(f) + return f + +def Variables(t, x): + n = Node(t) + while True: + t.mustMatch(IDENTIFIER) + n2 = Node(t) + n2.name = n2.value + if t.match(ASSIGN): + if t.token.assignOp: + raise t.newSyntaxError("Invalid variable initialization") + n2.initializer = Expression(t, x, COMMA) + n2.readOnly = not not (n.type_ == CONST) + n.append(n2) + x.varDecls.append(n2) + if not t.match(COMMA): break + return n + +def ParenExpression(t, x): + t.mustMatch(LEFT_PAREN) + n = Expression(t, x) + t.mustMatch(RIGHT_PAREN) + return n + +opPrecedence = { + "SEMICOLON": 0, + "COMMA": 1, + "ASSIGN": 2, "HOOK": 2, "COLON": 2, + # The above all have to have the same precedence, see bug 330975. + "OR": 4, + "AND": 5, + "BITWISE_OR": 6, + "BITWISE_XOR": 7, + "BITWISE_AND": 8, + "EQ": 9, "NE": 9, "STRICT_EQ": 9, "STRICT_NE": 9, + "LT": 10, "LE": 10, "GE": 10, "GT": 10, "IN": 10, "INSTANCEOF": 10, + "LSH": 11, "RSH": 11, "URSH": 11, + "PLUS": 12, "MINUS": 12, + "MUL": 13, "DIV": 13, "MOD": 13, + "DELETE": 14, "VOID": 14, "TYPEOF": 14, + # "PRE_INCREMENT": 14, "PRE_DECREMENT": 14, + "NOT": 14, "BITWISE_NOT": 14, "UNARY_PLUS": 14, "UNARY_MINUS": 14, + "INCREMENT": 15, "DECREMENT": 15, # postfix + "NEW": 16, + "DOT": 17 +} + +# Map operator type code to precedence +for i in opPrecedence.copy(): + opPrecedence[globals()[i]] = opPrecedence[i] + +opArity = { + "COMMA": -2, + "ASSIGN": 2, + "HOOK": 3, + "OR": 2, + "AND": 2, + "BITWISE_OR": 2, + "BITWISE_XOR": 2, + "BITWISE_AND": 2, + "EQ": 2, "NE": 2, "STRICT_EQ": 2, "STRICT_NE": 2, + "LT": 2, "LE": 2, "GE": 2, "GT": 2, "IN": 2, "INSTANCEOF": 2, + "LSH": 2, "RSH": 2, "URSH": 2, + "PLUS": 2, "MINUS": 2, + "MUL": 2, "DIV": 2, "MOD": 2, + "DELETE": 1, "VOID": 1, "TYPEOF": 1, + # "PRE_INCREMENT": 1, "PRE_DECREMENT": 1, + "NOT": 1, "BITWISE_NOT": 1, "UNARY_PLUS": 1, "UNARY_MINUS": 1, + "INCREMENT": 1, "DECREMENT": 1, # postfix + "NEW": 1, "NEW_WITH_ARGS": 2, "DOT": 2, "INDEX": 2, "CALL": 2, + "ARRAY_INIT": 1, "OBJECT_INIT": 1, "GROUP": 1 +} + +# Map operator type code to arity. +for i in opArity.copy(): + opArity[globals()[i]] = opArity[i] + +def Expression(t, x, stop=None): + operators = [] + operands = [] + bl = x.bracketLevel + cl = x.curlyLevel + pl = x.parenLevel + hl = x.hookLevel + + def reduce_(): + n = operators.pop() + op = n.type_ + arity = opArity[op] + if arity == -2: + # Flatten left-associative trees. + left = (len(operands) >= 2 and operands[-2]) + if left.type_ == op: + right = operands.pop() + left.append(right) + return left + arity = 2 + + # Always use append to add operands to n, to update start and end. + a = operands[-arity:] + del operands[-arity:] + for operand in a: + n.append(operand) + + # Include closing bracket or postfix operator in [start,end). + if n.end < t.token.end: + n.end = t.token.end + + operands.append(n) + return n + + class BreakOutOfLoops(Exception): pass + try: + while True: + tt = t.get() + if tt == END: break + if (tt == stop and x.bracketLevel == bl and x.curlyLevel == cl and + x.parenLevel == pl and x.hookLevel == hl): + # Stop only if tt matches the optional stop parameter, and that + # token is not quoted by some kind of bracket. + break + if tt == SEMICOLON: + # NB: cannot be empty, Statement handled that. + raise BreakOutOfLoops + + elif tt in (ASSIGN, HOOK, COLON): + if t.scanOperand: + raise BreakOutOfLoops + while ((operators and opPrecedence.get(operators[-1].type_, + None) > opPrecedence.get(tt)) or (tt == COLON and + operators and operators[-1].type_ == ASSIGN)): + reduce_() + if tt == COLON: + if operators: + n = operators[-1] + if not operators or n.type_ != HOOK: + raise t.newSyntaxError("Invalid label") + x.hookLevel -= 1 + else: + operators.append(Node(t)) + if tt == ASSIGN: + operands[-1].assignOp = t.token.assignOp + else: + x.hookLevel += 1 + + t.scanOperand = True + + elif tt in (IN, COMMA, OR, AND, BITWISE_OR, BITWISE_XOR, + BITWISE_AND, EQ, NE, STRICT_EQ, STRICT_NE, LT, LE, GE, GT, + INSTANCEOF, LSH, RSH, URSH, PLUS, MINUS, MUL, DIV, MOD, + DOT): + # We're treating comma as left-associative so reduce can fold + # left-heavy COMMA trees into a single array. + if tt == IN: + # An in operator should not be parsed if we're parsing the + # head of a for (...) loop, unless it is in the then part of + # a conditional expression, or parenthesized somehow. + if (x.inForLoopInit and not x.hookLevel and not + x.bracketLevel and not x.curlyLevel and + not x.parenLevel): + raise BreakOutOfLoops + if t.scanOperand: + raise BreakOutOfLoops + while (operators and opPrecedence.get(operators[-1].type_) + >= opPrecedence.get(tt)): + reduce_() + if tt == DOT: + t.mustMatch(IDENTIFIER) + operands.append(Node(t, DOT, [operands.pop(), Node(t)])) + else: + operators.append(Node(t)) + t.scanOperand = True + + elif tt in (DELETE, VOID, TYPEOF, NOT, BITWISE_NOT, UNARY_PLUS, + UNARY_MINUS, NEW): + if not t.scanOperand: + raise BreakOutOfLoops + operators.append(Node(t)) + + elif tt in (INCREMENT, DECREMENT): + if t.scanOperand: + operators.append(Node(t)) # prefix increment or decrement + else: + # Don't cross a line boundary for postfix {in,de}crement. + if (t.tokens.get((t.tokenIndex + t.lookahead - 1) + & 3).lineno != t.lineno): + raise BreakOutOfLoops + + # Use >, not >=, so postfix has higher precedence than + # prefix. + while (operators and opPrecedence.get(operators[-1].type_, + None) > opPrecedence.get(tt)): + reduce_() + n = Node(t, tt, [operands.pop()]) + n.postfix = True + operands.append(n) + + elif tt == FUNCTION: + if not t.scanOperand: + raise BreakOutOfLoops + operands.append(FunctionDefinition(t, x, False, EXPRESSED_FORM)) + t.scanOperand = False + + elif tt in (NULL, THIS, TRUE, FALSE, IDENTIFIER, NUMBER, STRING, + REGEXP): + if not t.scanOperand: + raise BreakOutOfLoops + operands.append(Node(t)) + t.scanOperand = False + + elif tt == LEFT_BRACKET: + if t.scanOperand: + # Array initializer. Parse using recursive descent, as the + # sub-grammer here is not an operator grammar. + n = Node(t, ARRAY_INIT) + while True: + tt = t.peek() + if tt == RIGHT_BRACKET: break + if tt == COMMA: + t.get() + n.append(None) + continue + n.append(Expression(t, x, COMMA)) + if not t.match(COMMA): + break + t.mustMatch(RIGHT_BRACKET) + operands.append(n) + t.scanOperand = False + else: + operators.append(Node(t, INDEX)) + t.scanOperand = True + x.bracketLevel += 1 + + elif tt == RIGHT_BRACKET: + if t.scanOperand or x.bracketLevel == bl: + raise BreakOutOfLoops + while reduce_().type_ != INDEX: + continue + x.bracketLevel -= 1 + + elif tt == LEFT_CURLY: + if not t.scanOperand: + raise BreakOutOfLoops + # Object initializer. As for array initializers (see above), + # parse using recursive descent. + x.curlyLevel += 1 + n = Node(t, OBJECT_INIT) + + class BreakOutOfObjectInit(Exception): pass + try: + if not t.match(RIGHT_CURLY): + while True: + tt = t.get() + if ((t.token.value == "get" or + t.token.value == "set") and + t.peek == IDENTIFIER): + if x.ecmaStrictMode: + raise t.newSyntaxError("Illegal property " + "accessor") + n.append(FunctionDefinition(t, x, True, + EXPRESSED_FORM)) + else: + if tt in (IDENTIFIER, NUMBER, STRING): + id_ = Node(t) + elif tt == RIGHT_CURLY: + if x.ecmaStrictMode: + raise t.newSyntaxError("Illegal " + "trailing ,") + raise BreakOutOfObjectInit + else: + raise t.newSyntaxError("Invalid property " + "name") + t.mustMatch(COLON) + n.append(Node(t, PROPERTY_INIT, [id_, + Expression(t, x, COMMA)])) + if not t.match(COMMA): break + t.mustMatch(RIGHT_CURLY) + except BreakOutOfObjectInit, e: pass + operands.append(n) + t.scanOperand = False + x.curlyLevel -= 1 + + elif tt == RIGHT_CURLY: + if not t.scanOperand and x.curlyLevel != cl: + raise ParseError("PANIC: right curly botch") + raise BreakOutOfLoops + + elif tt == LEFT_PAREN: + if t.scanOperand: + operators.append(Node(t, GROUP)) + x.parenLevel += 1 + else: + while (operators and + opPrecedence.get(operators[-1].type_) > + opPrecedence[NEW]): + reduce_() + + # Handle () now, to regularize the n-ary case for n > 0. + # We must set scanOperand in case there are arguments and + # the first one is a regexp or unary+/-. + if operators: + n = operators[-1] + else: + n = Object() + n.type_ = None + t.scanOperand = True + if t.match(RIGHT_PAREN): + if n.type_ == NEW: + operators.pop() + n.append(operands.pop()) + else: + n = Node(t, CALL, [operands.pop(), Node(t, LIST)]) + operands.append(n) + t.scanOperand = False + else: + if n.type_ == NEW: + n.type_ = NEW_WITH_ARGS + else: + operators.append(Node(t, CALL)) + x.parenLevel += 1 + + elif tt == RIGHT_PAREN: + if t.scanOperand or x.parenLevel == pl: + raise BreakOutOfLoops + while True: + tt = reduce_().type_ + if tt in (GROUP, CALL, NEW_WITH_ARGS): + break + if tt != GROUP: + if operands: + n = operands[-1] + if n[1].type_ != COMMA: + n[1] = Node(t, LIST, [n[1]]) + else: + n[1].type_ = LIST + else: + raise ParseError, "Unexpected amount of operands" + x.parenLevel -= 1 + + # Automatic semicolon insertion means we may scan across a newline + # and into the beginning of another statement. If so, break out of + # the while loop and let the t.scanOperand logic handle errors. + else: + raise BreakOutOfLoops + except BreakOutOfLoops, e: pass + + if x.hookLevel != hl: + raise t.newSyntaxError("Missing : after ?") + if x.parenLevel != pl: + raise t.newSyntaxError("Missing ) in parenthetical") + if x.bracketLevel != bl: + raise t.newSyntaxError("Missing ] in index expression") + if t.scanOperand: + raise t.newSyntaxError("Missing operand") + + t.scanOperand = True + t.unget() + while operators: + reduce_() + return operands.pop() + +def parse(source, filename=None, starting_line_number=1): + """Parse some Javascript + + Args: + source: the Javascript source, as a string + filename: the filename to include in messages + starting_line_number: the line number of the first line of the + passed in source, for output messages + Returns: + the parsed source code data structure + Raises: + ParseError + """ + t = Tokenizer(source, filename, starting_line_number) + x = CompilerContext(False) + n = Script(t, x) + if not t.done: + raise t.newSyntaxError("Syntax error") + return n + +if __name__ == "__main__": + print parse(file(sys.argv[1]).read(),sys.argv[1]) diff --git a/cgi-bin/js_check/jsparser.pyc b/cgi-bin/js_check/jsparser.pyc Binary files differnew file mode 100644 index 0000000..42b15b8 --- /dev/null +++ b/cgi-bin/js_check/jsparser.pyc diff --git a/cgi-bin/js_check/test_javascript.py b/cgi-bin/js_check/test_javascript.py new file mode 100644 index 0000000..6b00e03 --- /dev/null +++ b/cgi-bin/js_check/test_javascript.py @@ -0,0 +1,82 @@ +#!/usr/bin/python2 +import jsparser, re, sys, types, getopt +import cgi +import cgitb +print 'Content-type: text/html\n\n' +cgitb.enable() +RESERVED_WORDLIST = ['document', + 'window', + 'alert', + 'console', + '$', + 'jQuery', + 'xmlhttp', + 'eval', + 'XMLHttpRequest', + 'String', + 'this'] +form = cgi.FieldStorage() +opt_v = False +if 'script' not in form: + print '<h1>script not found</h1>' + sys.exit(1) +else: + print 'loading %d bytes' % len(form['script'].value) +test = jsparser.parse(form['script'].value) +ITERATION_BLOCKS = ['expression', + 'body', + 'block', + 'initializer', + 'condition', + 'thenPart', + 'elsePart', + 'tryBlock', + 'catchClauses', + 'varDecls'] + + +def security_checks(v): + if opt_v: + print 'this is the type: %s at line number %s' % (v.type, v.lineno) + if opt_v: + print 'this is the value: %s at line number %s' % (v.value, v.lineno) + + if v.type == 'IDENTIFIER' and v.value in RESERVED_WORDLIST: + print '(MY) ERROR reserved word "%s" used in assignment at line number %s' % (v.value, v.lineno) + sys.exit(1) + if v.type == 'STRING': + print '(MY) ERROR illegal type "%s" used at line number %s' % (v.type, v.lineno) + sys.exit(1) + if v.type == 'PLUS': + for a,b in enumerate(v): + if b.type == 'ARRAY_INIT': + print '(MY) ERROR illegal use of arrays to cast strings at %s' % v.lineno + sys.exit(1) + + +def traverse(tree, rec_level = 1): + if opt_v: + print 'test_script(tree,%d)' % rec_level + security_checks(tree) + for i, v in enumerate(tree): + test_script(v, rec_level + 1) + for block in ITERATION_BLOCKS: + if hasattr(tree, block): + b = getattr(tree, block) + try: + if opt_v: + print ' this is the expression type: %s ' % getattr(b, 'type') + except Exception as e: + sys.stderr.write('this was the error %s ' % e) + continue + try: + for i, v in enumerate(b): + test_script(v, rec_level + 1) + except Exception as e: + sys.stderr.write('this was the error %s ' % e) + continue + if opt_v: + print 'test_script(tree,%d) end' % rec_level + + +traverse(test) diff --git a/cgi-bin/js_check/test_javascript.py.bk b/cgi-bin/js_check/test_javascript.py.bk new file mode 100644 index 0000000..31ecaa6 --- /dev/null +++ b/cgi-bin/js_check/test_javascript.py.bk @@ -0,0 +1,85 @@ +#!/usr/bin/python2 +import jsparser, re, sys, types, getopt +import cgi +import cgitb +print 'Content-type: text/html\n\n' +cgitb.enable() +RESERVED_WORDLIST = ['document', + 'window', + 'alert', + 'console', + '$', + 'jQuery', + 'xmlhttp', + 'eval', + 'XMLHttpRequest', + 'String', + 'this'] +form = cgi.FieldStorage() +opt_v = False +if 'script' not in form: + print '<h1>script not found</h1>' + sys.exit(1) +else: + print 'loading %d bytes' % len(form['script'].value) +test = jsparser.parse(form['script'].value) +ITERATION_BLOCKS = ['expression', + 'body', + 'block', + 'initializer', + 'condition', + 'thenPart', + 'elsePart', + 'tryBlock', + 'catchClauses', + 'varDecls'] + +def test_script(tree, rec_level = 1): + if opt_v: + print 'test_script(tree,%d)' % rec_level + for i, v in enumerate(tree): + if v.type == 'IDENTIFIER' and v.value in RESERVED_WORDLIST: + print '(MY) ERROR reserved word "%s" used in assignment at line number %s' % (v.value, v.lineno) + sys.exit(1) + if v.type == 'STRING': + print '(MY) ERROR illegal type "%s" used at line number %s' % (v.type, v.lineno) + sys.exit(1) + if opt_v: + print 'this is the type: %s at line number %s' % (v.type, v.lineno) + if opt_v: + print 'this is the value: %s at line number %s' % (v.value, v.lineno) + test_script(v, rec_level + 1) + + for block in ITERATION_BLOCKS: + if hasattr(tree, block): + b = getattr(tree, block) + try: + if opt_v: + print ' this is the expression type: %s ' % getattr(b, 'type') + except Exception as e: + sys.stderr.write('this was the error %s ' % e) + continue + + try: + for i, v in enumerate(b): + if b.type == 'PLUS' and v.type == 'ARRAY_INIT': + print '(MY) ERROR illegal use of arrays to cast strings at %s' % v.lineno + sys.exit(1) + if opt_v: + print 'this is the (inner iteration) type: %s, at line number %s, rec_lev: %d' % (v.type, v.lineno, rec_level) + if v.type == 'STRING': + print '(MY) ERROR illegal type "%s" used at line number %s' % (v.type, v.lineno) + sys.exit(1) + if opt_v: + print 'this is the (inner iteration) value: %s' % v.value + test_script(v, rec_level + 1) + + except Exception as e: + sys.stderr.write('this was the error %s ' % e) + continue + + if opt_v: + print 'test_script(tree,%d) end' % rec_level + + +test_script(test) diff --git a/cgi-bin/js_check/validate.py b/cgi-bin/js_check/validate.py new file mode 100644 index 0000000..cd392b5 --- /dev/null +++ b/cgi-bin/js_check/validate.py @@ -0,0 +1,91 @@ +#!/usr/bin/python2 +import jsparser, re, sys, types, getopt +import cgi +import cgitb + +RESERVED_WORDLIST = ['document', + 'window', + 'document' + 'alert', + 'console', + '$', + 'jQuery', + 'xmlhttp', + 'eval', + 'XMLHttpRequest', + 'String', + 'this'] +opt_v = False +ITERATION_BLOCKS = ['expression', + 'body', + 'block', + 'initializer', + 'condition', + 'thenPart', + 'elsePart', + 'tryBlock', + 'catchClauses', + 'varDecls'] + +def test_script(tree, rec_level = 1): + if opt_v: + sys.stderr.write( 'test_script(tree,%d)' % rec_level); + for i, v in enumerate(tree): + if v.type == 'IDENTIFIER' and v.value in RESERVED_WORDLIST: + return 'BAD SCRIPT ERROR reserved word "%s" used in assignment at line number %s\n' % (v.value, v.lineno) + if v.type == 'STRING': + return 'BAD SCRIPT ERROR illegal type "%s" used at line number %s\n' % (v.type, v.lineno) + if opt_v: + sys.stderr.write( 'this is the type: %s at line number %s\n' % (v.type, v.lineno)); + if opt_v: + sys.stderr.write( 'this is the value: %s at line number %s\n' % (v.value, v.lineno)); + inner_test = test_script(v, rec_level + 1) + if inner_test: + return inner_test + + for block in ITERATION_BLOCKS: + if hasattr(tree, block): + b = getattr(tree, block) + try: + if opt_v: + sys.stderr.write( ' this is the expression type: %s \n' % getattr(b, 'type')); + except Exception as e: + sys.stderr.write('python execution error %s \n' % e) + continue + + try: + for i, v in enumerate(b): + if b.type == 'PLUS' and v.type == 'ARRAY_INIT': + return 'BAD SCRIPT ERROR illegal use of arrays to cast strings at %s\n' % v.lineno + if opt_v: + sys.stderr.write( 'this is the (inner iteration) type: %s, at line number %s, rec_lev: %d' % (v.type, v.lineno, rec_level)); + if v.type == 'STRING': + return 'BAD SCRIPT ERROR illegal type "%s" used at line number %s\n' % (v.type, v.lineno) + if opt_v: + sys.stderr.write( 'this is the (inner iteration) value: %s\n' % v.value); + inner_test = test_script(v, rec_level + 1) + if inner_test: + return inner_test + + except Exception as e: + sys.stderr.write('this was the error %s\n' % e) + continue + + if opt_v: + sys.stderr.write( 'test_script(tree,%d) end\n' % rec_level); + + return False + +if __name__ == "__main__": + f = open(sys.argv[1], 'r') + data = f.read() + f.close() + test = jsparser.parse(data) + + test = test_script(test) + if test: + print( test) + sys.exit(1) + else: + sys.exit(0) + diff --git a/cgi-bin/js_check/validate.pyc b/cgi-bin/js_check/validate.pyc Binary files differnew file mode 100644 index 0000000..0905b8f --- /dev/null +++ b/cgi-bin/js_check/validate.pyc diff --git a/cgi-bin/js_check/verify.pyc b/cgi-bin/js_check/verify.pyc Binary files differnew file mode 100644 index 0000000..c4c6370 --- /dev/null +++ b/cgi-bin/js_check/verify.pyc |
