The Role of the Parser General Types of Parsers Context-Free Grammars (CFGs) CFG Example Grammar-Related Terms Limitations of Regular Languages RLs versus CFGs in Practice Writing a Grammar Limitations of CFG Why Use Context-Free Grammars? Using Ambiguous Grammars Operator-Precedence Parsing Top-Down Parsing LL(1) Predictive Parsing Example Pascal Statement Parser Bottom-Up Parsing LR Parsing LR Parsing Algorithm Classic Problems in LR Parsing Parser Generators Error Recovery
stmt --> IF '(' expr ')' stmt
| IF '(' expr ')' stmt ELSE stmt
EG TRUE AND FALSE OR (TRUE OR FALSE)
B --> B OR B
B --> B AND B
B --> TRUE | FALSE
B --> '(' B ')'
B --> O
O --> O OR A | A
A --> A AND T | T
T --> TRUE | FALSE | '(' O ')'
B *=> B B *=> B OR B B *=> TRUE OR FALSE
B +=> TRUE OR B B +=> TRUE OR FALSE
B --> B OR B B --> B AND B B --> TRUE | FALSE
EG TRUE AND FALSE is a sentence TRUE AND OR FALSE is not a sentence
A --> a | b | c | ... | z | A | B | C | ... | Z D --> 0 | 1 | 2 | ... | 9 T --> AT | DT | EPSILON I --> AT
[a-zA-Z][a-zA-Z0-9]*
stmt --> IF cond THEN stmt | IF cond THEN stmt ELSE stmt
IF a THEN IF b THEN c ELSE d
A --> A ALPHA | BETA
A --> BETA A' A' --> ALPHA A' | EPSILON
B --> B OR A | A can be rewritten as: B --> A B' B' --> OR A B' | EPSILON
stmt --> IF expr THEN stmt ELSE stmt | IF expr THEN stmt can be rewritten as: stmt --> IF expr THEN stmt else_option else_option --> ELSE stmt | EPSILON
L = {wcw | w is in (a|b)*}
B --> B OR B | B AND B | TRUE | FALSE solved by defining AND to have higher precedence than OR
WHILE (next token is not EOF) {
IF (token is TRUE or "FALSE)
push (handle_stack, mk_node (token));
ELSE IF (f[top (op_stack)] < g[token])
push (op_stack, mk_node (token));
ELSE {
WHILE (f[top (op_stack)] > g[token])
push (handle_stack,
mk_node (pop (op_stack),
pop (handle_stack), pop (handle_stack)));
IF (top (op_stack) == DELIMIT_TOK
&& token == DELIMIT_TOK)
RETURN pop (handle_stack);
ELSE IF (top (op_stack) == LPAREN_TOK
&& token == RPAREN_TOK) {
pop (op_stack);
continue; /* Jump over push operation below. */
}
push (op_stack, mk_node (token));
}
}
A --> ALPHA1 | ALPHA2 | ... | ALPHAn
stmt --> if_stmt | while_stmt | block_stmt if_stmt --> IF expr THEN stmt ELSE stmt while_stmt --> WHILE stmt DO stmt block_stmt --> BEGIN stmt_list END stmt_list --> stmt_list stmt | EPSILON
EXTERN token lookahead;
INT match (token t) {
IF (lookahead == t)
lookahead = yylex (); /* gets next token from input */
ELSE
yy_error ("token %d expected", t);
}
VOID stmt () {
SWITCH (lookahead) {
CASE IF_TOKEN:
if_stmt (); break;
CASE WHILE_TOKEN:
while_stmt (); break;
CASE BEGIN_TOKEN:
block_stmt (); break;
DEFAULT:
yy_error ("Unknown statement");
}
}
VOID if_stmt () {
match (IF_TOKEN);
expr ();
match (THEN_TOKEN);
stmt ();
IF (lookahead == ELSE_TOKEN) {
match (ELSE_TOKEN);
stmt ();
}
}
VOID while_stmt () {
match (WHILE_TOKEN);
expr ();
match (DO_TOKEN);
stmt ();
}
void block_stmt () {
match (BEGIN_TOKEN);
WHILE (lookahead != END_TOKEN) {
stmt ();
match (';');
}
match (END_TOKEN);
}
set ip to point to first symbol of w$
FOR (;;) {
let s be the state on top of the stack and
a the symbol pointed to by ip
IF ( action[ s, a] == SHIFT s') {
push a then s' on top of the stack
advance ip to the next input symbol
}
ELSE IF ( action[ s, a] == REDUCE A --> BETA) {
pop 2 TIMES |BETA| symbols off the stack
let s' be the state now on top of the stack
push A then goto[ s', A] on top of the stack
output the production A --> BETA
}
ELSE IF ( action[ s, a] == ACCEPT) RETURN;
ELSE handle_error ();
}
declarations %% translation rules %% auxiliary routines
%{
/* Example using a stratified grammar. */
%}
%token TRUE, FALSE, OR, AND
%start line
%%
line : bool_expr '\n' ;
bool_expr: or_expr ;
or_expr: or_expr OR and_expr | and_expr ;
and_expr : and_expr AND token | token ;
token : '(' or_expr ')' | FALSE | TRUE ;
%%
int yylex () { /* Lexical analyzer can go here. */ }
%{
/* Example using Bison's precedence declarations. */
%}
%token TRUE, FALSE
%left OR
%left AND
%start line
%%
line : bool_expr '\n' ;
bool_expr: bool_expr OR bool_expr | bool_expr AND bool_expr
| '(' bool_expr ')' | FALSE | TRUE ;
%%
/* Lexical analyzer can also go in a separate file. */