char *my_string = "Hello Bozo!"; has six lexemes: "char" "*" "my_string" "=" ""Hello Bozo!"" ";"
TYPE token IS ('*', identifier, assign, string_literal, ';');
IDENTIFIER --> a letter possibly followed by letters and digits
IF ... THEN BEGIN ... END
IF if THEN then; ELSE else; is legal and semantically meaningful!
#include "lisp.h" /* defines s_expression, MAKE_ for various atoms,
the variable atom_val of type s_expression, and the token
values atom_token, '(', ')', and '.'. Topch() returns the
next character in the input without reading it. */
typedef int token;
#define is_let_dig_or_opr(c) (IN(c,'0','9') || IN(c,'A','Z') IN(c,'a','z') || MEMBER(c, "+-*/"))
char yytext[100];
token yylex() {
int ch, d;
char *p;
start_of_lexeme:
switch (ch = getchar()) {
case '(': case ')': case '.':
return ch;
case '"':
p = yytext;
nested_quote:
while ((*p++ = getchar()) != '"')
;
if (topch() == '"') {
getchar();
goto nested_quote;
}
*(p-1) = 0; /* remove the last quote */
atom_val = MAKE_STRING(yytext);
return atom_token;
case '\'':
atom_val = MAKE_CHARACTER(getchar());
if (topch() == '\'') getchar();
return atom_token;
case '0': ... case '9':
d = to_int(ch);
while (isdigit(topch())) {
d *= 10;
d += to_int(getchar());
}
atom_val = MAKE_INTEGER(d);
return atom_token;
case '\n': case '\t': case ' ': case ',':
goto start_of_lexeme;
case '-': /* skip comments "-- this is a comment\n" */
if (topch() == '-') {
while (getchar() != '\n')
;
goto start_of_lexeme;
}
else if (isdigit(topch())) { /* a negative number */
d = 0;
do {
d *= IBASE;
d += to_int(getchar());
} while (isdigit(topch()));
atom_val = MAKE_INTEGER(-d);
return atom_token;
}
/* else an operator, so drop into next case */
default: /* a literal atom, includes operators and other names */
p = yytext;
do {
Pascal, Ada, C, French, Chinese
x = "fu", y = "bar" xy = "fubar"
s^{0} = EPSILON, for i > 0, s^{i} = s^{i-1}s
D* - the set of all strings of digits including the empty string
D+ - the set of all strings of digits
d_{1} --> r_{1}
...
d_{n} --> r_{n}
LETTER --> [A-Za-z] DIGIT --> [0-9] IDENTIFIER --> LETTER ( LETTER | DIGIT ) *
definitions %% pattern/action pairs %% optional auxiliary routines
DIGIT [0-9]
LETTER [a-zA-Z]
OPERATOR [-+*/]
%%
[().] {return yytext[0];}
\'.\' {atom_value = MAKE_CHARACTER(yytext[1]); return atom_token;}
(+|-)?{DIGIT}+ {atom_value = MAKE_INTEGER(yytext); return atom_token;}
({LETTER}|{OPERATOR})({LETTER}|{DIGIT}|{OPERATOR})* {
atom_value := INTERN(yytext); return atom_token;}
--.* { /* nothing - just strip out and ignore comments */ }
PROCEDURE nextstate(s: state; i: input) IS
BEGIN
IF check(base(s)+a) THEN
RETURN next(base(s)+a);
ELSE
RETURN nextstate(default(s)+a);
END IF;
END nextstate;