-
Notifications
You must be signed in to change notification settings - Fork 16
/
lex.c
312 lines (265 loc) · 5.43 KB
/
lex.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include "compiler.h"
/*
* Read and match against the token stream. Need to move from stdio
* eventually
*/
#define NO_TOKEN 0xFFFF /* An unused value */
/*
* Simple block buffer read. We use 128 byte records so we can make
* this work in CP/M. For CP/M we'll also need to write some kind of
* 'end of file' token
*/
static unsigned char inbuf[128];
static unsigned char *inptr;
static int inlen;
/* Read the next block. Hopefully this is the only routine we need to
swap for CP/M etc */
static int in_record(void)
{
inptr = inbuf;
return read(0, inbuf, 128);
}
static int in_byte(void)
{
if (inlen == 0)
inlen = in_record();
if (inlen--)
return *inptr++;
inlen = 0;
return EOF;
}
static unsigned char outbuf[128];
static unsigned char *outptr = outbuf;
static unsigned int outlen;
static unsigned int outrecord = 0;
void out_write(void)
{
if (lseek(1, outrecord * 128L, SEEK_SET) < 0)
fatal("seek error");
if (outlen && write(1, outbuf, outlen) != outlen)
fatal("write error");
outlen = 0;
outptr = outbuf;
}
/* Again try and isolate the block I/O into two tiny routines */
void out_flush(void)
{
out_write();
outrecord++;
}
/* Read a record. We use this in situations where we need to rewind and
update headers */
static void out_record_read(unsigned record)
{
if (lseek(1, record * 128L, SEEK_SET) < 0)
fatal("seek error");
if (read(1, outbuf, 128) < 0)
fatal("read error");
outrecord = record;
}
/* Report the current record/offset */
unsigned long out_tell(void)
{
return (outrecord << 8) | outlen;
}
/* Go to a given record/offset from before */
void out_seek(unsigned long pos)
{
out_write();
out_record_read(pos >> 8);
outlen = pos & 0xFF;
outptr = outbuf + outlen;
}
/* Add bytes at the current position */
void out_byte(unsigned char c)
{
if (outlen == 128)
out_flush();
*outptr++ = c;
outlen++;
}
void out_block(void *pv, unsigned len)
{
unsigned char *p = pv;
while(len) {
register unsigned n;
/* Flush any full record */
if (outlen == 128)
out_flush();
/* Fill up what we can */
n = 128 - outlen;
if (n > len)
n = len;
memcpy(outptr, p, n);
outptr += n;
outlen += n;
p += n;
len -= n;
}
}
char filename[33];
unsigned line_num;
unsigned long token_value;
unsigned token;
unsigned last_token = NO_TOKEN;
unsigned tokbyte(void)
{
unsigned c = in_byte();
if (c == EOF) {
error("corrupt stream");
exit(1);
}
return c;
}
void next_token(void)
{
register int c;
/* Handle pushed back tokens */
if (last_token != NO_TOKEN) {
token = last_token;
last_token = NO_TOKEN;
return;
}
c = in_byte();
if (c == EOF) {
token = T_EOF;
// printf("*** EOF\n");
return;
}
token = c;
c = in_byte();
if (c == EOF) {
token = T_EOF;
return;
}
token |= (c << 8);
if (token == T_LINE) {
register char *p = filename;
line_num = tokbyte();
line_num |= tokbyte() << 8;
if (line_num & 0x8000) {
line_num &= 0x7FFF;
for (c = 0; c < 32; c++) {
*p = tokbyte();
if (*p == 0)
break;
p++;
}
*p = 0;
}
next_token();
return;
}
if (token == T_INTVAL || token == T_LONGVAL || token == T_UINTVAL
|| token == T_ULONGVAL || token == T_FLOATVAL) {
token_value = tokbyte();
token_value |= tokbyte() << 8;
token_value |= tokbyte() << 16;
token_value |= tokbyte() << 24;
}
}
/*
* You can only push back one token and it must not have attached data. This
* works out fine because we only ever need to push back a name when processing
* labels
*/
void push_token(unsigned t)
{
last_token = token;
token = t;
}
/*
* Try and move on a bit so that we don't generate a wall of errors for
* a single mistake
*/
void junk(void)
{
while (token != T_EOF && token != T_SEMICOLON)
next_token();
next_token();
}
/*
* If the token is the one expected then consume it and return 1, if not
* do not consume it and leave 0. This lets us write things like
*
* if (match(T_STAR)) { ... }
*/
unsigned match(unsigned t)
{
if (t == token) {
next_token();
return 1;
}
return 0;
}
void need_semicolon(void)
{
if (!match(T_SEMICOLON)) {
error("missing semicolon");
junk();
}
}
/* This can only be used if the token is a single character token. That turns
out to be sufficient for C so there is no need for anything fancy here */
void require(unsigned t)
{
if (!match(t))
errorc(t, "expected");
}
unsigned symname(void)
{
unsigned t;
if (token < T_SYMBOL)
return 0;
t = token;
next_token();
return t;
}
/*
* This is ugly and we need to review how we handle it
*/
static unsigned char pad_zero[2] = { 0xFF, 0xFE };
unsigned copy_string(unsigned label, unsigned maxlen, unsigned pad, unsigned lit)
{
register unsigned c;
register unsigned l = 0;
header(H_STRING, label, lit);
/* Copy the encoding string as is */
while((c = tokbyte()) != 0) {
if (l < maxlen) {
out_byte(c);
/* Quoted FFFF FFFE pairs count as one byte */
if (c == 0xFF)
out_byte(tokbyte());
l++;
}
} while(c);
/* No write any padding bytes */
if (pad) {
while(l++ < maxlen)
out_block(&pad_zero, 2);
}
/* Write the end marker */
out_byte(0);
footer(H_STRING, label, l);
next_token();
if (token != T_STRING_END)
error("bad token stream");
next_token();
return l;
}
unsigned label_tag;
unsigned quoted_string(int *len)
{
unsigned l = 0;
unsigned label = ++label_tag;
if (token != T_STRING)
return 0;
l = copy_string(label, ~0, 0, 1);
if (len)
*len = l;
return label;
}