1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364 | --
-- Copyright (C) 2014-2022, AdaCore
-- SPDX-License-Identifier: Apache-2.0
--
with Ada.Characters.Handling; use Ada.Characters.Handling;
with Ada.Exceptions; use Ada.Exceptions;
with Ada.IO_Exceptions;
with GNATCOLL.Mmap;
with Langkit_Support.Errors; use Langkit_Support.Errors;
package body Libadalang.PP_Lexer is
subtype Symbol_Set is Character
with Static_Predicate =>
Symbol_Set in 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_';
subtype Chars_Sequence_Set is Character
with Static_Predicate => Chars_Sequence_Set in Symbol_Set | '.';
----------
-- Read --
----------
function Read (Filename : String) return String_Access is
use GNATCOLL.Mmap;
File : Mapped_File;
Region : Mapped_Region;
Buffer : Str_Access;
begin
begin
File := Open_Read (Filename);
exception
when Exc : Ada.IO_Exceptions.Name_Error =>
raise File_Read_Error with Exception_Message (Exc);
end;
Region := Read (File);
Buffer := Data (Region);
return Result : constant String_Access := new String (1 .. Last (Region))
do
Result.all := Buffer.all (Result.all'Range);
Free (Region);
Close (File);
end return;
end Read;
-------------------------
-- Is_Valid_Identifier --
-------------------------
function Is_Valid_Identifier (Text : String) return Boolean is
Last_Was_Underscore : Boolean := False;
begin
if Text = "" then
return False;
elsif Text (Text'First) = '_'
or else Text (Text'Last) = '_'
or else Text (Text'First) in '0' .. '9'
then
return False;
end if;
for I in Text'Range loop
if Text (I) = '_' then
if Last_Was_Underscore then
return False;
else
Last_Was_Underscore := True;
end if;
elsif Text (I) not in 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' then
return False;
else
Last_Was_Underscore := False;
end if;
end loop;
return True;
end Is_Valid_Identifier;
------------------
-- Create_Lexer --
------------------
function Create_Lexer (Diagnostic_Filename, Filename : String) return Lexer
is
begin
return Result : Lexer do
Result.Filename := US.To_Unbounded_String (Diagnostic_Filename);
Result.Buffer := Read (Filename);
Result.Next_Char := Result.Buffer.all'First;
Result.Next_Sloc := (1, 1);
end return;
end Create_Lexer;
----------
-- Next --
----------
procedure Next (Self : in out Lexer; Token : out Token_Type) is
type Peeked_Char is record
EOF : Boolean;
Value : Character;
end record;
function Peek return Peeked_Char;
procedure Next_Char;
procedure Scan_String_Literal;
procedure Error (Message : String) with No_Return;
PC : Peeked_Char;
----------
-- Peek --
----------
function Peek return Peeked_Char is
begin
if Self.Next_Char > Self.Buffer.all'Last then
return (EOF => True, Value => ASCII.NUL);
else
return (EOF => False, Value => Self.Buffer.all (Self.Next_Char));
end if;
end Peek;
---------------
-- Next_Char --
---------------
procedure Next_Char is
begin
pragma Assert (Self.Next_Char <= Self.Buffer.all'Last);
Token.Last := Self.Next_Char;
if Self.Buffer.all (Self.Next_Char) = ASCII.LF then
Self.Next_Sloc.Line := Self.Next_Sloc.Line + 1;
Self.Next_Sloc.Column := 1;
else
Self.Next_Sloc.Column := Self.Next_Sloc.Column + 1;
end if;
Self.Next_Char := Self.Next_Char + 1;
end Next_Char;
-------------------------
-- Scan_String_Literal --
-------------------------
procedure Scan_String_Literal is
begin
loop
PC := Peek;
Next_Char;
if PC.EOF or else PC.Value = ASCII.LF then
Error ("unterminated string literal");
elsif PC.Value = '"' then
PC := Peek;
if not PC.EOF and then PC.Value = '"' then
Next_Char;
PC := Peek;
else
return;
end if;
end if;
end loop;
end Scan_String_Literal;
-----------
-- Error --
-----------
procedure Error (Message : String) is
begin
raise Syntax_Error with
US.To_String (Self.Filename)
& ":" & Image (Self.Next_Sloc)
& ": " & Message;
end Error;
begin
loop
Token.First := Self.Next_Char;
Token.Sloc := Self.Next_Sloc;
PC := Peek;
if PC.EOF then
Token.Kind := EOF;
return;
end if;
Next_Char;
case PC.Value is
when ASCII.LF =>
Token.Kind := EOL;
return;
when ' ' | ASCII.CR =>
null;
when '-' =>
PC := Peek;
if PC.EOF then
Error ("stray dash");
elsif PC.Value = '-' then
while not PC.EOF and then PC.Value /= ASCII.LF loop
Next_Char;
PC := Peek;
end loop;
-- Past this point, we can only be parsing a gnatprep switch
elsif PC.Value not in 'a' .. 'z' | 'A' .. 'Z' then
Error ("stray dash");
else
Token.Kind := Switch;
-- First take all alphanumeric characters possible (switch
-- name plus potentially a symbol name).
loop
PC := Peek;
if not PC.EOF and then PC.Value = '=' then
Next_Char;
exit;
elsif PC.EOF or else PC.Value not in Symbol_Set then
return;
end if;
Next_Char;
end loop;
-- We just got a "=" after the switch name: we expect either
-- nothing, the equivalent of a chars sequence, or a string
-- literal.
PC := Peek;
if PC.EOF or else PC.Value = ASCII.LF then
null;
elsif PC.Value in Chars_Sequence_Set then
while not PC.EOF and then PC.Value in Chars_Sequence_Set
loop
Next_Char;
PC := Peek;
end loop;
elsif PC.Value = '"' then
Next_Char;
Scan_String_Literal;
end if;
return;
end if;
when ':' =>
PC := Peek;
if PC.EOF then
Error ("stray colon");
elsif PC.Value = '=' then
Token.Kind := Assign;
Next_Char;
return;
else
Error ("invalid token");
end if;
when '*' =>
Token.Kind := Star;
return;
when '"' =>
Token.Kind := String_Literal;
Scan_String_Literal;
return;
when Chars_Sequence_Set =>
Token.Kind := Chars_Sequence;
loop
PC := Peek;
if PC.EOF or else PC.Value not in Chars_Sequence_Set then
return;
end if;
Next_Char;
end loop;
when others =>
Error ("invalid token");
end case;
end loop;
end Next;
-------------------------
-- Is_Valid_Identifier --
-------------------------
function Is_Valid_Identifier
(Self : Lexer; Token : Token_Type) return Boolean is
begin
return Is_Valid_Identifier (Self.Buffer.all (Token.First .. Token.Last));
end Is_Valid_Identifier;
--------------
-- Get_Text --
--------------
function Get_Text
(Self : Lexer; Token : Token_Type) return US.Unbounded_String is
begin
return US.To_Unbounded_String
(Self.Buffer.all (Token.First .. Token.Last));
end Get_Text;
------------------------
-- Get_Text_Lowercase --
------------------------
function Get_Text_Lowercase
(Self : Lexer; Token : Token_Type) return US.Unbounded_String is
begin
return US.To_Unbounded_String
(To_Lower (Self.Buffer.all (Token.First .. Token.Last)));
end Get_Text_Lowercase;
--------------------
-- Denoted_String --
--------------------
function Denoted_String
(Self : Lexer; Token : Token_Type) return US.Unbounded_String
is
Text : String renames Self.Buffer.all (Token.First .. Token.Last);
Result : US.Unbounded_String;
I : Positive := Text'First + 1;
begin
pragma Assert (Token.Kind = String_Literal);
pragma Assert (Text'Length >= 2);
pragma Assert (Text (Text'First) = '"' and then Text (Text'Last) = '"');
while I < Text'Last loop
US.Append (Result, Text (I));
I := I + (if Text (I) = '"' then 2 else 1);
end loop;
return Result;
end Denoted_String;
--------------
-- Finalize --
--------------
overriding procedure Finalize (Self : in out Lexer) is
begin
Free (Self.Buffer);
end Finalize;
end Libadalang.PP_Lexer;
|