1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149 | -- -*- Mode: Ada -*-
-- Filename : tokenize.ads
-- Description : Ruby-like split
-- Author : Finta Tartaruga
-- Created On : Tue Sep 11 22:05:53 2007
-- Last Modified By: R. Bernardini
-- Last Modified On: November 14, 2007
-- Update Count : 1
-- Status : <TESTED>
--
-- This package provides a function Split which divides its input
-- string in smaller strings, separated by a "separator" (much as the
-- split function in Perl, Ruby, and so on...). Function Split returns
-- a Token_List (defined by this package) whose elements can be accessed
-- by the function Element.
--
-- Function Split can accept a third Boolean value Collate_Separator.
-- If Collate_Separator is true, consecutive istances of the separator are
-- considered as a single one. If Collate_Separator is False, for every
-- pair of consecutive separator characters an empty string will be returned.
-- Moreover, if Collate_Separator is True, any separator at the beginning of
-- the string is ignored. Separators at the end are always ignored.
--
-- The default value of Collate_Separator is true if the separator
-- is the space, false otherwise.
--
-- Examples:
--
-- Split("Hello there") returns "Hello" and "there"
-- Split("Hello there", ' ', False) returns "Hello", "" and "there"
-- Split("Hello::there", ':') returns "Hello", "" and "there"
-- Split("Hello::there", ':', True) returns "Hello" and "there"
--
with Ada.Strings.Unbounded;
use Ada.Strings.Unbounded;
with Ada.Strings.Maps;
package Tokenize with SPARK_Mode => On is
type Token_Array is array (Positive range <>) of Unbounded_String;
--
-- Collation is considering successive instances of separators as
-- one and/or ignoring separators at the beginning or the end of
-- the string. For example, if collation is applied and the separator
-- is ';' the string
--
-- ";foo;;bar;pooh;"
--
-- will be splitted to
--
-- "foo", "bar" and "pooh"
--
-- while if collation is not applied the result will be
--
-- "", "foo", "", "bar", "pooh" and ""
--
-- A variable of type Collation_Option controls which kind of
-- collation is applied
--
type Collation_Option is private;
None : constant Collation_Option; -- No collation
Head : constant Collation_Option; -- Ignore separators at the beginning
Tail : constant Collation_Option; -- Ignore separators at the end
Middle : constant Collation_Option; -- Collate separators in the middle
Full : constant Collation_Option; -- Full collation
-- Function to combine different collation options. For example,
--
-- Head and Tail
--
-- ignores separators at the beginning and at the end, but does not
-- do any collation in the middle, so that the splitting of
-- ";foo;;bar;pooh;" would give
--
-- "foo" "" "bar" and "pooh"
function "and" (X, Y : Collation_Option) return Collation_Option;
function Split (To_Be_Splitted : String;
Separator : Ada.Strings.Maps.Character_Set;
Collation : Collation_Option) return Token_Array
with
Pre =>
To_Be_Splitted'Last < Integer'Last - 2,
Post =>
(if To_Be_Splitted = "" then Split'Result'Length = 0),
Annotate => (Gnatprove, Terminating);
--
-- Split string To_Be_Splitted in substring separated by any character in
-- Separator. If Collate_Separator is true consider consecutive
-- istances of Separator as a single one.
--
function Split (To_Be_Splitted : String;
Separator : Ada.Strings.Maps.Character_Set;
Collate_Separator : Boolean) return Token_Array
with
Pre =>
To_Be_Splitted'Last < Integer'Last - 2,
Post =>
(if To_Be_Splitted = "" then Split'Result'Length = 0),
Annotate => (Gnatprove, Terminating);
--
-- Synctactic sugar when only a single separator (and not a set) is
-- used.
--
function Split (To_Be_Splitted : String;
Separator : Character;
Collate_Separator : Boolean) return Token_Array
is (Split (To_Be_Splitted => To_Be_Splitted,
Separator => Ada.Strings.Maps.To_Set (Separator),
Collate_Separator => Collate_Separator))
with Pre =>
To_Be_Splitted'Last < Integer'Last - 2,
Post =>
(if To_Be_Splitted = "" then Split'Result'Length = 0);
--
-- Synctatic sugar with Separator defaulting to the space, with
-- Collate_Separator True if Separator is the space, false otherwise
--
function Split (To_Be_Splitted : String;
Separator : Character := ' ')
return Token_Array
is (Split (To_Be_Splitted, Separator, Separator = ' '))
with Pre => To_Be_Splitted'Last < Integer'Last - 2;
Empty_Array : constant Token_Array (1 .. 0) :=
(others => Null_Unbounded_String);
private
type Basic_Collation_Option is (Ignore_Head, Collate_Middle, Ignore_Tail);
type Collation_Option is array (Basic_Collation_Option) of Boolean;
None : constant Collation_Option := (others => False);
Head : constant Collation_Option := (Ignore_Head => True, others => False);
Tail : constant Collation_Option := (Ignore_Tail => True, others => False);
Middle : constant Collation_Option := (Collate_Middle => True, others => False);
Full : constant Collation_Option := (others => True);
overriding function "and" (X, Y : Collation_Option) return Collation_Option
is (X or Y);
end Tokenize;
|