vss_24.0.0_b4d0be7c/source/text/implementation/vss-strings-converters-decoders-shiftjis.adb

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
--
--  Copyright (C) 2022, AdaCore
--
--  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
--

with Interfaces;

with VSS.Implementation.String_Handlers;

with VSS.Strings.Converters.Decoders.Index_JIS0208;

package body VSS.Strings.Converters.Decoders.ShiftJIS is

   ------------
   -- Decode --
   ------------

   overriding procedure Decode
     (Self        : in out ShiftJIS_Decoder;
      Source      : Ada.Streams.Stream_Element_Array;
      End_Of_Data : Boolean;
      Target      : out VSS.Implementation.Strings.String_Data)
   is
      use type Ada.Streams.Stream_Element;
      use type Ada.Streams.Stream_Element_Offset;
      use type VSS.Unicode.Code_Point;

      Index  : Ada.Streams.Stream_Element_Offset := Source'First;
      Lead   : Ada.Streams.Stream_Element        := Self.Lead;
      Byte   : Ada.Streams.Stream_Element;
      Offset : VSS.Implementation.Strings.Cursor_Offset := (0, 0, 0);

   begin
      if Self.Error and Self.Flags (Stop_On_Error) then
         --  Error was encountered in "stop on error" mode, return immidiately.

         return;
      end if;

      loop
         if Index > Source'Last then
            if Lead /= 0 and (Self.Flags (Stateless) or End_Of_Data) then
               Lead := 0;

               Self.Error := True;

               if not Self.Flags (Stop_On_Error) then
                  VSS.Implementation.Strings.Handler (Target).Append
                    (Target, Replacement_Character, Offset);
               end if;
            end if;

            exit;
         end if;

         Byte := Source (Index);

         if Lead /= 0 then
            declare
               --  use type Ada.Streams.Stream_Element;
               use type Interfaces.Unsigned_32;

               Pointer     : Interfaces.Unsigned_32     := 0;
               Byte_Offset : constant Ada.Streams.Stream_Element :=
                 (if Byte < 16#7F# then 16#40# else 16#41#);
               Lead_Offset : constant Ada.Streams.Stream_Element :=
                 (if Lead < 16#A0# then 16#81# else 16#C1#);
               Code        : VSS.Unicode.Code_Point     := 0;

            begin
               if Byte in 16#40# .. 16#7E# | 16#80# .. 16#FC# then
                  Pointer :=
                    Interfaces.Unsigned_32 (Lead - Lead_Offset) * 188
                      + Interfaces.Unsigned_32 (Byte - Byte_Offset);

                  if Pointer in 8_836 .. 10_715 then
                     Code :=
                       VSS.Unicode.Code_Point (16#E000# - 8_836 + Pointer);

                  else
                     Code := Index_JIS0208.Table (Pointer);
                  end if;
               end if;

               Lead := 0;

               if Code /= 0 then
                  VSS.Implementation.Strings.Handler (Target).Append
                    (Target, Code, Offset);

               else
                  if Byte in ASCII_Byte_Range then
                     Index := Index - 1;
                  end if;

                  Self.Error := True;

                  if Self.Flags (Stop_On_Error) then
                     exit;

                  else
                     VSS.Implementation.Strings.Handler (Target).Append
                       (Target, Replacement_Character, Offset);
                  end if;
               end if;
            end;

         elsif Byte in ASCII_Byte_Range | 16#80# then
            --  Encoding Standard (Jul 2022) maps 0x5C and 0x7E bytes to
            --  Unicode code point with same value. However, WPT assumes
            --  mapping of 0x5C to U+00A5 and 0x7E to U+203E.

            case Byte is
               when 16#5C# =>
                  VSS.Implementation.Strings.Handler (Target).Append
                    (Target, 16#A5#, Offset);

               when 16#7E# =>
                  VSS.Implementation.Strings.Handler (Target).Append
                    (Target, 16#203E#, Offset);

               when others =>
                  VSS.Implementation.Strings.Handler (Target).Append
                    (Target, VSS.Unicode.Code_Point (Byte), Offset);
            end case;

         elsif Byte in 16#A1# .. 16#DF# then
            VSS.Implementation.Strings.Handler (Target).Append
              (Target,
               16#FF61# + VSS.Unicode.Code_Point (Byte - 16#A1#),
               Offset);

         elsif Byte in 16#81# .. 16#9F# | 16#E0# .. 16#FC# then
            Lead := Byte;

         else
            Self.Error := True;

            if Self.Flags (Stop_On_Error) then
               exit;

            else
               VSS.Implementation.Strings.Handler (Target).Append
                 (Target, Replacement_Character, Offset);
            end if;
         end if;

         Index := Index + 1;
      end loop;

      Self.Lead := Lead;
   end Decode;

   -------------------
   -- Error_Message --
   -------------------

   overriding function Error_Message
     (Self : ShiftJIS_Decoder) return VSS.Strings.Virtual_String is
   begin
      if Self.Error then
         return "Iff-formed sequence";

      else
         return VSS.Strings.Empty_Virtual_String;
      end if;
   end Error_Message;

   -------------
   -- Factory --
   -------------

   function Factory
     (Flags : Converter_Flags)
      return VSS.Strings.Converters.Decoders.Decoder_Access is
   begin
      return Result : constant
        VSS.Strings.Converters.Decoders.Decoder_Access :=
          new ShiftJIS_Decoder
      do
         declare
            Self : ShiftJIS_Decoder renames ShiftJIS_Decoder (Result.all);

         begin
            Self.Flags := Flags;
            Self.Reset_State;
         end;
      end return;
   end Factory;

   ---------------
   -- Has_Error --
   ---------------

   overriding function Has_Error (Self : ShiftJIS_Decoder) return Boolean is
   begin
      return Self.Error;
   end Has_Error;

   -----------------
   -- Reset_State --
   -----------------

   overriding procedure Reset_State (Self : in out ShiftJIS_Decoder) is
   begin
      Self.Lead  := 0;
      Self.Error := False;
   end Reset_State;

end VSS.Strings.Converters.Decoders.ShiftJIS;