with Ada.Assertions; use Ada.Assertions; with Ada.Directories; with Ada.Strings.Wide_Wide_Unbounded; with Ada.Text_IO; use Ada.Text_IO; with Interfaces; use Interfaces; with Liblktlang_Support.Text; use Liblktlang_Support.Text; with Liblktlang.Analysis; use Liblktlang.Analysis; with Liblktlang.Prelude; with Liblktlang.Public_Converters; use Liblktlang.Public_Converters; package body Liblktlang.Implementation.Extensions is package WWS renames Ada.Strings.Wide_Wide_Unbounded; function Common_Denoted_String (Node : Bare_Lkt_Node) return Internal_Decoded_String_Value; -- Common implementation for the ``p_denoted_string`` property of all -- string/pattern literal nodes. procedure Read_Denoted_Char (Buffer : Text_Type; For_Char_Lit : Boolean; Cursor : in out Positive; Cursor_Sloc : in out Source_Location; Result : out Internal_Decoded_Char_Value); -- Read the next denoted character starting at ``Buffer (Cursor)``. -- -- The location of the character at ``Buffer (Cursor)`` must be passed to -- ``Cursor_Sloc``, which is updated to follow the evolution of ``Cursor``. -- -- Upon return, ``Cursor`` points to the first item in ``Buffer`` for the -- next character to read (or to the closing single/double quote if the -- character read was the last one) and ``Result`` is set to the character -- that was just read, or to an error message if reading one character was -- unsuccessful. --------------------------- -- Common_Denoted_String -- --------------------------- function Common_Denoted_String (Node : Bare_Lkt_Node) return Internal_Decoded_String_Value is Tab_Stop : constant Positive := Node.Unit.Context.Tab_Stop; N_Text : constant Text_Type := Text (Node); pragma Assert (N_Text (N_Text'Last) = '"'); Cursor : Natural := N_Text'First + 1; Cursor_Sloc : Source_Location := Start_Sloc (Sloc_Range (Node)); Result : Text_Type (1 .. N_Text'Length); Result_Last : Natural := Result'First - 1; Char_Value : Internal_Decoded_Char_Value; begin -- Make sure that the slice starts at the first denoted character in the -- presence of string literal prefix. if N_Text (N_Text'First) /= '"' then pragma Assert (N_Text (Cursor) = '"'); Cursor := Cursor + 1; end if; -- Update Cursor_Sloc so that it reflects the location of N_Text's item -- at index Cursor. Cursor_Sloc.Column := Cursor_Sloc.Column + Column_Count (N_Text (N_Text'First .. Cursor - 1), Tab_Stop); while Cursor /= N_Text'Last loop Result_Last := Result_Last + 1; Read_Denoted_Char (N_Text, False, Cursor, Cursor_Sloc, Char_Value); if Char_Value.Has_Error then return (Value => Empty_String, Has_Error => True, Error_Sloc => Char_Value.Error_Sloc, Error_Message => Char_Value.Error_Message); end if; Result (Result_Last) := Char_Value.Value; end loop; return (Value => Create_String (Result (Result'First .. Result_Last)), Has_Error => False, Error_Sloc => No_Source_Location, Error_Message => Empty_String); end Common_Denoted_String; ----------------------- -- Read_Denoted_Char -- ----------------------- procedure Read_Denoted_Char (Buffer : Text_Type; For_Char_Lit : Boolean; Cursor : in out Positive; Cursor_Sloc : in out Source_Location; Result : out Internal_Decoded_Char_Value) is begin Result := (Value => ' ', Has_Error => False, Error_Sloc => Cursor_Sloc, Error_Message => Empty_String); if Buffer (Cursor) = '\' then Cursor := Cursor + 1; Cursor_Sloc.Column := Cursor_Sloc.Column + 1; declare function Read_Digits (N : Positive) return Character_Type; -- Read N hexadecimal digits (encoding a codepoint number) and -- return the corresponding character. function Short_Escape_Sequence (Codepoint : Character) return Character_Type; -- Helper to read a short escape sequence: return ``Codepoint`` -- converted to ``Character_Type`` and increment ``Cursor``, to -- make it go from the short escape sequence character ("n" in -- "\n", for instance) to the next item in ``Buffer``. ----------------- -- Read_Digits -- ----------------- function Read_Digits (N : Positive) return Character_Type is Codepoint : Unsigned_32 := 0; Digit_Char : Character_Type; Digit_Value : Unsigned_32; begin for I in 1 .. N loop Cursor := Cursor + 1; Cursor_Sloc.Column := Cursor_Sloc.Column + 1; Digit_Char := Buffer (Cursor); case Digit_Char is when '0' .. '9' => Digit_Value := Character_Type'Pos (Digit_Char) - Character_Type'Pos ('0'); when 'a' .. 'f' => Digit_Value := Character_Type'Pos (Digit_Char) - Character_Type'Pos ('a') + 10; when 'A' .. 'F' => Digit_Value := Character_Type'Pos (Digit_Char) - Character_Type'Pos ('A') + 10; when others => Result.Has_Error := True; Result.Error_Message := Create_String ("invalid escape sequence"); return ' '; end case; Codepoint := 16 * Codepoint + Digit_Value; end loop; -- Move past the last digit of the escape sequence Cursor := Cursor + 1; Cursor_Sloc.Column := Cursor_Sloc.Column + 1; return Character_Type'Val (Codepoint); end Read_Digits; --------------------------- -- Short_Escape_Sequence -- --------------------------- function Short_Escape_Sequence (Codepoint : Character) return Character_Type is begin Cursor := Cursor + 1; Cursor_Sloc.Column := Cursor_Sloc.Column + 1; return Character_Type'Val (Character'Pos (Codepoint)); end Short_Escape_Sequence; begin case Buffer (Cursor) is -- Escape sequences for codepoint numbers when 'x' => Result.Value := Read_Digits (2); when 'u' => Result.Value := Read_Digits (4); when 'U' => Result.Value := Read_Digits (8); -- Short escape sequences when '0' => Result.Value := Short_Escape_Sequence (ASCII.NUL); when 'a' => Result.Value := Short_Escape_Sequence (ASCII.BEL); when 'b' => Result.Value := Short_Escape_Sequence (ASCII.BS); when 't' => Result.Value := Short_Escape_Sequence (ASCII.HT); when 'n' => Result.Value := Short_Escape_Sequence (ASCII.LF); when 'v' => Result.Value := Short_Escape_Sequence (ASCII.VT); when 'f' => Result.Value := Short_Escape_Sequence (ASCII.FF); when 'r' => Result.Value := Short_Escape_Sequence (ASCII.CR); when '\' => Result.Value := Short_Escape_Sequence ('\'); when '"' => if For_Char_Lit then Result.Has_Error := True; Result.Error_Message := Create_String ("invalid escape sequence"); else Result.Value := Short_Escape_Sequence ('"'); end if; when ''' => if For_Char_Lit then Result.Value := Short_Escape_Sequence ('''); else Result.Has_Error := True; Result.Error_Message := Create_String ("invalid escape sequence"); end if; when others => Result.Has_Error := True; Result.Error_Message := Create_String ("invalid escape sequence"); end case; if Result.Has_Error then return; end if; end; else Result.Value := Buffer (Cursor); Cursor_Sloc.Column := Cursor_Sloc.Column + Column_Count (Buffer (Cursor .. Cursor)); Cursor := Cursor + 1; end if; end Read_Denoted_Char; ---------------------------------- -- Langkit_Root_P_Fetch_Prelude -- ---------------------------------- function Langkit_Root_P_Fetch_Prelude (Node : Bare_Langkit_Root) return Boolean is Ctx : constant Analysis_Context := Wrap_Context (Node.Unit.Context); Prelude : Analysis_Unit; begin Prelude := Ctx.Get_From_File ("__prelude"); if Prelude.Root = No_Lkt_Node then Prelude := Ctx.Get_From_Buffer ("__prelude", "ascii", Liblktlang.Prelude.Content); -- Check if we have syntactic or semantic errors in the prelude. If -- we do, raise an assertion error. if Prelude.Diagnostics'Length > 0 then for Diagnostic of Prelude.Diagnostics loop Put_Line (To_Pretty_String (Diagnostic)); end loop; raise Assertion_Error with "Errors in prelude"; end if; declare Sem_Results : constant Tree_Semantic_Result := Prelude.Root.As_Langkit_Root.P_Check_Semantic; begin if Analysis.Has_Error (Sem_Results) then for R of Analysis.Results (Sem_Results) loop if Analysis.Error_Message (R) /= "" then Put_Line (Image (Analysis.Node (R).Full_Sloc_Image & Error_Message (R))); end if; end loop; raise Assertion_Error with "Errors in prelude"; end if; end; Populate_Lexical_Env (Prelude); return True; else return False; end if; end Langkit_Root_P_Fetch_Prelude; ------------------------ -- Ref_Id_Short_Image -- ------------------------ function Ref_Id_Short_Image (Node : Bare_Ref_Id) return Text_Type is begin return "<" & To_Text (Kind_Name (Node)) & " """ & Text (Node) & """ " & To_Text (Ada.Directories.Simple_Name (Get_Filename (Unit (Node)))) & ":" & To_Text (Image (Sloc_Range (Node))) & ">"; end Ref_Id_Short_Image; ---------------------- -- Decl_Short_Image -- ---------------------- function Decl_Short_Image (Node : Bare_Decl) return Text_Type is Full_Name_Acc : String_Type := Dispatcher_Decl_P_Full_Name (Node); Full_Name : constant Text_Type := Full_Name_Acc.Content; File_Name : constant Text_Type := To_Text (Ada.Directories.Simple_Name (Get_Filename (Unit (Node)))); begin Dec_Ref (Full_Name_Acc); if File_Name = "__prelude" then return "<" & To_Text (Kind_Name (Node)) & " prelude: """ & Full_Name & """>"; else return "<" & To_Text (Kind_Name (Node)) & " """ & Full_Name & """ " & File_Name -- Don't show the sloc for function types, because it will be the -- root node's sloc, and thus will always change when we add stuff -- to the file, which is not helpful nor practical for tests. & (if Node.Kind = Lkt_Function_Type then "" else ":" & To_Text (Image (Sloc_Range (Node)))) & ">"; end if; end Decl_Short_Image; ----------------------- -- Id_P_Is_Type_Name -- ----------------------- function Id_P_Is_Type_Name (Node : Bare_Id) return Boolean is N_Text : constant Text_Type := Text (Node); begin -- Here is not the time to perform casing validation. Type names are -- supposed to have lower case, so we can just check if the first -- character has upper case. return N_Text'Length > 0 and then N_Text (N_Text'First) in 'A' .. 'Z'; end Id_P_Is_Type_Name; --------------------------------------- -- Lkt_Node_P_Env_From_Vals_Internal -- --------------------------------------- function Lkt_Node_P_Env_From_Vals_Internal (Node : Bare_Lkt_Node; Vals : Internal_Env_Kv_Array_Access) return Lexical_Env is Ret : constant Lexical_Env := Create_Static_Lexical_Env (Null_Lexical_Env, Node, Node.Unit.Context.Symbols); begin for El of Vals.Items loop AST_Envs.Add (Ret, Thin (El.Key), El.Value); end loop; return Ret; end Lkt_Node_P_Env_From_Vals_Internal; ----------------------------------------------- -- Lkt_Node_P_Internal_Fetch_Referenced_Unit -- ----------------------------------------------- function Lkt_Node_P_Internal_Fetch_Referenced_Unit (Node : Bare_Lkt_Node; Name : String_Type) return Internal_Unit is I : constant Internal_Unit := Get_From_Provider (Context => Node.Unit.Context, Name => Name.Content, Kind => Unit_Body, Charset => "ascii", Reparse => False); begin Populate_Lexical_Env (Wrap_Unit (I)); return I; end Lkt_Node_P_Internal_Fetch_Referenced_Unit; ------------------------------------------------- -- Single_Line_String_Lit_P_Is_Prefixed_String -- ------------------------------------------------- function Single_Line_String_Lit_P_Is_Prefixed_String (Node : Bare_Single_Line_String_Lit) return Boolean is Tok_Kind : constant Token_Kind := Kind (Data (Token (Node, Node.Token_Start_Index))); begin return Tok_Kind = Lkt_P_String; end Single_Line_String_Lit_P_Is_Prefixed_String; ------------------------------------- -- Single_Line_String_Lit_P_Prefix -- ------------------------------------- function Single_Line_String_Lit_P_Prefix (Node : Bare_Single_Line_String_Lit) return Character_Type is N_Text : constant Text_Type := Text (Node); begin return (if Single_Line_String_Lit_P_Is_Prefixed_String (Node) then N_Text (N_Text'First) else Character_Type'Val (0)); end Single_Line_String_Lit_P_Prefix; ------------------------------ -- Char_Lit_P_Denoted_Value -- ------------------------------ function Char_Lit_P_Denoted_Value (Node : Bare_Char_Lit) return Internal_Decoded_Char_Value is N_Text : constant Text_Type := Text (Node); pragma Assert (N_Text (N_Text'First) = '''); pragma Assert (N_Text (N_Text'Last) = '''); Cursor : Positive := N_Text'First + 1; Cursor_Sloc : Source_Location := Start_Sloc (Sloc_Range (Node)); Result : Internal_Decoded_Char_Value; begin -- Before reading the denoted character, update Cursor_Sloc so that it -- corresponds to the character right after the opening single quote. Cursor_Sloc.Column := Cursor_Sloc.Column + 1; Read_Denoted_Char (N_Text, True, Cursor, Cursor_Sloc, Result); -- Ensure that reading one character has moved the cursor to the closing -- quote. If it is not the case, there are too many characters in this -- literal. if not Result.Has_Error and then Cursor /= N_Text'Last then Result.Has_Error := True; Result.Error_Sloc := Cursor_Sloc; Result.Error_Message := Create_String ("exactly one character expected"); end if; return Result; end Char_Lit_P_Denoted_Value; -------------------------------------- -- Block_String_Lit_P_Denoted_Value -- -------------------------------------- function Block_String_Lit_P_Denoted_Value (Node : Bare_Block_String_Lit) return Internal_Decoded_String_Value is Result : Unbounded_Text_Type; List : constant Bare_Lkt_Node_Base_List := Node.Block_String_Lit_F_Lines; begin for I in 1 .. List.Count loop declare Item : constant Bare_Block_String_Line := List.Nodes.all (I); N_Text : constant Text_Type := Text (Item); Cursor : Positive := N_Text'First; Cursor_Sloc : Source_Location := Start_Sloc (Sloc_Range (Item)); Char_Value : Internal_Decoded_Char_Value; begin -- There are two legal cases: -- -- * '|"', which designates an empty line -- * '|" ...', which desigantes a non-empty line: the space after -- '|"' is mandatory. Decode escape sequences in the line -- content. -- -- The only thing guaranteed here (by the lexer) is that N_Next -- starts with ``|"``: the rest is to be checked here, as semantic -- checks. pragma Assert (N_Text'Length >= 2); pragma Assert (N_Text (N_Text'First .. N_Text'First + 1) = "|"""); Cursor := Cursor + 2; Cursor_Sloc.Column := Cursor_Sloc.Column + 2; if N_Text'Length > 2 then if N_Text (Cursor) /= ' ' then return (Value => Empty_String, Has_Error => True, Error_Sloc => Cursor_Sloc, Error_Message => Create_String ("space missing")); elsif N_Text'Length = 3 then return (Value => Empty_String, Has_Error => True, Error_Sloc => Cursor_Sloc, Error_Message => Create_String ("empty line must not end with a space")); end if; Cursor := Cursor + 1; Cursor_Sloc.Column := Cursor_Sloc.Column + 1; while Cursor <= N_Text'Last loop Read_Denoted_Char (N_Text, False, Cursor, Cursor_Sloc, Char_Value); if Char_Value.Has_Error then return (Value => Empty_String, Has_Error => True, Error_Sloc => Char_Value.Error_Sloc, Error_Message => Char_Value.Error_Message); end if; WWS.Append (Result, Char_Value.Value); end loop; end if; WWS.Append (Result, Chars.LF); end; end loop; return (Value => Create_String (To_Text (Result)), Has_Error => False, Error_Sloc => No_Source_Location, Error_Message => Empty_String); end Block_String_Lit_P_Denoted_Value; -------------------------------------------- -- Single_Line_String_Lit_P_Denoted_Value -- -------------------------------------------- function Single_Line_String_Lit_P_Denoted_Value (Node : Bare_Single_Line_String_Lit) return Internal_Decoded_String_Value is begin return Common_Denoted_String (Node); end Single_Line_String_Lit_P_Denoted_Value; ------------------------------- -- Token_Lit_P_Denoted_Value -- ------------------------------- function Token_Lit_P_Denoted_Value (Node : Bare_Token_Lit) return Internal_Decoded_String_Value is begin return Common_Denoted_String (Node); end Token_Lit_P_Denoted_Value; --------------------------------------- -- Token_Pattern_Lit_P_Denoted_Value -- --------------------------------------- function Token_Pattern_Lit_P_Denoted_Value (Node : Bare_Token_Pattern_Lit) return Internal_Decoded_String_Value is begin return Common_Denoted_String (Node); end Token_Pattern_Lit_P_Denoted_Value; end Liblktlang.Implementation.Extensions;