{
  Copyright 2009-2017 Michalis Kamburelis.

  This file is part of "Castle Game Engine".

  "Castle Game Engine" is free software; see the file COPYING.txt,
  included in this distribution, for details about the copyright.

  "Castle Game Engine" is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

  ----------------------------------------------------------------------------
}

{ DDS loading and saving. }

{ ----------------------------------------------------------------------------
  Constants and types for DDS file handling.

  This is based on MS docs on
  http://msdn.microsoft.com/en-us/library/bb943991(VS.85).aspx
  and gimp-dds/gimp-dds-2.0.7/dds.h,
  from gimp-dds plugin source code
  from http://nifelheim.dyndns.org/~cocidius/dds/.

  (gimp-dds is GNU GPL >= 2, and our engine is on LGPL >= 2,
  but I think it's Ok --- I did't copy code, just constant values
  known from DDS spec, and converted to Pascal). }

const
  DDSD_CAPS                   = $00000001;
  DDSD_HEIGHT                 = $00000002;
  DDSD_WIDTH                  = $00000004;
  DDSD_PITCH                  = $00000008;
  DDSD_PIXELFORMAT            = $00001000;
  DDSD_MIPMAPCOUNT            = $00020000;
  DDSD_LINEARSIZE             = $00080000;
  DDSD_DEPTH                  = $00800000;

  DDPF_ALPHAPIXELS            = $00000001;
  DDPF_ALPHA                  = $00000002;
  DDPF_FOURCC                 = $00000004;
  DDPF_PALETTEINDEXED8        = $00000020;
  DDPF_RGB                    = $00000040;
  DDPF_LUMINANCE              = $00020000;

  DDSCAPS_COMPLEX             = $00000008;
  DDSCAPS_TEXTURE             = $00001000;
  DDSCAPS_MIPMAP              = $00400000;

  DDSCAPS2_CUBEMAP            = $00000200;
  DDSCAPS2_CUBEMAP_POSITIVEX  = $00000400;
  DDSCAPS2_CUBEMAP_NEGATIVEX  = $00000800;
  DDSCAPS2_CUBEMAP_POSITIVEY  = $00001000;
  DDSCAPS2_CUBEMAP_NEGATIVEY  = $00002000;
  DDSCAPS2_CUBEMAP_POSITIVEZ  = $00004000;
  DDSCAPS2_CUBEMAP_NEGATIVEZ  = $00008000;
  DDSCAPS2_VOLUME             = $00200000;

  { Special FourCC constants, indicating float textures,
    from OGRE OgreDDSCodec.cpp. }
  D3DFMT_R16F            = 111;
  D3DFMT_G16R16F         = 112;
  D3DFMT_A16B16G16R16F   = 113;
  D3DFMT_R32F            = 114;
  D3DFMT_G32R32F         = 115;
  D3DFMT_A32B32G32R32F   = 116;

  { Constants for DxgiFormat, see
    https://msdn.microsoft.com/en-us/library/windows/desktop/bb173059%28v=vs.85%29.aspx }
  DXGI_FORMAT_UNKNOWN                     = 0;
  DXGI_FORMAT_R32G32B32A32_TYPELESS       = 1;
  DXGI_FORMAT_R32G32B32A32_FLOAT          = 2;
  DXGI_FORMAT_R32G32B32A32_UINT           = 3;
  DXGI_FORMAT_R32G32B32A32_SINT           = 4;
  DXGI_FORMAT_R32G32B32_TYPELESS          = 5;
  DXGI_FORMAT_R32G32B32_FLOAT             = 6;
  DXGI_FORMAT_R32G32B32_UINT              = 7;
  DXGI_FORMAT_R32G32B32_SINT              = 8;
  DXGI_FORMAT_R16G16B16A16_TYPELESS       = 9;
  DXGI_FORMAT_R16G16B16A16_FLOAT          = 10;
  DXGI_FORMAT_R16G16B16A16_UNORM          = 11;
  DXGI_FORMAT_R16G16B16A16_UINT           = 12;
  DXGI_FORMAT_R16G16B16A16_SNORM          = 13;
  DXGI_FORMAT_R16G16B16A16_SINT           = 14;
  DXGI_FORMAT_R32G32_TYPELESS             = 15;
  DXGI_FORMAT_R32G32_FLOAT                = 16;
  DXGI_FORMAT_R32G32_UINT                 = 17;
  DXGI_FORMAT_R32G32_SINT                 = 18;
  DXGI_FORMAT_R32G8X24_TYPELESS           = 19;
  DXGI_FORMAT_D32_FLOAT_S8X24_UINT        = 20;
  DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS    = 21;
  DXGI_FORMAT_X32_TYPELESS_G8X24_UINT     = 22;
  DXGI_FORMAT_R10G10B10A2_TYPELESS        = 23;
  DXGI_FORMAT_R10G10B10A2_UNORM           = 24;
  DXGI_FORMAT_R10G10B10A2_UINT            = 25;
  DXGI_FORMAT_R11G11B10_FLOAT             = 26;
  DXGI_FORMAT_R8G8B8A8_TYPELESS           = 27;
  DXGI_FORMAT_R8G8B8A8_UNORM              = 28;
  DXGI_FORMAT_R8G8B8A8_UNORM_SRGB         = 29;
  DXGI_FORMAT_R8G8B8A8_UINT               = 30;
  DXGI_FORMAT_R8G8B8A8_SNORM              = 31;
  DXGI_FORMAT_R8G8B8A8_SINT               = 32;
  DXGI_FORMAT_R16G16_TYPELESS             = 33;
  DXGI_FORMAT_R16G16_FLOAT                = 34;
  DXGI_FORMAT_R16G16_UNORM                = 35;
  DXGI_FORMAT_R16G16_UINT                 = 36;
  DXGI_FORMAT_R16G16_SNORM                = 37;
  DXGI_FORMAT_R16G16_SINT                 = 38;
  DXGI_FORMAT_R32_TYPELESS                = 39;
  DXGI_FORMAT_D32_FLOAT                   = 40;
  DXGI_FORMAT_R32_FLOAT                   = 41;
  DXGI_FORMAT_R32_UINT                    = 42;
  DXGI_FORMAT_R32_SINT                    = 43;
  DXGI_FORMAT_R24G8_TYPELESS              = 44;
  DXGI_FORMAT_D24_UNORM_S8_UINT           = 45;
  DXGI_FORMAT_R24_UNORM_X8_TYPELESS       = 46;
  DXGI_FORMAT_X24_TYPELESS_G8_UINT        = 47;
  DXGI_FORMAT_R8G8_TYPELESS               = 48;
  DXGI_FORMAT_R8G8_UNORM                  = 49;
  DXGI_FORMAT_R8G8_UINT                   = 50;
  DXGI_FORMAT_R8G8_SNORM                  = 51;
  DXGI_FORMAT_R8G8_SINT                   = 52;
  DXGI_FORMAT_R16_TYPELESS                = 53;
  DXGI_FORMAT_R16_FLOAT                   = 54;
  DXGI_FORMAT_D16_UNORM                   = 55;
  DXGI_FORMAT_R16_UNORM                   = 56;
  DXGI_FORMAT_R16_UINT                    = 57;
  DXGI_FORMAT_R16_SNORM                   = 58;
  DXGI_FORMAT_R16_SINT                    = 59;
  DXGI_FORMAT_R8_TYPELESS                 = 60;
  DXGI_FORMAT_R8_UNORM                    = 61;
  DXGI_FORMAT_R8_UINT                     = 62;
  DXGI_FORMAT_R8_SNORM                    = 63;
  DXGI_FORMAT_R8_SINT                     = 64;
  DXGI_FORMAT_A8_UNORM                    = 65;
  DXGI_FORMAT_R1_UNORM                    = 66;
  DXGI_FORMAT_R9G9B9E5_SHAREDEXP          = 67;
  DXGI_FORMAT_R8G8_B8G8_UNORM             = 68;
  DXGI_FORMAT_G8R8_G8B8_UNORM             = 69;
  DXGI_FORMAT_BC1_TYPELESS                = 70;
  DXGI_FORMAT_BC1_UNORM                   = 71;
  DXGI_FORMAT_BC1_UNORM_SRGB              = 72;
  DXGI_FORMAT_BC2_TYPELESS                = 73;
  DXGI_FORMAT_BC2_UNORM                   = 74;
  DXGI_FORMAT_BC2_UNORM_SRGB              = 75;
  DXGI_FORMAT_BC3_TYPELESS                = 76;
  DXGI_FORMAT_BC3_UNORM                   = 77;
  DXGI_FORMAT_BC3_UNORM_SRGB              = 78;
  DXGI_FORMAT_BC4_TYPELESS                = 79;
  DXGI_FORMAT_BC4_UNORM                   = 80;
  DXGI_FORMAT_BC4_SNORM                   = 81;
  DXGI_FORMAT_BC5_TYPELESS                = 82;
  DXGI_FORMAT_BC5_UNORM                   = 83;
  DXGI_FORMAT_BC5_SNORM                   = 84;
  DXGI_FORMAT_B5G6R5_UNORM                = 85;
  DXGI_FORMAT_B5G5R5A1_UNORM              = 86;
  DXGI_FORMAT_B8G8R8A8_UNORM              = 87;
  DXGI_FORMAT_B8G8R8X8_UNORM              = 88;
  DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM  = 89;
  DXGI_FORMAT_B8G8R8A8_TYPELESS           = 90;
  DXGI_FORMAT_B8G8R8A8_UNORM_SRGB         = 91;
  DXGI_FORMAT_B8G8R8X8_TYPELESS           = 92;
  DXGI_FORMAT_B8G8R8X8_UNORM_SRGB         = 93;
  DXGI_FORMAT_BC6H_TYPELESS               = 94;
  DXGI_FORMAT_BC6H_UF16                   = 95;
  DXGI_FORMAT_BC6H_SF16                   = 96;
  DXGI_FORMAT_BC7_TYPELESS                = 97;
  DXGI_FORMAT_BC7_UNORM                   = 98;
  DXGI_FORMAT_BC7_UNORM_SRGB              = 99;
  DXGI_FORMAT_AYUV                        = 100;
  DXGI_FORMAT_Y410                        = 101;
  DXGI_FORMAT_Y416                        = 102;
  DXGI_FORMAT_NV12                        = 103;
  DXGI_FORMAT_P010                        = 104;
  DXGI_FORMAT_P016                        = 105;
  DXGI_FORMAT_420_OPAQUE                  = 106;
  DXGI_FORMAT_YUY2                        = 107;
  DXGI_FORMAT_Y210                        = 108;
  DXGI_FORMAT_Y216                        = 109;
  DXGI_FORMAT_NV11                        = 110;
  DXGI_FORMAT_AI44                        = 111;
  DXGI_FORMAT_IA44                        = 112;
  DXGI_FORMAT_P8                          = 113;
  DXGI_FORMAT_A8P8                        = 114;
  DXGI_FORMAT_B4G4R4A4_UNORM              = 115;

type
  TDDSPixelFormat = packed record
    Size: LongWord;
    Flags: LongWord;
    case Integer of
      0: ( FourCC: array [0 .. 3] of char;
           RGBBitCount: LongWord;
           RBitMask: LongWord;
           GBitMask: LongWord;
           BBitMask: LongWord;
           ABitMask: LongWord; );
      1: ( { Alternative FourCC view, as LongWord, always in little endian memory
             order (process with LEtoN to compare with ints). }
           FourCCIntLE: LongWord; )

  end;
  PDDSPixelFormat = ^TDDSPixelFormat;

  { Corresponds to DDS_HEADER (Direct3D 10),
    http://msdn.microsoft.com/en-us/library/bb943982(VS.85).aspx. }
  TDDSHeader = packed record
    Size: LongWord;
    Flags: LongWord;
    Height: LongWord;
    Width: LongWord;
    PitchOrLinearSize: LongWord;
    Depth: LongWord;
    MipMapCount: LongWord;
    Reserved: array [0 .. 10] of LongWord;
    PixelFormat: TDDSPixelFormat;
    Caps1: LongWord;
    Caps2: LongWord;
    ReservedCaps: array [0 .. 1] of LongWord;
    Reserved2: LongWord;
  end;

  { DDS header extension to handle resource arrays,
    DXGI pixel formats that don't map to the legacy Microsoft DirectDraw
    pixel format structures, and additional metadata.
    From https://msdn.microsoft.com/en-us/library/windows/desktop/bb943983%28v=vs.85%29.aspx }
  TDDSHeaderDxt10 = packed record
    DxgiFormat: LongWord;
    ResourceDimension: LongWord;
    MiscFlag: LongWord;
    ArraySize: LongWord;
    MiscFlags2: LongWord;
  end;

{ TDDSRowReader -------------------------------------------------------------- }

type
  TRGBAChannel = 0..3;

  TChannelInfo = record
    Mask: LongWord;
    { Shift (to the right, by ShiftR) to fit into Byte }
    ShiftToByte: Integer;
    { Shift (to the right, by ShiftR) to have the least significant bit at
      1st position. }
    ShiftToLeast: Integer;
    { How long is the sequence of 1 bits inside the mask. }
    MaskOnes: Cardinal;
  end;

  { Reads DDS image rows, loading them to temporary memory.

    After reading row (by ReadRow) you can uncompress it by
    using RGBA (returns value as 8-bit Byte, for Red, Green, Blue, Alpha
    according to Channel parameter)
    and NextPixel repeatedly.

    It's callers responsibility to make sure that you use
    RGBA only with channels for which masks are non-zero.

    This is not the most efficient reader (it reads rows to temporary memory,
    for starters), but it works for all covered uncompressed non-palette
    DDS pixel formats. For special pixel formats, we use specialized
    optimized readers in ReadOptimized_*. }
  TDDSRowReader = class
  private
    FPixelFormat: PDDSPixelFormat;
    FWidth, FRowBytePadding: Cardinal;
    Row: Pointer;
    Pixel: Pointer;
    PixelValue: LongWord;
    RowByteSize, PixelByteSize: Cardinal;
    Channels: array [TRGBAChannel] of TChannelInfo;
    procedure CalculatePixelValue;
  public
    constructor Create(const PixelFormat: PDDSPixelFormat;
      const Width, RowBytePadding: Cardinal);
    destructor Destroy; override;

    procedure ReadRow(Stream: TStream);
    procedure NextPixel;
    function RGBA(Channel: TRGBAChannel): Byte;
  end;

{ Shift bits to the right if Value is positive.
  Shift to the left if negative. }
function ShiftR(const Value: LongWord; const Shift: Integer): LongWord;
begin
  if Shift >= 0 then
    Result := Value shr Shift else
    Result := Value shl (-Shift);
end;

constructor TDDSRowReader.Create(const PixelFormat: PDDSPixelFormat;
  const Width, RowBytePadding: Cardinal);

  { Calculate TChannelInfo. }
  procedure CalculateChannelInfo(const Mask: LongWord; var Info: TChannelInfo);
  const
    High1 = LongWord(1) shl 31;
  var
    LeadingZeros, Ones: Integer;
    M: LongWord;
  begin
    Info.Mask := Mask;

    if Mask = 0 then
    begin
      Info.ShiftToByte := 0;
      Info.ShiftToLeast := 0;
      Info.MaskOnes := 0;
      Exit;
    end;

    M := Mask;

    LeadingZeros := 0;
    while M and High1 = 0 do
    begin
      Inc(LeadingZeros);
      M := M shl 1;
    end;

    Ones := 0;
    while M <> 0 do
    begin
      Inc(Ones);
      M := M shl 1;
    end;
    Info.MaskOnes := Ones;

    { So the mask in binary starts with LeadingZeros of 0,
      then some 1. After Ones digits, the mask is zero.

      (In other words, we have a sequence of Ones bits inside the Mask
      that start and end with digit 1. We could assume here that
      this sequence is just full of 1 (nothing produces DDS
      files with other values, that would be pretty strange).
      But that's not actually necessary, all we need is to know
      the position of the most and least significant 1 digit,
      and then the trivial equation below will work Ok. }

    Info.ShiftToByte := 24 - LeadingZeros;
    Info.ShiftToLeast := 32 - LeadingZeros - Ones;

    { Assert that after shifting, all bits above Byte are clear
      and the most significant bit of color is in the most significant bit
      of byte. }
    Assert(ShiftR(Mask, Info.ShiftToByte) and $FFFFFF80 = $80);
  end;

begin
  inherited Create;
  FPixelFormat := PixelFormat;
  FWidth := Width;
  FRowBytePadding := RowBytePadding;

  { We already checked in TCompositeImage.LoadFromStream that RGBitCount divides
    by 8 and is not zero. }
  PixelByteSize := FPixelFormat^.RGBBitCount div 8;

  if PixelByteSize > SizeOf(PixelValue) then
    raise EInvalidDDS.CreateFmt('Unsupported DDS pixel format: more than 32 bits per pixel (RGBitCount is %d). Please report with sample image',
      [FPixelFormat^.RGBBitCount]);

  RowByteSize := PixelByteSize * Width;
  Row := GetMem(RowByteSize);

  CalculateChannelInfo(FPixelFormat^.RBitMask, Channels[0]);
  CalculateChannelInfo(FPixelFormat^.GBitMask, Channels[1]);
  CalculateChannelInfo(FPixelFormat^.BBitMask, Channels[2]);
  CalculateChannelInfo(FPixelFormat^.ABitMask, Channels[3]);
end;

destructor TDDSRowReader.Destroy;
begin
  FreeMem(Row);
  inherited;
end;

procedure TDDSRowReader.CalculatePixelValue;
begin
  PixelValue := 0;

  { The tricky memory part: move the meaningful bytes of current pixel
    (under Pixel^) to appropriate part of PixelValue, such that
    PixelValue can be directly and'ed with DDS masks (in ChannelMask[]).
    The copied bytes are the least significant part of LongWord value
    (DDS masks are specified like that). }

  Move(Pixel^, Pointer(PtrUInt(PtrUInt(@PixelValue)
    {$ifdef ENDIAN_BIG} + SizeOf(PixelValue) - PixelByteSize {$endif} ))^,
    PixelByteSize);
end;

procedure TDDSRowReader.ReadRow(Stream: TStream);
begin
  Stream.ReadBuffer(Row^, RowByteSize);

  if FRowBytePadding <> 0 then
    Stream.Seek(FRowBytePadding, soFromCurrent);

  Pixel := Row;
  CalculatePixelValue;
end;

procedure TDDSRowReader.NextPixel;
begin
  PtrUInt(Pixel) := PtrUInt(Pixel) + PixelByteSize;
  CalculatePixelValue;
end;

function TDDSRowReader.RGBA(Channel: TRGBAChannel): Byte;
begin
  with Channels[Channel] do
  begin
    Result := ShiftR(PixelValue and Mask, ShiftToByte);

    { What to with bits that are not set by PixelValue?
      That is, when the mask is less than 8 bits?

      It's important that pure white be preserved as pure white,
      and pure black as pure black. This is especially important for alpha
      channel (otherwise opaque image will suddenly turn into
      partially transparent, e.g. if you load image with 2 bits for alpha,
      and fill the remaining 6 bits with 0, then alpha = 11 (binary)
      will be turned into 11000000 (no longer completely opaque).)

      So it seems most sensible to fill the remaining bits with the contents
      of least-significant color bit. If this bit is zero, we actually
      have to do nothing, but when it's one --- we need to insert 1 bits
      into Result.

      This is done only when MaskOnes < 8, to optimize. }
    if (MaskOnes < 8) and
       (ShiftR(PixelValue, ShiftToLeast) and 1 <> 0) then
    begin
      { So the least-significant color bit is 1. }
      Result := Result or (ShiftR(not Mask, ShiftToByte) and $FF);
    end;
  end;
end;

{ TDDSHandler ----------------------------------------------------------------- }

type
  TDDSHandler = class(TCompositeFormatHandler)
  strict private
    { A couple of optimized routines for image formats that
      closely match corresponding Images unit memory format are below.
      Optimized routines already know the Result memory format (image class)
      and file format (Header.PixelFormat), they don't have to check them.

      Names for optimized routines follow the naming of DDS ms docs.
      This means that they are actually inverted in memory,
      since DDS bit masks should be treated as little-endian.
      For example for RGB image, RBitMask = $ff0000 means that red is
      the 3rd (not 1st) byte...)

      That's why DDS format RGB8 must be inverted when writing to TRGBImage.
      The format matching exactly TRGBImage is actually named BGR8 in DDS.

      The optimized routines are much faster as they don't use TDDSRowReader,
      so they don't need a temporary row (they load directly into output
      image memory), they don't need any mask/shift operations for each channel
      of each pixel (as the pixel format already matches what is needed,
      eventual swaps BGR<->RGB are fast).
      Tests shown that optimized versions are 4-7 times faster than
      if TDDSRowReader would be used to read the same images.
    }

    class procedure ReadOptimized_G8(const Stream: TStream;
      const Image: TCastleImage; const RowBytePadding: Integer); static;
    class procedure ReadOptimized_AG8(const Stream: TStream;
      const Image: TCastleImage; const RowBytePadding: Integer); static;
    class procedure ReadOptimized_RGB8(const Stream: TStream;
      const Image: TCastleImage; const RowBytePadding: Integer); static;
    class procedure ReadOptimized_BGR8(const Stream: TStream;
      const Image: TCastleImage; const RowBytePadding: Integer); static;
    class procedure ReadOptimized_ARGB8(const Stream: TStream;
      const Image: TCastleImage; const RowBytePadding: Integer); static;
    class procedure ReadOptimized_ABGR8(const Stream: TStream;
      const Image: TCastleImage; const RowBytePadding: Integer); static;
  public
    procedure LoadFromStream(const Stream: TStream; const URL: string); override;
    procedure SaveToStream(const Stream: TStream); override;
  end;

class procedure TDDSHandler.ReadOptimized_G8(const Stream: TStream;
  const Image: TCastleImage; const RowBytePadding: Integer);
var
  Y, Z: Integer;
begin
  for Z := 0 to Image.Depth - 1 do
    for Y := Image.Height - 1 downto 0 do
    begin
      Stream.ReadBuffer(Image.RowPtr(Y, Z)^, Image.PixelSize * Image.Width);
      if RowBytePadding <> 0 then
        Stream.Seek(RowBytePadding, soFromCurrent);
    end;
end;

class procedure TDDSHandler.ReadOptimized_AG8(const Stream: TStream;
  const Image: TCastleImage; const RowBytePadding: Integer);
var
  Y, Z: Integer;
begin
  for Z := 0 to Image.Depth - 1 do
    for Y := Image.Height - 1 downto 0 do
    begin
      Stream.ReadBuffer(Image.RowPtr(Y, Z)^, Image.PixelSize * Image.Width);
      if RowBytePadding <> 0 then
        Stream.Seek(RowBytePadding, soFromCurrent);
    end;
end;

class procedure TDDSHandler.ReadOptimized_RGB8(const Stream: TStream;
  const Image: TCastleImage; const RowBytePadding: Integer);
var
  X, Y, Z: Integer;
  Row: PVector3Byte;
begin
  for Z := 0 to Image.Depth - 1 do
    for Y := Image.Height - 1 downto 0 do
    begin
      Row := Image.RowPtr(Y, Z);
      Stream.ReadBuffer(Row^, Image.PixelSize * Image.Width);

      { Now invert red and blue. (Since all masks are little-endian,
        RBitMask = $FF0000 means that red is the 3rd (not 1st) byte...) }
      for X := 0 to Image.Width - 1 do
      begin
        SwapValues(Row^.Data[2], Row^.Data[0]);
        Inc(Row);
      end;

      if RowBytePadding <> 0 then
        Stream.Seek(RowBytePadding, soFromCurrent);
    end;
end;

class procedure TDDSHandler.ReadOptimized_BGR8(const Stream: TStream;
  const Image: TCastleImage; const RowBytePadding: Integer);
var
  Y, Z: Integer;
begin
  for Z := 0 to Image.Depth - 1 do
    for Y := Image.Height - 1 downto 0 do
    begin
      Stream.ReadBuffer(Image.RowPtr(Y, Z)^, Image.PixelSize * Image.Width);
      if RowBytePadding <> 0 then
        Stream.Seek(RowBytePadding, soFromCurrent);
    end;
end;

class procedure TDDSHandler.ReadOptimized_ARGB8(const Stream: TStream;
  const Image: TCastleImage; const RowBytePadding: Integer);
var
  X, Y, Z: Integer;
  Row: PVector4Byte;
begin
  for Z := 0 to Image.Depth - 1 do
    for Y := Image.Height - 1 downto 0 do
    begin
      Row := Image.RowPtr(Y, Z);
      Stream.ReadBuffer(Row^, Image.PixelSize * Image.Width);

      { Now invert ARGB to ABGR. So swap red<->blue, alpha and green are Ok. }
      for X := 0 to Image.Width - 1 do
      begin
        SwapValues(Row^.Data[2], Row^.Data[0]);
        Inc(Row);
      end;

      if RowBytePadding <> 0 then
        Stream.Seek(RowBytePadding, soFromCurrent);
    end;
end;

class procedure TDDSHandler.ReadOptimized_ABGR8(const Stream: TStream;
  const Image: TCastleImage; const RowBytePadding: Integer);
var
  Y, Z: Integer;
begin
  for Z := 0 to Image.Depth - 1 do
    for Y := Image.Height - 1 downto 0 do
    begin
      Stream.ReadBuffer(Image.RowPtr(Y, Z)^, Image.PixelSize * Image.Width);
      if RowBytePadding <> 0 then
        Stream.Seek(RowBytePadding, soFromCurrent);
    end;
end;

procedure TDDSHandler.LoadFromStream(const Stream: TStream; const URL: string);

  procedure CheckWarn(const Check: boolean; const Message: string);
  begin
    if not Check then
      WritelnWarning('DDS image', Message);
  end;

var
  Header: TDDSHeader;

  { Reading header initializes many instance fields:
    FWidth, FHeight,
    FMipmaps, FMipmapsCount,
    FCompositeType, FCubeMapSides, FDepth }
  procedure ReadHeader;
  var
    Magic: array [0 .. 3] of char;
  begin
    Stream.ReadBuffer(Magic, SizeOf(Magic));
    Check(Magic = 'DDS ', 'DDS file beginning (magic number) invalid, maybe this is not really a DDS file');

    Stream.ReadBuffer(Header, SizeOf(Header));

    { We could do this code *always*, not only on big-endian machines.
      But it does nothing on little-endian machines, so it's a waste of time
      there, so only do it in -dDEBUG mode. }
    {$if defined(ENDIAN_BIG) or defined(DEBUG)}
    Header.Size                    := LEToN(Header.Size                   );
    Header.Flags                   := LEToN(Header.Flags                  );
    Header.Height                  := LEToN(Header.Height                 );
    Header.Width                   := LEToN(Header.Width                  );
    Header.PitchOrLinearSize       := LEToN(Header.PitchOrLinearSize      );
    Header.Depth                   := LEToN(Header.Depth                  );
    Header.MipMapCount             := LEToN(Header.MipMapCount            );
    Header.PixelFormat.Size        := LEToN(Header.PixelFormat.Size       );
    Header.PixelFormat.Flags       := LEToN(Header.PixelFormat.Flags      );
    Header.PixelFormat.RGBBitCount := LEToN(Header.PixelFormat.RGBBitCount);
    Header.PixelFormat.RBitMask    := LEToN(Header.PixelFormat.RBitMask   );
    Header.PixelFormat.GBitMask    := LEToN(Header.PixelFormat.GBitMask   );
    Header.PixelFormat.BBitMask    := LEToN(Header.PixelFormat.BBitMask   );
    Header.PixelFormat.ABitMask    := LEToN(Header.PixelFormat.ABitMask   );
    Header.Caps1                   := LEToN(Header.Caps1                  );
    Header.Caps2                   := LEToN(Header.Caps2                  );
    {$endif}

    Assert(SizeOf(Header) = 124); { this is actually constant, as it's part of file format spec... }
    Check(Header.Size = SizeOf(Header), 'DDS header size incorrect');
    Check(Header.Flags and DDSD_CAPS <> 0, 'Missing DDSD_CAPS');
    Check(Header.Flags and DDSD_HEIGHT <> 0, 'Missing DDSD_HEIGHT');
    Check(Header.Flags and DDSD_WIDTH <> 0, 'Missing DDSD_WIDTH');
    Check(Header.Flags and DDSD_PIXELFORMAT <> 0, 'Missing DDSD_PIXELFORMAT');

    Height := Header.Height;
    Width := Header.Width;

    { calculate FMipmaps, FMipmapsCount }
    Composite.FMipmaps :=  Header.Caps1 and DDSCAPS_MIPMAP <> 0;
    if Composite.FMipmaps then
    begin
      Check(Header.Flags and DDSD_MIPMAPCOUNT <> 0, 'Missing DDSD_MIPMAPCOUNT, but caps indicate that this DDS image has mipmaps');
      Check(Header.Caps1 and DDSCAPS_COMPLEX <> 0, 'Missing DDSCAPS_COMPLEX, but caps indicate that this DDS image has mipmaps');
      Composite.FMipmapsCount := Header.MipMapCount;
      Check(Composite.MipmapsCount > 0, 'Specified mipmaps, but mipmap count is zero');
      { ATI Compresonator always sets this flag, even though it does not create mipmaps,
        and source image has not power-of-two sizes. So turn off FMipmaps flag. }
      if Composite.FMipmapsCount = 1 then
        Composite.FMipmaps := false;
    end else
      Composite.FMipmapsCount := 1;

    Check(Header.Flags and DDSCAPS_TEXTURE <> 0, 'Missing DDSCAPS_TEXTURE');

    { May be changed later if volume texture }
    Depth := 1;

    { calculate CompositeType }
    Check( (Header.Caps2 and DDSCAPS2_VOLUME = 0) or
           (Header.Caps2 and DDSCAPS2_CUBEMAP = 0),
      'DDS capabilities indicate CUBEMAP and VOLUME texture at the same time');
    if Header.Caps2 and DDSCAPS2_CUBEMAP <> 0 then
    begin
      Composite.FCompositeType := ctCubeMap;

      { calculate FCubeMapSides }
      Composite.FCubeMapSides := [];
      if Header.Caps2 and DDSCAPS2_CUBEMAP_POSITIVEX <> 0 then Include(Composite.FCubeMapSides, dcsPositiveX);
      if Header.Caps2 and DDSCAPS2_CUBEMAP_NEGATIVEX <> 0 then Include(Composite.FCubeMapSides, dcsNegativeX);
      if Header.Caps2 and DDSCAPS2_CUBEMAP_POSITIVEY <> 0 then Include(Composite.FCubeMapSides, dcsPositiveY);
      if Header.Caps2 and DDSCAPS2_CUBEMAP_NEGATIVEY <> 0 then Include(Composite.FCubeMapSides, dcsNegativeY);
      if Header.Caps2 and DDSCAPS2_CUBEMAP_POSITIVEZ <> 0 then Include(Composite.FCubeMapSides, dcsPositiveZ);
      if Header.Caps2 and DDSCAPS2_CUBEMAP_NEGATIVEZ <> 0 then Include(Composite.FCubeMapSides, dcsNegativeZ);
      Check(Composite.FCubeMapSides <> [], 'DDS file is a cube map, but all six sides of the cube are missing');
    end else
    if Header.Caps2 and DDSCAPS2_VOLUME <> 0 then
    begin
      Composite.FCompositeType := ctVolume;

      { calculate FDepth }
      Check(Header.Flags and DDSD_DEPTH <> 0, 'Missing DDSD_DEPTH, but DDS is a VOLUME texture');
      Depth := Header.Depth;
    end else
      Composite.FCompositeType := ctTexture;
    if Composite.FCompositeType <> ctTexture then
      { This invalid situation happens on
        http://regedit.gamedev.pl/Download/Rozne/Tekstury%20narzedziowe/NoiseVolume.dds
        (from
        http://regedit.gamedev.pl/download.php5?x=Rozne%2FTekstury+narzedziowe) }
      CheckWarn(Header.Caps1 and DDSCAPS_COMPLEX <> 0, 'Missing DDSCAPS_COMPLEX, but caps indicate that this DDS image has cube map or volume');

    Check(Header.PixelFormat.Size = SizeOf(Header.PixelFormat), 'Incorrect size of DDS pixel format record');
  end;

var
  HeaderDxt10: TDDSHeaderDxt10;

  procedure ReadHeaderDxt10;
  begin
    if (Header.PixelFormat.Flags and DDPF_FOURCC <> 0) and
       (Header.PixelFormat.FourCC = 'DX10') then
    begin
      Stream.ReadBuffer(HeaderDxt10, SizeOf(HeaderDxt10));

      { We could do this code *always*, not only on big-endian machines.
        But it does nothing on little-endian machines, so it's a waste of time
        there, so only do it in -dDEBUG mode. }
      {$if defined(ENDIAN_BIG) or defined(DEBUG)}
      HeaderDxt10.DxgiFormat        := LEToN(HeaderDxt10.DxgiFormat       );
      HeaderDxt10.ResourceDimension := LEToN(HeaderDxt10.ResourceDimension);
      HeaderDxt10.MiscFlag          := LEToN(HeaderDxt10.MiscFlag         );
      HeaderDxt10.ArraySize         := LEToN(HeaderDxt10.ArraySize        );
      HeaderDxt10.MiscFlags2        := LEToN(HeaderDxt10.MiscFlags2       );
      {$endif}
    end else
      FillChar(HeaderDxt10, SizeOf(HeaderDxt10), #0);
  end;

  { Read actual image data, initializing FImages contents }
  procedure ReadImages;

    { Read a single, normal 2D (or 3D) image from DDS file. }
    function ReadImage(const Width, Height, Depth, MipmapLevel: Cardinal): TEncodedImage;

      { Calculate RowBytePadding for an uncompressed image on this mipmap level. }
      function GetRowBytePadding(const PixelSize: Integer): Integer;
      begin
        { Header.PitchOrLinearSize for uncompressed texture may indicate
          row length (pitch) in bytes. This is useful to indicate padding of lines.
          I understand that otherwise I should assume lines are not padded? }
        if (Header.Flags and DDSD_PITCH <> 0) and
           (Header.PitchOrLinearSize <> 0) and
           (MipmapLevel = 0) then
          Result := Max(Header.PitchOrLinearSize - PixelSize * Width, 0)
        else
          Result := 0;
      end;

    type
      TUncompressedType = (
        utRGB_AlphaPossible,
        utGrayscale_AlphaPossible,
        utPureAlpha );

      procedure ReadUncompressed(const UncompressedType: TUncompressedType);
      var
        RowBytePadding: Integer;
        { Within ReadUncompressed, Result is always of TCastleImage class }
        Res: TCastleImage absolute Result;

        procedure ReadToGrayscale;
        var
          Reader: TDDSRowReader;
          X, Y, Z: Integer;
          G: PByte;
        begin
          Reader := TDDSRowReader.Create(@Header.PixelFormat, Width, RowBytePadding);
          try
            for Z := 0 to Depth - 1 do
              for Y := Height - 1 downto 0 do
              begin
                Reader.ReadRow(Stream);
                G := Res.RowPtr(Y, Z);
                for X := 0 to Width - 1 do
                begin
                  G^ := Reader.RGBA(0);
                  Reader.NextPixel;
                  Inc(G);
                end;
              end;
          finally FreeAndNil(Reader) end;
        end;

        procedure ReadToGrayscaleAlpha;
        var
          Reader: TDDSRowReader;
          X, Y, Z: Integer;
          GA: PVector2Byte;
        begin
          Reader := TDDSRowReader.Create(@Header.PixelFormat, Width, RowBytePadding);
          try
            for Z := 0 to Depth - 1 do
              for Y := Height - 1 downto 0 do
              begin
                Reader.ReadRow(Stream);
                GA := Res.RowPtr(Y, Z);
                for X := 0 to Width - 1 do
                begin
                  GA^.Data[0] := Reader.RGBA(0);
                  GA^.Data[1] := Reader.RGBA(3);
                  Reader.NextPixel;
                  Inc(GA);
                end;
              end;
          finally FreeAndNil(Reader) end;
        end;

        procedure ReadToRGB;
        var
          Reader: TDDSRowReader;
          X, Y, Z: Integer;
          RGB: PVector3Byte;
        begin
          Reader := TDDSRowReader.Create(@Header.PixelFormat, Width, RowBytePadding);
          try
            for Z := 0 to Depth - 1 do
              for Y := Height - 1 downto 0 do
              begin
                Reader.ReadRow(Stream);
                RGB := Res.RowPtr(Y, Z);
                for X := 0 to Width - 1 do
                begin
                  RGB^.Data[0] := Reader.RGBA(0);
                  RGB^.Data[1] := Reader.RGBA(1);
                  RGB^.Data[2] := Reader.RGBA(2);
                  Reader.NextPixel;
                  Inc(RGB);
                end;
              end;
          finally FreeAndNil(Reader) end;
        end;

        procedure ReadToRGBAlpha;
        var
          Reader: TDDSRowReader;
          X, Y, Z: Integer;
          RGBA: PVector4Byte;
        begin
          Reader := TDDSRowReader.Create(@Header.PixelFormat, Width, RowBytePadding);
          try
            for Z := 0 to Depth - 1 do
              for Y := Height - 1 downto 0 do
              begin
                Reader.ReadRow(Stream);
                RGBA := Res.RowPtr(Y, Z);
                for X := 0 to Width - 1 do
                begin
                  RGBA^.Data[0] := Reader.RGBA(0);
                  RGBA^.Data[1] := Reader.RGBA(1);
                  RGBA^.Data[2] := Reader.RGBA(2);
                  RGBA^.Data[3] := Reader.RGBA(3);
                  Reader.NextPixel;
                  Inc(RGBA);
                end;
              end;
          finally FreeAndNil(Reader) end;
        end;

        { Read alpha-only images (may be produced by GIMP-DDS) }
        procedure ReadToGrayscaleAlphaPure;
        var
          Reader: TDDSRowReader;
          X, Y, Z: Integer;
          GA: PVector2Byte;
        begin
          Reader := TDDSRowReader.Create(@Header.PixelFormat, Width, RowBytePadding);
          try
            for Z := 0 to Depth - 1 do
              for Y := Height - 1 downto 0 do
              begin
                Reader.ReadRow(Stream);
                GA := Res.RowPtr(Y, Z);
                for X := 0 to Width - 1 do
                begin
                  GA^.Data[0] := 255;
                  GA^.Data[1] := Reader.RGBA(3);
                  Reader.NextPixel;
                  Inc(GA);
                end;
              end;
          finally FreeAndNil(Reader) end;
        end;

      begin { ReadUncompressed }
        Check(Header.PixelFormat.RGBBitCount mod 8 = 0, 'Invalid DDS pixel format: only RGBBitCount being multiple of 8 is supported. Please report with sample image');
        Check(Header.PixelFormat.RGBBitCount > 0, 'Invalid DDS pixel format: RGBBitCount must be non-zero');

        RowBytePadding := GetRowBytePadding(Header.PixelFormat.RGBBitCount div 8);

        case UncompressedType of
          utRGB_AlphaPossible:
            if Header.PixelFormat.Flags and DDPF_ALPHAPIXELS = 0 then
            begin
              Result := TRGBImage.Create(Width, Height, Depth);

              if (Header.PixelFormat.RBitMask = $ff0000) and
                 (Header.PixelFormat.GBitMask = $00ff00) and
                 (Header.PixelFormat.BBitMask = $0000ff) then
                ReadOptimized_RGB8(Stream, Res, RowBytePadding) else
              if (Header.PixelFormat.RBitMask = $0000ff) and
                 (Header.PixelFormat.GBitMask = $00ff00) and
                 (Header.PixelFormat.BBitMask = $ff0000) then
                ReadOptimized_BGR8(Stream, Res, RowBytePadding) else
                ReadToRGB;
            end else
            begin
              Check(Header.PixelFormat.ABitMask <> 0, 'Invalid DDS pixel format: alpha channel flag specified (DDPF_ALPHAPIXELS), but alpha mask is zero');

              Result := TRGBAlphaImage.Create(Width, Height, Depth);

              if (Header.PixelFormat.RGBBitCount = 32) and
                 (Header.PixelFormat.ABitMask = $ff000000) and
                 (Header.PixelFormat.RBitMask = $00ff0000) and
                 (Header.PixelFormat.GBitMask = $0000ff00) and
                 (Header.PixelFormat.BBitMask = $000000ff) then
                ReadOptimized_ARGB8(Stream, Res, RowBytePadding) else
              if (Header.PixelFormat.RGBBitCount = 32) and
                 (Header.PixelFormat.ABitMask = $ff000000) and
                 (Header.PixelFormat.RBitMask = $000000ff) and
                 (Header.PixelFormat.GBitMask = $0000ff00) and
                 (Header.PixelFormat.BBitMask = $00ff0000) then
                ReadOptimized_ABGR8(Stream, Res, RowBytePadding) else
                ReadToRGBAlpha;
            end;
          utGrayscale_AlphaPossible:
            if Header.PixelFormat.Flags and DDPF_ALPHAPIXELS = 0 then
            begin
              Result := TGrayscaleImage.Create(Width, Height, Depth);

              if (Header.PixelFormat.RGBBitCount = 8) and
                 (Header.PixelFormat.RBitMask = $ff) then
                ReadOptimized_G8(Stream, Res, RowBytePadding) else
                ReadToGrayscale;
            end else
            begin
              Check(Header.PixelFormat.ABitMask <> 0, 'Invalid DDS pixel format: alpha channel flag specified (DDPF_ALPHAPIXELS), but alpha mask is zero');

              Result := TGrayscaleAlphaImage.Create(Width, Height, Depth);

              if (Header.PixelFormat.RGBBitCount = 16) and
                 (Header.PixelFormat.ABitMask = $ff00) and
                 (Header.PixelFormat.RBitMask = $00ff) then
                ReadOptimized_AG8(Stream, Res, RowBytePadding) else
                ReadToGrayscaleAlpha;
            end;
          utPureAlpha:
            begin
              Check(Header.PixelFormat.ABitMask <> 0, 'Invalid DDS pixel format: pure alpha expected, but alpha mask is zero');

              Result := TGrayscaleAlphaImage.Create(Width, Height, Depth);
              ReadToGrayscaleAlphaPure;
            end;
          else raise EInternalError.Create('UncompressedType?');
        end;
      end { ReadUncompressed };

      procedure ReadCompressed(Compression: TTextureCompression);
      var
        { Within ReadUncompressed, Result is always of TGPUCompressedImage class }
        Res: TGPUCompressedImage absolute Result;
      begin
        Result := TGPUCompressedImage.Create(Width, Height, Depth,
          Compression);
        try
          { check Header.PitchOrLinearSize vs Result.Size }
          if (Header.Flags and DDSD_LINEARSIZE <> 0) and
            { It seems there are textures with DDSD_LINEARSIZE set but
              PitchOrLinearSize 0, and we should ignore
              PitchOrLinearSize then (e.g. ~/images/dds_tests/greek_imperial_swordsman.tga.dds
              on chantal). Same for PitchOrLinearSize = -1
              (e.g. UberPack-1/Torque3D/levels/lonerock_island/
              inside UberPack-1 on opengameart.org). }
            (Header.PitchOrLinearSize <> 0) and
            (Header.PitchOrLinearSize <> High(LongWord)) and
            { check this only on base level }
            (MipmapLevel = 0) and
            (Header.PitchOrLinearSize <> Result.Size) and
            ( (Compression in [tcDxt1_RGB, tcDxt1_RGBA, tcDxt3, tcDxt5]) or
              { it seems that pvrtc includes mipmaps sizes too? }
              ((Compression in [
                tcPvrtc1_2bpp_RGB,
                tcPvrtc1_4bpp_RGB,
                tcPvrtc1_2bpp_RGBA,
                tcPvrtc1_4bpp_RGBA,
                tcPvrtc2_4bpp,
                tcPvrtc2_2bpp]) and not Composite.FMipmaps)
            ) then
            WritelnWarning('DDS', Format('Incorrect size for GPU compressed texture (DDS says %d, calculated should be %d, compression is %s)',
              [Header.PitchOrLinearSize, Result.Size,
               TextureCompressionInfo[Compression].Name]));

          Stream.ReadBuffer(Res.RawPixels^, Res.Size);

          Res.FlipVertical;
        { on unhandled error, make sure to free result }
        except FreeAndNil(Result); raise; end;
      end;

    begin
      { There are five mutually exclusive types of DDS pixel format:
        - uncompressed non-palette (DDPF_RGB, optional DDPF_ALPHAPIXELS) or
        - uncompressed palette (DDPF_PALETTEINDEXED8) or
        - compressed (DDPF_FOURCC) or
        - uncompressed grayscale (DDPF_LUMINANCE, optional DDPF_ALPHAPIXELS,
          GIMP-DDS can write such images).
        - neighter of the above: only DDPF_ALPHAPIXELS (pure alpha,
          GIMP-DDS can write such images; actually, GIMP-DDS omits even
          DDPF_ALPHAPIXELS).
      }

      Result := nil;
      try
        if Header.PixelFormat.Flags and DDPF_RGB <> 0 then
        begin
          Check(Header.PixelFormat.Flags and DDPF_FOURCC = 0, 'Invalid DDS pixel format: both uncompressed (DDPF_RGB) and compressed (DDPF_FOURCC) flags specified');
          Check(Header.PixelFormat.Flags and DDPF_PALETTEINDEXED8 = 0, 'Invalid DDS pixel format: both non-palette (DDPF_RGB) and palette (DDPF_PALETTEINDEXED8) flags specified');
          ReadUncompressed(utRGB_AlphaPossible);
        end else
        if Header.PixelFormat.Flags and DDPF_PALETTEINDEXED8 <> 0 then
        begin
          Assert(Header.PixelFormat.Flags and DDPF_RGB = 0);
          Check(Header.PixelFormat.Flags and DDPF_FOURCC = 0, 'Invalid DDS pixel format: both uncompressed (palette, DDPF_PALETTEINDEXED8) and compressed (DDPF_FOURCC) flags specified');

          raise EInvalidDDS.Create('TODO: Unsupported pixel format for DDS: palette images not supported now, please report with sample image');
        end else
        if Header.PixelFormat.Flags and DDPF_FOURCC <> 0 then
        begin
          Assert(Header.PixelFormat.Flags and DDPF_RGB = 0);
          Assert(Header.PixelFormat.Flags and DDPF_PALETTEINDEXED8 = 0);

          if Header.PixelFormat.FourCC = 'DXT1' then
            { There's no way to recognize from DDS file header whether it uses
              or not some transparent pixels. (DDPF_ALPHAPIXELS is never
              specified for compressed formats.)
              Potentially, every DXT1 image may have some transparent pixels,
              so use tcDxt1_RGBA. }
            ReadCompressed(tcDxt1_RGBA) else
          if Header.PixelFormat.FourCC = 'DXT3' then
            ReadCompressed(tcDxt3) else
          if Header.PixelFormat.FourCC = 'DXT5' then
            ReadCompressed(tcDxt5) else
          if Header.PixelFormat.FourCC = 'ATC ' then
            ReadCompressed(tcATITC_RGB) else
          if Header.PixelFormat.FourCC = 'ATCA' then
            ReadCompressed(tcATITC_RGBA_ExplicitAlpha) else
          if Header.PixelFormat.FourCC = 'ATCI' then
            ReadCompressed(tcATITC_RGBA_InterpolatedAlpha) else
          if Header.PixelFormat.FourCC = 'PTC2' then
          begin
            { No way to detect tcPvrtc1_2bpp_RGB? }
            ReadCompressed(tcPvrtc1_2bpp_RGBA);
          end else
          if Header.PixelFormat.FourCC = 'PTC4' then
          begin
            { No way to detect tcPvrtc1_4bpp_RGB? }
            ReadCompressed(tcPvrtc1_4bpp_RGBA);
          end else
          if Header.PixelFormat.FourCC = 'DX10' then
          begin
            case HeaderDxt10.DxgiFormat of
              DXGI_FORMAT_R8G8B8A8_UNORM:
                begin
                  Result := TRGBAlphaImage.Create(Width, Height, Depth);
                  ReadOptimized_ARGB8(Stream, TCastleImage(Result), GetRowBytePadding(4));
                end;
              DXGI_FORMAT_UNKNOWN:
                begin
                  if Composite.AutomaticCompression then
                    ReadCompressed(Composite.AutomaticCompressionType)
                  else
                    raise EInvalidDDS.Create('Unknown texture compression format in DDS: FourCC is DX10 and DxgiFormat is zero (unknown). You need to assign TCompositeImage.AutomaticCompression and TCompositeImage.AutomaticCompressionType in the engine to override this.');
                end;
              else
                raise EInvalidDDS.CreateFmt('Unsupported texture compression format in DDS: FourCC is DX10 and DxgiFormat is %d. Please report this (as Castle Game Engine issue), with the affected DDS file, we can possibly fix this DxgiFormat handling.',
                  [HeaderDxt10.DxgiFormat]);
            end;
          end else
          if (LEToN(Header.PixelFormat.FourCCIntLE) = D3DFMT_R16F) or
             (LEToN(Header.PixelFormat.FourCCIntLE) = D3DFMT_G16R16F) or
             (LEToN(Header.PixelFormat.FourCCIntLE) = D3DFMT_A16B16G16R16F) or
             (LEToN(Header.PixelFormat.FourCCIntLE) = D3DFMT_R32F) or
             (LEToN(Header.PixelFormat.FourCCIntLE) = D3DFMT_G32R32F) or
             (LEToN(Header.PixelFormat.FourCCIntLE) = D3DFMT_A32B32G32R32F) then
            raise EInvalidDDS.Create('Unsupported texture compression for DDS: FourCC indicates float texture, not supported yet') else
            raise EInvalidDDS.CreateFmt('Unsupported texture compression for DDS: FourCC is "%s%s%s%s"',
              [ SReadableForm(Header.PixelFormat.FourCC[0]),
                SReadableForm(Header.PixelFormat.FourCC[1]),
                SReadableForm(Header.PixelFormat.FourCC[2]),
                SReadableForm(Header.PixelFormat.FourCC[3]) ]);
        end else
        if Header.PixelFormat.Flags and DDPF_LUMINANCE <> 0 then
        begin
          { Uncompressed grayscale image (possibly with alpha). }
          Check(Header.PixelFormat.RBitMask <> 0,
            'Invalid DDS pixel format: luminance image (DDPF_LUMINANCE flag) specified, but Red mask is zero');
          Check(
            ( (Header.PixelFormat.RBitMask = Header.PixelFormat.GBitMask) and
              (Header.PixelFormat.RBitMask = Header.PixelFormat.BBitMask) ) or
            ( (Header.PixelFormat.GBitMask = 0) and
              (Header.PixelFormat.BBitMask = 0) ),
            'Invalid DDS pixel format: luminance image (DDPF_LUMINANCE flag) specified, so Red = Green = Blue masks should be equal or Green = Blue = should be zero, but they are not');
          ReadUncompressed(utGrayscale_AlphaPossible);
        end else
        begin
          { GIMP-DDS plugin doesn't set DDPF_ALPHAPIXELS, but this is wrong IMO,
            so I warn about it. }
          CheckWarn(Header.PixelFormat.Flags and DDPF_ALPHAPIXELS <> 0, 'Invalid DDS pixel format: no flag specified, even not DDPF_ALPHAPIXELS. We will assume this is alpha-only image, as GIMP-DDS plugin can write such files');
          Check(
            (Header.PixelFormat.RBitMask = 0) and
            (Header.PixelFormat.GBitMask = 0) and
            (Header.PixelFormat.BBitMask = 0),
            'Invalid DDS pixel format: pure alpha image (only DDPF_ALPHAPIXELS flag) specified, but Red / Green / Blue makss are not zero');
          ReadUncompressed(utPureAlpha);
        end;
      except FreeAndNil(Result); raise end;

      // useful to see this URL in logs, TextureMemoryProfiler dumps etc.
      Result.URL := URL + '[subimage]';
    end { ReadImage };

  var
    W, H, D: Cardinal;
    I: Integer;
    Side: TCompositeImage.TDDSCubeMapSide;
  begin
    { Check that Width/Height are power of two, this is needed to make
      sure images reading code is sensible (otherwise, there's no way
      to tell what are the sizes of following images). }
    if Composite.FMipmaps then
    begin
      if (not IsPowerOf2(Width)) or
         (not IsPowerOf2(Height)) then
      begin
        { nvcompress can generate such DDS files, if you input a non-power-of-two image }
        WritelnWarning('DDS image has mipmaps, but width or height are not a power of 2: %d x %d. We will ignore these mipmaps, since we cannot determine their size', [Width, Height]);
        Composite.FMipmaps := false;
      end;
    end;

    case Composite.CompositeType of
      ctTexture:
        begin
          if Composite.Mipmaps then
          begin
            Images.Count := Composite.FMipmapsCount;
            W := Width;
            H := Height;
            for I := 0 to Composite.FMipmapsCount - 1 do
            begin
              Images[I] := ReadImage(W, H, 1, I);
              W := Max(1, W div 2);
              H := Max(1, H div 2);
            end;
          end else
          begin
            Images.Count := 1;
            Images[0] := ReadImage(Width, Height, 1, 0);
          end;
        end;
      ctCubeMap:
        begin
          for Side := Low(Side) to High(Side) do
            if Side in Composite.FCubeMapSides then
            begin
              if Composite.Mipmaps then
              begin
                W := Width;
                H := Height;
                for I := 0 to Composite.FMipmapsCount - 1 do
                begin
                  Images.Add(ReadImage(W, H, 1, I));
                  W := Max(1, W div 2);
                  H := Max(1, H div 2);
                end;
              end else
              begin
                Images.Add(ReadImage(Width, Height, 1, 0));
              end;
            end;
        end;
      ctVolume:
        begin
          if Composite.Mipmaps then
          begin
            W := Width;
            H := Height;
            D := Depth;
            for I := 0 to Composite.FMipmapsCount - 1 do
            begin
              Images.Add(ReadImage(W, H, D, I));
              W := Max(1, W div 2);
              H := Max(1, H div 2);
              D := Max(1, D div 2);
            end;
          end else
          begin
            Images.Add(ReadImage(Width, Height, Depth, 0));
          end;
        end;
      else raise EInternalError.Create('CompositeType?');
    end;
  end { ReadImages };

begin
  try
    ReadHeader;
    ReadHeaderDxt10;
    ReadImages;
  except
    { EReadError is raised by Stream.ReadBuffer when it can't read
      specified number of bytes }
    on E: EReadError do raise EInvalidDDS.Create('Read error: ' + E.Message);
    on E: ECheckFailed do raise EInvalidDDS.Create('Wrong DDS file: ' + E.Message);
  end;
end;

procedure TDDSHandler.SaveToStream(const Stream: TStream);

  procedure WriteHeader;
  const
    Magic: array [0 .. 3] of char = 'DDS ';
  var
    Header: TDDSHeader;
  begin
    Stream.WriteBuffer(Magic, SizeOf(Magic));

    { initially fill Header with zeros, to avoid writing memory garbage
      (which would be potential security risk) to file. }
    FillChar(Header, SizeOf(Header), 0);

    Header.Size := SizeOf(Header);
    Header.Flags := DDSD_CAPS or DDSD_HEIGHT or DDSD_WIDTH or DDSD_PIXELFORMAT
      or DDSCAPS_TEXTURE;
    Header.Height := Height;
    Header.Width := Width;
    Header.Caps1 := 0; { for starters }

    if Composite.Mipmaps then
    begin
      Header.Caps1 := Header.Caps1 or DDSCAPS_COMPLEX or DDSCAPS_MIPMAP;
      Header.Flags := Header.Flags or DDSD_MIPMAPCOUNT;
      Header.MipMapCount := Composite.MipmapsCount;
    end;

    case Composite.CompositeType of
      ctTexture: ;
      ctCubeMap:
        begin
          Header.Caps1 := Header.Caps1 or DDSCAPS_COMPLEX;
          Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP;

          if dcsPositiveX in Composite.FCubeMapSides then Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP_POSITIVEX;
          if dcsNegativeX in Composite.FCubeMapSides then Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP_NEGATIVEX;
          if dcsPositiveY in Composite.FCubeMapSides then Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP_POSITIVEY;
          if dcsNegativeY in Composite.FCubeMapSides then Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP_NEGATIVEY;
          if dcsPositiveZ in Composite.FCubeMapSides then Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP_POSITIVEZ;
          if dcsNegativeZ in Composite.FCubeMapSides then Header.Caps2 := Header.Caps2 or DDSCAPS2_CUBEMAP_NEGATIVEZ;
        end;
      ctVolume:
        begin
          Header.Caps1 := Header.Caps1 or DDSCAPS_COMPLEX;
          Header.Caps2 := Header.Caps2 or DDSCAPS2_VOLUME;
          Header.Flags := Header.Flags or DDSD_DEPTH;
          Header.Depth := Depth;
        end;
      else raise EInternalError.Create('CompositeType');
    end;

    if Images[0] is TCastleImage then
    begin
      { For uncompressed image, PitchOrLinearSize is row length }
      Header.PitchOrLinearSize := Composite.Width * TCastleImage(Images[0]).PixelSize;
      Header.Flags := Header.Flags or DDSD_PITCH;
    end else
    if Images[0] is TGPUCompressedImage then
    begin
      { For compressed image, PitchOrLinearSize is image length }
      Header.PitchOrLinearSize := TGPUCompressedImage(Images[0]).Size;
      Header.Flags := Header.Flags or DDSD_LINEARSIZE;
    end else
      raise Exception.CreateFmt('Cannot save image class %s to DDS file', [Images[0].ClassName]);

    Header.PixelFormat.Size := SizeOf(Header.PixelFormat);
    Header.PixelFormat.Flags := 0; { for starters }

    if Images[0] is TGrayscaleImage then
    begin
      Header.PixelFormat.Flags := DDPF_LUMINANCE;
      Header.PixelFormat.RGBBitCount := 8;
      Header.PixelFormat.RBitMask := $ff;
      Header.PixelFormat.GBitMask := $ff;
      Header.PixelFormat.BBitMask := $ff;
      Header.PixelFormat.ABitMask := 0;
    end else
    if Images[0] is TGrayscaleAlphaImage then
    begin
      Header.PixelFormat.Flags := DDPF_LUMINANCE or DDPF_ALPHAPIXELS;
      Header.PixelFormat.RGBBitCount := 16;
      Header.PixelFormat.RBitMask := $00ff;
      Header.PixelFormat.GBitMask := $00ff;
      Header.PixelFormat.BBitMask := $00ff;
      Header.PixelFormat.ABitMask := $ff00;
    end else
    if Images[0] is TRGBImage then
    begin
      Header.PixelFormat.Flags := DDPF_RGB;
      Header.PixelFormat.RGBBitCount := 24;
      Header.PixelFormat.RBitMask := $000000ff;
      Header.PixelFormat.GBitMask := $0000ff00;
      Header.PixelFormat.BBitMask := $00ff0000;
      Header.PixelFormat.ABitMask := 0;
    end else
    if Images[0] is TRGBAlphaImage then
    begin
      Header.PixelFormat.Flags := DDPF_RGB or DDPF_ALPHAPIXELS;
      Header.PixelFormat.RGBBitCount := 32;
      Header.PixelFormat.RBitMask := $000000ff;
      Header.PixelFormat.GBitMask := $0000ff00;
      Header.PixelFormat.BBitMask := $00ff0000;
      Header.PixelFormat.ABitMask := $ff000000;
    end else
    if Images[0] is TGPUCompressedImage then
    begin
      Header.PixelFormat.Flags := DDPF_FOURCC;
      case TGPUCompressedImage(Images[0]).Compression of
        tcDxt1_RGB,
        tcDxt1_RGBA : Header.PixelFormat.FourCC := 'DXT1';
        tcDxt3:       Header.PixelFormat.FourCC := 'DXT3';
        tcDxt5:       Header.PixelFormat.FourCC := 'DXT5';
        tcPvrtc1_2bpp_RGB,
        tcPvrtc1_2bpp_RGBA: Header.PixelFormat.FourCC := 'PTC2';
        tcPvrtc1_4bpp_RGB,
        tcPvrtc1_4bpp_RGBA: Header.PixelFormat.FourCC := 'PTC4';
        tcATITC_RGB                   : Header.PixelFormat.FourCC := 'ATC ';
        tcATITC_RGBA_ExplicitAlpha    : Header.PixelFormat.FourCC := 'ATCA';
        tcATITC_RGBA_InterpolatedAlpha: Header.PixelFormat.FourCC := 'ATCI';
        else raise EImageSaveError.CreateFmt('When saving DDS: Cannot save to DDS with compression %s',
          [TextureCompressionInfo[TGPUCompressedImage(Images[0]).Compression].Name]);
      end;
    end else
      raise Exception.CreateFmt('Unable to save image class %s to DDS image',
        [Images[0].ClassName]);

    { We could do this code *always*, not only on big-endian machines.
      But it does nothing on little-endian machines, so it's a waste of time
      there, so only do it in -dDEBUG mode. }
    {$if defined(ENDIAN_BIG) or defined(DEBUG)}
    Header.Size                    := NToLE(Header.Size                   );
    Header.Flags                   := NToLE(Header.Flags                  );
    Header.Height                  := NToLE(Header.Height                 );
    Header.Width                   := NToLE(Header.Width                  );
    Header.PitchOrLinearSize       := NToLE(Header.PitchOrLinearSize      );
    Header.Depth                   := NToLE(Header.Depth                  );
    Header.MipMapCount             := NToLE(Header.MipMapCount            );
    Header.PixelFormat.Size        := NToLE(Header.PixelFormat.Size       );
    Header.PixelFormat.Flags       := NToLE(Header.PixelFormat.Flags      );
    Header.PixelFormat.RGBBitCount := NToLE(Header.PixelFormat.RGBBitCount);
    Header.PixelFormat.RBitMask    := NToLE(Header.PixelFormat.RBitMask   );
    Header.PixelFormat.GBitMask    := NToLE(Header.PixelFormat.GBitMask   );
    Header.PixelFormat.BBitMask    := NToLE(Header.PixelFormat.BBitMask   );
    Header.PixelFormat.ABitMask    := NToLE(Header.PixelFormat.ABitMask   );
    Header.Caps1                   := NToLE(Header.Caps1                  );
    Header.Caps2                   := NToLE(Header.Caps2                  );
    {$endif}

    Stream.WriteBuffer(Header, SizeOf(Header));
  end;

  procedure WriteImages;

    procedure WriteUncompressedImage(Image: TCastleImage);
    var
      Z, Y: Integer;
    begin
      for Z := 0 to Image.Depth - 1 do
        { We have to invert rows order when saving to DDS }
        for Y := Image.Height - 1 downto 0 do
          Stream.WriteBuffer(Image.RowPtr(Y, Z)^, Image.Width * Image.PixelSize);
    end;

    procedure WriteCompressedImage(Image: TGPUCompressedImage);
    var
      Temp: TGPUCompressedImage;
    begin
      Temp := Image.MakeCopy;
      try
        { invert rows when saving to DDS }
        Temp.FlipVertical;
        Stream.WriteBuffer(Temp.RawPixels^, Temp.Size);
      finally FreeAndNil(Temp) end;
    end;

  var
    I: Integer;
  begin
    for I := 0 to Images.Count - 1 do
      if Images[I] is TCastleImage then
        WriteUncompressedImage(TCastleImage(Images[I])) else
      begin
        Assert(Images[I] is TGPUCompressedImage);
        WriteCompressedImage(TGPUCompressedImage(Images[I]));
      end;
  end;

begin
  WriteHeader;
  WriteImages;
end;
