 | 
|
|
|
{ }
{ Unicode codecs v3.12 }
{ }
{ This unit is copyright © 2002-2004 by }
{ David J Butler and Dieter Köhler. }
{ }
{ This unit is part of Delphi Fundamentals. }
{ Its original file name is cUnicodeCodecs.pas }
{ The latest version is available from the Fundamentals home page }
{ http://fundementals.sourceforge.net/ }
{ A forum is available on SourceForge for general discussion }
{ http://sourceforge.net/forum/forum.php?forum_id=2117 }
{ }
{ This unit is also part of the Open XML Utility Library. }
{ http://www.philo.de/xml/ }
{ }
{ }
{ LICENSE }
{ }
{ The contents of this file are subject to the Mozilla Public License Version }
{ 1.1 (the "License"); you may not use this file except in compliance with }
{ the License. You may obtain a copy of the License at }
{ "http://www.mozilla.org/MPL/" }
{ }
{ Software distributed under the License is distributed on an "AS IS" basis, }
{ WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for }
{ the specific language governing rights and limitations under the License. }
{ }
{ The Original Code is "cUnicodeCodecs.pas". }
{ }
{ The Initial Developers of the Original Code are David J Butler (Pretoria, }
{ South Africa, "http://fundementals.sourceforge.net/") and Dieter Köhler }
{ (Heidelberg, Germany, "http://www.philo.de/"). Portions created by the }
{ Initial Developers are Copyright (C) 2002-2004 David J Butler and }
{ Dieter Köhler. All Rights Reserved. }
{ }
{ Alternatively, the contents of this file may be used under the terms of the }
{ GNU General Public License Version 2 or later (the "GPL"), in which case the }
{ provisions of the GPL are applicable instead of those above. If you wish to }
{ allow use of your version of this file only under the terms of the GPL, and }
{ not to allow others to use your version of this file under the terms of the }
{ MPL, indicate your decision by deleting the provisions above and replace }
{ them with the notice and other provisions required by the GPL. If you do not }
{ delete the provisions above, a recipient may use your version of this file }
{ under the terms of any one of the MPL or the GPL. }
{ }
{ }
{ DESCRIPTION }
{ }
{ Codecs (encoders/decoders) for Unicode text. }
{ }
{ To decode or encode Unicode text, use one of the EncodingToUTF16 or }
{ UTF16ToEncoding functions. }
{ }
{ For example, to convert an ISO-8859-1 string into an Unicode string: }
{ }
{ WideStr := EncodingToUTF16(TISO8859_1Codec, 'ISO-8859-1 String'); }
{ }
{ or alternatively, using an alias: }
{ }
{ WideStr := EncodingToUTF16('iso-8859-1', 'ISO-8859-1 String'); }
{ WideStr := EncodingToUTF16('latin1', 'ISO-8859-1 String'); }
{ }
{ }
{ REVISION HISTORY }
{ }
{ 17/04/2002 0.01 Initial version. ISO8859, Mac, Win1250-1252, UTF. }
{ 20/04/2002 0.02 EBCDIC-US. }
{ 28/10/2002 3.03 Refactored. }
{ 29/10/2002 3.04 UTF-8 string functions. }
{ 04/11/2002 3.05 Test cases. Fixed bug in UTF-8 encoding function. }
{ 23/05/2003 3.06 Detection routines. }
{ 28/09/2003 3.07 Renamed ASCII to USASCII for clarity. }
{ 30/10/2003 3.08 Moved character mappings to unit cUnicodeMaps. }
{ 10/01/2004 3.09 Moved generic functions to cUnicodeChar and cUnicode. }
{ Revision of codec classes. }
{ 15/03/2004 3.10 Moved character mappings into codec classes. }
{ UCS2 codec. }
{ 11/04/2004 3.11 Improved Read/Write functions. }
{ 19/04/2004 3.12 Small revisions. }
{ }
{$INCLUDE cDefines.inc}
unit cUnicodeCodecs;
interface
uses
{ Delphi }
SysUtils;
const
UnitName = 'cUnicodeCodecs';
UnitVersion = '3.12';
UnitCopyright = 'Copyright (c) 2002-2004 David J Butler and Dieter Köhler';
{ }
{ UCS-4 definitions }
{ }
{$IFDEF DELPHI5}
type
UCS4Char = LongWord;
PUCS4Char = ^UCS4Char;
{$ENDIF}
const
UCS4_STRING_TERMINATOR = $9C;
UCS4_LF = $0A;
UCS4_CR = $0D;
{ }
{ US-ASCII string functions }
{ }
function IsUSASCIIString(const S: AnsiString): Boolean;
function IsUSASCIIWideBuf(const Buf: PWideChar; const Len: Integer): Boolean;
function IsUSASCIIWideString(const S: WideString): Boolean;
{ }
{ Long string conversion functions }
{ }
procedure LongToWide(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer);
function LongStringToWideString(const S: AnsiString): WideString;
procedure WideToLong(const Buf: Pointer; const Len: Integer;
const DestBuf: Pointer);
function WideToLongString(const P: PWideChar; const Len: Integer): AnsiString;
function WideStringToLongString(const S: WideString): AnsiString;
{ }
{ UTF-8 character conversion functions }
{ }
type
TUTF8Error = (
UTF8ErrorNone,
UTF8ErrorInvalidEncoding,
UTF8ErrorIncompleteEncoding,
UTF8ErrorInvalidBuffer,
UTF8ErrorOutOfRange );
function UTF8ToUCS4Char(const P: PChar; const Size: Integer;
out SeqSize: Integer; out Ch: UCS4Char): TUTF8Error;
function UTF8ToWideChar(const P: PChar; const Size: Integer;
out SeqSize: Integer; out Ch: WideChar): TUTF8Error;
procedure UCS4CharToUTF8(const Ch: UCS4Char; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
procedure WideCharToUTF8(const Ch: WideChar; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
{ }
{ UTF-16 character conversion functions }
{ }
procedure UCS4CharToUTF16BE(const Ch: UCS4Char; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
procedure UCS4CharToUTF16LE(const Ch: UCS4Char; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
{ }
{ UTF-8 string functions }
{ }
const
UTF8BOMSize = 3;
function DetectUTF8BOM(const P: PChar; const Size: Integer): Boolean;
function UTF8CharSize(const P: PChar; const Size: Integer): Integer;
function UTF8BufLength(const P: PChar; const Size: Integer): Integer;
function UTF8StringLength(const S: String): Integer;
function UTF8StringToWideString(const S: String): WideString;
function UTF8StringToLongString(const S: String): String;
function UCS4CharToUTF8CharSize(const Ch: UCS4Char): Integer;
function WideBufToUTF8Size(const Buf: PWideChar; const Len: Integer): Integer;
function WideStringToUTF8Size(const S: WideString): Integer;
function WideBufToUTF8String(const Buf: PWideChar; const Len: Integer): String;
function WideStringToUTF8String(const S: WideString): String;
function LongBufToUTF8Size(const Buf: PChar; const Len: Integer): Integer;
function LongStringToUTF8Size(const S: String): Integer;
function LongStringToUTF8String(const S: String): String;
function UCS4CharToUTF8String(const Ch: UCS4Char): String;
function ISO8859_1StringToUTF8String(const S: String): String;
{ }
{ UTF-16 functions }
{ }
const
UTF16BOMSize = 2;
function DetectUTF16BEBOM(const P: PChar; const Size: Integer): Boolean;
function DetectUTF16LEBOM(const P: PChar; const Size: Integer): Boolean;
function DetectUTF16BOM(const P: PChar; const Size: Integer;
out SwapEndian: Boolean): Boolean;
function SwapUTF16Endian(const P: WideChar): WideChar;
{ }
{ TCustomUnicodeCodec }
{ Base class for Unicode Codec implementations. }
{ }
type
TCodecErrorAction = (
eaException, // Raise an exception (default)
eaStop, // Stop encoding/decoding
eaIgnore, // Ignore error and continue
eaSkip, // Skip character and continue
eaReplace); // Replace invalid character and continue
TCodecReadLFOption = (
lrPass, // No normalization takes place (default)
lrNormalize); // Line breaks are adjusted to Linux-style breaks with a
// single LINE FEED, i.e. a sequence of CARRIAGE RETURN
// ($0D) + LINE FEED ($0A) or a single CARRIAGE RETURN is
// normalized to a single LINE FEED ($0A)
TCodecWriteLFOption = (
lwLF, // Transcode LINE FEED into LINE FEED (default)
lwCR, // Transcode LINE FEED into CARRIAGE RETURN
lwCRLF); // Transcode LINE FEED into CARRIAGE RETURN + LINE FEED
TCodecReadEvent = procedure (Sender: TObject; var Buf; Count: Longint;
var Ok: Boolean) of object;
TCodecWriteEvent = procedure (Sender: TObject; const Buf; Count: Longint)
of object;
TCustomUnicodeCodec = class
private
FErrorAction : TCodecErrorAction;
FDecodeReplaceChar : WideChar;
FReadLFOption : TCodecReadLFOption;
FWriteLFOption : TCodecWriteLFOption;
FOnRead : TCodecReadEvent;
FOnWrite : TCodecWriteEvent;
FReadAhead : Boolean; // Flag used for LF input normalization
FReadAheadBuffer : UCS4Char; // Buffer storage LF input normalization
FReadAheadByteCount : Integer; // Buffer storage LF input normalization
protected
procedure ResetReadAhead;
procedure SetDecodeReplaceChar(const Value: WideChar);
procedure SetErrorAction(const Value: TCodecErrorAction);
procedure SetReadLFOption(const Value: TCodecReadLFOption); virtual;
procedure SetWriteLFOption(const Value: TCodecWriteLFOption); virtual;
procedure SetOnRead(const Value: TCodecReadEvent);
function ReadBuffer(var Buf; Count: Integer): Boolean;
procedure WriteBuffer(const Buf; Count: Integer);
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); virtual; abstract;
// Implementation guidelines for derived classes:
// ReadUCS4Char calls (if necessary repeatedly and sometimes
// ahead) the InternalReadUCS4Char function, which must call
// ReadBuffer to request buffer values. ReadBuffer calls the
// OnRead event to request buffer values, similar to the Delphi
// VCL TStream.Read function.
// It must raise an EConvertError exception if the byte values
// returned by the OnRead event contain code that cannot be
// converted to a UCS-4 character or if the result value falls
// into the reserved surrogate area [$D800..$DFFF].
// If ReadBuffer returns False, the UCS-4 character $9C
// (STRING TERMINATOR) must be returned.
// LINE FEED characters ($A) are transformed according to the
// value of ReadLFOption property by the ReadUCS4Char function.
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); virtual; abstract;
// Implementation guideline for derived classes:
// WriteUCS4Char calls (if necessary repeatedly) the
// InternalWriteUCS4Char procedure, which must call WriteBuffer
// to write buffer values. WriteBuffer calls the OnWrite event
// to send the buffer values, similar to the Delphi VCL
// TStream.Write function.
// It must raise an EConvertError exception if the specified
// UCS-4 character cannot be converted into the target encoding.
// If no OnWrite event handler is assigned calling WriteUCS4Char
// simply has no effect.
// LINE FEED characters ($A) are transformed according to the
// value of the WriteLFOption property by the WriteUCS4Char
// procedure.
public
constructor Create; virtual;
constructor CreateEx(const AErrorAction: TCodecErrorAction = eaException;
const ADecodeReplaceChar: WideChar = WideChar(#$FFFD);
const AReadLFOption: TCodecReadLFOption = lrPass;
const AWriteLFOption: TCodecWriteLFOption = lwCRLF);
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); virtual; abstract;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; virtual; abstract;
procedure DecodeStr(const Buf: Pointer; const BufSize: Integer;
var Dest: WideString);
function EncodeStr(const S: WideString): String;
procedure ReadUCS4Char(out C: UCS4Char; out ByteCount: Integer);
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); virtual;
// Implementation guideline for derived clases:
// WriteUCS4Char can be overridden to implement more efficient
// handling of LINE FEED character transformations.
property ErrorAction: TCodecErrorAction read FErrorAction write SetErrorAction default eaException;
property DecodeReplaceChar: WideChar read FDecodeReplaceChar write SetDecodeReplaceChar default #$FFFD;
property ReadLFOption: TCodecReadLFOption read FReadLFOption write SetReadLFOption default lrPass;
property WriteLFOption: TCodecWriteLFOption read FWriteLFOption write SetWriteLFOption default lwLF;
property OnRead: TCodecReadEvent read FOnRead write SetOnRead;
property OnWrite: TCodecWriteEvent read FOnWrite write FOnWrite;
end;
TUnicodeCodecClass = class of TCustomUnicodeCodec;
EUnicodeCodecException = class(Exception)
ProcessedBytes : Integer;
end;
{ }
{ Unicode Codec alias functions }
{ }
function GetCodecClassByAlias(const CodecAlias: String): TUnicodeCodecClass;
function GetEncodingName(const CodecClass: TUnicodeCodecClass): String;
{$IFDEF OS_MSWIN}
{ }
{ Windows system encoding functions }
{ }
function GetSystemEncodingName: String; {$IFDEF DELPHI6_UP}platform;{$ENDIF}
function GetSystemEncodingCodecClass: TUnicodeCodecClass; {$IFDEF DELPHI6_UP}platform;{$ENDIF}
{$ENDIF}
{ }
{ Encoding detection }
{ }
function DetectUTFEncoding(const Buf: Pointer; const BufSize: Integer;
var BOMSize: Integer): TUnicodeCodecClass;
{ }
{ Encoding conversion functions }
{ }
function EncodingToUTF16(const CodecClass: TUnicodeCodecClass;
const Buf: Pointer; const BufSize: Integer): WideString; overload;
function EncodingToUTF16(const CodecClass: TUnicodeCodecClass;
const S: String): WideString; overload;
function EncodingToUTF16(const CodecAlias: String;
const Buf: Pointer; const BufSize: Integer): WideString; overload;
function EncodingToUTF16(const CodecAlias, S: String): WideString; overload;
function UTF16ToEncoding(const CodecClass: TUnicodeCodecClass;
const S: WideString): String; overload;
function UTF16ToEncoding(const CodecAlias: String;
const S: WideString): String; overload;
{ }
{ TUTF8Codec }
{ Unicode Codec implementation for UTF-8. }
{ }
type
TUTF8Codec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUTF16BECodec }
{ Unicode Codec implementation for UTF-16BE. }
{ }
type
TUTF16BECodec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUTF16LECodec }
{ Unicode Codec implementation for UTF-16LE. }
{ }
type
TUTF16LECodec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUCS4BECodec }
{ Unicode Codec implementation for ISO 10646 UCS-4BE. }
{ }
type
TUCS4BECodec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUCS4LECodec }
{ Unicode Codec implementation for ISO 10646 UCS-4BE. }
{ }
type
TUCS4LECodec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUCS4_2143Codec }
{ Unicode Codec implementation for ISO 10646 UCS-4BE. }
{ }
type
TUCS4_2143Codec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUCS4_3412Codec }
{ Unicode Codec implementation for ISO 10646 UCS-4BE. }
{ }
type
TUCS4_3412Codec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TUCS2Codec }
{ Unicode Codec implementation for ISO 10646 UCS-2. }
{ }
type
TUCS2Codec = class(TCustomUnicodeCodec)
protected
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
procedure WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer); override;
end;
{ }
{ TCustomSingleByteCodec }
{ Base class for single-byte encodings. }
{ }
type
TCustomSingleByteCodec = class(TCustomUnicodeCodec)
protected
FEncodeReplaceChar : AnsiChar;
procedure InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer); override;
procedure InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer); override;
public
constructor Create; override;
constructor CreateEx(const ErrorAction: TCodecErrorAction = eaException;
const DecodeReplaceChar: WideChar = WideChar(#$FFFD);
const EncodeReplaceChar: AnsiChar = AnsiChar(#32));
property EncodeReplaceChar: AnsiChar read FEncodeReplaceChar write FEncodeReplaceChar;
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String; override;
function DecodeChar(const P: AnsiChar): WideChar; virtual; abstract;
function EncodeChar(const Ch: WideChar): AnsiChar; virtual; abstract;
function DecodeUCS4Char(const P: AnsiChar): UCS4Char; virtual;
function EncodeUCS4Char(const Ch: UCS4Char): AnsiChar; virtual;
end;
TUnicodeSingleByteCodecClass = class of TCustomSingleByteCodec;
{ }
{ ISO-8859 }
{ }
type
TISO8859_1Codec = class(TCustomSingleByteCodec)
public
procedure Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer); override;
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_2Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_3Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_4Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_5Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_6Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_7Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_8Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_9Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_10Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_13Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_14Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TISO8859_15Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
{ }
{ Windows }
{ }
type
TWindows37Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows437Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows500Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows708Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows737Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows775Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows850Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows852Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows855Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows857Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows858Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows861Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows862Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows863Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows864Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows865Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows866Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows869Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows870Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows874Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows875Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1026Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1047Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1140Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1141Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1142Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1143Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1144Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1145Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1146Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1147Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1148Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1149Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1250Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1251Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1252Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1253Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1254Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1255Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1256Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1257Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TWindows1258Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
{ }
{ IBM }
{ }
type
TIBM037Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM038Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM256Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM273Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM274Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM275Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM277Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM278Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM280Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM281Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM284Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM285Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM290Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM297Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM420Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM423Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM424Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM437Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM500Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM850Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM851Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM852Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM855Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM857Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM860Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM861Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM862Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM863Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM864Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM865Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM866Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM868Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM869Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM870Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM871Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM874Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM875Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM880Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM904Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM905Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM918Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM1004Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM1026Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TIBM1047Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
{ }
{ Macintosh }
{ }
type
TMacLatin2Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TMacRomanCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TMacCyrillicCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TMacGreekCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TMacIcelandicCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TMacTurkishCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
{ }
{ International }
{ }
type
TUSASCIICodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TEBCDIC_USCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TKOI8_RCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TJIS_X0201Codec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
TNextStepCodec = class(TCustomSingleByteCodec)
public
function DecodeChar(const P: AnsiChar): WideChar; override;
function EncodeChar(const Ch: WideChar): AnsiChar; override;
end;
implementation
{$IFNDEF LINUX}
uses
{ Delphi }
Windows;
{$ENDIF}
resourcestring
SCannotConvert = 'Unicode code point $%x has no equivalent in %s';
SCannotConvertUCS4 = 'Cannot convert $%8.8X to %s';
SHighSurrogateNotFound = 'High surrogate not found';
SInvalidCodePoint = '$%x is not a valid %s code point';
SInvalidEncoding = 'Invalid %s encoding';
SLongStringConvertError = 'Long string conversion error';
SLowSurrogateNotFound = 'Low surrogate not found';
SSurrogateNotAllowed = 'Surrogate value $%x found in %s. Values between $D800 and $DFFF are reserved for use with UTF-16';
SEncodingOutOfRange = '%s encoding out of range';
SUTF8Error = 'UTF-8 error %d';
{ }
{ Type definitions }
{ }
{$IFNDEF DELPHI6_UP}
type
PByte = ^Byte;
PWord = ^Word;
PLongWord = ^LongWord;
{$ENDIF}
{ }
{ US-ASCII String functions }
{ }
function IsUSASCIIString(const S: AnsiString): Boolean;
var I : Integer;
P : PAnsiChar;
begin
P := Pointer(S);
For I := 1 to Length(S) do
if Ord(P^) >= $80 then
begin
Result := False;
exit;
end
else
Inc(P);
Result := True;
end;
function IsUSASCIIWideBuf(const Buf: PWideChar; const Len: Integer): Boolean;
var I : Integer;
P : PWideChar;
begin
P := Buf;
For I := 1 to Len do
if Ord(P^) >= $80 then
begin
Result := False;
exit;
end
else
Inc(P);
Result := True;
end;
function IsUSASCIIWideString(const S: WideString): Boolean;
begin
Result := IsUSASCIIWideBuf(Pointer(S), Length(S));
end;
{ }
{ Long string functions }
{ }
procedure LongToWide(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer);
var I : Integer;
P : Pointer;
Q : Pointer;
V : LongWord;
begin
if BufSize <= 0 then
exit;
P := Buf;
Q := DestBuf;
For I := 1 to BufSize div 4 do
begin
// convert 4 characters per iteration
V := PLongWord(P)^;
Inc(PLongWord(P));
PLongWord(Q)^ := (V and $FF) or ((V and $FF00) shl 8);
Inc(PLongWord(Q));
V := V shr 16;
PLongWord(Q)^ := (V and $FF) or ((V and $FF00) shl 8);
Inc(PLongWord(Q));
end;
// convert remaining (<4)
For I := 1 to BufSize mod 4 do
begin
PWord(Q)^ := PByte(P)^;
Inc(PByte(P));
Inc(PWord(Q));
end;
end;
function LongStringToWideString(const S: AnsiString): WideString;
var L : Integer;
begin
L := Length(S);
SetLength(Result, L);
if L = 0 then
exit;
LongToWide(Pointer(S), L, Pointer(Result));
end;
procedure WideToLong(const Buf: Pointer; const Len: Integer;
const DestBuf: Pointer);
var I : Integer;
S : PWideChar;
Q : PAnsiChar;
V : LongWord;
W : Word;
begin
if Len <= 0 then
exit;
S := Buf;
Q := DestBuf;
For I := 1 to Len div 2 do
begin
// convert 2 characters per iteration
V := PLongWord(S)^;
if V and $FF00FF00 <> 0 then
raise EConvertError.Create(SLongStringConvertError);
Q^ := AnsiChar(V);
Inc(Q);
Q^ := AnsiChar(V shr 16);
Inc(Q);
Inc(S, 2);
end;
// convert remaining character
if Len mod 2 = 1 then
begin
W := Ord(S^);
if W > $FF then
raise EConvertError.Create(SLongStringConvertError);
Q^ := AnsiChar(W);
end;
end;
function WideToLongString(const P: PWideChar; const Len: Integer): AnsiString;
var I : Integer;
S : PWideChar;
Q : PAnsiChar;
V : WideChar;
begin
if Len <= 0 then
begin
Result := '';
exit;
end;
SetLength(Result, Len);
S := P;
Q := Pointer(Result);
For I := 1 to Len do
begin
V := S^;
if Ord(V) > $FF then
raise EConvertError.Create(SLongStringConvertError);
Q^ := AnsiChar(Byte(V));
Inc(S);
Inc(Q);
end;
end;
function WideStringToLongString(const S: WideString): AnsiString;
begin
Result := WideToLongString(Pointer(S), Length(S));
end;
{ }
{ UTF-8 character conversion functions }
{ }
{ UTF8ToUCS4Char returns UTF8ErrorNone if a valid UTF-8 sequence was decoded }
{ (and Ch contains the decoded UCS4 character and SeqSize contains the size }
{ of the UTF-8 sequence). If an incomplete UTF-8 sequence is encountered, the }
{ function returns UTF8ErrorIncompleteEncoding and SeqSize > Size. If an }
{ invalid UTF-8 sequence is encountered, the function returns }
{ UTF8ErrorInvalidEncoding and SeqSize (<= Size) is the size of the }
{ invalid sequence, and Ch may be the intended character. }
function UTF8ToUCS4Char(const P: PChar; const Size: Integer;
out SeqSize: Integer; out Ch: UCS4Char): TUTF8Error;
var C, D : Byte;
V : LongWord;
I : Integer;
begin
if not Assigned(P) or (Size <= 0) then
begin
SeqSize := 0;
Ch := 0;
Result := UTF8ErrorInvalidBuffer;
exit;
end;
C := Ord(P^);
if C < $80 then
begin
SeqSize := 1;
Ch := C;
Result := UTF8ErrorNone;
exit;
end;
// multi-byte characters always start with 11xxxxxx ($C0)
// following bytes always start with 10xxxxxx ($80)
if C and $C0 = $80 then
begin
SeqSize := 1;
Ch := C;
Result := UTF8ErrorInvalidEncoding;
exit;
end;
if C and $20 = 0 then // 2-byte sequence
begin
SeqSize := 2;
V := C and $1F;
end else
if C and $10 = 0 then // 3-byte sequence
begin
SeqSize := 3;
V := C and $0F;
end else
if C and $08 = 0 then // 4-byte sequence (max needed for Unicode $0-$1FFFFF)
begin
SeqSize := 4;
V := C and $07;
end else
begin
SeqSize := 1;
Ch := C;
Result := UTF8ErrorInvalidEncoding;
exit;
end;
if Size < SeqSize then // incomplete
begin
Ch := C;
Result := UTF8ErrorIncompleteEncoding;
exit;
end;
For I := 1 to SeqSize - 1 do
begin
D := Ord(P[I]);
if D and $C0 <> $80 then // following byte must start with 10xxxxxx
begin
SeqSize := 1;
Ch := C;
Result := UTF8ErrorInvalidEncoding;
exit;
end;
V := (V shl 6) or (D and $3F); // decode 6 bits
end;
Ch := V;
Result := UTF8ErrorNone;
end;
function UTF8ToWideChar(const P: PChar; const Size: Integer;
out SeqSize: Integer; out Ch: WideChar): TUTF8Error;
var Ch4 : UCS4Char;
begin
Result := UTF8ToUCS4Char(P, Size, SeqSize, Ch4);
if Ch4 > $FFFF then
begin
Result := UTF8ErrorOutOfRange;
Ch := #$0000;
end else
Ch := WideChar(Ch4);
end;
{ UCS4CharToUTF8 transforms the UCS4 char Ch to UTF-8 encoding. SeqSize }
{ returns the number of bytes needed to transform Ch. Up to DestSize }
{ bytes of the UTF-8 encoding will be placed in Dest. }
procedure UCS4CharToUTF8(const Ch: UCS4Char; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
var P : PByte;
begin
P := Dest;
if Ch < $80 then // US-ASCII (1-byte sequence)
begin
SeqSize := 1;
if not Assigned(P) or (DestSize <= 0) then
exit;
P^ := Byte(Ch);
end else
if Ch < $800 then // 2-byte sequence
begin
SeqSize := 2;
if not Assigned(P) or (DestSize <= 0) then
exit;
P^ := $C0 or Byte(Ch shr 6);
if DestSize = 1 then
exit;
Inc(P);
P^ := $80 or (Ch and $3F);
end else
if Ch < $10000 then // 3-byte sequence
begin
SeqSize := 3;
if not Assigned(P) or (DestSize <= 0) then
exit;
P^ := $E0 or Byte(Ch shr 12);
if DestSize = 1 then
exit;
Inc(P);
P^ := $80 or ((Ch shr 6) and $3F);
if DestSize = 2 then
exit;
Inc(P);
P^ := $80 or (Ch and $3F);
end else
if Ch < $200000 then // 4-byte sequence
begin
SeqSize := 4;
if not Assigned(P) or (DestSize <= 0) then
exit;
P^ := $F0 or Byte(Ch shr 18);
if DestSize = 1 then
exit;
Inc(P);
P^ := $80 or ((Ch shr 12) and $3F);
if DestSize = 2 then
exit;
Inc(P);
P^ := $80 or ((Ch shr 6) and $3F);
if DestSize = 3 then
exit;
Inc(P);
P^ := $80 or (Ch and $3F);
end
else
raise EConvertError.CreateFmt(SInvalidCodePoint, [Ord(Ch), 'Unicode']);
end;
procedure WideCharToUTF8(const Ch: WideChar; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
begin
UCS4CharToUTF8(Ord(Ch), Dest, DestSize, SeqSize);
end;
{ }
{ UTF-16 character conversion functions }
{ }
{ UCS4CharToUTF16BE transforms the UCS4 char Ch to UTF-16BE encoding. SeqSize }
{ returns the number of bytes needed to transform Ch. Up to DestSize }
{ bytes of the UTF-16BE encoding will be placed in Dest. }
procedure UCS4CharToUTF16BE(const Ch: UCS4Char; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
var P : PByte;
HighSurrogate, LowSurrogate : Word;
begin
P := Dest;
Case Ch of
$00000080..$0000D7FF, $0000E000..$0000FFFD :
begin
SeqSize := 2;
if not Assigned(P) or (DestSize <= 0) then
exit;
P^ := Hi(Ch);
if DestSize <= 1 then
exit;
Inc(P);
P^ := Lo(Ch);
end;
$0000D800..$0000DFFF, $0000FFFE,$0000FFFF :
raise EConvertError.CreateFmt(SInvalidCodePoint, [Ch, 'UCS-4']);
$00010000..$0010FFFF :
begin
SeqSize := 4;
if not Assigned(P) or (DestSize <= 0) then
exit;
HighSurrogate := $D7C0 + (Ch shr 10);
P^ := Hi(HighSurrogate);
if DestSize <= 1 then
exit;
Inc(P);
P^ := Lo(HighSurrogate);
if DestSize <= 2 then
exit;
LowSurrogate := $DC00 xor (Ch and $3FF);
Inc(P);
P^ := Hi(LowSurrogate);
if DestSize <= 3 then
exit;
Inc(P);
P^ := Lo(LowSurrogate);
end;
else // out of UTF-16 range
raise EConvertError.CreateFmt(SCannotConvertUCS4, [Ch, 'UTF-16BE']);
end;
end;
{ UCS4CharToUTF16LE transforms the UCS4 char Ch to UTF-16LE encoding. SeqSize }
{ returns the number of bytes needed to transform Ch. Up to DestSize }
{ bytes of the UTF-16LE encoding will be placed in Dest. }
procedure UCS4CharToUTF16LE(const Ch: UCS4Char; const Dest: Pointer;
const DestSize: Integer; out SeqSize: Integer);
var P : PByte;
HighSurrogate, LowSurrogate : Word;
begin
P := Dest;
Case Ch of
$00000080..$0000D7FF, $0000E000..$0000FFFD :
begin
SeqSize := 2;
if not Assigned(P) or (DestSize <= 0) then
exit;
P^ := Lo(Ch);
if DestSize <= 1 then
exit;
Inc(P);
P^ := Hi(Ch);
end;
$0000D800..$0000DFFF, $0000FFFE, $0000FFFF :
raise EConvertError.CreateFmt(SInvalidCodePoint, [Ch, 'UCS-4']);
$00010000..$0010FFFF:
begin
SeqSize := 4;
if not Assigned(P) or (DestSize <= 0) then
exit;
HighSurrogate := $D7C0 + (Ch shr 10);
P^ := Lo(HighSurrogate);
if DestSize <= 1 then
exit;
Inc(P);
P^ := Hi(HighSurrogate);
if DestSize <= 2 then
exit;
LowSurrogate := $DC00 xor (Ch and $3FF);
Inc(P);
P^ := Lo(LowSurrogate);
if DestSize <= 3 then
exit;
Inc(P);
P^ := Hi(LowSurrogate);
end;
else // out of UTF-16 range
raise EConvertError.CreateFmt(SCannotConvertUCS4, [Ch, 'UTF-16LE']);
end;
end;
{ }
{ UTF-8 string functions }
{ }
function DetectUTF8BOM(const P: PChar; const Size: Integer): Boolean;
var Q : PChar;
begin
Result := False;
if Assigned(P) and (Size >= 3) and (P^ = #$EF) then
begin
Q := P;
Inc(Q);
if Q^ = #$BB then
begin
Inc(Q);
if Q^ = #$BF then
Result := True;
end;
end;
end;
function UTF8CharSize(const P: PChar; const Size: Integer): Integer;
var C : Byte;
I : Integer;
Q : PChar;
begin
if not Assigned(P) or (Size <= 0) then
begin
Result := 0;
exit;
end;
C := Ord(P^);
if C < $80 then // 1-byte (US-ASCII value)
Result := 1 else
if C and $C0 = $80 then // invalid encoding
Result := 1 else
begin
// multi-byte character
if C and $20 = 0 then
Result := 2 else
if C and $10 = 0 then
Result := 3 else
if C and $08 = 0 then
Result := 4 else
begin
Result := 1; // invalid encoding
exit;
end;
if Size < Result then // incomplete encoding
exit;
Q := P;
Inc(Q);
For I := 1 to Result - 1 do
if Ord(Q^) and $C0 <> $80 then
begin
Result := 1; // invalid encoding
exit;
end else
Inc(Q);
end;
end;
function UTF8BufLength(const P: PChar; const Size: Integer): Integer;
var Q : PChar;
L, C : Integer;
begin
Q := P;
L := Size;
Result := 0;
While L > 0 do
begin
C := UTF8CharSize(Q, L);
Dec(L, C);
Inc(Q, C);
Inc(Result);
end;
end;
function UTF8StringLength(const S: String): Integer;
begin
Result := UTF8BufLength(Pointer(S), Length(S));
end;
function UCS4CharToUTF8CharSize(const Ch: UCS4Char): Integer;
begin
if Ch < $80 then
Result := 1 else
if Ch < $800 then
Result := 2 else
if Ch < $10000 then
Result := 3 else
if Ch < $200000 then
Result := 4
else
raise EConvertError.CreateFmt(SInvalidCodePoint, [Ord(Ch), 'Unicode']);
end;
function WideBufToUTF8Size(const Buf: PWideChar; const Len: Integer): Integer;
var P : PWideChar;
I : Integer;
C : UCS4Char;
begin
P := Buf;
Result := 0;
For I := 1 to Len do
begin
C := UCS4Char(P^);
Inc(Result);
if C >= $80 then
if C >= $800 then
Inc(Result, 2) else
Inc(Result);
Inc(P);
end;
end;
function LongBufToUTF8Size(const Buf: PChar; const Len: Integer): Integer;
var P : PChar;
I : Integer;
begin
P := Buf;
Result := 0;
For I := 1 to Len do
begin
Inc(Result);
if Ord(P^) >= $80 then
Inc(Result);
Inc(P);
end;
end;
function WideStringToUTF8Size(const S: WideString): Integer;
begin
Result := WideBufToUTF8Size(Pointer(S), Length(S));
end;
function LongStringToUTF8Size(const S: String): Integer;
begin
Result := LongBufToUTF8Size(Pointer(S), Length(S));
end;
function UTF8StringToWideString(const S: String): WideString;
var P : PChar;
Q : PWideChar;
L, M, I : Integer;
C : WideChar;
begin
L := Length(S);
if L = 0 then
begin
Result := '';
exit;
end;
if IsUSASCIIString(S) then // optimize for US-ASCII strings
begin
Result := LongStringToWideString(S);
exit;
end;
// Decode UTF-8
P := Pointer(S);
SetLength(Result, L); // maximum size
Q := Pointer(Result);
M := 0;
Repeat
UTF8ToWideChar(P, L, I, C);
Assert(I > 0, 'I > 0');
Q^ := C;
Inc(Q);
Inc(M);
Inc(P, I);
Dec(L, I);
Until L <= 0;
SetLength(Result, M); // actual size
end;
function UTF8StringToLongString(const S: String): String;
var P : PChar;
Q : PChar;
L, M, I : Integer;
C : WideChar;
begin
L := Length(S);
if L = 0 then
begin
Result := '';
exit;
end;
if IsUSASCIIString(S) then // optimize for US-ASCII strings
begin
Result := S;
exit;
end;
// Decode UTF-8
P := Pointer(S);
SetLength(Result, L); // maximum size
Q := Pointer(Result);
M := 0;
Repeat
UTF8ToWideChar(P, L, I, C);
Assert(I > 0, 'I > 0');
if Ord(C) > $FF then
raise EConvertError.Create(SLongStringConvertError);
Q^ := Char(Ord(C));
Inc(Q);
Inc(M);
Inc(P, I);
Dec(L, I);
Until L <= 0;
SetLength(Result, M); // actual size
end;
function WideBufToUTF8String(const Buf: PWideChar; const Len: Integer): String;
var P : PWideChar;
Q : PChar;
I, M,
N, J : Integer;
begin
if Len = 0 then
begin
Result := '';
exit;
end;
N := WideBufToUTF8Size(Buf, Len);
if N = Len then // optimize for US-ASCII strings
begin
Result := WideToLongString(Buf, Len);
exit;
end;
SetLength(Result, N);
P := Buf;
Q := Pointer(Result);
M := 0;
For I := 1 to Len do
begin
UCS4CharToUTF8(UCS4Char(P^), Q, N, J);
Inc(P);
Inc(Q, J);
Dec(N, J);
Inc(M, J);
end;
SetLength(Result, M); // actual size
end;
function LongStringToUTF8String(const S: String): String;
var P : PChar;
Q : PChar;
I, M, N : Integer;
J, L : Integer;
begin
P := Pointer(S);
L := Length(S);
if L = 0 then
begin
Result := '';
exit;
end;
N := LongBufToUTF8Size(P, L);
if N = L then // optimize for US-ASCII strings
begin
Result := S;
exit;
end;
SetLength(Result, N);
Q := Pointer(Result);
M := 0;
For I := 1 to L do
begin
UCS4CharToUTF8(UCS4Char(Ord(P^)), Q, N, J);
Inc(P);
Inc(Q, J);
Dec(N, J);
Inc(M, J);
end;
SetLength(Result, M); // actual size
end;
function WideStringToUTF8String(const S: WideString): String;
begin
Result := WideBufToUTF8String(Pointer(S), Length(S));
end;
const
MaxUTF8SequenceSize = 4;
function UCS4CharToUTF8String(const Ch: UCS4Char): String;
var Buf : Array[0..MaxUTF8SequenceSize - 1] of Byte;
Size, I : Integer;
P, Q : PChar;
begin
Size := 0;
UCS4CharToUTF8(Ch, @Buf, Sizeof(Buf), Size);
SetLength(Result, Size);
if Size > 0 then
begin
P := Pointer(Result);
Q := @Buf;
For I := 0 to Size - 1 do
begin
P^ := Q^;
Inc(P);
Inc(Q);
end;
end;
end;
function ISO8859_1StringToUTF8String(const S: String): String;
var P, Q : PChar;
L, I,
M, J : Integer;
begin
L := Length(S);
if L = 0 then
begin
Result := '';
exit;
end;
// Calculate size
M := L;
P := Pointer(S);
For I := 1 to L do
begin
if Ord(P^) >= $80 then
Inc(M); // 2 bytes required for #$80-#$FF
Inc(P);
end;
// Check if conversion is required
if M = L then
begin
// All characters are US-ASCII, return reference to same string
Result := S;
exit;
end;
// Convert
SetLength(Result, M);
Q := Pointer(Result);
P := Pointer(S);
For I := 1 to L do
begin
WideCharToUTF8(WideChar(P^), Q, M, J);
Inc(P);
Inc(Q, J);
Dec(M, J);
end;
end;
{ }
{ UTF-16 functions }
{ }
function DetectUTF16BEBOM(const P: PChar; const Size: Integer): Boolean;
begin
Result := Assigned(P) and (Size >= Sizeof(WideChar)) and
(PWideChar(P)^ = WideChar($FEFF));
end;
function DetectUTF16LEBOM(const P: PChar; const Size: Integer): Boolean;
begin
Result := Assigned(P) and (Size >= Sizeof(WideChar)) and
(PWideChar(P)^ = WideChar($FFFE));
end;
{ DetectUTF16Encoding returns True if the encoding was confirmed to be UTF-16. }
{ SwapEndian is True if it was detected that the UTF-16 data is in reverse }
{ endian from that used by the cpu. }
function DetectUTF16BOM(const P: PChar; const Size: Integer;
out SwapEndian: Boolean): Boolean;
begin
if not Assigned(P) or (Size < Sizeof(WideChar)) then
begin
SwapEndian := False;
Result := False;
end else
if PWideChar(P)^ = WideChar($FEFF) then
begin
SwapEndian := False;
Result := True;
end else
if PWideChar(P)^ = WideChar($FFFE) then
begin
SwapEndian := True;
Result := True;
end
else
begin
SwapEndian := False;
Result := False;
end;
end;
function SwapUTF16Endian(const P: WideChar): WideChar;
begin
Result := WideChar(((Ord(P) and $FF) shl 8) or (Ord(P) shr 8));
end;
{ }
{ Helper Functions }
{ }
type
AnsiCharMap = Array[AnsiChar] of WideChar;
function CharFromMap(const Ch: WideChar; const Map: AnsiCharMap;
const Encoding: String): AnsiChar;
var I : AnsiChar;
P : PWideChar;
begin
if Ch = #$FFFF then
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), Encoding]);
P := @Map;
for I := #$00 to #$FF do
if P^ <> Ch then
Inc(P)
else
begin
Result := I;
exit;
end;
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), Encoding]);
end;
type
AnsiCharHighMap = Array[#$80..#$FF] of WideChar;
function CharFromHighMap(const Ch: WideChar; const Map: AnsiCharHighMap;
const Encoding: String): AnsiChar;
var I : AnsiChar;
P : PWideChar;
begin
if Ord(Ch) < $80 then
begin
Result := AnsiChar(Ch);
exit;
end;
if Ch = #$FFFF then
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), Encoding]);
P := @Map;
for I := #$80 to #$FF do
if P^ <> Ch then
Inc(P)
else
begin
Result := I;
exit;
end;
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), Encoding]);
end;
type
AnsiCharISOMap = Array[#$A0..#$FF] of WideChar;
function CharFromISOMap(const Ch: WideChar; const Map: AnsiCharISOMap;
const Encoding: String): AnsiChar;
var I : AnsiChar;
P : PWideChar;
begin
if Ord(Ch) < $A0 then
begin
Result := AnsiChar(Ch);
exit;
end;
if Ch = #$FFFF then
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), Encoding]);
P := @Map;
for I := #$A0 to #$FF do
if P^ <> Ch then
Inc(P)
else
begin
Result := I;
exit;
end;
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), Encoding]);
end;
{ }
{ Unicode Codec aliases }
{ }
const
USASCIIAliases = 17;
USASCIIAlias : Array[0..USASCIIAliases - 1] of String = (
'ASCII', 'US-ASCII', 'us',
'ANSI_X3.4-1968', 'ANSI_X3.4-1986', 'iso-ir-6',
'ISO_646.irv:1991', 'ISO_646.irv', 'ISO_646',
'ISO-646', 'ISO646', 'ISO646-US',
'IBM367', 'cp367', 'csASCII', 'IBM891', 'IBM903');
{ }
{ ISO-8859-1 - Latin 1 }
{ Western Europe and Americas: Afrikaans, Basque, Catalan, Danish, Dutch, }
{ English, Faeroese, Finnish, French, Galician, German, Icelandic, Irish, }
{ Italian, Norwegian, Portuguese, Spanish and Swedish. }
{ Default for HTTP Protocol }
{ }
const
ISO8859_1Aliases = 8;
ISO8859_1Alias : Array[0..ISO8859_1Aliases - 1] of String = (
'ISO-8859-1', 'ISO_8859-1:1987', 'ISO_8859-1',
'iso-ir-100', 'latin1', 'l1', 'IBM819', 'cp819');
{ }
{ ISO-8859-2 Latin 2 }
{ Latin-written Slavic and Central European languages: Czech, German, }
{ Hungarian, Polish, Romanian, Croatian, Slovak, Slovene. }
{ }
const
ISO8859_2Aliases = 6;
ISO8859_2Alias : Array[0..ISO8859_2Aliases - 1] of String = (
'ISO-8859-2', 'ISO_8859-2:1987', 'ISO_8859-2',
'iso-ir-101', 'latin2', 'l2');
{ }
{ ISO-8859-3 - Latin 3 }
{ Esperanto, Galician, Maltese, and Turkish. }
{ }
const
ISO8859_3Aliases = 6;
ISO8859_3Alias : Array[0..ISO8859_3Aliases - 1] of String = (
'ISO-8859-3', 'ISO_8859-3:1988', 'ISO_8859-3',
'iso-ir-109', 'latin3', 'l3');
{ }
{ ISO-8859-4 - Latin 4 }
{ Scandinavia/Baltic (mostly covered by 8859-1 also): Estonian, Latvian, and }
{ Lithuanian. It is an incomplete predecessor of Latin 6. }
{ }
const
ISO8859_4Aliases = 6;
ISO8859_4Alias : Array[0..ISO8859_4Aliases - 1] of String = (
'ISO-8859-4', 'ISO_8859-4:1988', 'ISO_8859-4',
'iso-ir-110', 'latin4', 'l4');
{ }
{ ISO-8859-5 - Cyrillic }
{ Bulgarian, Byelorussian, Macedonian, Russian, Serbian and Ukrainian. }
{ }
const
ISO8859_5Aliases = 5;
ISO8859_5Alias : Array[0..ISO8859_5Aliases - 1] of String = (
'ISO-8859-5', 'ISO_8859-5:1988', 'ISO_8859-5',
'iso-ir-144', 'cyrillic');
{ }
{ ISO-8859-6 - Arabic }
{ Non-accented Arabic. }
{ }
const
ISO8859_6Aliases = 7;
ISO8859_6Alias : Array[0..ISO8859_6Aliases - 1] of String = (
'ISO-8859-6', 'ISO_8859-6:1987', 'ISO_8859-6',
'iso-ir-127', 'ECMA-114', 'ASMO-708', 'arabic');
{ }
{ ISO-8859-7 - Modern Greek }
{ Greek. }
{ }
const
ISO8859_7Aliases = 8;
ISO8859_7Alias : Array[0..ISO8859_7Aliases - 1] of String = (
'ISO-8859-7', 'ISO_8859-7:1987', 'ISO_8859-7',
'iso-ir-126', 'ELOT_928', 'ECMA-118', 'greek', 'greek8');
{ }
{ ISO-8859-8 - Hebrew }
{ Non-accented Hebrew. }
{ }
const
ISO8859_8Aliases = 5;
ISO8859_8Alias : Array[0..ISO8859_8Aliases - 1] of String = (
'ISO-8859-8', 'ISO_8859-8:1988', 'ISO_8859-8',
'iso-ir-138', 'hebrew');
{ }
{ ISO-8859-9 - Latin 5 }
{ Same as 8859-1 except for Turkish instead of Icelandic }
{ }
const
ISO8859_9Aliases = 6;
ISO8859_9Alias : Array[0..ISO8859_9Aliases - 1] of String = (
'ISO-8859-9', 'ISO_8859-9:1989', 'ISO_8859-9',
'iso-ir-148', 'latin5', 'l5');
{ }
{ ISO-8859-10 - Latin 6 }
{ Latin6, for Lappish/Nordic/Eskimo languages: Adds the last Inuit }
{ (Greenlandic) and Sami (Lappish) letters that were missing in Latin 4 to }
{ cover the entire Nordic area. }
{ }
const
ISO8859_10Aliases = 6;
ISO8859_10Alias : Array[0..ISO8859_10Aliases - 1] of String = (
'ISO-8859-10', 'ISO_8859-10:1992', 'ISO_8859-10',
'iso-ir-157', 'latin6', 'l6');
{ }
{ ISO-8859-13 - Latin 7 }
{ }
const
ISO8859_13Aliases = 4;
ISO8859_13Alias : Array[0..ISO8859_13Aliases - 1] of String = (
'ISO-8859-13', 'ISO_8859-13', 'latin7', 'l7');
{ }
{ ISO-8859-14 - Latin 8 }
{ }
const
ISO8859_14Aliases = 7;
ISO8859_14Alias : Array[0..ISO8859_14Aliases - 1] of String = (
'ISO-8859-14', 'ISO_8859-14:1998', 'ISO_8859-14',
'iso-ir-199', 'latin8', 'l8', 'iso-celtic');
{ }
{ ISO-8859-15 - Latin 9 }
{ }
const
ISO8859_15Aliases = 6;
ISO8859_15Alias : Array[0..ISO8859_15Aliases - 1] of String = (
'ISO-8859-15', 'ISO_8859-15',
'latin9', 'l9', 'latin0', 'l0');
{ }
{ KOI8-R }
{ }
const
KOI8_RAliases = 1;
KOI8_RAlias : Array[0..KOI8_RAliases - 1] of String = (
'KOI8-R');
{ }
{ Mac Latin-2 }
{ }
const
MacLatin2Aliases = 3;
MacLatin2Alias : Array[0..MacLatin2Aliases - 1] of String = (
'MacLatin2', 'Mac', 'Macintosh');
{ }
{ Mac Roman }
{ }
const
MacRomanAliases = 1;
MacRomanAlias : Array[0..MacRomanAliases - 1] of String = (
'MacRoman');
{ }
{ Mac Cyrillic }
{ }
const
MacCyrillicAliases = 1;
MacCyrillicAlias : Array[0..MacCyrillicAliases - 1] of String = (
'MacCyrillic');
{ }
{ CP437 - DOSLatinUS }
{ Original IBM PC encoding }
{ }
const
CP437Aliases = 3;
CP437Alias : Array[0..CP437Aliases - 1] of String = (
'IBM437', 'cp437', 'DOSLatinUS');
{ }
{ Windows-1250 }
{ }
const
Win1250Aliases = 3;
Win1250Alias : Array[0..Win1250Aliases - 1] of String = (
'windows-1250', 'cp1250', 'WinLatin2');
{ }
{ Windows-1251 }
{ }
const
Win1251Aliases = 3;
Win1251Alias : Array[0..Win1251Aliases - 1] of String = (
'windows-1251', 'cp1251', 'WinCyrillic');
{ }
{ Windows-1252 }
{ }
const
Win1252Aliases = 3;
Win1252Alias : Array[0..Win1252Aliases - 1] of String = (
'windows-1252', 'cp1252', 'WinLatin1');
{ }
{ EBCDIC-US }
{ }
const
EBCDIC_USAliases = 2;
EBCDIC_USAlias : Array[0..EBCDIC_USAliases - 1] of String = (
'ebcdic-us', 'ebcdic');
{ }
{ UTF-8 }
{ }
const
UTF8Aliases = 2;
UTF8Alias : Array[0..UTF8Aliases - 1] of String = (
'UTF-8', 'utf8');
{ }
{ UTF-16BE }
{ }
const
UTF16BEAliases = 3;
UTF16BEAlias : Array[0..UTF16BEAliases - 1] of String = (
'UTF-16BE', 'UTF-16', 'utf16');
{ }
{ UTF-16LE }
{ }
const
UTF16LEAliases = 2;
UTF16LEAlias : Array[0..UTF16LEAliases - 1] of String = (
'UTF-16LE', 'utf16le');
{ }
{ Unicode Codec alias table }
{ }
type
UnicodeCodecAliasInfo = record
Table : Pointer;
Count : Integer;
Codec : TUnicodeCodecClass;
end;
const
UnicodeCodecAliasEntries = 26;
UnicodeCodecAliasList : Array[0..UnicodeCodecAliasEntries - 1] of UnicodeCodecAliasInfo =
((Table:@USASCIIAlias; Count: USASCIIAliases; Codec: TUSASCIICodec),
(Table:@ISO8859_1Alias; Count: ISO8859_1Aliases; Codec: TISO8859_1Codec),
(Table:@ISO8859_2Alias; Count: ISO8859_2Aliases; Codec: TISO8859_2Codec),
(Table:@ISO8859_3Alias; Count: ISO8859_3Aliases; Codec: TISO8859_3Codec),
(Table:@ISO8859_4Alias; Count: ISO8859_4Aliases; Codec: TISO8859_4Codec),
(Table:@ISO8859_5Alias; Count: ISO8859_5Aliases; Codec: TISO8859_5Codec),
(Table:@ISO8859_6Alias; Count: ISO8859_6Aliases; Codec: TISO8859_6Codec),
(Table:@ISO8859_7Alias; Count: ISO8859_7Aliases; Codec: TISO8859_7Codec),
(Table:@ISO8859_8Alias; Count: ISO8859_8Aliases; Codec: TISO8859_8Codec),
(Table:@ISO8859_9Alias; Count: ISO8859_9Aliases; Codec: TISO8859_9Codec),
(Table:@ISO8859_10Alias; Count: ISO8859_10Aliases; Codec: TISO8859_10Codec),
(Table:@ISO8859_13Alias; Count: ISO8859_13Aliases; Codec: TISO8859_13Codec),
(Table:@ISO8859_14Alias; Count: ISO8859_14Aliases; Codec: TISO8859_14Codec),
(Table:@ISO8859_15Alias; Count: ISO8859_15Aliases; Codec: TISO8859_15Codec),
(Table:@KOI8_RAlias; Count: KOI8_RAliases; Codec: TKOI8_RCodec),
(Table:@MacLatin2Alias; Count: MacLatin2Aliases; Codec: TMacLatin2Codec),
(Table:@MacRomanAlias; Count: MacRomanAliases; Codec: TMacRomanCodec),
(Table:@MacCyrillicAlias; Count: MacCyrillicAliases; Codec: TMacCyrillicCodec),
(Table:@CP437Alias; Count: CP437Aliases; Codec: TIBM037Codec),
(Table:@Win1250Alias; Count: Win1250Aliases; Codec: TWindows1250Codec),
(Table:@Win1251Alias; Count: Win1251Aliases; Codec: TWindows1251Codec),
(Table:@Win1252Alias; Count: Win1252Aliases; Codec: TWindows1252Codec),
(Table:@EBCDIC_USAlias; Count: EBCDIC_USAliases; Codec: TEBCDIC_USCodec),
(Table:@UTF8Alias; Count: UTF8Aliases; Codec: TUTF8Codec),
(Table:@UTF16BEAlias; Count: UTF16BEAliases; Codec: TUTF16BECodec),
(Table:@UTF16LEAlias; Count: UTF16LEAliases; Codec: TUTF16LECodec)
);
{ }
{ Unicode Codec alias functions }
{ }
function GetCodecClassByAlias(const CodecAlias: String): TUnicodeCodecClass;
var I, J : Integer;
P : PString;
begin
For I := 0 to UnicodeCodecAliasEntries - 1 do
begin
P := UnicodeCodecAliasList[I].Table;
For J := 0 to UnicodeCodecAliasList[I].Count - 1 do
begin
if AnsiCompareText(CodecAlias, P^) = 0 then
begin
Result := UnicodeCodecAliasList[I].Codec;
exit;
end;
Inc(P);
end;
end;
Result := nil;
end;
function GetEncodingName(const CodecClass: TUnicodeCodecClass): String;
var I : Integer;
begin
For I := 0 to UnicodeCodecAliasEntries - 1 do
if UnicodeCodecAliasList[I].Codec = CodecClass then
begin
Result := PString(UnicodeCodecAliasList[I].Table)^;
exit;
end;
Result := '';
end;
{$IFDEF OS_MSWIN}
{ }
{ MSWindows system encoding functions }
{ }
function GetSystemEncodingName: String;
begin
// GetACP returns the current ANSI code-page identifier for the system,
// or a default identifier if no code page is current.
Case GetACP of
874 : Result := 'cp874'; // Thai
932 : Result := 'cp932'; // Japan
936 : Result := 'cp936'; // Chinese (PRC, Singapore)
949 : Result := 'cp949'; // Korean
950 : Result := 'cp950'; // Chinese (Taiwan, Hong Kong)
1200 : Result := 'ISO-10646-UCS-2'; // Unicode (BMP of ISO 10646)
1250 : Result := 'windows-1250'; // Windows 3.1 Eastern European
1251 : Result := 'windows-1251'; // Windows 3.1 Cyrillic
1252 : Result := 'windows-1252'; // Windows 3.1 Latin 1 (US, Western Europe)
1253 : Result := 'windows-1253'; // Windows 3.1 Greek
1254 : Result := 'windows-1254'; // Windows 3.1 Turkish
1255 : Result := 'windows-1255'; // Hebrew
1256 : Result := 'windows-1256'; // Arabic
1257 : Result := 'windows-1257'; // Baltic
else
Result := '';
end;
end;
function GetSystemEncodingCodecClass: TUnicodeCodecClass;
begin
Case GetACP of
874 : Result := TWindows874Codec; // Thai
932 : Result := nil; // Japan -- Not supported
936 : Result := nil; // Chinese (PRC, Singapore) -- Not supported
949 : Result := nil; // Korean -- Not supported
950 : Result := nil; // Chinese (Taiwan, Hong Kong) -- Not supported
1200 : Result := nil; // Unicode (BMP of ISO 10646) -- Not supported
1250 : Result := TWindows1250Codec; // Windows 3.1 Eastern European
1251 : Result := TWindows1251Codec; // Windows 3.1 Cyrillic
1252 : Result := TWindows1252Codec; // Windows 3.1 Latin 1 (US, Western Europe)
1253 : Result := TWindows1253Codec; // Windows 3.1 Greek
1254 : Result := TWindows1254Codec; // Windows 3.1 Turkish
1255 : Result := TWindows1255Codec; // Hebrew
1256 : Result := TWindows1256Codec; // Arabic
1257 : Result := TWindows1257Codec; // Baltic
else
Result := nil;
end;
end;
{$ENDIF}
{ }
{ Encoding detection }
{ }
function DetectUTFEncoding(const Buf: Pointer; const BufSize: Integer;
var BOMSize: Integer): TUnicodeCodecClass;
var R : Boolean;
begin
if DetectUTF16BOM(Buf, BufSize, R) then
begin
BOMSize := UTF16BOMSize;
if R then
Result := TUTF16LECodec
else
Result := TUTF16BECodec
end
else
if DetectUTF8BOM(Buf, BufSize) then
begin
BOMSize := UTF8BOMSize;
Result := TUTF8Codec;
end
else
begin
BOMSize := 0;
Result := nil;
end;
end;
{ }
{ Unicode conversion functions }
{ }
function EncodingToUTF16(const CodecClass: TUnicodeCodecClass;
const Buf: Pointer; const BufSize: Integer): WideString;
var C : TCustomUnicodeCodec;
begin
if not Assigned(CodecClass) then
begin
Result := '';
exit;
end;
C := CodecClass.Create;
try
C.DecodeStr(Buf, BufSize, Result);
finally
C.Free;
end;
end;
function EncodingToUTF16(const CodecClass: TUnicodeCodecClass;
const S: String): WideString;
var C : TCustomUnicodeCodec;
begin
if not Assigned(CodecClass) then
begin
Result := '';
exit;
end;
C := CodecClass.Create;
try
C.DecodeStr(PChar(S), Length(S), Result);
finally
C.Free;
end;
end;
function EncodingToUTF16(const CodecAlias: String;
const Buf: Pointer; const BufSize: Integer): WideString;
begin
Result := EncodingToUTF16(GetCodecClassByAlias(CodecAlias),
Buf, BufSize);
end;
function EncodingToUTF16(const CodecAlias, S: String): WideString;
begin
Result := EncodingToUTF16(GetCodecClassByAlias(CodecAlias), S);
end;
function UTF16ToEncoding(const CodecClass: TUnicodeCodecClass;
const S: WideString): String;
var C : TCustomUnicodeCodec;
I : Integer;
begin
if not Assigned(CodecClass) then
begin
Result := '';
exit;
end;
C := CodecClass.Create;
try
Result := C.Encode(Pointer(S), Length(S), I);
finally
C.Free;
end;
end;
function UTF16ToEncoding(const CodecAlias: String; const S: WideString): String;
begin
Result := UTF16ToEncoding(GetCodecClassByAlias(CodecAlias), S);
end;
{ }
{ EUnicodeCodecException helper functions }
{ }
procedure RaiseUnicodeCodecException(const Msg: String;
const ProcessedBytes: Integer); overload;
var E : EUnicodeCodecException;
begin
E := EUnicodeCodecException.Create(Msg);
E.ProcessedBytes := ProcessedBytes;
raise E;
end;
procedure RaiseUnicodeCodecException(const Msg: string; const Args: array of const;
const ProcessedBytes: Integer); overload;
var E : EUnicodeCodecException;
begin
E := EUnicodeCodecException.CreateFmt(Msg, Args);
E.ProcessedBytes := ProcessedBytes;
end;
{ }
{ TCustomUnicodeCodec }
{ }
constructor TCustomUnicodeCodec.Create;
begin
inherited Create;
FDecodeReplaceChar := WideChar(#$FFFD);
FErrorAction := eaException;
FReadLFOption := lrPass;
FWriteLFOption := lwLF;
ResetReadAhead;
end;
constructor TCustomUnicodeCodec.CreateEx(const AErrorAction: TCodecErrorAction;
const ADecodeReplaceChar: WideChar; const AReadLFOption: TCodecReadLFOption;
const AWriteLFOption: TCodecWriteLFOption);
begin
inherited Create;
FErrorAction := AErrorAction;
FDecodeReplaceChar := ADecodeReplaceChar;
FReadLFOption := AReadLFOption;
FWriteLFOption := AWriteLFOption;
ResetReadAhead;
end;
procedure TCustomUnicodeCodec.ResetReadAhead;
begin
FReadAhead := False;
FReadAheadBuffer := 0;
end;
procedure TCustomUnicodeCodec.SetDecodeReplaceChar(const Value: WideChar);
begin
FDecodeReplaceChar := Value;
end;
procedure TCustomUnicodeCodec.SetErrorAction(const Value: TCodecErrorAction);
begin
FErrorAction := Value;
end;
procedure TCustomUnicodeCodec.SetReadLFOption(const Value: TCodecReadLFOption);
begin
FReadLFOption := Value;
end;
procedure TCustomUnicodeCodec.SetWriteLFOption(const Value: TCodecWriteLFOption);
begin
FWriteLFOption := Value;
end;
procedure TCustomUnicodeCodec.SetOnRead(const Value: TCodecReadEvent);
begin
if @Value <> @FOnRead then
begin
ResetReadAhead;
FOnRead := Value;
end;
end;
procedure TCustomUnicodeCodec.DecodeStr(const Buf: Pointer; const BufSize: Integer;
var Dest: WideString);
var P : PChar;
Q : PWideChar;
L, M : Integer;
I, J : Integer;
begin
P := Buf;
L := BufSize;
if not Assigned(P) or (L <= 0) then
begin
Dest := '';
exit;
end;
SetLength(Dest, BufSize);
M := 0;
Repeat
Q := Pointer(Dest);
Inc(Q, M);
Decode(P, L, Q, BufSize * Sizeof(WideChar), I, J);
Dec(L, I);
Inc(P, I);
Inc(M, J);
if (J < BufSize) or (L <= 0) then
break;
SetLength(Dest, M + BufSize);
Until False;
if Length(Dest) <> M then
SetLength(Dest, M);
end;
function TCustomUnicodeCodec.EncodeStr(const S: WideString): String;
var I : Integer;
begin
Result := Encode(Pointer(S), Length(S), I);
end;
function TCustomUnicodeCodec.ReadBuffer(var Buf; Count: Integer): Boolean;
begin
Result := False;
if Assigned(FOnRead) then
FOnRead(self, Buf, Count, Result);
end;
procedure TCustomUnicodeCodec.WriteBuffer(const Buf; Count: Integer);
begin
if Assigned(FOnWrite) then
FOnWrite(self, Buf, Count);
end;
procedure TCustomUnicodeCodec.ReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
begin
// Get UCS4 character from read-ahead buffer or from InternalReadUCS4Char
if FReadAhead then
begin
C := FReadAheadBuffer;
ByteCount := FReadAheadByteCount;
FReadAhead := False;
end
else
InternalReadUCS4Char(C, ByteCount);
// Adjust line breaks to Linux-style breaks with a single LINE FEED character
if (C = UCS4_CR) and (ReadLFOption = lrNormalize) then
begin
InternalReadUCS4Char(FReadAheadBuffer, FReadAheadByteCount);
if FReadAheadBuffer = UCS4_LF then
Inc(ByteCount, FReadAheadByteCount)
else
FReadAhead := True;
C := UCS4_LF;
end;
end;
procedure TCustomUnicodeCodec.WriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var ByteCount2 : Integer;
begin
if C = UCS4_LF then
// Transform LINE FEED character
Case WriteLFOption of
lwLF : InternalWriteUCS4Char(UCS4_LF, ByteCount);
lwCR : InternalWriteUCS4Char(UCS4_CR, ByteCount);
lwCRLF :
begin
InternalWriteUCS4Char(UCS4_CR, ByteCount);
InternalWriteUCS4Char(UCS4_LF, ByteCount2);
Inc(ByteCount, ByteCount2);
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ TCustomSingleByteCodec }
{ }
constructor TCustomSingleByteCodec.Create;
begin
inherited Create;
FEncodeReplaceChar := AnsiChar(#32);
end;
constructor TCustomSingleByteCodec.CreateEx(const ErrorAction: TCodecErrorAction;
const DecodeReplaceChar: WideChar; const EncodeReplaceChar: AnsiChar);
begin
inherited CreateEx(ErrorAction, DecodeReplaceChar);
FEncodeReplaceChar := EncodeReplaceChar;
end;
procedure TCustomSingleByteCodec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var P : PChar;
Q : PWideChar;
I, L, C : Integer;
begin
P := Buf;
Q := DestBuf;
C := DestSize div Sizeof(WideChar);
if not Assigned(P) or (BufSize <= 0) or not Assigned(Q) or (C <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
L := 0;
For I := 1 to BufSize do
try
if L >= C then
break;
Q^ := DecodeChar(P^);
Inc(P);
Inc(Q);
Inc(L);
except
on E : Exception do
Case FErrorAction of
eaException :
RaiseUnicodeCodecException(E.Message, P - Buf);
eaStop :
break;
eaSkip :
Inc(P);
eaIgnore :
begin
Q^ := WideChar(P^);
Inc(P);
Inc(Q);
Inc(L);
end;
eaReplace :
begin
Q^ := FDecodeReplaceChar;
Inc(P);
Inc(Q);
Inc(L);
end;
end;
end;
DestLength := L;
ProcessedBytes := P - Buf;
end;
function TCustomSingleByteCodec.DecodeUCS4Char(const P: AnsiChar): UCS4Char;
begin
Result := Ord(DecodeChar(P));
end;
function TCustomSingleByteCodec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P : PChar;
Q : PWideChar;
I, L, M : Integer;
begin
Q := S;
if not Assigned(Q) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
SetLength(Result, Length);
L := 0;
M := 0;
P := Pointer(Result);
For I := 1 to Length do
try
P^ := EncodeChar(Q^);
Inc(P);
Inc(Q);
Inc(L);
Inc(M);
except
on E : Exception do
Case FErrorAction of
eaException :
RaiseUnicodeCodecException(E.Message, L);
eaStop :
break;
eaSkip :
begin
Inc(Q);
Inc(L);
end;
eaIgnore :
begin
P^ := Char(Q^);
Inc(P);
Inc(Q);
Inc(L);
Inc(M);
end;
eaReplace :
begin
P^ := FEncodeReplaceChar;
Inc(P);
Inc(Q);
Inc(L);
Inc(M);
end;
end;
end;
if Length <> M then
SetLength(Result, M);
ProcessedChars := L;
end;
function TCustomSingleByteCodec.EncodeUCS4Char(const Ch: UCS4Char): AnsiChar;
begin
if Ch < $10000 then
Result := EncodeChar(WideChar(Ch))
else
raise EConvertError.CreateFmt(SInvalidCodePoint, [Ch, '']);
end;
procedure TCustomSingleByteCodec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var B : AnsiChar;
begin
if ReadBuffer(B, 1) then
begin
C := Ord(DecodeChar(B));
ByteCount := 1;
end
else
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
end;
end;
procedure TCustomSingleByteCodec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var E : Char;
begin
E := EncodeUCS4Char(C);
WriteBuffer(E, 1);
ByteCount := 1;
end;
{ }
{ UTF-8 }
{ }
procedure TUTF8Codec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var P : PChar;
Q : PWideChar;
L, I : Integer;
M, N : Integer;
R : TUTF8Error;
C : WideChar;
begin
P := Buf;
L := BufSize;
Q := DestBuf;
N := DestSize div Sizeof(WideChar);
if not Assigned(P) or (L <= 0) or not Assigned(Q) or (N <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
M := 0;
Repeat
if M >= N then
break;
try
R := UTF8ToWideChar(P, L, I, C);
Case R of
UTF8ErrorNone :
begin
Q^ := C;
Inc(Q);
Inc(M);
Inc(P, I);
Dec(L, I);
end;
UTF8ErrorInvalidEncoding :
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-8']);
UTF8ErrorIncompleteEncoding :
begin
ProcessedBytes := BufSize - L;
DestLength := M;
exit;
end;
UTF8ErrorOutOfRange :
raise EConvertError.CreateFmt(SEncodingOutOfRange, ['UTF-8']);
else
raise EConvertError.CreateFmt(SUTF8Error, [Ord(R)]);
end;
except
on E : Exception do
Case FErrorAction of
eaException :
RaiseUnicodeCodecException(E.Message, BufSize - L);
eaStop :
break;
eaSkip :
begin
Inc(P, I);
Dec(L, I);
end;
eaIgnore :
begin
Q^ := C;
Inc(Q);
Inc(M);
Inc(P, I);
Dec(L, I);
end;
eaReplace :
begin
Q^ := FDecodeReplaceChar;
Inc(Q);
Inc(M);
Inc(P, I);
Dec(L, I);
end;
end;
end;
Until L <= 0;
ProcessedBytes := BufSize - L;
DestLength := M;
end;
function TUTF8Codec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P : PWideChar;
Q : PChar;
I, L,
M, J : Integer;
begin
P := S;
if not Assigned(P) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
L := Length * 3;
SetLength(Result, L);
Q := Pointer(Result);
M := 0;
For I := 1 to Length do
begin
WideCharToUTF8(P^, Q, L, J);
Inc(P);
Inc(Q, J);
Dec(L, J);
Inc(M, J);
end;
SetLength(Result, M);
ProcessedChars := Length;
end;
procedure TUTF8Codec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
const
MaxCode: array[1..6] of LongWord =
($7F, $7FF, $FFFF, $1FFFFF, $3FFFFFF, $7FFFFFFF);
var B, First, Mask: Byte;
begin
if not ReadBuffer(B, 1) then
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
exit;
end;
C := B;
ByteCount := 1;
if C >= $80 then
begin // UTF-8 sequence
First := B;
Mask := $40;
if (B and $C0 <> $C0) then
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-8']);
while (Mask and First <> 0) do
begin
if not ReadBuffer(B, 1) then
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-8']);
if (B and $C0) <> $80 then
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-8']);
C := (C shl 6) or (B and $3F); // Add bits to C
Inc(ByteCount); // Increase sequence length
Mask := Mask shr 1; // Adjust Mask
end;
if ByteCount > 6 then // No 0 bit in sequence header 'First'
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-8']);
C := C and MaxCode[ByteCount]; // dispose of header bits
// Check for invalid sequence as suggested by RFC2279
if ((ByteCount > 1) and (C <= MaxCode[ByteCount - 1])) then
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-8']);
end;
end;
procedure TUTF8Codec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
UCS4CharToUTF8(C, @Buffer, 4, ByteCount);
WriteBuffer(Buffer, ByteCount);
end;
procedure TUTF8Codec.WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer);
const
UTF8_LF : Byte = $0A;
UTF8_CR : Byte = $0D;
UTF8_CRLF : Array[0..1] of Byte = ($0D, $0A);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UTF8_LF, 1);
ByteCount := 1;
end;
lwCR:
begin
WriteBuffer(UTF8_CR, 1);
ByteCount := 1;
end;
lwCRLF:
begin
WriteBuffer(UTF8_CRLF, 2);
ByteCount := 2;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ UTF-16BE }
{ }
procedure TUTF16BECodec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var
L, M : Integer;
P, Q : PWideChar;
begin
L := BufSize;
if L > DestSize then
L := DestSize;
if L <= 1 then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
Dec(L, L mod Sizeof(WideChar));
M := L div Sizeof(WideChar);
P := Buf;
Q := DestBuf;
Move(P^, Q^, L);
DestLength := M;
ProcessedBytes := L;
end;
function TUTF16BECodec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var L : Integer;
begin
if Length <= 0 then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
L := Length * 2;
SetLength(Result, L);
Move(S^, Pointer(Result)^, L);
ProcessedChars := Length;
end;
procedure TUTF16BECodec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var LowSurrogate: Array[0..1] of Byte; // We do not use Word, because the byte order of a Word is CPU dependant
begin
C := 0;
// C must be initialized, because the ReadBuffer(C, 2) call below does
// not fill the whole variable!
if not ReadBuffer(C, 2) then
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
Exit;
end;
C := Swap(C); // UCS4Chars are stored in Little Endian mode; so we need to swap the bytes.
Case C of
$D800..$DBFF: // High surrogate of Unicode character [$10000..$10FFFF]
begin
if not ReadBuffer(LowSurrogate, 2) then
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-16BE']);
Case LowSurrogate[0] of
$DC..$DF:
begin
C := ((C - $D7C0) shl 10) + ((LowSurrogate[0] xor $DC) shl 8) + LowSurrogate[1];
ByteCount := 4;
end;
else
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-16BE']);
end;
end;
$DC00..$DFFF: // Low surrogate of Unicode character [$10000..$10FFFF]
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-16BE']);
else
ByteCount := 2;
end;
end;
procedure TUTF16BECodec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
UCS4CharToUTF16BE(C, @Buffer, 4, ByteCount);
WriteBuffer(Buffer[0], ByteCount);
end;
procedure TUTF16BECodec.WriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
const
UTF16BE_LF : Array[0..1] of Byte = ($00, $0A);
UTF16BE_CR : Array[0..1] of Byte = ($00, $0D);
UTF16BE_CRLF : Array[0..3] of Byte = ($00, $0D, $00, $0A);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UTF16BE_LF, 2);
ByteCount := 2;
end;
lwCR:
begin
WriteBuffer(UTF16BE_CR, 2);
ByteCount := 2;
end;
lwCRLF:
begin
WriteBuffer(UTF16BE_CRLF, 4);
ByteCount := 4;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ UTF-16LE }
{ }
procedure TUTF16LECodec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer; out ProcessedBytes, DestLength: Integer);
var I, L, M : Integer;
P, Q : PWideChar;
begin
L := BufSize;
if L > DestSize then
L := DestSize;
if L <= 1 then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
Dec(L, L mod Sizeof(WideChar));
M := L div Sizeof(WideChar);
P := Buf;
Q := DestBuf;
For I := 1 to M do
begin
Q^ := SwapUTF16Endian(P^);
Inc(P);
Inc(Q);
end;
DestLength := M;
ProcessedBytes := L;
end;
function TUTF16LECodec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var I, L : Integer;
P, Q : PWideChar;
begin
if Length <= 0 then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
L := Length * 2;
SetLength(Result, L);
P := S;
Q := Pointer(Result);
For I := 1 to Length do
begin
Q^ := SwapUTF16Endian(P^);
Inc(P);
Inc(Q);
end;
ProcessedChars := Length;
end;
procedure TUTF16LECodec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var LowSurrogate : Array[0..1] of Byte; // We do not use Word, because the byte order of a Word is CPU dependant
begin
C := 0;
// C must be initialized, because the ReadBuffer(C, 2) call below does
// not fill the whole variable!
if not ReadBuffer(C, 2) then
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
Exit;
end;
Case C of // UCS4Chars are stored in Little Endian mode; so we just can go on with it.
$D800..$DBFF: // High surrogate of Unicode character [$10000..$10FFFF]
begin
if not ReadBuffer(LowSurrogate, 2) then
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-16LE']);
Case LowSurrogate[1] of
$DC..$DF:
begin
C := ((C - $D7C0) shl 10) + ((LowSurrogate[1] xor $DC) shl 8) + LowSurrogate[0];
ByteCount := 4;
end;
else
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-16LE']);
end;
end;
$DC00..$DFFF: // Low surrogate of Unicode character [$10000..$10FFFF]
raise EConvertError.CreateFmt(SInvalidEncoding, ['UTF-16LE']);
else
ByteCount := 2;
end;
end;
procedure TUTF16LECodec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
UCS4CharToUTF16LE(C, @Buffer, 4, ByteCount);
WriteBuffer(Buffer[0], ByteCount);
end;
procedure TUTF16LECodec.WriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
const
UTF16LE_LF : Array[0..1] of Byte = ($0A, $00);
UTF16LE_CR : Array[0..1] of Byte = ($0D, $00);
UTF16LE_CRLF : Array[0..3] of Byte = ($0D, $00, $0A, $00);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UTF16LE_LF, 2);
ByteCount := 2;
end;
lwCR:
begin
WriteBuffer(UTF16LE_CR, 2);
ByteCount := 2;
end;
lwCRLF:
begin
WriteBuffer(UTF16LE_CRLF, 4);
ByteCount := 4;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ TUCS4BECodec }
{ }
procedure TUCS4BECodec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var Ch4 : UCS4Char;
N, P, Q : PChar;
L, C : Integer;
begin
P := Buf;
Q := DestBuf;
C := DestSize div 2;
if not Assigned(P) or (BufSize <= 0) or not Assigned(Q) or (C <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
L := 0;
N := P + BufSize - 4;
While P <= N do
begin
Ch4 := Ord(P^) * $1000000 +
Ord((P + 1)^) * $10000 +
Ord((P + 2)^) * $100 +
Ord((P + 3)^);
if ((Ch4 >= $D800) and (Ch4 < $E000)) or (Ch4 > $10FFFF) then
Case FErrorAction of
eaException :
if Ch4 > $10FFFF then
RaiseUnicodeCodecException(SCannotConvertUCS4, [Ch4, 'UTF-16'], P - Buf)
else
RaiseUnicodeCodecException(SSurrogateNotAllowed, [Ch4, 'UCS-4'], P - Buf);
eaStop :
break;
eaSkip :
Inc(P, 4);
eaIgnore :
begin
if L + 1 >= C then
break;
Q^ := (P + 1)^; // Nevertheless change Big Endian to Little Endian ...
(Q + 1)^ := P^;
(Q + 2)^ := (P + 3)^;
(Q + 3)^ := (P + 2)^;
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end;
eaReplace :
begin
if L >= C then
break;
Q^ := Char(Lo(Ord(FDecodeReplaceChar)));
(Q + 1)^ := Char(Hi(Ord(FDecodeReplaceChar)));
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end
else
begin
if Ch4 > $FFFF then
begin
if L + 1 >= C then
break;
Q^ := Char((Ord((P + 1)^) shl 6) + (Ord((P + 2)^) shr 2));
(Q + 1)^ := Char($D8 + (Ord((P + 1)^) shr 2));
(Q + 2)^ := (P + 3)^;
(Q + 3)^ := Char($DC + (3 and Ord((P + 2)^)));
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end
else
begin
if L >= C then
break;
Q^ := (P + 3)^;
(Q + 1)^ := (P + 2)^;
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end;
end;
DestLength := L;
ProcessedBytes := P - Buf;
end;
function TUCS4BECodec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P, N : PWideChar;
Q : PChar;
M : Integer;
HighSurrogate : Word;
LowSurrogate : Word;
begin
P := S;
if not Assigned(P) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
SetLength(Result, Length * 4);
Q := Pointer(Result);
M := 0;
N := P + Length;
While P < N do
Case Ord(P^) of
$D800..$DBFF: // High surrogate of Unicode character [$10000..$10FFFF]
begin
if P = N - 1 then // End of WideString?
raise EConvertError.Create(SLowSurrogateNotFound);
HighSurrogate := Ord(P^);
Inc(P);
Inc(M, 2);
LowSurrogate := Ord(P^);
Case LowSurrogate of // Low Surrogate following?
$DC00..$DF00:
begin
Q^ := Char(0);
(Q+1)^ := Char((HighSurrogate - $D7C0) shr 6);
(Q+2)^ := Char(((HighSurrogate and $3F) shl 2) + ((LowSurrogate - $DC00) shr 8));
(Q+3)^ := Char(Lo(LowSurrogate));
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
else
raise EConvertError.Create(SLowSurrogateNotFound);
end;
end;
$DC00..$DFFF: // Low surrogate of Unicode character [$10000..$10FFFF]
raise EConvertError.Create(SHighSurrogateNotFound);
else
Q^ := Char(0);
(Q+1)^ := Char(0);
(Q+2)^ := Char(Hi(Ord(P^)));
(Q+3)^ := Char(Lo(Ord(P^)));
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
SetLength(Result, M);
ProcessedChars := Length;
end;
procedure TUCS4BECodec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var B : Array[0..3] of Byte;
begin
if ReadBuffer(B, 4) then
begin
C := B[0] * $1000000 + B[1] * $10000 + B[2] * $100 + B[3];
Case C of
$D800..$DFFF: // Do not accept surrogates
raise EConvertError.CreateFmt(SSurrogateNotAllowed, [C, 'UCS-4BE']);
end;
ByteCount := 4;
end
else
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
end;
end;
procedure TUCS4BECodec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
Buffer[0] := C shr 24;
Buffer[1] := (C and $FF0000) shr 16;
Buffer[2] := (C and $FF00) shr 8;
Buffer[3] := C and $FF;
WriteBuffer(Buffer, 4);
ByteCount := 4;
end;
procedure TUCS4BECodec.WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer);
const
UCS4BE_LF : Array[0..3] of Byte = ($0A, $00, $00, $00);
UCS4BE_CR : Array[0..3] of Byte = ($0D, $00, $00, $00);
UCS4BE_CRLF : Array[0..7] of Byte = ($0D, $00, $00, $00, $0A, $00, $00, $00);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UCS4BE_LF, 4);
ByteCount := 4;
end;
lwCR:
begin
WriteBuffer(UCS4BE_CR, 4);
ByteCount := 4;
end;
lwCRLF:
begin
WriteBuffer(UCS4BE_CRLF, 8);
ByteCount := 8;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ TUCS4LECodec }
{ }
procedure TUCS4LECodec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var Ch4 : UCS4Char;
N, P, Q : PChar;
L, C : Integer;
begin
P := Buf;
Q := DestBuf;
C := DestSize div 2;
if not Assigned(P) or (BufSize <= 0) or not Assigned(Q) or (C <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
L := 0;
N := P + BufSize - 4;
While P <= N do
begin
Ch4 := Ord((P + 3)^) * $1000000 +
Ord((P + 2)^) * $10000 +
Ord((P + 1)^) * $100 +
Ord(P^);
if ((Ch4 >= $D800) and (Ch4 < $E000)) or (Ch4 > $10FFFF) then
Case FErrorAction of
eaException :
if Ch4 > $10FFFF then
RaiseUnicodeCodecException(SCannotConvertUCS4, [Ch4, 'UTF-16'], P - Buf)
else
RaiseUnicodeCodecException(SSurrogateNotAllowed, [Ch4, 'UCS-4'], P - Buf);
eaStop :
break;
eaSkip :
Inc(P, 4);
eaIgnore :
begin
if L + 1 >= C then
break;
Q^ := P^;
(Q + 1)^ := (P + 1)^;
(Q + 2)^ := (P + 2)^;
(Q + 3)^ := (P + 3)^;
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end;
eaReplace :
begin
if L >= C then
break;
Q^ := Char(Lo(Ord(FDecodeReplaceChar)));
(Q + 1)^ := Char(Hi(Ord(FDecodeReplaceChar)));
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end
else
if Ch4 > $FFFF then
begin
if L + 1 >= C then
break;
Q^ := Char((Ord((P + 2)^) shl 6) + (Ord((P + 1)^) shr 2));
(Q + 1)^ := Char($D8 + (Ord((P + 2)^) shr 2));
(Q + 2)^ := P^;
(Q + 3)^ := Char($DC + (3 and Ord((P + 1)^)));
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end
else
begin
if L >= C then
break;
Q^ := P^;
(Q + 1)^ := (P + 1)^;
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end;
DestLength := L;
ProcessedBytes := P - Buf;
end;
function TUCS4LECodec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P, N : PWideChar;
Q : PChar;
M : Integer;
HighSurrogate : Word;
LowSurrogate : Word;
begin
P := S;
if not Assigned(P) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
SetLength(Result, Length * 4);
Q := Pointer(Result);
M := 0;
N := P + Length;
While P < N do
Case Ord(P^) of
$D800..$DBFF: // High surrogate of Unicode character [$10000..$10FFFF]
begin
if P = N - 1 then // End of WideString?
raise EConvertError.Create(SLowSurrogateNotFound);
HighSurrogate := Ord(P^);
Inc(P);
Inc(M, 2);
LowSurrogate := Ord(P^);
Case LowSurrogate of // Low Surrogate following?
$DC00..$DF00:
begin
Q^ := Char(Lo(LowSurrogate));
(Q + 1)^ := Char(((HighSurrogate and $3F) shl 2) + ((LowSurrogate - $DC00) shr 8));
(Q + 2)^ := Char((HighSurrogate - $D7C0) shr 6);
(Q + 3)^ := Char(0);
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
else
raise EConvertError.Create(SLowSurrogateNotFound);
end;
end;
$DC00..$DFFF: // Low surrogate of Unicode character [$10000..$10FFFF]
raise EConvertError.Create(SHighSurrogateNotFound);
else
Q^ := Char(0);
(Q + 1)^ := Char(0);
(Q + 2)^ := Char(Hi(Ord(P^)));
(Q + 3)^ := Char(Lo(Ord(P^)));
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
SetLength(Result, M);
ProcessedChars := Length;
end;
procedure TUCS4LECodec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var B : Array[0..3] of Byte;
begin
if ReadBuffer(B, 4) then
begin
C := B[3] * $1000000 + B[2] * $10000 + B[1] * $100 + B[0];
Case C of
$D800..$DFFF: // Do not accept surrogates
raise EConvertError.CreateFmt(SSurrogateNotAllowed, [C, 'UCS-4LE']);
end;
ByteCount := 4;
end
else
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
end;
end;
procedure TUCS4LECodec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
Buffer[0] := C and $FF;
Buffer[1] := (C and $FF00) shr 8;
Buffer[2] := (C and $FF0000) shr 16;
Buffer[3] := C shr 24;
WriteBuffer(Buffer, 4);
ByteCount := 4;
end;
procedure TUCS4LECodec.WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer);
const
UCS4LE_LF : Array[0..3] of Byte = ($00, $00, $00, $0A);
UCS4LE_CR : Array[0..3] of Byte = ($00, $00, $00, $0D);
UCS4LE_CRLF : Array[0..7] of Byte = ($00, $00, $00, $0D, $00, $00, $00, $0A);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UCS4LE_LF, 4);
ByteCount := 4;
end;
lwCR:
begin
WriteBuffer(UCS4LE_CR, 4);
ByteCount := 4;
end;
lwCRLF:
begin
WriteBuffer(UCS4LE_CRLF, 8);
ByteCount := 8;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ TUCS4_2143Codec }
{ }
procedure TUCS4_2143Codec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var Ch4 : UCS4Char;
N, P, Q : PChar;
L, C : Integer;
begin
P := Buf;
Q := DestBuf;
C := DestSize div 2;
if not Assigned(P) or (BufSize <= 0) or not Assigned(Q) or (C <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
L := 0;
N := P + BufSize - 4;
While P <= N do
begin
Ch4 := Ord((P + 1)^) * $1000000 +
Ord(P^) * $10000 +
Ord((P + 3)^) * $100 +
Ord((P + 2)^);
if ((Ch4 >= $D800) and (Ch4 < $E000)) or (Ch4 > $10FFFF) then
Case FErrorAction of
eaException :
if Ch4 > $10FFFF
then RaiseUnicodeCodecException(SCannotConvertUCS4, [Ch4, 'UTF-16'], P - Buf)
else RaiseUnicodeCodecException(SSurrogateNotAllowed, [Ch4, 'UCS-4'], P - Buf);
eaStop :
break;
eaSkip :
Inc(P, 4);
eaIgnore :
begin
if L + 1 >= C then
break;
Q^ := P^;
(Q + 1)^ := (P + 1)^;
(Q + 2)^ := (P + 2)^;
(Q + 3)^ := (P + 3)^;
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end;
eaReplace :
begin
if L >= C then
break;
Q^ := Char(Lo(Ord(FDecodeReplaceChar)));
(Q + 1)^ := Char(Hi(Ord(FDecodeReplaceChar)));
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end
else
if Ch4 > $FFFF then
begin
if L + 1 >= C then
break;
Q^ := Char((Ord(P^) shl 6) + (Ord((P + 3)^) shr 2));
(Q + 1)^ := Char($D8 + (Ord(P^) shr 2));
(Q + 2)^ := (P + 2)^;
(Q + 3)^ := Char($DC + (3 and Ord((P + 3)^)));
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end
else
begin
if L >= C then
break;
Q^ := (P + 2)^;
(Q + 1)^ := (P + 3)^;
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end;
DestLength := L;
ProcessedBytes := P - Buf;
end;
function TUCS4_2143Codec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P, N : PWideChar;
Q : PChar;
M : Integer;
HighSurrogate : Word;
LowSurrogate : Word;
begin
P := S;
if not Assigned(P) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
SetLength(Result, Length * 4);
Q := Pointer(Result);
M := 0;
N := P + Length;
While P < N do
begin
Case Ord(P^) of
$D800..$DBFF: // High surrogate of Unicode character [$10000..$10FFFF]
begin
if P = N - 1 then // End of WideString?
raise EConvertError.Create(SLowSurrogateNotFound);
HighSurrogate := Ord(P^);
Inc(P);
Inc(M, 2);
LowSurrogate := Ord(P^);
Case LowSurrogate of // Low Surrogate following?
$DC00..$DF00:
begin
Q^ := Char((HighSurrogate - $D7C0) shr 6);
(Q + 1)^ := Char(0);
(Q + 2)^ := Char(Lo(LowSurrogate));
(Q + 3)^ := Char(((HighSurrogate and $3F) shl 2) + ((LowSurrogate - $DC00) shr 8));
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
else
raise EConvertError.Create(SLowSurrogateNotFound);
end;
end;
$DC00..$DFFF: // Low surrogate of Unicode character [$10000..$10FFFF]
raise EConvertError.Create(SHighSurrogateNotFound);
else
Q^ := Char(0);
(Q + 1)^ := Char(0);
(Q + 2)^ := Char(Lo(Ord(P^)));
(Q + 3)^ := Char(Hi(Ord(P^)));
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
end;
SetLength(Result, M);
ProcessedChars := Length;
end;
procedure TUCS4_2143Codec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var B : Array[0..3] of Byte;
begin
if ReadBuffer(B, 4) then
begin
C := B[1] * $1000000 + B[0] * $10000 + B[3] * $100 + B[2];
Case C of
$D800..$DFFF: // Do not accept surrogates
raise EConvertError.CreateFmt(SSurrogateNotAllowed, [C, 'UCS-4']);
end;
end
else
C := UCS4_STRING_TERMINATOR;
ByteCount := 4;
end;
procedure TUCS4_2143Codec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
Buffer[0] := (C and $FF0000) shr 16;
Buffer[1] := C shr 24;
Buffer[2] := C and $FF;
Buffer[3] := (C and $FF00) shr 8;
WriteBuffer(Buffer, 4);
ByteCount := 4;
end;
procedure TUCS4_2143Codec.WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer);
const
UCS4_2143_LF : Array[0..3] of Byte = ($00, $0A, $00, $00);
UCS4_2143_CR : Array[0..3] of Byte = ($00, $0D, $00, $00);
UCS4_2143_CRLF : Array[0..7] of Byte = ($00, $0D, $00, $00, $00, $0A, $00, $00);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UCS4_2143_LF, 4);
ByteCount := 4;
end;
lwCR:
begin
WriteBuffer(UCS4_2143_CR, 4);
ByteCount := 4;
end;
lwCRLF:
begin
WriteBuffer(UCS4_2143_CRLF, 8);
ByteCount := 8;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ TUCS4_3412Codec }
{ }
procedure TUCS4_3412Codec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var Ch4 : UCS4Char;
N, P, Q : PChar;
L, C : Integer;
begin
P := Buf;
Q := DestBuf;
C := DestSize div 2;
if not Assigned(P) or (BufSize <= 0) or not Assigned(Q) or (C <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
L := 0;
N := P + BufSize - 4;
While P <= N do
begin
Ch4 := Ord((P + 2)^) * $1000000 +
Ord((P + 3)^) * $10000 +
Ord(P^) * $100 +
Ord((P + 1)^);
if ((Ch4 >= $D800) and (Ch4 < $E000)) or (Ch4 > $10FFFF) then
Case FErrorAction of
eaException :
if Ch4 > $10FFFF then
RaiseUnicodeCodecException(SCannotConvertUCS4, [Ch4, 'UTF-16'], P - Buf)
else
RaiseUnicodeCodecException(SSurrogateNotAllowed, [Ch4, 'UCS-4'], P - Buf);
eaStop :
break;
eaSkip :
Inc(P, 4);
eaIgnore :
begin
if L + 1 >= C then
break;
Q^ := (P + 1)^; // Nevertheless change Big Endian to Little Endian ...
(Q + 1)^ := P^;
(Q + 2)^ := (P + 3)^;
(Q + 3)^ := (P + 2)^;
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end;
eaReplace :
begin
if L >= C then
break;
Q^ := Char(Lo(Ord(FDecodeReplaceChar)));
(Q + 1)^ := Char(Hi(Ord(FDecodeReplaceChar)));
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end
else
if Ch4 > $FFFF then
begin
if L + 1 >= C then
break;
Q^ := Char((Ord((P + 3)^) shl 6) + (Ord(P^) shr 2));
(Q + 1)^ := Char($D8 + (Ord((P + 3)^) shr 2));
(Q + 2)^ := (P + 1)^;
(Q + 3)^ := Char($DC + (3 and Ord(P^)));
Inc(Q, 4);
Inc(P, 4);
Inc(L, 2);
end
else
begin
if L >= C then
break;
Q^ := (P + 1)^;
(Q + 1)^ := P^;
Inc(Q, 2);
Inc(P, 4);
Inc(L);
end;
end;
DestLength := L;
ProcessedBytes := P - Buf;
end;
function TUCS4_3412Codec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P, N : PWideChar;
Q : PChar;
M : Integer;
HighSurrogate : Word;
LowSurrogate : Word;
begin
P := S;
if not Assigned(P) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
SetLength(Result, Length * 4);
Q := Pointer(Result);
M := 0;
N := P + Length;
While P < N do
Case Ord(P^) of
$D800..$DBFF: // High surrogate of Unicode character [$10000..$10FFFF]
begin
if P = N - 1 then // End of WideString?
raise EConvertError.Create(SLowSurrogateNotFound);
HighSurrogate := Ord(P^);
Inc(P);
Inc(M, 2);
LowSurrogate := Ord(P^);
Case LowSurrogate of // Low Surrogate following?
$DC00..$DF00:
begin
Q^ := Char(((HighSurrogate and $3F) shl 2) + ((LowSurrogate - $DC00) shr 8));
(Q + 1)^ := Char(Lo(LowSurrogate));
(Q + 2)^ := Char(0);
(Q + 3)^ := Char((HighSurrogate - $D7C0) shr 6);
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
else
raise EConvertError.Create(SLowSurrogateNotFound);
end;
end;
$DC00..$DFFF: // Low surrogate of Unicode character [$10000..$10FFFF]
raise EConvertError.Create(SHighSurrogateNotFound);
else
Q^ := Char(Hi(Ord(P^)));
(Q + 1)^ := Char(Lo(Ord(P^)));
(Q + 2)^ := Char(0);
(Q + 3)^ := Char(0);
Inc(P);
Inc(Q, 4);
Inc(M, 4);
end;
SetLength(Result, M);
ProcessedChars := Length;
end;
procedure TUCS4_3412Codec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
var B : Array[0..3] of Byte;
begin
if ReadBuffer(B, 4) then
begin
C := B[2] * $1000000 + B[3] * $10000 + B[0] * $100 + B[1];
Case C of
$D800..$DFFF: // Do not accept surrogates
raise EConvertError.CreateFmt(SSurrogateNotAllowed, [C, 'UCS-4']);
end;
ByteCount := 4;
end
else
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
end;
end;
procedure TUCS4_3412Codec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var Buffer : Array[0..3] of Byte;
begin
Buffer[0] := (C and $FF00) shr 8;
Buffer[1] := C and $FF;
Buffer[2] := C shr 24;
Buffer[3] := (C and $FF0000) shr 16;
WriteBuffer(Buffer, 4);
ByteCount := 4;
end;
procedure TUCS4_3412Codec.WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer);
const
UCS4_3412_LF : Array[0..3] of Byte = ($00, $00, $0A, $00);
UCS4_3412_CR : Array[0..3] of Byte = ($00, $00, $0D, $00);
UCS4_3412_CRLF : Array[0..7] of Byte = ($00, $00, $0D, $00, $00, $00, $0A, $00);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UCS4_3412_LF, 4);
ByteCount := 4;
end;
lwCR:
begin
WriteBuffer(UCS4_3412_CR, 4);
ByteCount := 4;
end;
lwCRLF:
begin
WriteBuffer(UCS4_3412_CRLF, 8);
ByteCount := 8;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ TUCS2Codec }
{ }
procedure TUCS2Codec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var P : PWideChar;
Q : PWideChar;
I, L, C : Integer;
begin
P := Buf;
Q := DestBuf;
C := DestSize div Sizeof(WideChar);
if not Assigned(P) or (BufSize <= 0) or not Assigned(Q) or (C <= 0) then
begin
ProcessedBytes := 0;
DestLength := 0;
exit;
end;
L := 0;
For I := 1 to BufSize do
if (Ord(P^) >= $D800) and (Ord(P^) < $E000) then // Do not accept surrogates
Case FErrorAction of
eaException :
RaiseUnicodeCodecException(SSurrogateNotAllowed, [Ord(P^), 'UCS-2'], P - Buf);
eaStop :
break;
eaSkip :
Inc(P);
eaIgnore :
begin
Q^ := WideChar(P^);
Inc(P);
Inc(Q);
Inc(L);
end;
eaReplace :
begin
Q^ := FDecodeReplaceChar;
Inc(P);
Inc(Q);
Inc(L);
end;
end
else
begin
if L >= C then
break;
Q^ := P^;
Inc(P);
Inc(Q);
Inc(L);
end;
DestLength := L;
ProcessedBytes := P - Buf;
end;
function TUCS2Codec.Encode(const S: PWideChar; const Length: Integer;
out ProcessedChars: Integer): String;
var P : PWideChar;
Q : PWideChar;
I, L, M : Integer;
begin
Q := S;
if not Assigned(Q) or (Length <= 0) then
begin
ProcessedChars := 0;
Result := '';
exit;
end;
SetLength(Result, Length*2);
L := 0;
M := 0;
P := Pointer(Result);
For I := 1 to Length do
if (Ord(P^) >= $D800) and (Ord(P^) < $E000) then // Do not accept surrogates
Case FErrorAction of
eaException :
RaiseUnicodeCodecException(SSurrogateNotAllowed, [Ord(P^), 'UCS-2'], L * 2);
eaStop :
break;
eaSkip :
begin
Inc(Q);
Inc(L);
end;
eaIgnore :
begin
P^ := Q^;
Inc(P);
Inc(Q);
Inc(L);
Inc(M);
end;
eaReplace :
begin
P^ := FDecodeReplaceChar;
Inc(P);
Inc(Q);
Inc(L);
Inc(M);
end;
end
else
begin
P^ := Q^;
Inc(P);
Inc(Q);
Inc(L);
Inc(M);
end;
if Length <> M then
SetLength(Result, M * 2);
ProcessedChars := L;
end;
procedure TUCS2Codec.InternalReadUCS4Char(out C: UCS4Char;
out ByteCount: Integer);
begin
C := 0;
// C must be initialized, because the ReadBuffer(C, 2) call below does
// not fill the whole variable!
if not ReadBuffer(C, 2) then
begin
C := UCS4_STRING_TERMINATOR;
ByteCount := 0;
exit;
end;
C := Swap(C); // UCS4Chars are stored in Little Endian mode; so we need to swap the bytes.
ByteCount := 2;
Case C of
$D800..$DFFF: // Do not accept surrogates
raise EConvertError.CreateFmt(SSurrogateNotAllowed, [C, 'UCS-2']);
end;
end;
procedure TUCS2Codec.InternalWriteUCS4Char(const C: UCS4Char;
out ByteCount: Integer);
var HighByte, LowByte: Byte;
begin
if C > $FFFF then
raise EConvertError.CreateFmt(SEncodingOutOfRange, ['UCS-2']);
HighByte := Hi(C);
LowByte := Lo(C);
WriteBuffer(HighByte, 1);
WriteBuffer(LowByte, 1);
ByteCount := 2;
end;
procedure TUCS2Codec.WriteUCS4Char(const C: UCS4Char; out ByteCount: Integer);
const
UCS2_LF : Array[0..1] of Byte = ($00, $0A);
UCS2_CR : Array[0..1] of Byte = ($00, $0D);
UCS2_CRLF : Array[0..3] of Byte = ($00, $0D, $00, $0A);
begin
if C = UCS4_LF then
Case WriteLFOption of
lwLF:
begin
WriteBuffer(UCS2_LF, 2);
ByteCount := 2;
end;
lwCR:
begin
WriteBuffer(UCS2_CR, 2);
ByteCount := 2;
end;
lwCRLF:
begin
WriteBuffer(UCS2_CRLF, 4);
ByteCount := 4;
end;
end
else
InternalWriteUCS4Char(C, ByteCount);
end;
{ }
{ ISO-8859-1 - Latin 1 }
{ }
function TISO8859_1Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := WideChar(P);
end;
function TISO8859_1Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
if Ord(Ch) >= $100 then
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-1']);
Result := AnsiChar(Ch);
end;
procedure TISO8859_1Codec.Decode(const Buf: Pointer; const BufSize: Integer;
const DestBuf: Pointer; const DestSize: Integer;
out ProcessedBytes, DestLength: Integer);
var L, C: Integer;
begin
L := BufSize;
C := DestSize div Sizeof(WideChar);
if C < L then
L := C;
if L < 0 then
L := 0;
ProcessedBytes := L;
DestLength := L;
LongToWide(Buf, L, DestBuf);
end;
{ }
{ ISO-8859-2 Latin 2 }
{ }
const
ISO8859_2Map : AnsiCharISOMap = (
#$00A0, #$0104, #$02D8, #$0141, #$00A4, #$013D, #$015A, #$00A7,
#$00A8, #$0160, #$015E, #$0164, #$0179, #$00AD, #$017D, #$017B,
#$00B0, #$0105, #$02DB, #$0142, #$00B4, #$013E, #$015B, #$02C7,
#$00B8, #$0161, #$015F, #$0165, #$017A, #$02DD, #$017E, #$017C,
#$0154, #$00C1, #$00C2, #$0102, #$00C4, #$0139, #$0106, #$00C7,
#$010C, #$00C9, #$0118, #$00CB, #$011A, #$00CD, #$00CE, #$010E,
#$0110, #$0143, #$0147, #$00D3, #$00D4, #$0150, #$00D6, #$00D7,
#$0158, #$016E, #$00DA, #$0170, #$00DC, #$00DD, #$0162, #$00DF,
#$0155, #$00E1, #$00E2, #$0103, #$00E4, #$013A, #$0107, #$00E7,
#$010D, #$00E9, #$0119, #$00EB, #$011B, #$00ED, #$00EE, #$010F,
#$0111, #$0144, #$0148, #$00F3, #$00F4, #$0151, #$00F6, #$00F7,
#$0159, #$016F, #$00FA, #$0171, #$00FC, #$00FD, #$0163, #$02D9);
function TISO8859_2Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $A0 then
Result := WideChar(P)
else
Result := ISO8859_2Map[P];
end;
function TISO8859_2Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromISOMap(Ch, ISO8859_2Map, 'ISO-8859-2');
end;
{ }
{ ISO-8859-3 - Latin 3 }
{ }
const
ISO8859_3Map : AnsiCharISOMap = (
#$00A0, #$0126, #$02D8, #$00A3, #$00A4, #$FFFF, #$0124, #$00A7,
#$00A8, #$0130, #$015E, #$011E, #$0134, #$00AD, #$FFFF, #$017B,
#$00B0, #$0127, #$00B2, #$00B3, #$00B4, #$00B5, #$0125, #$00B7,
#$00B8, #$0131, #$015F, #$011F, #$0135, #$00BD, #$FFFF, #$017C,
#$00C0, #$00C1, #$00C2, #$FFFF, #$00C4, #$010A, #$0108, #$00C7,
#$00C8, #$00C9, #$00CA, #$00CB, #$00CC, #$00CD, #$00CE, #$00CF,
#$FFFF, #$00D1, #$00D2, #$00D3, #$00D4, #$0120, #$00D6, #$00D7,
#$011C, #$00D9, #$00DA, #$00DB, #$00DC, #$016C, #$015C, #$00DF,
#$00E0, #$00E1, #$00E2, #$FFFF, #$00E4, #$010B, #$0109, #$00E7,
#$00E8, #$00E9, #$00EA, #$00EB, #$00EC, #$00ED, #$00EE, #$00EF,
#$FFFF, #$00F1, #$00F2, #$00F3, #$00F4, #$0121, #$00F6, #$00F7,
#$011D, #$00F9, #$00FA, #$00FB, #$00FC, #$016D, #$015D, #$02D9);
function TISO8859_3Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $A0 then
Result := WideChar(P)
else
Result := ISO8859_3Map[P];
end;
function TISO8859_3Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromISOMap(Ch, ISO8859_3Map, 'ISO-8859-3');
end;
{ }
{ ISO-8859-4 - Latin 4 }
{ }
const
ISO8859_4Map : AnsiCharISOMap = (
#$00A0, #$0104, #$0138, #$0156, #$00A4, #$0128, #$013B, #$00A7,
#$00A8, #$0160, #$0112, #$0122, #$0166, #$00AD, #$017D, #$00AF,
#$00B0, #$0105, #$02DB, #$0157, #$00B4, #$0129, #$013C, #$02C7,
#$00B8, #$0161, #$0113, #$0123, #$0167, #$014A, #$017E, #$014B,
#$0100, #$00C1, #$00C2, #$00C3, #$00C4, #$00C5, #$00C6, #$012E,
#$010C, #$00C9, #$0118, #$00CB, #$0116, #$00CD, #$00CE, #$012A,
#$0110, #$0145, #$014C, #$0136, #$00D4, #$00D5, #$00D6, #$00D7,
#$00D8, #$0172, #$00DA, #$00DB, #$00DC, #$0168, #$016A, #$00DF,
#$0101, #$00E1, #$00E2, #$00E3, #$00E4, #$00E5, #$00E6, #$012F,
#$010D, #$00E9, #$0119, #$00EB, #$0117, #$00ED, #$00EE, #$012B,
#$0111, #$0146, #$014D, #$0137, #$00F4, #$00F5, #$00F6, #$00F7,
#$00F8, #$0173, #$00FA, #$00FB, #$00FC, #$0169, #$016B, #$02D9);
function TISO8859_4Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $A0 then
Result := WideChar(P)
else
Result := ISO8859_4Map[P];
end;
function TISO8859_4Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromISOMap(Ch, ISO8859_4Map, 'ISO-8859-4');
end;
{ }
{ ISO-8859-5 - Cyrillic }
{ }
function TISO8859_5Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$00..$A0, $AD : Result := WideChar(P);
$F0 : Result := #$2116;
$FD : Result := #$00A7;
else
Result := WideChar(Ord(P) + $0360);
end;
end;
function TISO8859_5Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
if Ord(Ch) <= $A0 then
Result := AnsiChar(Ch)
else
Case Ch of
#$2116 : Result := #$F0;
#$00A7 : Result := #$FD;
#$00AD : Result := #$AD;
#$0401..#$045F :
Case Ch of
#$0450, #$045D, #$040D :
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-5']);
else
Result := AnsiChar(Ord(Ch) - $0360);
end;
else
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-5']);
end;
end;
{ }
{ ISO-8859-6 - Arabic }
{ }
function TISO8859_6Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$00..$A0, $A4, $AD : Result := WideChar(P);
$AC, $BB, $BF, $C1..$DA, $E0..$F2 : Result := WideChar(Ord(P) + $0580);
else
Result := #$FFFF;
end;
end;
function TISO8859_6Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
if Ord(Ch) <= $A0 then
Result := AnsiChar(Ch)
else
Case Ch of
#$00A4 : Result := #$A4;
#$00AD : Result := #$AD;
#$062C, #$063B, #$063F, #$0641..#$065A, #$0660..#$0672 :
Result := AnsiChar(Ord(Ch) - $0580);
else
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-6']);
end;
end;
{ }
{ ISO-8859-7 - Modern Greek }
{ }
function TISO8859_7Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$00..$A0, $A6..$A9, $AB..$AD, $B0..$B3, $B7, $BB, $BD :
Result := WideChar(P);
$A1 : Result := #$2018;
$A2 : Result := #$2019;
$AF : Result := #$2015;
$D2, $FF : Result := #$FFFF;
else
Result := WideChar(Ord(P) + $02D0);
end;
end;
function TISO8859_7Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
if Ord(Ch) <= $A0 then
Result := AnsiChar(Ch)
else
Case Ch of
#$00A6..#$00A9, #$00AB..#$00AD, #$00B0..#$00B3, #$00B7, #$00BB, #$00BD :
Result := AnsiChar(Ch);
#$0373..#$03CE : Result := AnsiChar(Ord(Ch) - $02D0);
#$2018 : Result := #$A1;
#$2019 : Result := #$A2;
#$2015 : Result := #$AF;
else
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-7']);
end;
end;
{ }
{ ISO-8859-8 - Hebrew }
{ }
function TISO8859_8Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$00..$A0, $A2..$A9, $AB..$AE, $B0..$B9, $BB..$BE :
Result := WideChar(P);
$AA : Result := #$00D7;
$AF : Result := #$203E;
$BA : Result := #$00F7;
$DF : Result := #$2017;
$E0..$FA : Result := WideChar(Ord(P) + $04E0);
else
Result := #$FFFF;
end;
end;
function TISO8859_8Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
if Ord(Ch) <= $A0 then
Result := AnsiChar(Ch)
else
Case Ch of
#$00A2..#$00A9, #$00AB..#$00AE, #$00B0..#$00B9, #$00BB..#$00BE :
Result := AnsiChar(Ord(Ch));
#$00D7 : Result := #$AA;
#$203E : Result := #$AF;
#$00F7 : Result := #$BA;
#$2017 : Result := #$DF;
#$05C0..#$05DA : Result := AnsiChar(Ord(Ch) - $04E0);
else
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-8']);
end;
end;
{ }
{ ISO-8859-9 - Latin 5 }
{ }
function TISO8859_9Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$D0 : Result := #$011E;
$DD : Result := #$0130;
$DE : Result := #$015E;
$F0 : Result := #$011F;
$FD : Result := #$0131;
$FE : Result := #$015F;
else
Result := WideChar(P);
end;
end;
function TISO8859_9Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Case Ch of
#$011E : Result := #$D0;
#$0130 : Result := #$DD;
#$015E : Result := #$DE;
#$011F : Result := #$F0;
#$0131 : Result := #$FD;
#$015F : Result := #$FE;
else
if Ord(Ch) <= $00FF then
Result := AnsiChar(Ch)
else
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-9']);
end;
end;
{ }
{ ISO-8859-10 - Latin 6 }
{ }
const
ISO8859_10Map : AnsiCharISOMap = (
#$00A0, #$0104, #$0112, #$0122, #$012A, #$0128, #$0136, #$00A7,
#$013B, #$0110, #$0160, #$0166, #$017D, #$00AD, #$016A, #$014A,
#$00B0, #$0105, #$0113, #$0123, #$012B, #$0129, #$0137, #$00B7,
#$013C, #$0111, #$0161, #$0167, #$017E, #$2014, #$016B, #$014B,
#$0100, #$00C1, #$00C2, #$00C3, #$00C4, #$00C5, #$00C6, #$012E,
#$010C, #$00C9, #$0118, #$00CB, #$0116, #$00CD, #$00CE, #$00CF,
#$00D0, #$0145, #$014C, #$00D3, #$00D4, #$00D5, #$00D6, #$0168,
#$00D8, #$0172, #$00DA, #$00DB, #$00DC, #$00DD, #$00DE, #$00DF,
#$0101, #$00E1, #$00E2, #$00E3, #$00E4, #$00E5, #$00E6, #$012F,
#$010D, #$00E9, #$0119, #$00EB, #$0117, #$00ED, #$00EE, #$00EF,
#$00F0, #$0146, #$014D, #$00F3, #$00F4, #$00F5, #$00F6, #$0169,
#$00F8, #$0173, #$00FA, #$00FB, #$00FC, #$00FD, #$00FE, #$0138);
function TISO8859_10Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $A0 then
Result := WideChar(P)
else
Result := ISO8859_10Map[P];
end;
function TISO8859_10Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromISOMap(Ch, ISO8859_10Map, 'ISO-8859-10');
end;
{ }
{ ISO-8859-13 - Latin 7 }
{ }
const
ISO8859_13Map : AnsiCharISOMap = (
#$00A0, #$201D, #$00A2, #$00A3, #$00A4, #$201E, #$00A6, #$00A7,
#$00D8, #$00A9, #$0156, #$00AB, #$00AC, #$00AD, #$00AE, #$00C6,
#$00B0, #$00B1, #$00B2, #$00B3, #$201C, #$00B5, #$00B6, #$00B7,
#$00F8, #$00B9, #$0157, #$00BB, #$00BC, #$00BD, #$00BE, #$00E6,
#$0104, #$012E, #$0100, #$0106, #$00C4, #$00C5, #$0118, #$0112,
#$010C, #$00C9, #$0179, #$0116, #$0122, #$0136, #$012A, #$013B,
#$0160, #$0143, #$0145, #$00D3, #$014C, #$00D5, #$00D6, #$00D7,
#$0172, #$0141, #$015A, #$016A, #$00DC, #$017B, #$017D, #$00DF,
#$0105, #$012F, #$0101, #$0107, #$00E4, #$00E5, #$0119, #$0113,
#$010D, #$00E9, #$017A, #$0117, #$0123, #$0137, #$012B, #$013C,
#$0161, #$0144, #$0146, #$00F3, #$014D, #$00F5, #$00F6, #$00F7,
#$0173, #$0142, #$015B, #$016B, #$00FC, #$017B, #$017E, #$2019);
function TISO8859_13Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $A0 then
Result := WideChar(P)
else
Result := ISO8859_13Map[P];
end;
function TISO8859_13Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromISOMap(Ch, ISO8859_13Map, 'ISO-8859-13');
end;
{ }
{ ISO-8859-14 - Latin 8 }
{ }
const
ISO8859_14Map : AnsiCharISOMap = (
#$00A0, #$1E02, #$1E03, #$00A3, #$010A, #$010B, #$1E0A, #$00A7,
#$1E80, #$00A9, #$1E82, #$1E0B, #$1EF2, #$00AD, #$00AE, #$0178,
#$1E1E, #$1E1F, #$0120, #$0121, #$1E40, #$1E41, #$00B6, #$1E56,
#$1E81, #$1E57, #$1E83, #$1E60, #$1EF3, #$1E84, #$1E85, #$1E61,
#$00C0, #$00C1, #$00C2, #$00C3, #$00C4, #$00C5, #$00C6, #$00C7,
#$00C8, #$00C9, #$00CA, #$00CB, #$00CC, #$00CD, #$00CE, #$00CF,
#$0174, #$00D1, #$00D2, #$00D3, #$00D4, #$00D5, #$00D6, #$1E6A,
#$00D8, #$00D9, #$00DA, #$00DB, #$00DC, #$00DD, #$0176, #$00DF,
#$00E0, #$00E1, #$00E2, #$00E3, #$00E4, #$00E5, #$00E6, #$00E7,
#$00E8, #$00E9, #$00EA, #$00EB, #$00EC, #$00ED, #$00EE, #$00EF,
#$0175, #$00F1, #$00F2, #$00F3, #$00F4, #$00F5, #$00F6, #$1E6B,
#$00F8, #$00F9, #$00FA, #$00FB, #$00FC, #$00FD, #$0177, #$00FF);
function TISO8859_14Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $A0 then
Result := WideChar(P)
else
Result := ISO8859_14Map[P];
end;
function TISO8859_14Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromISOMap(Ch, ISO8859_14Map, 'ISO-8859-14');
end;
{ }
{ ISO-8859-15 - Latin 9 }
{ }
function TISO8859_15Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$A4 : Result := #$20AC;
$A6 : Result := #$00A6;
$A8 : Result := #$0161;
$B4 : Result := #$017D;
$B8 : Result := #$017E;
$BC : Result := #$0152;
$BD : Result := #$0153;
$BE : Result := #$0178;
else
Result := WideChar(P);
end;
end;
function TISO8859_15Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Case Ch of
#$20AC : Result := #$A4;
#$00A6 : Result := #$A6;
#$0161 : Result := #$A8;
#$017D : Result := #$B4;
#$017E : Result := #$B8;
#$0152 : Result := #$BC;
#$0153 : Result := #$BD;
#$0178 : Result := #$BE;
else
if Ord(Ch) <= $00FF then
Result := AnsiChar(Ch)
else
raise EConvertError.CreateFmt(SCannotConvert, [Ord(Ch), 'ISO-8859-15']);
end;
end;
{ }
{ CP37 }
{ Map shared by IBM037 and Windows-37. }
{ }
const
CP37Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$00E0, #$00E1, #$00E3, #$00E5,
#$00E7, #$00F1, #$00A2, #$002E, #$003C, #$0028, #$002B, #$007C,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$0021, #$0024, #$002A, #$0029, #$003B, #$00AC,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$007E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$005E, #$00A3, #$00A5, #$00B7, #$00A9, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$005B, #$005D, #$00AF, #$00A8, #$00B4, #$00D7,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$00F9, #$00FA, #$00FF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
{ }
{ Windows-37 }
{ }
function TWindows37Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := CP37Map[P];
end;
function TWindows37Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, CP37Map, 'Windows-37');
end;
{ }
{ IBM037 }
{ }
function TIBM037Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := CP37Map[P];
end;
function TIBM037Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, CP37Map, 'IBM037');
end;
{ }
{ IBM038 }
{ }
const
IBM038Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$005B, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$005D, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$FFFF, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$007E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$005C, #$FFFF, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$009F);
function TIBM038Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM038Map[P];
end;
function TIBM038Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM038Map, 'IBM038');
end;
{ }
{ IBM256 }
{ }
const
IBM256Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$00E0, #$00E1, #$00E3, #$00E5,
#$00E7, #$00F1, #$005B, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$005D, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$007E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$00A3, #$00A5, #$20A7, #$0192, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$203E, #$00A8, #$00B4, #$2017,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$00F9, #$00FA, #$00FF,
#$005C, #$2003, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM256Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM256Map[P];
end;
function TIBM256Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM256Map, 'IBM256');
end;
{ }
{ IBM273 }
{ }
const
IBM273Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$007B, #$00E0, #$00E1, #$00E3, #$00E5,
#$00E7, #$00F1, #$00C4, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$007E, #$00DC, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$005B, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00F6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$0023, #$00A7, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$00DF, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$00A3, #$00A5, #$00B7, #$00A9, #$0040, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$203E, #$00A8, #$00B4, #$00D7,
#$00E4, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00A6, #$00F2, #$00F3, #$00F5,
#$00FC, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$007D, #$00F9, #$00FA, #$00FF,
#$00D6, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$005C, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$005D, #$00D9, #$00DA, #$009F);
function TIBM273Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM273Map[P];
end;
function TIBM273Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM273Map, 'IBM273');
end;
{ }
{ IBM274 }
{ }
const
IBM274Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$005B, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$005D, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$00F9, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$0060, #$003A, #$0023, #$00E0, #$0027, #$003D, #$0022,
#$FFFF, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$00A8, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$00E9, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$00E8, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$00E7, #$FFFF, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$009F);
function TIBM274Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM274Map[P];
end;
function TIBM274Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM274Map, 'IBM274');
end;
{ }
{ IBM275 }
{ }
const
IBM275Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$00C9, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$0024, #$00C7, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$00E7, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$00E3, #$003A, #$00D5, #$00C3, #$0027, #$003D, #$0022,
#$FFFF, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$007E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$00F5, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$00E9, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$005C, #$FFFF, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$009F);
function TIBM275Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM275Map[P];
end;
function TIBM275Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM275Map, 'IBM275');
end;
{ }
{ IBM277 }
{ }
const
IBM277Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$00E0, #$00E1, #$00E3, #$007D,
#$00E7, #$00F1, #$0023, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$00A4, #$00C5, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$0024,
#$00C7, #$00D1, #$00F8, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00A6, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$00C6, #$00D8, #$0027, #$003D, #$0022,
#$0040, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$007B, #$00B8, #$005B, #$005D,
#$00B5, #$00FC, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$00A3, #$00A5, #$00B7, #$00A9, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$203E, #$00A8, #$00B4, #$00D7,
#$00E6, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$00E5, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$007E, #$00F9, #$00FA, #$00FF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM277Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM277Map[P];
end;
function TIBM277Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM277Map, 'IBM277');
end;
{ }
{ IBM278 }
{ }
const
IBM278Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$007B, #$00E0, #$00E1, #$00E3, #$007D,
#$00E7, #$00F1, #$00A7, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$0060, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$00A4, #$00C5, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$0023, #$00C0, #$00C1, #$00C3, #$0024,
#$00C7, #$00D1, #$00F6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$00E9, #$003A, #$00C4, #$00D6, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$005D,
#$00B5, #$00FC, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$00A3, #$00A5, #$00B7, #$00A9, #$005B, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$203E, #$00A8, #$00B4, #$00D7,
#$00E4, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00A6, #$00F2, #$00F3, #$00F5,
#$00E5, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$007E, #$00F9, #$00FA, #$00FF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$0040, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM278Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM278Map[P];
end;
function TIBM278Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM278Map, 'IBM278');
end;
{ }
{ IBM280 }
{ }
const
IBM280Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$007B, #$00E1, #$00E3, #$00E5,
#$005C, #$00F1, #$00B0, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$005D, #$00EA, #$00EB, #$007D, #$00ED, #$00EE, #$00EF,
#$007E, #$00DF, #$00E9, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00F2, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$00F9, #$003A, #$00A3, #$00A7, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$005B, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$00EC, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$0023, #$00A5, #$00B7, #$00A9, #$0040, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$203E, #$00A8, #$00B4, #$00D7,
#$00E0, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00A6, #$00F3, #$00F5,
#$00E8, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$0060, #$00FA, #$00FF,
#$00E7, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM280Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM280Map[P];
end;
function TIBM280Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM280Map, 'IBM280');
end;
{ }
{ IBM281 }
{ Similar to IBM038. }
{ }
function TIBM281Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Case Ord(P) of
$4A : Result := #$00A3;
$4F : Result := #$007C;
$5A : Result := #$0021;
$5B : Result := #$00A5;
$5F : Result := #$00AC;
$A1 : Result := #$203E;
$E0 : Result := #$0024;
else
Result := IBM038Map[P];
end;
end;
function TIBM281Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Case Ord(Ch) of
$00A3 : Result := #$4A;
$007C : Result := #$4F;
$0021 : Result := #$5A;
$00A5 : Result := #$5B;
$00AC : Result := #$5F;
$203E : Result := #$A1;
$0024 : Result := #$E0;
else
Result := CharFromMap(Ch, IBM038Map, 'IBM281');
end;
end;
{ }
{ IBM284 }
{ }
const
IBM284Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$00E0, #$00E1, #$00E3, #$00E5,
#$00E7, #$00A6, #$005B, #$002E, #$003C, #$0028, #$002B, #$007C,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$005D, #$0024, #$002A, #$0029, #$003B, #$00AC,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$0023, #$00F1, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$00D1, #$0040, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$00A8, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$00A3, #$00A5, #$00B7, #$00A9, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$005E, #$0021, #$203E, #$007E, #$00B4, #$00D7,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$00F9, #$00FA, #$00FF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM284Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM284Map[P];
end;
function TIBM284Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM284Map, 'IBM284');
end;
{ }
{ IBM285 }
{ }
const
IBM285Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$00E0, #$00E1, #$00E3, #$00E5,
#$00E7, #$00F1, #$0024, #$002E, #$003C, #$0028, #$002B, #$007C,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$0021, #$00A3, #$002A, #$0029, #$003B, #$00AC,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$203E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$005B, #$00A5, #$00B7, #$00A9, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$005E, #$005D, #$007E, #$00A8, #$00B4, #$00D7,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$00F9, #$00FA, #$00FF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM285Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM285Map[P];
end;
function TIBM285Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM285Map, 'IBM285');
end;
{ }
{ IBM290 }
{ }
const
IBM290Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$3002, #$300C, #$300D, #$3001, #$30FB, #$30F2, #$30A1,
#$30A3, #$30A5, #$00A3, #$002E, #$003C, #$0028, #$002B, #$007C,
#$0026, #$30A7, #$30A9, #$30E3, #$30E5, #$30E7, #$30C3, #$FFFF,
#$30FC, #$FFFF, #$0021, #$00A5, #$002A, #$0029, #$003B, #$00AC,
#$002D, #$002F, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$FFFF, #$30A2, #$30A4, #$30A6, #$30A8, #$30AA, #$30AB, #$30AD,
#$30AF, #$30B1, #$30B3, #$FFFF, #$30B5, #$30B7, #$30B9, #$30BB,
#$30BD, #$30BF, #$30C1, #$30C4, #$30C6, #$30C8, #$30CA, #$30CB,
#$30CC, #$30CD, #$30CE, #$FFFF, #$FFFF, #$30CF, #$30D2, #$30D5,
#$FFFF, #$203E, #$30D8, #$30DB, #$30DE, #$30DF, #$30E0, #$30E1,
#$30E2, #$30E4, #$30E6, #$FFFF, #$30E8, #$30E9, #$30EA, #$30EB,
#$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$FFFF, #$30EC, #$30ED, #$30EF, #$30F3, #$309B, #$309C,
#$FFFF, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$FFFF, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$0024, #$FFFF, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$009F);
function TIBM290Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM290Map[P];
end;
function TIBM290Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM290Map, 'IBM290');
end;
{ }
{ IBM297 }
{ }
const
IBM297Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$0040, #$00E1, #$00E3, #$00E5,
#$005C, #$00F1, #$00B0, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$007B, #$00EA, #$00EB, #$007D, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$00A7, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00F9, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$00B5, #$003A, #$00A3, #$00E0, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$005B, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$0060, #$00A8, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$0023, #$00A5, #$00B7, #$00A9, #$005D, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$203E, #$007E, #$00B4, #$00D7,
#$00E9, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$00E8, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$00A6, #$00FA, #$00FF,
#$00E7, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
function TIBM297Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM297Map[P];
end;
function TIBM297Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM297Map, 'IBM297');
end;
{ }
{ IBM420 }
{ }
const
IBM420Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$0651, #$FE7D, #$0640, #$FFFF, #$0621, #$0622,
#$FE82, #$0623, #$00A2, #$002E, #$003C, #$0028, #$002B, #$007C,
#$0026, #$FE84, #$0624, #$FFFF, #$FFFF, #$0626, #$0627, #$FE8E,
#$0628, #$FE91, #$0021, #$0024, #$002A, #$0029, #$003B, #$00AC,
#$002D, #$002F, #$0629, #$062A, #$FE97, #$062B, #$FE9B, #$062C,
#$FE9F, #$062D, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FEA3, #$062E, #$FEA7, #$062F, #$0630, #$0631, #$0632, #$0633,
#$FEB3, #$060C, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$0634, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$FEB7, #$0635, #$FEBB, #$0636, #$FEBF, #$0637,
#$0638, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$0639, #$FECA, #$FECB, #$FECC, #$063A, #$FECE,
#$FECF, #$00F7, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$FED0, #$0641, #$FED3, #$0642, #$FED7, #$0643,
#$FEDB, #$0644, #$FEF5, #$FEF6, #$FEF7, #$FEF8, #$FFFF, #$FFFF,
#$FEFB, #$FEFC, #$FEDF, #$0645, #$FEE3, #$0646, #$FEE7, #$0647,
#$061B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$FEEB, #$FFFF, #$FEEC, #$FFFF, #$0648,
#$061F, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$0649, #$FEF0, #$064A, #$FEF2, #$FEF3, #$0660,
#$00D7, #$FFFF, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$0661, #$0662, #$FFFF, #$0663, #$0664, #$0665,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$FFFF, #$0666, #$0667, #$0668, #$0669, #$009F);
function TIBM420Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM420Map[P];
end;
function TIBM420Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM420Map, 'IBM420');
end;
{ }
{ IBM423 }
{ }
const
IBM423Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$0391, #$0392, #$0393, #$0394, #$0395, #$0396, #$0397,
#$0398, #$0399, #$005B, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$039A, #$039B, #$039C, #$039D, #$039E, #$039F, #$03A0,
#$03A1, #$03A3, #$005D, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$03A4, #$03A5, #$03A6, #$03A7, #$03A8, #$03A9,
#$FFFF, #$FFFF, #$FFFF, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FFFF, #$0386, #$0388, #$0389, #$FFFF, #$038A, #$038C, #$038E,
#$038F, #$0060, #$003A, #$00A3, #$00A7, #$0027, #$003D, #$0022,
#$00C4, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$03B1, #$03B2, #$03B3, #$03B4, #$03B5, #$03B6,
#$00D6, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$03B7, #$03B8, #$03B9, #$03BA, #$03BB, #$03BC,
#$00DC, #$00A8, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$03BD, #$03BE, #$03BF, #$03C0, #$03C1, #$03C2,
#$FFFF, #$03AC, #$03AD, #$03AE, #$03CA, #$03AF, #$03CC, #$03CD,
#$03CB, #$03CE, #$03C3, #$03C4, #$03C5, #$03C6, #$03C7, #$03C8,
#$00B8, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$FFFF, #$03C9, #$00C2, #$00E0, #$00E4, #$00EA,
#$00B4, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B1, #$00E9, #$00E8, #$00EB, #$00EE, #$00EF,
#$00B0, #$FFFF, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00BD, #$00F6, #$00F4, #$00FB, #$00F9, #$00FC,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00FF, #$00E7, #$00C7, #$FFFF, #$FFFF, #$009F);
function TIBM423Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM423Map[P];
end;
function TIBM423Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM423Map, 'IBM423');
end;
{ }
{ IBM424 }
{ }
const
IBM424Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$05D0, #$05D1, #$05D2, #$05D3, #$05D4, #$05D5, #$05D6,
#$05D7, #$05D8, #$00A2, #$002E, #$003C, #$0028, #$002B, #$007C,
#$0026, #$05D9, #$05DA, #$05DB, #$05DC, #$05DD, #$05DE, #$05DF,
#$05E0, #$05E1, #$0021, #$0024, #$002A, #$0029, #$003B, #$00AC,
#$002D, #$002F, #$05E2, #$05E3, #$05E4, #$05E5, #$05E6, #$05E7,
#$05E8, #$05E9, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$FFFF, #$05EA, #$FFFF, #$FFFF, #$00A0, #$FFFF, #$FFFF, #$FFFF,
#$21D4, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$FFFF, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$FFFF, #$FFFF, #$FFFF, #$00B8, #$FFFF, #$00A4,
#$00B5, #$007E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$00AE,
#$005E, #$00A3, #$00A5, #$00B7, #$00A9, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$005B, #$005D, #$203E, #$00A8, #$00B4, #$00D7,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$FFFF,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$FFFF, #$FFFF, #$FFFF, #$FFFF, #$009F);
function TIBM424Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := IBM424Map[P];
end;
function TIBM424Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, IBM424Map, 'IBM424');
end;
{ }
{ CP437 }
{ Map shared by IBM437 and Windows-437. }
{ }
const
CP437Map : AnsiCharHighMap = (
#$00C7, #$00FC, #$00E9, #$00E2, #$00E4, #$00E0, #$00E5, #$00E7,
#$00EA, #$00EB, #$00E8, #$00EF, #$00EE, #$00EC, #$00C4, #$00C5,
#$00C9, #$00E6, #$00C6, #$00F4, #$00F6, #$00F2, #$00FB, #$00F9,
#$00FF, #$00D6, #$00DC, #$00A2, #$00A3, #$00A5, #$20A7, #$0192,
#$00E1, #$00ED, #$00F3, #$00FA, #$00F1, #$00D1, #$00AA, #$00BA,
#$00BF, #$2310, #$00AC, #$00BD, #$00BC, #$00A1, #$00AB, #$00BB,
#$2591, #$2592, #$2593, #$2502, #$2524, #$2561, #$2562, #$2556,
#$2555, #$2563, #$2551, #$2557, #$255D, #$255C, #$255B, #$2510,
#$2514, #$2534, #$252C, #$251C, #$2500, #$253C, #$255E, #$255F,
#$255A, #$2554, #$2569, #$2566, #$2560, #$2550, #$256C, #$2567,
#$2568, #$2564, #$2565, #$2559, #$2558, #$2552, #$2553, #$256B,
#$256A, #$2518, #$250C, #$2588, #$2584, #$258C, #$2590, #$2580,
#$03B1, #$00DF, #$0393, #$03C0, #$03A3, #$03C3, #$00B5, #$03C4,
#$03A6, #$0398, #$03A9, #$03B4, #$221E, #$03C6, #$03B5, #$2229,
#$2261, #$00B1, #$2265, #$2264, #$2320, #$2321, #$00F7, #$2248,
#$00B0, #$2219, #$00B7, #$221A, #$207F, #$00B2, #$25A0, #$00A0);
{ }
{ Windows-437 }
{ }
function TWindows437Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := CP437Map[P];
end;
function TWindows437Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, CP437Map, 'Windows-437');
end;
{ }
{ IBM437 }
{ }
function TIBM437Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := CP437Map[P];
end;
function TIBM437Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, CP437Map, 'IBM437');
end;
{ }
{ CP500 }
{ Map shared by IBM500 and Windows-500. }
{ }
const
CP500Map : AnsiCharMap = (
#$0000, #$0001, #$0002, #$0003, #$009C, #$0009, #$0086, #$007F,
#$0097, #$008D, #$008E, #$000B, #$000C, #$000D, #$000E, #$000F,
#$0010, #$0011, #$0012, #$0013, #$009D, #$0085, #$0008, #$0087,
#$0018, #$0019, #$0092, #$008F, #$001C, #$001D, #$001E, #$001F,
#$0080, #$0081, #$0082, #$0083, #$0084, #$000A, #$0017, #$001B,
#$0088, #$0089, #$008A, #$008B, #$008C, #$0005, #$0006, #$0007,
#$0090, #$0091, #$0016, #$0093, #$0094, #$0095, #$0096, #$0004,
#$0098, #$0099, #$009A, #$009B, #$0014, #$0015, #$009E, #$001A,
#$0020, #$00A0, #$00E2, #$00E4, #$00E0, #$00E1, #$00E3, #$00E5,
#$00E7, #$00F1, #$005B, #$002E, #$003C, #$0028, #$002B, #$0021,
#$0026, #$00E9, #$00EA, #$00EB, #$00E8, #$00ED, #$00EE, #$00EF,
#$00EC, #$00DF, #$005D, #$0024, #$002A, #$0029, #$003B, #$005E,
#$002D, #$002F, #$00C2, #$00C4, #$00C0, #$00C1, #$00C3, #$00C5,
#$00C7, #$00D1, #$00A6, #$002C, #$0025, #$005F, #$003E, #$003F,
#$00F8, #$00C9, #$00CA, #$00CB, #$00C8, #$00CD, #$00CE, #$00CF,
#$00CC, #$0060, #$003A, #$0023, #$0040, #$0027, #$003D, #$0022,
#$00D8, #$0061, #$0062, #$0063, #$0064, #$0065, #$0066, #$0067,
#$0068, #$0069, #$00AB, #$00BB, #$00F0, #$00FD, #$00FE, #$00B1,
#$00B0, #$006A, #$006B, #$006C, #$006D, #$006E, #$006F, #$0070,
#$0071, #$0072, #$00AA, #$00BA, #$00E6, #$00B8, #$00C6, #$00A4,
#$00B5, #$007E, #$0073, #$0074, #$0075, #$0076, #$0077, #$0078,
#$0079, #$007A, #$00A1, #$00BF, #$00D0, #$00DD, #$00DE, #$00AE,
#$00A2, #$00A3, #$00A5, #$00B7, #$00A9, #$00A7, #$00B6, #$00BC,
#$00BD, #$00BE, #$00AC, #$007C, #$00AF, #$00A8, #$00B4, #$00D7,
#$007B, #$0041, #$0042, #$0043, #$0044, #$0045, #$0046, #$0047,
#$0048, #$0049, #$00AD, #$00F4, #$00F6, #$00F2, #$00F3, #$00F5,
#$007D, #$004A, #$004B, #$004C, #$004D, #$004E, #$004F, #$0050,
#$0051, #$0052, #$00B9, #$00FB, #$00FC, #$00F9, #$00FA, #$00FF,
#$005C, #$00F7, #$0053, #$0054, #$0055, #$0056, #$0057, #$0058,
#$0059, #$005A, #$00B2, #$00D4, #$00D6, #$00D2, #$00D3, #$00D5,
#$0030, #$0031, #$0032, #$0033, #$0034, #$0035, #$0036, #$0037,
#$0038, #$0039, #$00B3, #$00DB, #$00DC, #$00D9, #$00DA, #$009F);
{ }
{ Windows-500 }
{ }
function TWindows500Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := CP500Map[P];
end;
function TWindows500Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, CP500Map, 'Windows-500');
end;
{ }
{ IBM500 }
{ }
function TIBM500Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
Result := CP500Map[P];
end;
function TIBM500Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromMap(Ch, CP500Map, 'IBM500');
end;
{ }
{ Windows-708 }
{ }
const
Windows708Map : AnsiCharHighMap = (
#$2502, #$2524, #$00E9, #$00E2, #$2561, #$00E0, #$2562, #$00E7,
#$00EA, #$00EB, #$00E8, #$00EF, #$00EE, #$2556, #$2555, #$2563,
#$2551, #$2557, #$255D, #$00F4, #$255C, #$255B, #$00FB, #$00F9,
#$2510, #$2514, #$009A, #$009B, #$009C, #$009D, #$009E, #$009F,
#$F8C1, #$2534, #$252C, #$251C, #$00A4, #$2500, #$253C, #$255E,
#$255F, #$255A, #$2554, #$2569, #$060C, #$2566, #$00AB, #$00BB,
#$2591, #$2592, #$2593, #$2560, #$2550, #$256C, #$2567, #$2568,
#$2564, #$2565, #$2559, #$061B, #$2558, #$2552, #$2553, #$061F,
#$256B, #$0621, #$0622, #$0623, #$0624, #$0625, #$0626, #$0627,
#$0628, #$0629, #$062A, #$062B, #$062C, #$062D, #$062E, #$062F,
#$0630, #$0631, #$0632, #$0633, #$0634, #$0635, #$0636, #$0637,
#$0638, #$0639, #$063A, #$2588, #$2584, #$258C, #$2590, #$2580,
#$0640, #$0641, #$0642, #$0643, #$0644, #$0645, #$0646, #$0647,
#$0648, #$0649, #$064A, #$064B, #$064C, #$064D, #$064E, #$064F,
#$0650, #$0651, #$0652, #$F8C2, #$F8C3, #$F8C4, #$F8C5, #$F8C6,
#$F8C7, #$256A, #$2518, #$250C, #$00B5, #$00A3, #$25A0, #$00A0);
function TWindows708Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := Windows708Map[P];
end;
function TWindows708Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, Windows708Map, 'Windows-708');
end;
{ }
{ Windows-737 }
{ }
const
Windows737Map : AnsiCharHighMap = (
#$0391, #$0392, #$0393, #$0394, #$0395, #$0396, #$0397, #$0398,
#$0399, #$039A, #$039B, #$039C, #$039D, #$039E, #$039F, #$03A0,
#$03A1, #$03A3, #$03A4, #$03A5, #$03A6, #$03A7, #$03A8, #$03A9,
#$03B1, #$03B2, #$03B3, #$03B4, #$03B5, #$03B6, #$03B7, #$03B8,
#$03B9, #$03BA, #$03BB, #$03BC, #$03BD, #$03BE, #$03BF, #$03C0,
#$03C1, #$03C3, #$03C2, #$03C4, #$03C5, #$03C6, #$03C7, #$03C8,
#$2591, #$2592, #$2593, #$2502, #$2524, #$2561, #$2562, #$2556,
#$2555, #$2563, #$2551, #$2557, #$255D, #$255C, #$255B, #$2510,
#$2514, #$2534, #$252C, #$251C, #$2500, #$253C, #$255E, #$255F,
#$255A, #$2554, #$2569, #$2566, #$2560, #$2550, #$256C, #$2567,
#$2568, #$2564, #$2565, #$2559, #$2558, #$2552, #$2553, #$256B,
#$256A, #$2518, #$250C, #$2588, #$2584, #$258C, #$2590, #$2580,
#$03C9, #$03AC, #$03AD, #$03AE, #$03CA, #$03AF, #$03CC, #$03CD,
#$03CB, #$03CE, #$0386, #$0388, #$0389, #$038A, #$038C, #$038E,
#$038F, #$00B1, #$2265, #$2264, #$03AA, #$03AB, #$00F7, #$2248,
#$00B0, #$2219, #$00B7, #$221A, #$207F, #$00B2, #$25A0, #$00A0);
function TWindows737Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := Windows737Map[P];
end;
function TWindows737Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, Windows737Map, 'Windows-737');
end;
{ }
{ Windows-775 }
{ }
const
Windows775Map : AnsiCharHighMap = (
#$0106, #$00FC, #$00E9, #$0101, #$00E4, #$0123, #$00E5, #$0107,
#$0142, #$0113, #$0156, #$0157, #$012B, #$0179, #$00C4, #$00C5,
#$00C9, #$00E6, #$00C6, #$014D, #$00F6, #$0122, #$00A2, #$015A,
#$015B, #$00D6, #$00DC, #$00F8, #$00A3, #$00D8, #$00D7, #$00A4,
#$0100, #$012A, #$00F3, #$017B, #$017C, #$017A, #$201D, #$00A6,
#$00A9, #$00AE, #$00AC, #$00BD, #$00BC, #$0141, #$00AB, #$00BB,
#$2591, #$2592, #$2593, #$2502, #$2524, #$0104, #$010C, #$0118,
#$0116, #$2563, #$2551, #$2557, #$255D, #$012E, #$0160, #$2510,
#$2514, #$2534, #$252C, #$251C, #$2500, #$253C, #$0172, #$016A,
#$255A, #$2554, #$2569, #$2566, #$2560, #$2550, #$256C, #$017D,
#$0105, #$010D, #$0119, #$0117, #$012F, #$0161, #$0173, #$016B,
#$017E, #$2518, #$250C, #$2588, #$2584, #$258C, #$2590, #$2580,
#$00D3, #$00DF, #$014C, #$0143, #$00F5, #$00D5, #$00B5, #$0144,
#$0136, #$0137, #$013B, #$013C, #$0146, #$0112, #$0145, #$2019,
#$00AD, #$00B1, #$201C, #$00BE, #$00B6, #$00A7, #$00F7, #$201E,
#$00B0, #$2219, #$00B7, #$00B9, #$00B3, #$00B2, #$25A0, #$00A0);
function TWindows775Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := Windows775Map[P];
end;
function TWindows775Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, Windows775Map, 'Windows-775');
end;
{ }
{ CP850 }
{ Map shared by IBM850 and Windows-850. }
{ }
const
CP850Map : AnsiCharHighMap = (
#$00C7, #$00FC, #$00E9, #$00E2, #$00E4, #$00E0, #$00E5, #$00E7,
#$00EA, #$00EB, #$00E8, #$00EF, #$00EE, #$00EC, #$00C4, #$00C5,
#$00C9, #$00E6, #$00C6, #$00F4, #$00F6, #$00F2, #$00FB, #$00F9,
#$00FF, #$00D6, #$00DC, #$00F8, #$00A3, #$00D8, #$00D7, #$0192,
#$00E1, #$00ED, #$00F3, #$00FA, #$00F1, #$00D1, #$00AA, #$00BA,
#$00BF, #$00AE, #$00AC, #$00BD, #$00BC, #$00A1, #$00AB, #$00BB,
#$2591, #$2592, #$2593, #$2502, #$2524, #$00C1, #$00C2, #$00C0,
#$00A9, #$2563, #$2551, #$2557, #$255D, #$00A2, #$00A5, #$2510,
#$2514, #$2534, #$252C, #$251C, #$2500, #$253C, #$00E3, #$00C3,
#$255A, #$2554, #$2569, #$2566, #$2560, #$2550, #$256C, #$00A4,
#$00F0, #$00D0, #$00CA, #$00CB, #$00C8, #$0131, #$00CD, #$00CE,
#$00CF, #$2518, #$250C, #$2588, #$2584, #$00A6, #$00CC, #$2580,
#$00D3, #$00DF, #$00D4, #$00D2, #$00F5, #$00D5, #$00B5, #$00FE,
#$00DE, #$00DA, #$00DB, #$00D9, #$00FD, #$00DD, #$00AF, #$00B4,
#$00AD, #$00B1, #$2017, #$00BE, #$00B6, #$00A7, #$00F7, #$00B8,
#$00B0, #$00A8, #$00B7, #$00B9, #$00B3, #$00B2, #$25A0, #$00A0);
{ }
{ Windows-850 }
{ }
function TWindows850Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := CP850Map[P];
end;
function TWindows850Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, CP850Map, 'Windows-850');
end;
{ }
{ IBM850 }
{ }
function TIBM850Codec.DecodeChar(const P: AnsiChar): WideChar;
begin
if Ord(P) < $80 then
Result := WideChar(P)
else
Result := CP850Map[P];
end;
function TIBM850Codec.EncodeChar(const Ch: WideChar): AnsiChar;
begin
Result := CharFromHighMap(Ch, CP850Map, 'IBM850');
end;
{ }
{ IBM851 }
{ }
const
IBM851Map : AnsiCharHighMap = (
#$00C7, #$00FC, #$00E9, #$00E2, #$00E4, #$00E0, #$0386, #$00E7,
#$00EA, #$00EB, #$00E8, #$00EF, #$00EE, #$0388, #$00C4, #$0389,
#$038A, #$FFFF, #$038C, #$00F4, #$00F6, #$038E, #$00FB, #$00F9,
#$038F, #$00D6, #$00DC, #$03AC, #$00A3, #$03AD, #$03AE, #$03AF,
#$03CA, #$0390, #$03CC, #$03CD, #$0391, #$0392, #$0393, #$0394,
#$0395, #$0396, #$0397, #$00BD, #$0398, #$0399, #$00AB, #$00BB,
#$2591, #$2592, #$2593, #$2502, #$2524, #$039A, #$039B, #$039D,
#$039C, #$2563, #$2551, #$2557, #$255D, #$039E, #$039F, #$2510,
<