class EL_ISO_8859_6_ZCODEC
Codec for ISO_8859_6 automatically generated from decoder.c in VTD-XML source
note
description: "Codec for ISO_8859_6 automatically generated from decoder.c in VTD-XML source"
author: "Finnian Reilly"
copyright: "Copyright (c) 2001-2022 Finnian Reilly"
contact: "finnian at eiffel hyphen loop dot com"
license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
date: "2024-08-08 19:01:05 GMT (Thursday 8th August 2024)"
revision: "1"
class
EL_ISO_8859_6_ZCODEC
inherit
EL_ZCODEC
create
make
feature {NONE} -- Initialization
initialize_latin_sets
do
latin_set_1 := latin_set_from_array (<<
193, -- 'ء'
194, -- 'آ'
195, -- 'أ'
196, -- 'ؤ'
197, -- 'إ'
198, -- 'ئ'
199, -- 'ا'
200, -- 'ب'
201, -- 'ة'
202, -- 'ت'
203, -- 'ث'
204, -- 'ج'
205, -- 'ح'
206, -- 'خ'
207, -- 'د'
208, -- 'ذ'
209, -- 'ر'
210, -- 'ز'
211, -- 'س'
212, -- 'ش'
213, -- 'ص'
214, -- 'ض'
215, -- 'ط'
216, -- 'ظ'
217, -- 'ع'
218 -- 'غ'
>>)
latin_set_2 := latin_set_from_array (<<
224, -- 'ـ'
225, -- 'ف'
226, -- 'ق'
227, -- 'ك'
228, -- 'ل'
229, -- 'م'
230, -- 'ن'
231, -- 'ه'
232, -- 'و'
233, -- 'ى'
234, -- 'ي'
235, -- 'ً'
236, -- 'ٌ'
237, -- 'ٍ'
238, -- 'َ'
239, -- 'ُ'
240, -- 'ِ'
241, -- 'ّ'
242 -- 'ْ'
>>)
end
feature -- Conversion
as_upper (code: NATURAL): NATURAL
local
offset: NATURAL
do
inspect code
when 97..122, 251..254 then
offset := 32
else end
Result := code - offset
end
as_lower (code: NATURAL): NATURAL
local
offset: NATURAL
do
inspect code
when 65..90, 219..222 then
offset := 32
else end
Result := code + offset
end
to_upper_offset (code: NATURAL): INTEGER
do
inspect code
when 97..122, 251..254 then
Result := 32
else end
Result := Result.opposite
end
to_lower_offset (code: NATURAL): INTEGER
do
inspect code
when 65..90, 219..222 then
Result := 32
else end
end
unicode_case_change_substitute (code: NATURAL): CHARACTER_32
-- Returns Unicode case change character if c does not have a latin case change
-- or else the Null character
do
inspect code
-- µ -> Μ
when 181 then
Result := 'Μ'
-- À -> à
when 192 then
Result := 'à'
-- ó -> Ó
when 243 then
Result := 'Ó'
-- ô -> Ô
when 244 then
Result := 'Ô'
-- õ -> Õ
when 245 then
Result := 'Õ'
-- ö -> Ö
when 246 then
Result := 'Ö'
-- ø -> Ø
when 248 then
Result := 'Ø'
-- ù -> Ù
when 249 then
Result := 'Ù'
-- ú -> Ú
when 250 then
Result := 'Ú'
-- ÿ -> Ÿ
when 255 then
Result := 'Ÿ'
else end
end
latin_character (uc: CHARACTER_32): CHARACTER
-- unicode to latin translation
-- Returns '%U' if translation is the same as ISO-8859-1 or else not in ISO_8859_6
do
inspect uc
when 'ء'..'غ' then
Result := latin_set_1 [uc.code - 1569]
when 'ـ'..'ْ' then
Result := latin_set_2 [uc.code - 1600]
when '؟' then
Result := '%/191/'
when '،' then
Result := '%/172/'
when '؛' then
Result := '%/187/'
else end
end
feature -- Character query
in_latin_1_disjoint_set (c: CHARACTER): BOOLEAN
-- `True' if `c' is either the Substitute character or a member of disjoint set of latin-1
do
inspect c
when Substitute, '¬', '»', '¿', 'Á'..'Ú', 'à'..'ò' then
Result := True
else
end
end
is_alpha (code: NATURAL): BOOLEAN
do
inspect code
when 65..90, 97..122, 181, 192, 219..223, 243..246, 248..255 then
Result := True
else
end
end
is_lower (code: NATURAL): BOOLEAN
do
inspect code
when 97..122, 251..254 then
Result := True
-- Characters which are only available in a single case
when 181, 192, 223, 243, 244, 245, 246, 248, 249, 250, 255 then
Result := True
else
end
end
is_upper (code: NATURAL): BOOLEAN
do
inspect code
when 65..90, 219..222 then
Result := True
else
end
end
feature {NONE} -- Implementation
new_unicode_table: SPECIAL [CHARACTER_32]
-- Unicode value indexed by ISO_8859_6 character values
do
Result := single_byte_unicode_chars
Result [0xA0] := ' ' --
Result [0xA4] := '¤' --
Result [0xAC] := '،' --
Result [0xAD] := '' --
Result [0xBB] := '؛' --
Result [0xBF] := '؟' --
Result [0xC1] := 'ء' --
Result [0xC2] := 'آ' --
Result [0xC3] := 'أ' --
Result [0xC4] := 'ؤ' --
Result [0xC5] := 'إ' --
Result [0xC6] := 'ئ' --
Result [0xC7] := 'ا' --
Result [0xC8] := 'ب' --
Result [0xC9] := 'ة' --
Result [0xCA] := 'ت' --
Result [0xCB] := 'ث' --
Result [0xCC] := 'ج' --
Result [0xCD] := 'ح' --
Result [0xCE] := 'خ' --
Result [0xCF] := 'د' --
Result [0xD0] := 'ذ' --
Result [0xD1] := 'ر' --
Result [0xD2] := 'ز' --
Result [0xD3] := 'س' --
Result [0xD4] := 'ش' --
Result [0xD5] := 'ص' --
Result [0xD6] := 'ض' --
Result [0xD7] := 'ط' --
Result [0xD8] := 'ظ' --
Result [0xD9] := 'ع' --
Result [0xDA] := 'غ' --
Result [0xE0] := 'ـ' --
Result [0xE1] := 'ف' --
Result [0xE2] := 'ق' --
Result [0xE3] := 'ك' --
Result [0xE4] := 'ل' --
Result [0xE5] := 'م' --
Result [0xE6] := 'ن' --
Result [0xE7] := 'ه' --
Result [0xE8] := 'و' --
Result [0xE9] := 'ى' --
Result [0xEA] := 'ي' --
Result [0xEB] := 'ً' --
Result [0xEC] := 'ٌ' --
Result [0xED] := 'ٍ' --
Result [0xEE] := 'َ' --
Result [0xEF] := 'ُ' --
Result [0xF0] := 'ِ' --
Result [0xF1] := 'ّ' --
Result [0xF2] := 'ْ' --
end
feature {NONE} -- Internal attributes
latin_set_1: SPECIAL [CHARACTER]
latin_set_2: SPECIAL [CHARACTER]
end