class EL_ISO_8859_8_ZCODEC
Codec for ISO_8859_8 automatically generated from decoder.c in VTD-XML source
note
description: "Codec for ISO_8859_8 automatically generated from decoder.c in VTD-XML source"
author: "Finnian Reilly"
copyright: "Copyright (c) 2001-2022 Finnian Reilly"
contact: "finnian at eiffel hyphen loop dot com"
license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
date: "2024-08-08 19:01:05 GMT (Thursday 8th August 2024)"
revision: "1"
class
EL_ISO_8859_8_ZCODEC
inherit
EL_ZCODEC
create
make
feature {NONE} -- Initialization
initialize_latin_sets
do
latin_set_1 := latin_set_from_array (<<
224, -- 'א'
225, -- 'ב'
226, -- 'ג'
227, -- 'ד'
228, -- 'ה'
229, -- 'ו'
230, -- 'ז'
231, -- 'ח'
232, -- 'ט'
233, -- 'י'
234, -- 'ך'
235, -- 'כ'
236, -- 'ל'
237, -- 'ם'
238, -- 'מ'
239, -- 'ן'
240, -- 'נ'
241, -- 'ס'
242, -- 'ע'
243, -- 'ף'
244, -- 'פ'
245, -- 'ץ'
246, -- 'צ'
247, -- 'ק'
248, -- 'ר'
249, -- 'ש'
250 -- 'ת'
>>)
end
feature -- Conversion
as_upper (code: NATURAL): NATURAL
local
offset: NATURAL
do
inspect code
when 97..122, 251..254 then
offset := 32
else end
Result := code - offset
end
as_lower (code: NATURAL): NATURAL
local
offset: NATURAL
do
inspect code
when 65..90, 219..222 then
offset := 32
else end
Result := code + offset
end
to_upper_offset (code: NATURAL): INTEGER
do
inspect code
when 97..122, 251..254 then
Result := 32
else end
Result := Result.opposite
end
to_lower_offset (code: NATURAL): INTEGER
do
inspect code
when 65..90, 219..222 then
Result := 32
else end
end
unicode_case_change_substitute (code: NATURAL): CHARACTER_32
-- Returns Unicode case change character if c does not have a latin case change
-- or else the Null character
do
inspect code
-- µ -> Μ
when 181 then
Result := 'Μ'
-- À -> à
when 192 then
Result := 'à'
-- Á -> á
when 193 then
Result := 'á'
-- Â -> â
when 194 then
Result := 'â'
-- Ã -> ã
when 195 then
Result := 'ã'
-- Ä -> ä
when 196 then
Result := 'ä'
-- Å -> å
when 197 then
Result := 'å'
-- Æ -> æ
when 198 then
Result := 'æ'
-- Ç -> ç
when 199 then
Result := 'ç'
-- È -> è
when 200 then
Result := 'è'
-- É -> é
when 201 then
Result := 'é'
-- Ê -> ê
when 202 then
Result := 'ê'
-- Ë -> ë
when 203 then
Result := 'ë'
-- Ì -> ì
when 204 then
Result := 'ì'
-- Í -> í
when 205 then
Result := 'í'
-- Î -> î
when 206 then
Result := 'î'
-- Ï -> ï
when 207 then
Result := 'ï'
-- Ð -> ð
when 208 then
Result := 'ð'
-- Ñ -> ñ
when 209 then
Result := 'ñ'
-- Ò -> ò
when 210 then
Result := 'ò'
-- Ó -> ó
when 211 then
Result := 'ó'
-- Ô -> ô
when 212 then
Result := 'ô'
-- Õ -> õ
when 213 then
Result := 'õ'
-- Ö -> ö
when 214 then
Result := 'ö'
-- Ø -> ø
when 216 then
Result := 'ø'
-- Ù -> ù
when 217 then
Result := 'ù'
-- Ú -> ú
when 218 then
Result := 'ú'
-- ÿ -> Ÿ
when 255 then
Result := 'Ÿ'
else end
end
latin_character (uc: CHARACTER_32): CHARACTER
-- unicode to latin translation
-- Returns '%U' if translation is the same as ISO-8859-1 or else not in ISO_8859_8
do
inspect uc
when 'א'..'ת' then
Result := latin_set_1 [uc.code - 1488]
when '‗' then
Result := '%/223/'
when '‾' then
Result := '%/175/'
when '÷' then
Result := '%/186/'
when '×' then
Result := '%/170/'
else end
end
feature -- Character query
in_latin_1_disjoint_set (c: CHARACTER): BOOLEAN
-- `True' if `c' is either the Substitute character or a member of disjoint set of latin-1
do
inspect c
when Substitute, 'ª', '¯', 'º', 'ß'..'ú' then
Result := True
else
end
end
is_alpha (code: NATURAL): BOOLEAN
do
inspect code
when 65..90, 97..122, 181, 192..214, 216..222, 251..255 then
Result := True
else
end
end
is_lower (code: NATURAL): BOOLEAN
do
inspect code
when 97..122, 251..254 then
Result := True
-- Characters which are only available in a single case
when 181, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 255 then
Result := True
else
end
end
is_upper (code: NATURAL): BOOLEAN
do
inspect code
when 65..90, 219..222 then
Result := True
else
end
end
feature {NONE} -- Implementation
new_unicode_table: SPECIAL [CHARACTER_32]
-- Unicode value indexed by ISO_8859_8 character values
do
Result := single_byte_unicode_chars
Result [0xAA] := '×' --
Result [0xAB] := '«' --
Result [0xAC] := '¬' --
Result [0xAD] := '' --
Result [0xAE] := '®' --
Result [0xAF] := '‾' --
Result [0xB0] := '°' --
Result [0xB1] := '±' --
Result [0xB2] := '²' --
Result [0xB3] := '³' --
Result [0xB4] := '´' --
Result [0xB5] := 'µ' --
Result [0xB6] := '¶' --
Result [0xB7] := '·' --
Result [0xB8] := '¸' --
Result [0xB9] := '¹' --
Result [0xBA] := '÷' --
Result [0xBB] := '»' --
Result [0xBC] := '¼' --
Result [0xBD] := '½' --
Result [0xBE] := '¾' --
Result [0xDF] := '‗' --
Result [0xE0] := 'א' --
Result [0xE1] := 'ב' --
Result [0xE2] := 'ג' --
Result [0xE3] := 'ד' --
Result [0xE4] := 'ה' --
Result [0xE5] := 'ו' --
Result [0xE6] := 'ז' --
Result [0xE7] := 'ח' --
Result [0xE8] := 'ט' --
Result [0xE9] := 'י' --
Result [0xEA] := 'ך' --
Result [0xEB] := 'כ' --
Result [0xEC] := 'ל' --
Result [0xED] := 'ם' --
Result [0xEE] := 'מ' --
Result [0xEF] := 'ן' --
Result [0xF0] := 'נ' --
Result [0xF1] := 'ס' --
Result [0xF2] := 'ע' --
Result [0xF3] := 'ף' --
Result [0xF4] := 'פ' --
Result [0xF5] := 'ץ' --
Result [0xF6] := 'צ' --
Result [0xF7] := 'ק' --
Result [0xF8] := 'ר' --
Result [0xF9] := 'ש' --
Result [0xFA] := 'ת' --
end
feature {NONE} -- Internal attributes
latin_set_1: SPECIAL [CHARACTER]
end