class EL_WINDOWS_1251_ZCODEC
Codec for WINDOWS_1251 automatically generated from decoder.c in VTD-XML source
note
description: "Codec for WINDOWS_1251 automatically generated from decoder.c in VTD-XML source"
author: "Finnian Reilly"
copyright: "Copyright (c) 2001-2022 Finnian Reilly"
contact: "finnian at eiffel hyphen loop dot com"
license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
date: "2024-08-08 19:01:05 GMT (Thursday 8th August 2024)"
revision: "1"
class
EL_WINDOWS_1251_ZCODEC
inherit
EL_ZCODEC
create
make
feature {NONE} -- Initialization
initialize_latin_sets
do
latin_set_1 := latin_set_from_array (<<
161, -- 'Ў'
143, -- 'Џ'
192, -- 'А'
193, -- 'Б'
194, -- 'В'
195, -- 'Г'
196, -- 'Д'
197, -- 'Е'
198, -- 'Ж'
199, -- 'З'
200, -- 'И'
201, -- 'Й'
202, -- 'К'
203, -- 'Л'
204, -- 'М'
205, -- 'Н'
206, -- 'О'
207, -- 'П'
208, -- 'Р'
209, -- 'С'
210, -- 'Т'
211, -- 'У'
212, -- 'Ф'
213, -- 'Х'
214, -- 'Ц'
215, -- 'Ч'
216, -- 'Ш'
217, -- 'Щ'
218, -- 'Ъ'
219, -- 'Ы'
220, -- 'Ь'
221, -- 'Э'
222, -- 'Ю'
223, -- 'Я'
224, -- 'а'
225, -- 'б'
226, -- 'в'
227, -- 'г'
228, -- 'д'
229, -- 'е'
230, -- 'ж'
231, -- 'з'
232, -- 'и'
233, -- 'й'
234, -- 'к'
235, -- 'л'
236, -- 'м'
237, -- 'н'
238, -- 'о'
239, -- 'п'
240, -- 'р'
241, -- 'с'
242, -- 'т'
243, -- 'у'
244, -- 'ф'
245, -- 'х'
246, -- 'ц'
247, -- 'ч'
248, -- 'ш'
249, -- 'щ'
250, -- 'ъ'
251, -- 'ы'
252, -- 'ь'
253, -- 'э'
254, -- 'ю'
255 -- 'я'
>>)
latin_set_2 := latin_set_from_array (<<
184, -- 'ё'
144, -- 'ђ'
131, -- 'ѓ'
186, -- 'є'
190, -- 'ѕ'
179, -- 'і'
191, -- 'ї'
188, -- 'ј'
154, -- 'љ'
156, -- 'њ'
158, -- 'ћ'
157 -- 'ќ'
>>)
latin_set_3 := latin_set_from_array (<<
168, -- 'Ё'
128, -- 'Ђ'
129, -- 'Ѓ'
170, -- 'Є'
189, -- 'Ѕ'
178, -- 'І'
175, -- 'Ї'
163, -- 'Ј'
138, -- 'Љ'
140, -- 'Њ'
142, -- 'Ћ'
141 -- 'Ќ'
>>)
latin_set_4 := latin_set_from_array (<<
134, -- '†'
135, -- '‡'
149 -- '•'
>>)
latin_set_5 := latin_set_from_array (<<
145, -- '‘'
146, -- '’'
130 -- '‚'
>>)
latin_set_6 := latin_set_from_array (<<
147, -- '“'
148, -- '”'
132 -- '„'
>>)
latin_set_7 := latin_set_from_array (<<
150, -- '–'
151 -- '—'
>>)
latin_set_8 := latin_set_from_array (<<
139, -- '‹'
155 -- '›'
>>)
latin_set_9 := latin_set_from_array (<<
162, -- 'ў'
159 -- 'џ'
>>)
latin_set_10 := latin_set_from_array (<<
165, -- 'Ґ'
180 -- 'ґ'
>>)
end
feature -- Conversion
as_upper (code: NATURAL): NATURAL
local
offset: NATURAL
do
inspect code
when 97..122, 224..255 then
offset := 32
when 131 then
offset := 2
when 144, 154, 156..159, 184, 186, 191 then
offset := 16
when 162, 179, 190 then
offset := 1
when 180 then
offset := 15
when 188 then
offset := 25
else end
Result := code - offset
end
as_lower (code: NATURAL): NATURAL
local
offset: NATURAL
do
inspect code
when 65..90, 192..223 then
offset := 32
when 128, 138, 140..143, 168, 170, 175 then
offset := 16
when 129 then
offset := 2
when 161, 178, 189 then
offset := 1
when 163 then
offset := 25
when 165 then
offset := 15
else end
Result := code + offset
end
to_upper_offset (code: NATURAL): INTEGER
do
inspect code
when 97..122, 224..255 then
Result := 32
when 131 then
Result := 2
when 144, 154, 156..159, 184, 186, 191 then
Result := 16
when 162, 179, 190 then
Result := 1
when 180 then
Result := 15
when 188 then
Result := 25
else end
Result := Result.opposite
end
to_lower_offset (code: NATURAL): INTEGER
do
inspect code
when 65..90, 192..223 then
Result := 32
when 128, 138, 140..143, 168, 170, 175 then
Result := 16
when 129 then
Result := 2
when 161, 178, 189 then
Result := 1
when 163 then
Result := 25
when 165 then
Result := 15
else end
end
unicode_case_change_substitute (code: NATURAL): CHARACTER_32
-- Returns Unicode case change character if c does not have a latin case change
-- or else the Null character
do
inspect code
-- µ -> Μ
when 181 then
Result := 'Μ'
else end
end
latin_character (uc: CHARACTER_32): CHARACTER
-- unicode to latin translation
-- Returns '%U' if translation is the same as ISO-8859-1 or else not in WINDOWS_1251
do
inspect uc
when 'Ў'..'я' then
Result := latin_set_1 [uc.code - 1038]
when 'ё'..'ќ' then
Result := latin_set_2 [uc.code - 1105]
when 'Ё'..'Ќ' then
Result := latin_set_3 [uc.code - 1025]
when '†'..'•' then
Result := latin_set_4 [uc.code - 8224]
when '‘'..'‚' then
Result := latin_set_5 [uc.code - 8216]
when '“'..'„' then
Result := latin_set_6 [uc.code - 8220]
when '–'..'—' then
Result := latin_set_7 [uc.code - 8211]
when '‹'..'›' then
Result := latin_set_8 [uc.code - 8249]
when 'ў'..'џ' then
Result := latin_set_9 [uc.code - 1118]
when 'Ґ'..'ґ' then
Result := latin_set_10 [uc.code - 1168]
when '№' then
Result := '%/185/'
when '€' then
Result := '%/136/'
when '‰' then
Result := '%/137/'
when '™' then
Result := '%/153/'
when '…' then
Result := '%/133/'
else end
end
feature -- Character query
in_latin_1_disjoint_set (c: CHARACTER): BOOLEAN
-- `True' if `c' is either the Substitute character or a member of disjoint set of latin-1
do
inspect c
when Substitute, '%/0128/'..'%/0151/', '%/0153/'..'%/0159/', '%/0161/'..'£', '¥', '¨', 'ª', '¯', '²'..'´', '¸'..'º', '¼'..'ÿ' then
Result := True
else
end
end
is_alpha (code: NATURAL): BOOLEAN
do
inspect code
when 65..90, 97..122, 128..129, 131, 138, 140..144, 154, 156..159, 161..163, 165, 168, 170, 175, 178..181, 184, 186, 188..255 then
Result := True
else
end
end
is_lower (code: NATURAL): BOOLEAN
do
inspect code
when 97..122, 224..255, 131, 144, 154, 156..159, 184, 186, 191, 162, 179, 190, 180, 188 then
Result := True
-- Characters which are only available in a single case
when 181 then
Result := True
else
end
end
is_upper (code: NATURAL): BOOLEAN
do
inspect code
when 65..90, 192..223, 128, 138, 140..143, 168, 170, 175, 129, 161, 178, 189, 163, 165 then
Result := True
else
end
end
feature {NONE} -- Implementation
new_unicode_table: SPECIAL [CHARACTER_32]
-- Unicode value indexed by WINDOWS_1251 character values
do
Result := single_byte_unicode_chars
Result [0x80] := 'Ђ' -- CYRILLIC CAPITAL LETTER DJE
Result [0x81] := 'Ѓ' -- CYRILLIC CAPITAL LETTER GJE
Result [0x82] := '‚' -- SINGLE LOW-9 QUOTATION MARK
Result [0x83] := 'ѓ' -- CYRILLIC SMALL LETTER GJE
Result [0x84] := '„' -- DOUBLE LOW-9 QUOTATION MARK
Result [0x85] := '…' -- HORIZONTAL ELLIPSIS
Result [0x86] := '†' -- DAGGER
Result [0x87] := '‡' -- DOUBLE DAGGER
Result [0x88] := '€' -- EURO SIGN
Result [0x89] := '‰' -- PER MILLE SIGN
Result [0x8A] := 'Љ' -- CYRILLIC CAPITAL LETTER LJE
Result [0x8B] := '‹' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
Result [0x8C] := 'Њ' -- CYRILLIC CAPITAL LETTER NJE
Result [0x8D] := 'Ќ' -- CYRILLIC CAPITAL LETTER KJE
Result [0x8E] := 'Ћ' -- CYRILLIC CAPITAL LETTER TSHE
Result [0x8F] := 'Џ' -- CYRILLIC CAPITAL LETTER DZHE
Result [0x90] := 'ђ' -- CYRILLIC SMALL LETTER DJE
Result [0x91] := '‘' -- LEFT SINGLE QUOTATION MARK
Result [0x92] := '’' -- RIGHT SINGLE QUOTATION MARK
Result [0x93] := '“' -- LEFT DOUBLE QUOTATION MARK
Result [0x94] := '”' -- RIGHT DOUBLE QUOTATION MARK
Result [0x95] := '•' -- BULLET
Result [0x96] := '–' -- EN DASH
Result [0x97] := '—' -- EM DASH
Result [0x99] := '™' -- TRADE MARK SIGN
Result [0x9A] := 'љ' -- CYRILLIC SMALL LETTER LJE
Result [0x9B] := '›' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
Result [0x9C] := 'њ' -- CYRILLIC SMALL LETTER NJE
Result [0x9D] := 'ќ' -- CYRILLIC SMALL LETTER KJE
Result [0x9E] := 'ћ' -- CYRILLIC SMALL LETTER TSHE
Result [0x9F] := 'џ' -- CYRILLIC SMALL LETTER DZHE
Result [0xA0] := ' ' -- NO-BREAK SPACE
Result [0xA1] := 'Ў' -- CYRILLIC CAPITAL LETTER SHORT U
Result [0xA2] := 'ў' -- CYRILLIC SMALL LETTER SHORT U
Result [0xA3] := 'Ј' -- CYRILLIC CAPITAL LETTER JE
Result [0xA4] := '¤' -- CURRENCY SIGN
Result [0xA5] := 'Ґ' -- CYRILLIC CAPITAL LETTER GHE WITH UPTURN
Result [0xA6] := '¦' -- BROKEN BAR
Result [0xA7] := '§' -- SECTION SIGN
Result [0xA8] := 'Ё' -- CYRILLIC CAPITAL LETTER IO
Result [0xA9] := '©' -- COPYRIGHT SIGN
Result [0xAA] := 'Є' -- CYRILLIC CAPITAL LETTER UKRAINIAN IE
Result [0xAB] := '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
Result [0xAC] := '¬' -- NOT SIGN
Result [0xAD] := '' -- SOFT HYPHEN
Result [0xAE] := '®' -- REGISTERED SIGN
Result [0xAF] := 'Ї' -- CYRILLIC CAPITAL LETTER YI
Result [0xB0] := '°' -- DEGREE SIGN
Result [0xB1] := '±' -- PLUS-MINUS SIGN
Result [0xB2] := 'І' -- CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
Result [0xB3] := 'і' -- CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
Result [0xB4] := 'ґ' -- CYRILLIC SMALL LETTER GHE WITH UPTURN
Result [0xB5] := 'µ' -- MICRO SIGN
Result [0xB6] := '¶' -- PILCROW SIGN
Result [0xB7] := '·' -- MIDDLE DOT
Result [0xB8] := 'ё' -- CYRILLIC SMALL LETTER IO
Result [0xB9] := '№' -- NUMERO SIGN
Result [0xBA] := 'є' -- CYRILLIC SMALL LETTER UKRAINIAN IE
Result [0xBB] := '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
Result [0xBC] := 'ј' -- CYRILLIC SMALL LETTER JE
Result [0xBD] := 'Ѕ' -- CYRILLIC CAPITAL LETTER DZE
Result [0xBE] := 'ѕ' -- CYRILLIC SMALL LETTER DZE
Result [0xBF] := 'ї' -- CYRILLIC SMALL LETTER YI
Result [0xC0] := 'А' -- CYRILLIC CAPITAL LETTER A
Result [0xC1] := 'Б' -- CYRILLIC CAPITAL LETTER BE
Result [0xC2] := 'В' -- CYRILLIC CAPITAL LETTER VE
Result [0xC3] := 'Г' -- CYRILLIC CAPITAL LETTER GHE
Result [0xC4] := 'Д' -- CYRILLIC CAPITAL LETTER DE
Result [0xC5] := 'Е' -- CYRILLIC CAPITAL LETTER IE
Result [0xC6] := 'Ж' -- CYRILLIC CAPITAL LETTER ZHE
Result [0xC7] := 'З' -- CYRILLIC CAPITAL LETTER ZE
Result [0xC8] := 'И' -- CYRILLIC CAPITAL LETTER I
Result [0xC9] := 'Й' -- CYRILLIC CAPITAL LETTER SHORT I
Result [0xCA] := 'К' -- CYRILLIC CAPITAL LETTER KA
Result [0xCB] := 'Л' -- CYRILLIC CAPITAL LETTER EL
Result [0xCC] := 'М' -- CYRILLIC CAPITAL LETTER EM
Result [0xCD] := 'Н' -- CYRILLIC CAPITAL LETTER EN
Result [0xCE] := 'О' -- CYRILLIC CAPITAL LETTER O
Result [0xCF] := 'П' -- CYRILLIC CAPITAL LETTER PE
Result [0xD0] := 'Р' -- CYRILLIC CAPITAL LETTER ER
Result [0xD1] := 'С' -- CYRILLIC CAPITAL LETTER ES
Result [0xD2] := 'Т' -- CYRILLIC CAPITAL LETTER TE
Result [0xD3] := 'У' -- CYRILLIC CAPITAL LETTER U
Result [0xD4] := 'Ф' -- CYRILLIC CAPITAL LETTER EF
Result [0xD5] := 'Х' -- CYRILLIC CAPITAL LETTER HA
Result [0xD6] := 'Ц' -- CYRILLIC CAPITAL LETTER TSE
Result [0xD7] := 'Ч' -- CYRILLIC CAPITAL LETTER CHE
Result [0xD8] := 'Ш' -- CYRILLIC CAPITAL LETTER SHA
Result [0xD9] := 'Щ' -- CYRILLIC CAPITAL LETTER SHCHA
Result [0xDA] := 'Ъ' -- CYRILLIC CAPITAL LETTER HARD SIGN
Result [0xDB] := 'Ы' -- CYRILLIC CAPITAL LETTER YERU
Result [0xDC] := 'Ь' -- CYRILLIC CAPITAL LETTER SOFT SIGN
Result [0xDD] := 'Э' -- CYRILLIC CAPITAL LETTER E
Result [0xDE] := 'Ю' -- CYRILLIC CAPITAL LETTER YU
Result [0xDF] := 'Я' -- CYRILLIC CAPITAL LETTER YA
Result [0xE0] := 'а' -- CYRILLIC SMALL LETTER A
Result [0xE1] := 'б' -- CYRILLIC SMALL LETTER BE
Result [0xE2] := 'в' -- CYRILLIC SMALL LETTER VE
Result [0xE3] := 'г' -- CYRILLIC SMALL LETTER GHE
Result [0xE4] := 'д' -- CYRILLIC SMALL LETTER DE
Result [0xE5] := 'е' -- CYRILLIC SMALL LETTER IE
Result [0xE6] := 'ж' -- CYRILLIC SMALL LETTER ZHE
Result [0xE7] := 'з' -- CYRILLIC SMALL LETTER ZE
Result [0xE8] := 'и' -- CYRILLIC SMALL LETTER I
Result [0xE9] := 'й' -- CYRILLIC SMALL LETTER SHORT I
Result [0xEA] := 'к' -- CYRILLIC SMALL LETTER KA
Result [0xEB] := 'л' -- CYRILLIC SMALL LETTER EL
Result [0xEC] := 'м' -- CYRILLIC SMALL LETTER EM
Result [0xED] := 'н' -- CYRILLIC SMALL LETTER EN
Result [0xEE] := 'о' -- CYRILLIC SMALL LETTER O
Result [0xEF] := 'п' -- CYRILLIC SMALL LETTER PE
Result [0xF0] := 'р' -- CYRILLIC SMALL LETTER ER
Result [0xF1] := 'с' -- CYRILLIC SMALL LETTER ES
Result [0xF2] := 'т' -- CYRILLIC SMALL LETTER TE
Result [0xF3] := 'у' -- CYRILLIC SMALL LETTER U
Result [0xF4] := 'ф' -- CYRILLIC SMALL LETTER EF
Result [0xF5] := 'х' -- CYRILLIC SMALL LETTER HA
Result [0xF6] := 'ц' -- CYRILLIC SMALL LETTER TSE
Result [0xF7] := 'ч' -- CYRILLIC SMALL LETTER CHE
Result [0xF8] := 'ш' -- CYRILLIC SMALL LETTER SHA
Result [0xF9] := 'щ' -- CYRILLIC SMALL LETTER SHCHA
Result [0xFA] := 'ъ' -- CYRILLIC SMALL LETTER HARD SIGN
Result [0xFB] := 'ы' -- CYRILLIC SMALL LETTER YERU
Result [0xFC] := 'ь' -- CYRILLIC SMALL LETTER SOFT SIGN
Result [0xFD] := 'э' -- CYRILLIC SMALL LETTER E
Result [0xFE] := 'ю' -- CYRILLIC SMALL LETTER YU
Result [0xFF] := 'я' -- CYRILLIC SMALL LETTER YA
end
feature {NONE} -- Internal attributes
latin_set_1: SPECIAL [CHARACTER]
latin_set_2: SPECIAL [CHARACTER]
latin_set_3: SPECIAL [CHARACTER]
latin_set_4: SPECIAL [CHARACTER]
latin_set_5: SPECIAL [CHARACTER]
latin_set_6: SPECIAL [CHARACTER]
latin_set_7: SPECIAL [CHARACTER]
latin_set_8: SPECIAL [CHARACTER]
latin_set_9: SPECIAL [CHARACTER]
latin_set_10: SPECIAL [CHARACTER]
end