class EL_ISO_8859_15_ZCODEC

(source code)

description

Codec for ISO_8859_15 automatically generated from decoder.c in VTD-XML source

note
	description: "Codec for ISO_8859_15 automatically generated from decoder.c in VTD-XML source"

	author: "Finnian Reilly"
	copyright: "Copyright (c) 2001-2022 Finnian Reilly"
	contact: "finnian at eiffel hyphen loop dot com"

	license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
	date: "2024-08-08 19:01:05 GMT (Thursday 8th August 2024)"
	revision: "1"

class
	EL_ISO_8859_15_ZCODEC

inherit
	EL_ZCODEC

create
	make

feature {NONE} -- Initialization

	initialize_latin_sets
		do
			latin_set_1 := latin_set_from_array (<<
				180, -- 'Ž'
				184  -- 'ž'
			>>)
			latin_set_2 := latin_set_from_array (<<
				166, -- 'Š'
				168  -- 'š'
			>>)
			latin_set_3 := latin_set_from_array (<<
				188, -- 'Œ'
				189  -- 'œ'
			>>)
		end

feature -- Conversion

	as_upper (code: NATURAL): NATURAL
		local
			offset: NATURAL
		do
			inspect code
				when 97..122, 224..246, 248..254 then
					offset := 32
				when 168 then
					offset := 2
				when 184 then
					offset := 4
				when 189 then
					offset := 1
				when 255 then
					offset := 65

			else end
			Result := code - offset
		end

	as_lower (code: NATURAL): NATURAL
		local
			offset: NATURAL
		do
			inspect code
				when 65..90, 192..214, 216..222 then
					offset := 32
				when 166 then
					offset := 2
				when 180 then
					offset := 4
				when 188 then
					offset := 1
				when 190 then
					offset := 65

			else end
			Result := code + offset
		end

	to_upper_offset (code: NATURAL): INTEGER
		do
			inspect code
				when 97..122, 224..246, 248..254 then
					Result := 32
				when 168 then
					Result := 2
				when 184 then
					Result := 4
				when 189 then
					Result := 1
				when 255 then
					Result := 65
			else end
			Result := Result.opposite
		end

	to_lower_offset (code: NATURAL): INTEGER
		do
			inspect code
				when 65..90, 192..214, 216..222 then
					Result := 32
				when 166 then
					Result := 2
				when 180 then
					Result := 4
				when 188 then
					Result := 1
				when 190 then
					Result := 65

			else end
		end

	unicode_case_change_substitute (code: NATURAL): CHARACTER_32
		-- Returns Unicode case change character if c does not have a latin case change
		-- or else the Null character
		do
			inspect code
				-- µ -> Μ
				when 181 then
					Result := 'Μ'
			else end
		end

	latin_character (uc: CHARACTER_32): CHARACTER
			-- unicode to latin translation
			-- Returns '%U' if translation is the same as ISO-8859-1 or else not in ISO_8859_15
		do
			inspect uc
				when 'Ž'..'ž' then
					Result := latin_set_1 [uc.code - 381]
				when 'Š'..'š' then
					Result := latin_set_2 [uc.code - 352]
				when 'Œ'..'œ' then
					Result := latin_set_3 [uc.code - 338]
				when 'Ÿ' then
					Result := '%/190/'
				when '€' then
					Result := '%/164/'
			else end
		end

feature -- Character query

	in_latin_1_disjoint_set (c: CHARACTER): BOOLEAN
		-- `True' if `c' is either the Substitute character or a member of disjoint set of latin-1
		do
			inspect c
				when Substitute, '¤', '¦', '¨', '´', '¸', '¼'..'¾' then
					Result := True
			else
			end
		end

	is_alpha (code: NATURAL): BOOLEAN
		do
			inspect code 
				when 65..90, 97..122, 166, 168, 180..181, 184, 188..190, 192..214, 216..246, 248..255 then
					Result := True
			else
			end
		end

	is_lower (code: NATURAL): BOOLEAN
		do
			inspect code 
				when 97..122, 224..246, 248..254, 168, 184, 189, 255 then
					Result := True

				-- Characters which are only available in a single case
				when 181, 223 then
					Result := True

			else
			end
		end


	is_upper (code: NATURAL): BOOLEAN
		do
			inspect code 
				when 65..90, 192..214, 216..222, 166, 180, 188, 190 then
					Result := True
			else
			end
		end

feature {NONE} -- Implementation

	new_unicode_table: SPECIAL [CHARACTER_32]
			-- Unicode value indexed by ISO_8859_15 character values
		do
			Result := single_byte_unicode_chars
			Result [0xA4] := '€' -- 
			Result [0xA6] := 'Š' -- 
			Result [0xA8] := 'š' -- 
			Result [0xB4] := 'Ž' -- 
			Result [0xB8] := 'ž' -- 
			Result [0xBC] := 'Œ' -- 
			Result [0xBD] := 'œ' -- 
			Result [0xBE] := 'Ÿ' -- 
		end

feature {NONE} -- Internal attributes

	latin_set_1: SPECIAL [CHARACTER]

	latin_set_2: SPECIAL [CHARACTER]

	latin_set_3: SPECIAL [CHARACTER]

end