class TEXT_DATA_TEST_SET

(source code)

description

General string and character experiments

note
	description: "General string and character experiments"

	author: "Finnian Reilly"
	copyright: "Copyright (c) 2001-2022 Finnian Reilly"
	contact: "finnian at eiffel hyphen loop dot com"

	license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
	date: "2024-08-27 7:53:23 GMT (Tuesday 27th August 2024)"
	revision: "41"

class
	TEXT_DATA_TEST_SET

inherit
	EL_EQA_TEST_SET

	EL_MODULE_EIFFEL; EL_MODULE_USER_INPUT

	EL_SHARED_ZCODEC_FACTORY

	EL_ENCODING_TYPE

	EL_STRING_HANDLER

create
	make

feature {NONE} -- Initialization

	make
		-- initialize `test_table'
		do
			make_named (<<
				["zstring_memory_on_boundary", agent test_zstring_memory_on_boundary]
			>>)
		end

feature -- Tests

	test_zstring_memory_on_boundary
		-- TEXT_DATA_TEST_SET.test_zstring_memory_on_boundary
		do
			assert ("size = 48", Eiffel.physical_size (create {EL_ZSTRING}.make_empty) = 48)
		-- Adding an INTEGER_32 attribute to ZSTRING will make size = 64 which is
		-- a 16 byte increase equivalent to 2 x INTEGER_32. So 48 is aligned with some boundary.
		end

feature -- Basic operations

	alternative_once_naming
		do
			lio.put_line (Mime_type_template)
			lio.put_line (Text_charset_template)
		end

	audio_info_parsing
		-- TEXT_DATA_TEST_SET.
		local
			s: ZSTRING; parts: EL_ZSTRING_LIST
		do
			s := "Stream #0.0(und): Audio: aac, 44100 Hz, stereo, fltp, 253 kb/s"
			create parts.make_adjusted_split (s, ',', {EL_SIDE}.Left)
			across parts as part loop
				lio.put_index_labeled_string (part.cursor_index, Void, part.item)
				lio.put_new_line
			end
		end

	check_if_euro_is_space
		local
			euro: CHARACTER_32
		do
			euro := '€'
			lio.put_labeled_string ("euro.is_space", euro.is_space.out)
			lio.put_new_line
			lio.put_labeled_string ("euro.is_space", C_properties.is_space (euro).out)
			lio.put_new_line
		end

	check_if_type_name_unique
		local
			name_1, name_2: STRING
			type_id: INTEGER
		do
			type_id := ({like Current}).type_id
			name_1 := Eiffel.type_name_of_type (type_id)
			name_2 := Eiffel.type_name_of_type (type_id)
			if name_1 = name_2 then
				lio.put_line ("Unique")
			else
				lio.put_line ("Not unique")
			end
		end

	escaping_text
		do
			lio.put_string_field ("&aa&bb&", escaped_text ("&aa&bb&").as_string_8)
		end

	find_highest_common_character
		-- Find highest character in commone with all Latin and Windows ZSTRING codecs
		-- The answer is 127
		local
			i: INTEGER; c: CHARACTER; uc: CHARACTER_32
			done: BOOLEAN
		do
			from i := 1 until done or else i > 255 loop
				c := i.to_character_8; uc := i.to_character_32
				if same_for_all_codecs (c, uc) then
					lio.put_integer_field ("Same for all codecs", i)
				else
					lio.put_integer_field ("NOT the same", i)
					done := True
				end
				lio.put_new_line
				i := i + 1
			end
		end

	fuzzy_match
		local
			a: STRING
		do
			a := "image/jpeg"
			across << "jpg", "jpeg", "png" >> as ext loop
				lio.put_labeled_string ("Matched " + ext.item, a.fuzzy_index (ext.item, 1, 1).to_boolean.out )
				lio.put_new_line
			end
--			Matched jpg: True
--			Matched jpeg: True
--			Matched png: True
		end

	hexadecimal_to_natural_64
		local
			hex: EL_HEXADECIMAL_CONVERTER
		do
			lio.put_string (hex.to_natural_64 ("0x00000A987").out)
			lio.put_new_line
		end

	index_of_empty
		local
			line: ZSTRING
		do
			create line.make_empty
			assert ("is 0", line.index_of (' ', 1) = 0)
		end

	input_capital_a_umlaut
		local
			line: ZSTRING
		do
			line := User_input.line ("Enter a Ä character (ALT 0196)")
			lio.put_new_line
			assert ("is Ä", line [1] = 'Ä')
		end

	input_unicode_character
		local
			str: ZSTRING; euro: ZSTRING
		do
			lio.put_line ("Enter a EURO symbol")
			io.read_line
			lio.put_new_line
			create str.make_from_utf_8 (io.last_string)
			lio.put_labeled_string ("Euro", str)
			lio.put_new_line
			create euro.make_filled ((0x20AC).to_character_32, 1)
			assert ("Console.encoded (euro) ~ io.last_string", Console.encoded (euro, False) ~ io.last_string)
			assert ("Console.decoded (io.last_string) ~ euro", Console.decoded (io.last_string) ~ euro)
		end

	reading_character_32_as_natural_8
		local
			chars: SPECIAL [CHARACTER_32]; ptr: MANAGED_POINTER
			i: INTEGER
		do
			create chars.make_filled (' ', 2)
			create ptr.share_from_pointer (chars.base_address, chars.count * 4)
			from i := 0 until i = ptr.count loop
				ptr.put_natural_8 (i.to_natural_8, i)
				i := i + 1
			end
			from i := 0 until i = ptr.count loop
				lio.put_index_labeled_string (i, Void, ptr.read_natural_8 (i).out)
				lio.put_new_line
				i := i + 1
			end
		end

	replace_delimited_substring_general
		local
			email: ZSTRING
		do
			across << "freilly8@gmail.com", "finnian@gmail.com", "finnian-buyer@eiffel-loop.com" >> as address loop
				email := address.item
				lio.put_string (email)
				email.replace_delimited_substring_general ("finnian", "@eiffel", "", False, 1)
				lio.put_string (" -> "); lio.put_string (email)
				lio.put_new_line
			end
		end

	right_adjust
		do
			if attached ("abc%R") as str then
				str.right_adjust
				lio.put_integer_field ("str.count", str.count)
				lio.put_new_line
			end
		end

	split_empty_count
		do
			lio.put_integer_field ("count", ("").split (',').count)
			lio.put_new_line
		end

	string_to_integer_conversion
		local
			str: ZSTRING
		do
			str := ""
			lio.put_string ("str.is_integer: ")
			lio.put_boolean (str.is_integer)
		end

	substitute_template_with_string_8
		local
			type: STRING
		do
			type := "html"
			lio.put_string_field ("Content", Mime_type_template #$ [type, "UTF-8"])
			lio.put_new_line
			lio.put_string_field ("Content", Mime_type_template #$ [type, "UTF-8"])
		end

	substring_beyond_bounds
		local
			name: STRING
		do
			name := "Muller"
			lio.put_string_field ("name", name.substring (1, name.count + 1))
			lio.put_new_line
		end

	test_has_repeated_hexadecimal_digit
		do
			lio.put_boolean (has_repeated_hexadecimal_digit (0xAAAAAAAAAAAAAAAA)); lio.put_new_line
			lio.put_boolean (has_repeated_hexadecimal_digit (0x1AAAAAAAAAAAAAAA)); lio.put_new_line
			lio.put_boolean (has_repeated_hexadecimal_digit (0xAAAAAAAAAAAAAAA1)); lio.put_new_line
		end

	url_string
		local
			str: EL_URI_PATH_ELEMENT_STRING_8
		do
			create str.make_empty
			str.append_general ("freilly8@gmail.com")
		end

feature {NONE} -- Implementation

	escaped_text (s: READABLE_STRING_GENERAL): READABLE_STRING_GENERAL
			-- `text' with doubled ampersands.
		local
			n, l_count: INTEGER; l_amp_code: NATURAL_32; l_string_32: STRING_32
		do
			l_amp_code := ('&').code.as_natural_32
			l_count := s.count
			n := s.index_of_code (l_amp_code, 1)

			if n > 0 then
					-- There is an ampersand present in `s'.
					-- Replace all occurrences of "&" with "&&".
					--| Cannot be replaced with `{STRING_32}.replace_substring_all' because
					--| we only want it to happen once, not forever.
				from
					create l_string_32.make (l_count + 1)
					l_string_32.append_string_general (s)
				until
					n > l_count
				loop
					n := l_string_32.index_of_code (l_amp_code, n)
					if n > 0 then
						l_string_32.insert_character ('&', n)
							-- Increase count local by one as a character has been inserted.
						l_count := l_count + 1
						n := n + 2
					else
						n := l_count + 1
					end
				end
				Result := l_string_32
			else
				Result := s
			end
		ensure
			ampersand_occurrences_doubled: Result.as_string_32.occurrences ('&') =
				(old s.twin.as_string_32).occurrences ('&') * 2
		end

	has_repeated_hexadecimal_digit (n: NATURAL_64): BOOLEAN
		local
			first, hex_digit: NATURAL_64
			i: INTEGER
		do
			first := n & 0xF
			hex_digit := first
			from i := 1 until hex_digit /= first or i > 15 loop
				hex_digit := n.bit_shift_right (i * 4) & 0xF
				i := i + 1
			end
			Result := i = 16 and then hex_digit = first
		end

	same_for_all_codecs (c: CHARACTER; uc: CHARACTER_32): BOOLEAN
		local
			lower, upper, i: NATURAL; codec: EL_ZCODEC
		do
			Result := True
			across << 1 |..| 11, 13 |..| 14, 1250 |..| 1258 >> as interval until not Result loop
				lower := interval.item.lower.to_natural_32
				upper := interval.item.upper.to_natural_32
				from i := lower until not Result or else i > upper loop
					if i > 1000 then
						codec := Codec_factory.codec_by (Windows | i)
					else
						codec := Codec_factory.codec_by (Latin | i)
					end
					if codec.unicode_table [c.code] /= uc then
						Result := False
					end
					i := i + 1
				end
			end
		end

feature {NONE} -- Constants

	C_properties: CHARACTER_PROPERTY
			-- Property for Unicode characters.
		once
			create Result.make
		end

	Mime_type_template, Text_charset_template: ZSTRING
		once
			Result := "text/%S; charset=%S"
		end

end