el_readable_string_x

description

Routines to supplement handling of strings conforming to READABLE_STRING_8 and READABLE_STRING_32

note
	description: "[
		Routines to supplement handling of strings conforming to
		${READABLE_STRING_8} and ${READABLE_STRING_32}
	]"

	author: "Finnian Reilly"
	copyright: "Copyright (c) 2001-2022 Finnian Reilly"
	contact: "finnian at eiffel hyphen loop dot com"

	license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
	date: "2024-10-06 10:34:39 GMT (Sunday 6th October 2024)"
	revision: "49"

deferred class
	EL_READABLE_STRING_X_ROUTINES [
		READABLE_STRING_X -> READABLE_STRING_GENERAL, C -> COMPARABLE -- CHARACTER_X
	]

inherit
	ANY

	EL_READABLE_STRING_GENERAL_ROUTINES_IMP

	EL_STRING_GENERAL_ROUTINES
		rename
			to_unicode_general as to_unicode
		export
			{ANY} as_zstring, ZSTRING, to_unicode
		end

	EL_STRING_BIT_COUNTABLE [READABLE_STRING_X]

	EL_STRING_8_CONSTANTS

	EL_SIDE_ROUTINES
		rename
			valid_sides as valid_adjustments
		export
			{ANY} valid_adjustments
		end

	EL_ZSTRING_CONSTANTS
		rename
			String_searcher as ZString_searcher
		end

feature -- Access

	occurrence_intervals (
		target: READABLE_STRING_X; pattern: READABLE_STRING_GENERAL; keep_ref: BOOLEAN

	): EL_OCCURRENCE_INTERVALS
		do
			Result := Once_occurence_intervals.emptied
			fill_intervals (Result, target, pattern)
			if keep_ref then
				Result := Result.twin
			end
		end

	split_intervals (target: READABLE_STRING_X; separator: READABLE_STRING_GENERAL; keep_ref: BOOLEAN): EL_SPLIT_INTERVALS
		do
			Result := Once_split_intervals.emptied
			fill_intervals (Result, target, separator)
			if keep_ref then
				Result := Result.twin
			end
		end

	selected (n: INTEGER; n_set: READABLE_INDEXABLE [INTEGER]; name_list: READABLE_STRING_X): READABLE_STRING_X
		require
			name_count_matches: n_set.upper - n_set.lower = name_list.occurrences (',')
		local
			index, i, start_index, end_index: INTEGER; found: BOOLEAN
		do
			if name_list.count = 0 then
				Result := name_list.substring (1, 0)
			else
				from index := n_set.lower until index > n_set.upper or found loop
					if n_set [index] = n then
						found := True
					else
						index := index + 1
					end
				end
				if found and then attached split_on_character as split_list then
					split_list.set_target (name_list); split_list.set_separator (',')
					found := False
					i := n_set.lower
					across split_list as list until found loop
						if i = index then
							start_index := list.item_lower; end_index := list.item_upper
							if name_list [start_index] = ' ' then
								start_index :=  start_index + 1
							end
							found := True
						else
							i := i + 1
						end
					end
					if found then
						Result := name_list.substring (start_index, end_index)
					else
						Result := name_list.substring (1, 0)
					end
				else
					Result := name_list.substring (1, 0)
				end
			end
		end

	to_utf_8 (str: READABLE_STRING_X): STRING
		do
			if attached cursor (str) as c then
				create Result.make (c.utf_8_byte_count)
				c.append_to_utf_8 (Result)
			end
		end

feature -- Measurement

	between_interval (str: READABLE_STRING_X; left, right: CHARACTER_32): INTEGER_64
		-- compact substring interval between `left' and `right' character
		local
			left_index, right_index: INTEGER; ir: EL_INTERVAL_ROUTINES
		do
			left_index := index_of (str, left, 1)
			if left_index > 0 then
				right_index := last_index_of (str, right, str.count)
				if right_index > 0 then
					Result := ir.compact (left_index + 1, right_index - 1)
				end
			end
		end

	word_count (str: READABLE_STRING_X; exclude_variable_references: BOOLEAN): INTEGER
		-- count of all substrings of `str' that are separated by whitespace
		-- but if `exclude_variable_references' is `True', substract cound of substrings
		-- that are variable references defined by `is_variable_reference'
		local
			i, upper, word_index: INTEGER; state_find_word: BOOLEAN; c32: EL_CHARACTER_32_ROUTINES
		do
			upper := str.count
			state_find_word := True
			from i := 1 until i > upper loop
				if state_find_word then
					from until i > upper or else not c32.is_space (str [i]) loop
						i := i + 1
					end
					word_index := i
				else
					from until i > upper or else c32.is_space (str [i]) loop
						i := i + 1
					end
					if attached new_shared_substring (str, word_index, i - 1) as word
						and then has_alpha (word)
						and then exclude_variable_references implies not is_variable_reference (word)
					then
						Result := Result + 1
					end
				end
				state_find_word := not state_find_word
				i := i + 1
			end
		end

feature -- Lists

	delimited_list (text: READABLE_STRING_X; delimiter: READABLE_STRING_GENERAL): EL_ARRAYED_LIST [READABLE_STRING_X]
		-- `text' split into arrayed list by `delimiter' string
		do
			Result := substring_list (text, split_intervals (text, delimiter, False))
		end

	to_csv_list (text: READABLE_STRING_X): like to_list
		-- left adjusted comma separated list
		do
			Result := to_list (text, ',', {EL_SIDE}.Left)
		end

	to_list (text: READABLE_STRING_X; uc: CHARACTER_32; adjustments: INTEGER): EL_ARRAYED_LIST [READABLE_STRING_X]
		-- `text' split by `uc' character and space adjusted according to `adjustments':
		-- `Both', `Left', `None', `Right' from class `EL_SIDE'.
		require
			valid_adjustments: valid_adjustments (adjustments)
		do
			if attached Once_split_intervals.emptied as intervals then
				intervals.fill (text, uc, adjustments)
				Result := substring_list (text, intervals)
			end
		end

feature -- Character query

	ends_with_character (s: READABLE_STRING_x; c: C): BOOLEAN
		deferred
		end

	has_alpha (str: READABLE_STRING_X): BOOLEAN
		local
			i, upper: INTEGER
		do
			upper := str.count
			from i := 1 until Result or else i > upper loop
				Result := is_i_th_alpha (str, i)
				i := i + 1
			end
		end

	has_double_quotes (s: READABLE_STRING_X): BOOLEAN
			--
		do
			Result := has_quotes (s, 2)
		end

	has_enclosing (s: READABLE_STRING_X; c_first, c_last: CHARACTER_32): BOOLEAN
			--
		deferred
		end

	has_quotes (s: READABLE_STRING_X; type: INTEGER): BOOLEAN
		require
			double_or_single: 1 <= type and type <= 2
		do
			if type = 1 then
				Result := has_enclosing (s, '%'', '%'')
			else
				Result := has_enclosing (s, '"', '"')
			end
		end

	has_single_quotes (s: READABLE_STRING_X): BOOLEAN
			--
		do
			Result := has_quotes (s, 1)
		end

	starts_with_character (s: READABLE_STRING_x; c: C): BOOLEAN
		deferred
		end

feature -- Status query

	has_member (str: READABLE_STRING_X; set: EL_SET [C]): BOOLEAN
		-- `True' if at least one character in `str' is a member of `set'
		deferred
		end

	is_canonically_spaced (s: READABLE_STRING_X): BOOLEAN
		-- `True' if the longest substring of whitespace consists of one space character (ASCII 32)
		local
			uc_i: CHARACTER_32; i, upper, space_count: INTEGER
			c32: EL_CHARACTER_32_ROUTINES
		do
			Result := True; upper := s.count
			from i := 1 until not Result or else i > upper loop
				uc_i := s [i]
				if c32.is_space (uc_i) then
					space_count := space_count + 1
				else
					space_count := 0
				end
				inspect space_count
					when 0 then
						do_nothing
					when 1 then
						Result := uc_i = ' '
				else
					Result := False
				end
				i := i + 1
			end
		end

	is_eiffel (s: READABLE_STRING_X): BOOLEAN
		-- `True' if `target' is an Eiffel identifier
		do
			Result := cursor (s).is_eiffel
		end

	is_eiffel_lower (s: READABLE_STRING_X): BOOLEAN
		-- `True' if `target' is a lower-case Eiffel identifier
		do
			Result := cursor (s).is_eiffel_lower
		end

	is_eiffel_upper (s: READABLE_STRING_X): BOOLEAN
		-- `True' if `target' is an upper-case Eiffel identifier
		do
			Result := cursor (s).is_eiffel_upper
		end

	is_identifier_boundary (str: READABLE_STRING_X; lower, upper: INTEGER): BOOLEAN
		-- `True' if indices `lower' to `upper' are an identifier boundary
		require
			valid_lower: lower >= 1
			valid_upper: upper <= str.count
		do
			Result := True
			if upper + 1 <= str.count then
				Result := not is_i_th_identifier (str, upper + 1)
			end
			if Result and then lower - 1 >= 1 then
				Result := not is_i_th_identifier (str, lower - 1)
			end
		end

	is_subset_of (str: READABLE_STRING_X; set: EL_SET [C]): BOOLEAN
		-- `True' if set of all characters in `str' is a subset of `set'
		deferred
		end

	is_variable_name (str: READABLE_STRING_X): BOOLEAN
		local
			i, upper: INTEGER
		do
			upper := str.count
			Result := upper > 1
			from i := 1 until not Result or i > upper loop
				inspect i
					when 1 then
						Result := str [i] = '$'
					when 2 then
						Result := is_i_th_alpha (str, i)
				else
					Result := is_i_th_alpha_numeric (str, i) or else str [i] = '_'
				end
				i := i + 1
			end
		end

	is_variable_reference (str: READABLE_STRING_X): BOOLEAN
		-- `True' if str is one of two variable reference forms

		-- 1. $<C identifier>
		-- 2. ${<C identifier>}
		local
			lower, upper, i: INTEGER
		do
			upper := str.count
			if str.count >= 2 and then str [1] = '$' then
				if str [2] = '{' and then upper > 3 then
				-- like: ${name}
					if str [upper] = '}' then
						lower := 3; upper := upper - 1
					end
				else
					lower := 2
				end
				if str.valid_index (lower) then
					Result := is_i_th_alpha (str, lower)
					from i := lower until i > upper or not Result loop
						Result := is_i_th_alpha_numeric (str, i) or else str [i] = '_'
						i := i + 1
					end
				end
			end
		end

feature -- Comparison

	caseless_ends_with (big, small: READABLE_STRING_X): BOOLEAN
		-- `True' if `big.ends_with (small)' is true regardless of case of `small'
		do
			if small.is_empty then
				Result := True

			elseif big.count >= small.count then
				Result := occurs_caseless_at (big, small, big.count - small.count + 1)
			end
		end

	matches_wildcard (s, wildcard: READABLE_STRING_X): BOOLEAN
		-- try to match `wildcard' search term against string `s' with an asterisk either to the left,
		-- to the right or on both sides
		local
			any_ending, any_start: BOOLEAN; start_index, end_index: INTEGER
			search_string: READABLE_STRING_X
		do
			start_index := 1; end_index := wildcard.count
			inspect wildcard.count
				when 0 then
				when 1 then
					if wildcard [1].code = {ASCII}.Star then
						any_ending := True; any_start := True
					end
			else
				if ends_with_character (wildcard, asterisk)  then
					end_index := end_index - 1
					any_ending := True
				end
				if starts_with_character (wildcard, asterisk) then
					start_index := start_index + 1
					any_start := True
				end
			end
			if start_index - end_index + 1 = wildcard.count then
				search_string := wildcard
			else
				search_string := new_shared_substring (wildcard, start_index, end_index)
			end
			if any_ending and any_start then
				if wildcard.count = 1 then
					Result := True
				else
					Result := s.has_substring (search_string)
				end

			elseif any_ending then
				Result := starts_with (s, search_string)

			elseif any_start then
				Result := ends_with (s, search_string)
			else
				Result := same_string (s, wildcard)
			end
		end

	occurs_at (big, small: READABLE_STRING_X; index: INTEGER): BOOLEAN
		-- `True' if `small' string occurs in `big' string at `index'
		deferred
		end

	occurs_caseless_at (big, small: READABLE_STRING_X; index: INTEGER): BOOLEAN
		-- `True' if `small' string occurs in `big' string at `index' regardless of case
		deferred
		end

	same_caseless (a, b: READABLE_STRING_X): BOOLEAN
		-- `True' `a' and `b' are the same regardless of case
		do
			if a.count = b.count then
				Result := occurs_caseless_at (a, b, 1)
			end
		end

feature -- Substring

	adjusted (str: READABLE_STRING_X): READABLE_STRING_X
		local
			start_index, end_index: INTEGER
		do
			end_index := str.count - cursor (str).trailing_white_count
			if end_index.to_boolean then
				start_index := cursor (str).leading_white_count + 1
			else
				start_index := 1
			end
			if start_index = 1 and then end_index = str.count then
				Result := str
			else
				Result := str.substring (start_index, end_index)
			end
		end

	sandwiched_parts (str: READABLE_STRING_X; separator: CHARACTER_32; head_count, tail_count: INTEGER): READABLE_STRING_X
		-- joined substring of split list defined by `separator' after `head_count' and `tail_count' parts
		-- have been removed from head and tail of list respectively
		local
			start_index, end_index, index, first_cursor_index, last_cursor_index: INTEGER
		do
			if head_count + tail_count > 0 then
				if attached split_on_character as split_list then
					split_list.set_target (str); split_list.set_separator (separator)

					first_cursor_index := head_count + 1
					last_cursor_index := split_list.count - tail_count

					across split_list as list loop
						index := list.cursor_index
						if index = first_cursor_index then
							start_index := list.item_lower
						end
						if index = last_cursor_index then
							end_index := list.item_upper
						end
					end
					if start_index > 0 and end_index > 0 then
						Result := str.substring (start_index, end_index)
					else
						Result := str.substring (1, 0)
					end
				end
			else
				Result := str
			end
		end

	substring_to (str: READABLE_STRING_X; uc: CHARACTER_32): READABLE_STRING_X
		-- `substring_to_from' from start of string
		do
			Result := substring_to_from (str, uc, null)
		end

	substring_to_from (str: READABLE_STRING_X; uc: CHARACTER_32; start_index_ptr: TYPED_POINTER [INTEGER]): READABLE_STRING_X
		-- substring from INTEGER at memory location `start_index_ptr' up to but not including index of `uc'
		-- or else `substring_end (start_index)' if `uc' not found
		-- `start_index' is 1 if `start_index_ptr = Default_pointer'
		-- write new start_index back to `start_index_ptr'
		-- if `uc' not found then new `start_index' is `count + 1'
		local
			start_index, index: INTEGER; pointer: EL_POINTER_ROUTINES
		do
			if start_index_ptr.is_default_pointer then
				start_index := 1
			else
				start_index := pointer.read_integer_32 (start_index_ptr)
			end
			index := index_of (str, uc, start_index)
			if index > 0 then
				Result := str.substring (start_index, index - 1)
				start_index := index + 1
			else
				Result := str.substring (start_index, str.count)
				start_index := str.count + 1
			end
			if not start_index_ptr.is_default_pointer then
				start_index_ptr.memory_copy ($start_index, {PLATFORM}.Integer_32_bytes)
			end
		end

	substring_to_reversed (str: READABLE_STRING_X; uc: CHARACTER_32): READABLE_STRING_X
		-- `substring_to_reversed_from' from end of string
		do
			Result := substring_to_reversed_from (str, uc, null)
		end

	substring_to_reversed_from (
		str: READABLE_STRING_X; uc: CHARACTER_32; start_index_from_end_ptr: TYPED_POINTER [INTEGER]
	): READABLE_STRING_X
		-- the same as `substring_to' except going from right to left
		-- if `uc' not found `start_index_from_end' is set to `0' and written back to `start_index_from_end_ptr'
		local
			start_index_from_end, index: INTEGER; pointer: EL_POINTER_ROUTINES
		do
			if start_index_from_end_ptr.is_default_pointer then
				start_index_from_end := str.count
			else
				start_index_from_end := pointer.read_integer_32 (start_index_from_end_ptr)
			end
			index := last_index_of (str, uc, start_index_from_end)
			if index > 0 then
				Result := str.substring (index + 1, start_index_from_end)
				start_index_from_end := index - 1
			else
				Result := str.substring (1, start_index_from_end)
				start_index_from_end := 0
			end
			if not start_index_from_end_ptr.is_default_pointer then
				pointer.put_integer_32 (start_index_from_end, start_index_from_end_ptr)
			end
		end

	truncated (str: READABLE_STRING_X; max_count: INTEGER): READABLE_STRING_X
		-- return `str' truncated to `max_count' characters, adding ellipsis where necessary
		do
			if str.count <= max_count then
				Result := str
			else
				Result := str.substring (1, max_count - 2) + Character_string_8_table.item ('.', 2)
			end
		end

feature {NONE} -- Implementation

	null: TYPED_POINTER [INTEGER]
		do
		end

	substring_list (text: READABLE_STRING_X; intervals: EL_SEQUENTIAL_INTERVALS): like to_list
		do
			create Result.make (intervals.count)
			from intervals.start until intervals.after loop
				Result.extend (text.substring (intervals.item_lower, intervals.item_upper))
				intervals.forth
			end
		end

	to_code (character: CHARACTER_32): NATURAL_32
		do
			Result := character.natural_32_code
		end

feature {NONE} -- Deferred

	asterisk: C
		deferred
		end

	as_canonically_spaced (s: READABLE_STRING_X): READABLE_STRING_X
		-- copy of `s' with each substring of whitespace consisting of one space character (ASCII 32)
		deferred
		end

	cursor (s: READABLE_STRING_X): EL_STRING_ITERATION_CURSOR
		deferred
		end

	ends_with (s, trailing: READABLE_STRING_X): BOOLEAN
		deferred
		end

	fill_intervals (intervals: EL_OCCURRENCE_INTERVALS; target: READABLE_STRING_X; pattern: READABLE_STRING_GENERAL)
		deferred
		end

	is_i_th_alpha (str: READABLE_STRING_X; i: INTEGER): BOOLEAN
		-- `True' if i'th character is alphabetical
		deferred
		end

	is_i_th_alpha_numeric (str: READABLE_STRING_X; i: INTEGER): BOOLEAN
		-- `True' if i'th character is alphabetical or numeric
		deferred
		end

	is_i_th_identifier (str: READABLE_STRING_X; i: INTEGER): BOOLEAN
		-- `True' if i'th character is an identifier
		deferred
		end

	index_of (str: READABLE_STRING_X; c: CHARACTER_32; start_index: INTEGER): INTEGER
		deferred
		end

	last_index_of (str: READABLE_STRING_X; c: CHARACTER_32; start_index_from_end: INTEGER): INTEGER
		deferred
		end

	new_shared_substring (s: READABLE_STRING_X; start_index, end_index: INTEGER): READABLE_STRING_X
		deferred
		end

	same_string (a, b: READABLE_STRING_X): BOOLEAN
		deferred
		end

	split_on_character: EL_SPLIT_ON_CHARACTER [READABLE_STRING_X]
		deferred
		end

	string_searcher: STRING_SEARCHER
		deferred
		end

	starts_with (s, leading: READABLE_STRING_X): BOOLEAN
		deferred
		end

feature {NONE} -- Constants

	Once_occurence_intervals: EL_OCCURRENCE_INTERVALS
		once
			create Result.make_empty
		end

	Once_split_intervals: EL_SPLIT_INTERVALS
		once
			create Result.make_empty
		end

end