class EL_SEARCHABLE_ZSTRING
Searchable aspects of ZSTRING
note
description: "Searchable aspects of ${ZSTRING}"
author: "Finnian Reilly"
copyright: "Copyright (c) 2001-2022 Finnian Reilly"
contact: "finnian at eiffel hyphen loop dot com"
license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
date: "2024-10-06 10:41:16 GMT (Sunday 6th October 2024)"
revision: "64"
deferred class
EL_SEARCHABLE_ZSTRING
inherit
EL_ZSTRING_IMPLEMENTATION
EL_ZSTRING_CONSTANTS
feature -- Index position
index_of (uc: CHARACTER_32; start_index: INTEGER): INTEGER
local
c: CHARACTER
do
inspect uc.code
when 0 .. Max_ascii_code then
Result := internal_index_of (uc.to_character_8, start_index)
else
c := Codec.encoded_character (uc)
inspect c
when Substitute then
Result := unencoded_index_of (uc, start_index, null)
else
Result := internal_index_of (c, start_index)
end
end
ensure then
valid_result: Result = 0 or (start_index <= Result and Result <= count)
zero_if_absent: (Result = 0) = not substring (start_index, count).has (uc)
found_if_present: substring (start_index, count).has (uc) implies item (Result) = uc
none_before: substring (start_index, count).has (uc) implies
not substring (start_index, Result - 1).has (uc)
end
index_of_z_code (a_z_code: NATURAL; start_index: INTEGER): INTEGER
do
if a_z_code > 0xFF then
Result := unencoded_index_of ((a_z_code & Sign_bit_mask).to_character_32, start_index, null)
else
Result := internal_index_of (a_z_code.to_character_8, start_index)
end
ensure then
valid_result: Result = 0 or (start_index <= Result and Result <= count)
zero_if_absent: (Result = 0) = not substring (start_index, count).has_z_code (a_z_code)
found_if_present: substring (start_index, count).has_z_code (a_z_code) implies z_code (Result) = a_z_code
none_before: substring (start_index, count).has_z_code (a_z_code) implies
not substring (start_index, Result - 1).has_z_code (a_z_code)
end
last_index_of (uc: CHARACTER_32; start_index_from_end: INTEGER): INTEGER
-- Position of last occurrence of `c',
-- 0 if none.
local
c: CHARACTER
do
inspect uc.code
when 0 .. Max_ascii_code then
Result := internal_last_index_of (uc.to_character_8, start_index_from_end)
else
c := Codec.encoded_character (uc)
inspect c
when Substitute then
Result := unencoded_last_index_of (uc, start_index_from_end)
else
Result := internal_last_index_of (c, start_index_from_end)
end
end
end
substring_index (other: READABLE_STRING_GENERAL; start_index: INTEGER): INTEGER
local
r: EL_READABLE_STRING_GENERAL_ROUTINES; return_default: BOOLEAN; type_code: CHARACTER
do
type_code := string_storage_type (other)
inspect other.count
when 1 then
-- character search
inspect type_code
when 'X' then
-- Seems to be 1% faster
if attached {EL_READABLE_ZSTRING} other as z_str then
Result := index_of_z_code (z_str.z_code (1), start_index)
end
else
Result := index_of (other [1], start_index)
end
else
-- string search
inspect type_code
when '1' then
if attached compatible_string_8 (other) as str_8 then
Result := String_8.substring_index_compatible (Current, str_8, start_index)
else
return_default := True
end
when 'X' then
if attached {EL_READABLE_ZSTRING} other as z_str then
Result := substring_index_zstring (z_str, start_index)
end
else
return_default := True
end
if return_default and then attached String_searcher as searcher
and then attached Z_code_pattern_array [0] as z_code_string
then
r.shared_cursor_by_type (other, type_code).fill_z_codes (z_code_string)
searcher.initialize_deltas (z_code_string)
Result := searcher.substring_index_with_deltas (current_readable, z_code_string, start_index, count)
end
end
end
substring_index_in_bounds (other: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER): INTEGER
local
r: EL_READABLE_STRING_GENERAL_ROUTINES; return_default: BOOLEAN; type_code: CHARACTER
do
type_code := string_storage_type (other)
inspect type_code
when '1' then
if attached compatible_string_8 (other) as str_8 then
Result := String_8.substring_index_in_bounds_compatible (Current, str_8, start_pos, end_pos)
else
return_default := True
end
when 'X' then
if attached {EL_READABLE_ZSTRING} other as z_str then
Result := substring_index_in_bounds_zstring (z_str, start_pos, end_pos)
end
else
return_default := True
end
if return_default and then attached String_searcher as searcher
and then attached Z_code_pattern_array [0] as z_code_string
then
r.shared_cursor_by_type (other, type_code).fill_z_codes (z_code_string)
searcher.initialize_deltas (z_code_string)
Result := searcher.substring_index_with_deltas (current_readable, z_code_string, start_pos, end_pos)
end
end
substring_right_index (other: EL_READABLE_ZSTRING; start_index: INTEGER): INTEGER
-- index to right of first occurrence of `other' if valid index or else 0
do
Result := substring_index (other, start_index)
if Result > 0 then
Result := Result + other.count
end
end
substring_right_index_general (other: READABLE_STRING_GENERAL; start_index: INTEGER): INTEGER
do
Result := substring_index (other, start_index)
if Result > 0 then
Result := Result + other.count
end
end
word_index (word: READABLE_STRING_GENERAL; start_index: INTEGER): INTEGER
local
has_left_boundary, has_right_boundary, found: BOOLEAN
index: INTEGER
do
from index := start_index; Result := 1 until Result = 0 or else found or else index + word.count - 1 > count loop
Result := substring_index (word, index)
if Result > 0 then
has_left_boundary := Result = 1 or else not is_alpha_numeric_item (Result - 1)
has_right_boundary := Result + word.count - 1 = count or else not is_alpha_numeric_item (Result + word.count)
if has_left_boundary and has_right_boundary then
found := True
else
index := Result + 1
end
end
end
end
feature -- Occurrence index lists
substitution_marker_index_list: ARRAYED_LIST [INTEGER]
-- shared list of indices of unescaped template substitution markers '%S' AKA '#'
local
i, i_upper: INTEGER; c_i: CHARACTER
do
Result := Once_substring_indices.emptied
i_upper := count - 1
if attached area as l_area then
from i := 0 until i > i_upper loop
c_i := l_area [i]
inspect c_i
when '%S' then
Result.extend (i + 1)
else
end
i := i + 1
end
end
end
substring_index_list (delimiter: EL_READABLE_ZSTRING; keep_ref: BOOLEAN): ARRAYED_LIST [INTEGER]
do
Result := internal_substring_index_list (delimiter)
if keep_ref then
Result := Result.twin
end
end
substring_index_list_general (delimiter: READABLE_STRING_GENERAL; keep_ref: BOOLEAN): ARRAYED_LIST [INTEGER]
do
Result := internal_substring_index_list_general (delimiter)
if keep_ref then
Result := Result.twin
end
end
substring_intervals (str: READABLE_STRING_GENERAL; keep_ref: BOOLEAN): EL_OCCURRENCE_INTERVALS
do
Result := internal_substring_intervals (str)
if keep_ref then
Result := Result.twin
end
end
feature -- Basic operations
fill_alpha_numeric_intervals (interval_list: EL_ARRAYED_INTERVAL_LIST)
-- fill `interval_list' with substring intervals of contiguous alpha-numeric characters
local
i, j, block_index, i_upper, l_count: INTEGER; c_i: CHARACTER
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION; interval: NATURAL_64
do
if attached area as l_area and then attached unencoded_area as area_32
and then attached Codec as l_codec
then
interval_list.wipe_out
i_upper := area_upper
from i := 0 until i > i_upper loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
if attached iter.block_string (block_index, area_32) as str_32 then
block_index := iter.next_index (block_index, str_32)
l_count := str_32.count
from j := 1 until j > l_count loop
if str_32 [j].is_alpha_numeric then
interval := interval_list.extend_next_upper (interval, i + j)
end
j := j + 1
end
i := i + l_count
end
when Ascii_range then
if c_i.is_alpha_numeric then
interval := interval_list.extend_next_upper (interval, i + 1)
end
i := i + 1
else
if l_codec.is_alphanumeric (c_i.natural_32_code) then
interval := interval_list.extend_next_upper (interval, i + 1)
end
i := i + 1
end
end
interval_list.extend_compact (interval)
end
end
fill_index_list (list: ARRAYED_LIST [INTEGER]; a_pattern: READABLE_STRING_GENERAL)
-- fill `list' with all indices of `a_pattern' found in `Current'
do
if a_pattern.is_string_8 and then attached compatible_string_8 (a_pattern) as str_8 then
String_8.fill_index_list (list, Current, str_8)
else
internal_fill_index_list (list, shared_z_code_pattern_general (a_pattern, 1))
end
end
fill_index_list_by_character (list: ARRAYED_LIST [INTEGER]; uc: CHARACTER_32)
do
fill_index_list_by_z_code (list, Codec.as_z_code (uc))
end
feature {EL_SHARED_ZSTRING_CODEC, EL_SEARCHABLE_ZSTRING} -- Implementation
shared_z_code_pattern (index: INTEGER): STRING_32
-- Current expanded as `z_code' sequence
require
valid_index: 1 <= index and index <= 2
do
Result := Z_code_pattern_array [index - 1]
fill_with_z_code (Result)
end
shared_z_code_pattern_general (pattern: READABLE_STRING_GENERAL; index: INTEGER): STRING_32
local
r: EL_READABLE_STRING_GENERAL_ROUTINES; type_code: CHARACTER
do
type_code := string_storage_type (pattern)
inspect type_code
when 'X' then
if attached {EL_READABLE_ZSTRING} pattern as z_pattern then
Result := z_pattern.shared_z_code_pattern (index)
end
else
Result := Z_code_pattern_array [index - 1]
r.shared_cursor_by_type (pattern, type_code).fill_z_codes (Result)
end
end
feature {NONE} -- Implementation
empty_occurrence_intervals (i: INTEGER): EL_OCCURRENCE_INTERVALS
do
Result := Occurrence_intervals [i]
Result.wipe_out
end
fill_index_list_by_z_code (list: ARRAYED_LIST [INTEGER]; a_z_code: NATURAL)
-- fill `list' with all indices of `a_z_code' found in `Current'
local
i, l_count, index, block_index: INTEGER
l_area: like area; uc: CHARACTER_32; c: CHARACTER
do
if a_z_code > 0xFF then
uc := z_code_to_unicode (a_z_code).to_character_32
from index := 1 until index = 0 loop
index := unencoded_index_of (uc, index, $block_index)
if index > 0 then
list.extend (index)
index := index + 1
end
end
else
c := a_z_code.to_character_8
l_area := area; l_count := count
from i := 0 until i = l_count loop
if l_area [i] = c then
list.extend (i + 1)
end
i := i + 1
end
end
end
internal_fill_index_list (list: ARRAYED_LIST [INTEGER]; z_code_pattern: STRING_32)
-- fill `list' with all indices of `a_pattern' found in `Current'
local
index, l_count, pattern_count: INTEGER
do
pattern_count := z_code_pattern.count; l_count := count
if attached string_searcher as searcher then
searcher.initialize_deltas (z_code_pattern)
from index := 1 until index = 0 or else index > l_count - pattern_count + 1 loop
index := searcher.substring_index_with_deltas (current_readable, z_code_pattern, index, l_count)
if index > 0 then
list.extend (index)
index := index + pattern_count
end
end
end
end
internal_substring_index_list (delimiter: EL_READABLE_ZSTRING): ARRAYED_LIST [INTEGER]
-- shared list of indices of `str' occurring in `Current'
local
delimiter_count: INTEGER
do
Result := Once_substring_indices.emptied
delimiter_count := delimiter.count
if delimiter = current_readable or else delimiter_count = 0 then
Result.extend (1)
elseif delimiter_count <= count then
inspect respective_encoding (delimiter)
when Both_have_mixed_encoding then
if delimiter_count = 1 then
fill_index_list_by_z_code (Result, delimiter.z_code (1))
else
internal_fill_index_list (Result, delimiter.shared_z_code_pattern (1))
end
when Only_other then
-- cannot find `delimiter'
do_nothing
when Only_current, Neither then
String_8.fill_index_list (Result, Current, String_8.injected (delimiter, 1))
else
end
end
end
internal_substring_index_list_general (delimiter: READABLE_STRING_GENERAL): ARRAYED_LIST [INTEGER]
-- shared list of indices of `str' occurring in `Current'
local
delimiter_count: INTEGER; type_code: CHARACTER
do
type_code := string_storage_type (delimiter)
inspect type_code
when 'X' then
if attached {EL_READABLE_ZSTRING} delimiter as z_str then
Result := internal_substring_index_list (z_str)
end
when '4' then
Result := internal_substring_index_list (adapted_argument_for_type (delimiter, type_code, 1))
else
check
is_READABLE_STRING_8: type_code = '1'
end
Result := Once_substring_indices.emptied
delimiter_count := delimiter.count
if delimiter = current_readable or else delimiter_count = 0 then
Result.extend (1)
elseif delimiter_count = 1 then
fill_index_list_by_character (Result, delimiter [1])
elseif attached compatible_string_8 (delimiter) as str_8 then
String_8.fill_index_list (Result, Current, str_8)
else
Result := internal_substring_index_list (adapted_argument_for_type (delimiter, type_code, 1))
end
end
end
internal_substring_intervals (str: READABLE_STRING_GENERAL): EL_OCCURRENCE_INTERVALS
do
Result := Occurrence_intervals [0]
Result.wipe_out
Result.fill_by_string_general (current_readable, str, 0)
end
is_reversible_z_code_pattern (general: READABLE_STRING_GENERAL; z_code_string: STRING_32): BOOLEAN
local
zstr, zstr_2: ZSTRING
do
create zstr.make_from_general (general)
create zstr_2.make (z_code_string.count)
across z_code_string as l_code loop
zstr_2.append_z_code (l_code.item.natural_32_code)
end
Result := zstr ~ zstr_2
end
substring_index_in_bounds_zstring (other: EL_READABLE_ZSTRING; start_pos, end_pos: INTEGER): INTEGER
local
has_mixed_in_range: BOOLEAN
do
has_mixed_in_range := has_unencoded_between_optimal (area, start_pos, end_pos)
inspect current_other_bitmap (has_mixed_in_range, other.has_mixed_encoding)
when Both_have_mixed_encoding then
-- Make calls to `code' more efficient by caching calls to `unencoded_code' in expanded string
Result := String_searcher.substring_index (
current_readable, other.shared_z_code_pattern (1), start_pos, end_pos
)
when Only_current, Neither then
Result := String_8.substring_index_in_bounds (Current, other, start_pos, end_pos)
when Only_other then
Result := 0
else
end
end
substring_index_zstring (other: EL_READABLE_ZSTRING; start_index: INTEGER): INTEGER
local
has_mixed_in_range: BOOLEAN
do
has_mixed_in_range := has_unencoded_between_optimal (area, start_index, count)
inspect current_other_bitmap (has_mixed_in_range, other.has_mixed_encoding)
when Only_current, Neither then
Result := String_8.substring_index (Current, other, start_index)
when Both_have_mixed_encoding then
-- Make calls to `code' more efficient by caching calls to `unencoded_code' in expanded string
Result := String_searcher.substring_index (current_readable, other.shared_z_code_pattern (1), start_index, count)
when Only_other then
Result := 0
else
end
end
feature {NONE} -- Constants
Occurrence_intervals: SPECIAL [EL_OCCURRENCE_INTERVALS]
once
create Result.make_filled (create {EL_OCCURRENCE_INTERVALS}.make_empty, 2)
Result [1] := create {EL_OCCURRENCE_INTERVALS}.make_empty
end
Z_code_pattern_array: SPECIAL [STRING_32]
once
create Result.make_filled (create {STRING_32}.make_empty, 2)
Result [1] := create {STRING_32}.make_empty
end
end