class EL_EXTENDED_ZSTRING
Capabilities of ZSTRING extended with routines from EL_EXTENDED_READABLE_STRING_I and EL_EXTENDED_STRING_GENERAL.
note
description: "[
Capabilities of ${ZSTRING} extended with routines from ${EL_EXTENDED_READABLE_STRING_I} and
${EL_EXTENDED_STRING_GENERAL}.
]"
author: "Finnian Reilly"
copyright: "Copyright (c) 2001-2022 Finnian Reilly"
contact: "finnian at eiffel hyphen loop dot com"
license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
date: "2025-05-06 7:34:39 GMT (Tuesday 6th May 2025)"
revision: "18"
class
EL_EXTENDED_ZSTRING
inherit
ZSTRING
rename
append_to as append_to_other,
is_ascii_substring as is_other_ascii_substring,
split as zstring_split
redefine
append_area_32, append_utf_8, make, trim, share
end
EL_EXTENDED_STRING_32
rename
area as unencoded_area,
set_target as share,
shared_substring as shared_immutable_substring,
to_code_array as to_z_code_array
undefine
append_to_string_32, append_to_string_8, append_to_utf_8,
count,
ends_with_character, fill_z_codes,
has, has_alpha, has_alpha_in_bounds, has_enclosing, has_in_bounds, has_member, has_quotes,
is_ascii, is_ascii_in_bounds, is_alpha_numeric, is_canonically_spaced,
is_character, is_subset_of, is_valid_as_string_8,
leading_occurrences, leading_white_count_in_bounds, leading_white_count,
matches_wildcard, null, occurrences, prune_set_members, put_lower, put_upper, quoted,
remove_bookends, replace_character, remove_double, remove_single, replace_set_members,
same_string, set_substring_case, set_substring_lower, set_substring_upper, starts_with_character,
substring_to, substring_to_from, substring_to_reversed, substring_to_reversed_from,
to_canonically_spaced, to_utf_8,
trailing_white_count_in_bounds, trailing_white_count, translate, translate_or_delete, translate_with_deletion,
utf_8_byte_count, valid_index, write_utf_8_to,
String_32_searcher
redefine
all_alpha_numeric_in_bounds, all_ascii_in_bounds,
append_area_32, append_substring_to_special_32, append_substring_to_special_8,
append_to, append_utf_8,
index_of_white, is_substring_c_identifier, is_substring_eiffel_identifier,
is_i_th_alpha, is_i_th_alpha_numeric, is_i_th_identifier, is_i_th_space,
latin_1_count,
new_shared_substring, occurrences_in_area_bounds, occurs_at, occurs_caseless_at,
parse_substring_in_bounds,
right_bracket_index, split, split_adjusted, to_z_code_array
end
create
make_empty
feature {NONE} -- Initialization
make (n: INTEGER)
do
Precursor (n)
shared_string := Current
end
feature -- Measurement
index_of_white (start_index: INTEGER): INTEGER
-- index of first occurrence of white space character at or after `start_index'.
-- 0 if none.
local
i, block_index, i_upper: INTEGER; c_i: CHARACTER_8; iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
c32: EL_CHARACTER_32_ROUTINES
do
if attached area as l_area and then attached unencoded_area as unencoded
and then attached Unicode_table as uc_table
then
i_upper := count - 1
from i := start_index - 1 until i > i_upper or Result > 0 loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
if c32.is_space (iter.item ($block_index, unencoded, i + 1)) then
Result := i + 1
end
when Ascii_range then
if c_i.is_space then
Result := i + 1
end
else
if c32.is_space (uc_table [c_i.code]) then
Result := i + 1
end
end
i := i + 1
end
end
end
last_word_start_index (end_index_ptr: TYPED_POINTER [INTEGER]): INTEGER
-- start index of last alpha-numeric word and end index
-- written to `end_index_ptr' if not equal to `default_pointer'
local
i: INTEGER; found: BOOLEAN
do
from i := count until i = 0 or found loop
if is_alpha_numeric_item (i) then
found := True
else
i := i - 1
end
end
if found and then not end_index_ptr.is_default_pointer then
put_integer_32 (i, end_index_ptr)
end
found := False
from until i = 0 or found loop
if is_alpha_numeric_item (i) then
Result := i
else
found := True
end
i := i - 1
end
end
latin_1_count: INTEGER
local
i, i_upper, block_index: INTEGER; break, already_latin_1: BOOLEAN
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION; c_i: CHARACTER
do
already_latin_1 := Codec.encoded_as_latin (1)
if attached unicode_table as l_unicode_table and then attached unencoded_area as area_32
and then attached area as l_area
then
i_upper := area_upper
from i := area_lower until break or i > i_upper loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
if iter.item ($block_index, area_32, i + 1).is_character_8 then
Result := Result + 1
end
when Ascii_range then
Result := Result + 1
else
if already_latin_1 then
Result := Result + 1
elseif l_unicode_table [c_i.code].is_character_8 then
Result := Result + 1
end
end
i := i + 1
end
end
end
feature -- Status query
has_alpha_in_bounds (start_index, end_index: INTEGER): BOOLEAN
local
i, block_index, i_upper: INTEGER; c_i: CHARACTER_8; iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
if attached unencoded_area as unencoded and then attached area as l_area
and then attached Unicode_table as uc_table
then
i_upper := end_index - 1
from i := start_index - 1 until i > i_upper or Result loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
Result := iter.item ($block_index, unencoded, i + 1).is_alpha
when Ascii_range then
Result := c_i.is_alpha
else
Result := uc_table [c_i.code].is_alpha
end
i := i + 1
end
end
end
is_ascii_in_bounds (start_index, end_index: INTEGER): BOOLEAN
-- `True' if all characters in `target.substring (start_index, end_index)'
-- are in the ASCII character set: 0 .. 127
do
Result := all_ascii_in_bounds (unencoded_area, lower_abs (start_index), upper_abs (end_index))
end
occurs_at (smaller: ZSTRING; index: INTEGER): BOOLEAN
-- `True' if `smaller' string occurs in `Current' at `index'
do
Result := same_characters (smaller, 1, smaller.count, index)
end
occurs_caseless_at (smaller: ZSTRING; index: INTEGER): BOOLEAN
-- `True' if `smaller' string occurs in `big' string at `index' regardless of case
do
Result := same_caseless_characters (smaller, 1, smaller.count, index)
end
feature -- Element change
append_utf_8 (utf_8_string: READABLE_STRING_8)
do
Precursor {ZSTRING} (utf_8_string)
update_shared
end
share (other: ZSTRING)
-- Make current string share the text of `other'.
-- Subsequent changes to the characters of current string
-- will also affect `other', and conversely.
do
Precursor (other)
shared_string := other
end
feature -- Duplication
filled (uc: CHARACTER_32; n: INTEGER): ZSTRING
-- shared string filled with `n' number of `uc' characters repeated
do
Result := Character_string_table.item (uc, n)
end
pruned (c: CHARACTER_32): STRING_32
do
create Result.make_from_string (Current)
Result.prune_all (c)
end
shared_leading (end_index: INTEGER): ZSTRING
-- leading substring of `shared_string' from 1 to `end_index'
do
create Result.make_shared (shared_string, end_index)
end
feature -- Conversion
split (uc: CHARACTER_32): EL_SPLIT_ON_CHARACTER [like shared_string, CHARACTER_32]
-- left adjusted iterable split of `shared_string'
do
Result := split_adjusted (uc, Left_side)
end
split_adjusted (uc: CHARACTER_32; adjustments: INTEGER): like split
do
Result := Once_split_zstring
Result.set_adjustments (adjustments); Result.set_separator (uc); Result.set_target (shared_string)
end
to_z_code_array: SPECIAL [NATURAL_32]
local
i, block_index, i_upper: INTEGER; c_i: CHARACTER_8; uc_i: CHARACTER_32
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
create Result.make_empty (count)
if attached unencoded_area as unencoded and then attached area as l_area
and then attached Unicode_table as uc_table
then
i_upper := count - 1
from i := 0 until i > i_upper loop
c_i := l_area [i]
inspect c_i
when Substitute then
uc_i := iter.item ($block_index, unencoded, i + 1)
Result.extend (unicode_to_z_code (uc_i.natural_32_code))
else
Result.extend (c_i.natural_32_code)
end
i := i + 1
end
end
end
feature {STRING_HANDLER} -- Basic operations
append_to (special_out: SPECIAL [CHARACTER_32]; source_index, n: INTEGER)
local
i, i_upper, l_block_index: INTEGER; c_i: CHARACTER
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
codec.decode (n, area, special_out, 0)
if attached area as l_area and then attached unencoded_area as unencoded
and then attached unicode_table as uc_table
then
i_upper := (index_lower + source_index + n - 1).min (index_upper)
from i := index_lower + source_index until i > i_upper loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
special_out.extend (iter.item ($l_block_index, unencoded, i - index_lower + 1))
when Ascii_range then
special_out.extend (c_i)
else
special_out.extend (uc_table [c_i.code])
end
i := i + 1
end
end
end
feature {NONE} -- Implementation
all_alpha_numeric_in_bounds (unencoded: like unencoded_area; i_lower, i_upper: INTEGER): BOOLEAN
-- `True' if all characters in `a_area' from `i_lower' to `i_upper' are alpha-numeric
local
i, block_index: INTEGER; c_i: CHARACTER_8; iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
if attached area as l_area and then attached Unicode_table as uc_table then
Result := True
from i := i_lower until i > i_upper or not Result loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
Result := iter.item ($block_index, unencoded, i + 1).is_alpha_numeric
when Ascii_range then
Result := c_i.is_alpha_numeric
else
Result := uc_table [c_i.code].is_alpha_numeric
end
i := i + 1
end
end
end
append_area_32 (a_area: SPECIAL [CHARACTER_32])
do
Precursor {ZSTRING} (a_area)
if Current /= shared_string then
shared_string.share (Current)
end
end
append_substring_to_special_32 (
unencoded: like unencoded_area; i_lower, i_upper: INTEGER
special_32: SPECIAL [CHARACTER_32]; a_offset: INTEGER
)
local
i, offset, block_index: INTEGER; c_i: CHARACTER_8; uc_i: CHARACTER_32
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
offset := a_offset
if attached Unicode_table as uc_table and then attached area as l_area then
from i := i_lower until i > i_upper loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
uc_i:= iter.item ($block_index, unencoded, i + 1)
when Ascii_range then
uc_i := c_i
else
uc_i := uc_table [c_i.code]
end
special_32.put (uc_i, offset)
i := i + 1; offset := offset + 1
end
end
end
append_substring_to_special_8 (
unencoded: like unencoded_area; i_lower, i_upper: INTEGER
special_8: SPECIAL [CHARACTER_8]; a_offset: INTEGER
)
local
i, offset, block_index: INTEGER; c_i: CHARACTER_8; uc_i: CHARACTER_32
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
offset := a_offset
if attached Unicode_table as uc_table and then attached area as l_area then
from i := i_lower until i > i_upper loop
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
uc_i:= iter.item ($block_index, unencoded, i + 1)
when Ascii_range then
uc_i := c_i
else
uc_i := uc_table [c_i.code]
end
if uc_i.is_character_8 then
special_8.put (uc_i.to_character_8, offset)
else
special_8.put (Substitute, offset)
end
i := i + 1; offset := offset + 1
end
end
end
copy_area_32_data (unencoded: like unencoded_area; source: SPECIAL [CHARACTER_32])
require else
not_applicable: False
do
end
is_i_th_alpha (unencoded: like unencoded_area; i: INTEGER): BOOLEAN
-- `True' if i'th character in `area' is alphabetical or numeric
do
Result := is_alpha_item (i - 1)
end
is_i_th_alpha_numeric (unencoded: like unencoded_area; i: INTEGER): BOOLEAN
-- `True' if i'th character in `area' is alphabetical or numeric
do
Result := is_alpha_numeric_item (i - 1)
end
is_i_th_identifier (unencoded: like unencoded_area; i: INTEGER): BOOLEAN
-- `True' if i'th character in `area' is a code identifier character
local
c: EL_CHARACTER_8_ROUTINES
do
Result := c.is_c_identifier (area [i], False)
end
is_i_th_space (unencoded: like unencoded_area; i: INTEGER; a_unicode: EL_UNICODE_PROPERTY): BOOLEAN
-- `True' if i'th character in `unencoded' is white space
do
Result := is_space_item (i - 1)
end
all_ascii_in_bounds (unencoded: like unencoded_area; i_lower, i_upper: INTEGER): BOOLEAN
-- `True' if all characters in `a_area' from `i_lower' to `i_upper' are in the ASCII character range
local
i: INTEGER; c: EL_CHARACTER_8_ROUTINES; substitute_found: BOOLEAN
do
if attached area as l_area then
from i := i_lower until i > i_upper or substitute_found loop
inspect l_area [i]
when Substitute then
substitute_found := True
else
end
i := i + 1
end
Result := not substitute_found and then c.is_ascii_area (area, i_lower, i_upper)
end
end
is_substring_c_identifier (unencoded: like unencoded_area; i_lower, i_upper: INTEGER): BOOLEAN
-- `True' if characters in `a_area' from `i_lower' to `i_upper' constitute
-- a C language identifier
local
c: EL_CHARACTER_8_ROUTINES
do
Result := c.is_c_identifier_area (area, i_lower, i_upper)
end
is_substring_eiffel_identifier (
unencoded: like unencoded_area; i_lower, i_upper: INTEGER case: NATURAL_8
): BOOLEAN
local
c: EL_CHARACTER_8_ROUTINES
do
Result := c.is_eiffel_identifier_area (area, i_lower, i_upper, case)
end
new_readable: EL_EXTENDED_ZSTRING
do
create Result.make_empty
end
new_shared_substring (str: EL_READABLE_ZSTRING; start_index, end_index: INTEGER): EL_READABLE_ZSTRING
do
Result := Substring_buffer.copied_substring (str, start_index, end_index)
end
new_substring (start_index, end_index: INTEGER): ZSTRING
do
create Result.make_from_substring (Current, start_index, end_index)
end
occurrences_in_area_bounds (unencoded: like unencoded_area; uc: CHARACTER_32; i_lower, i_upper: INTEGER): INTEGER
-- count of `c' occurrences in area between `i_lower' and `i_upper'
local
i, block_index: INTEGER; c_i, ascii_uc: CHARACTER_8; iter: EL_COMPACT_SUBSTRINGS_32_ITERATION
do
if uc.natural_32_code <= 0x7F then
ascii_uc := uc.to_character_8
end
if attached Unicode_table as uc_table and then attached area as l_area then
from i := i_lower until i > i_upper loop
inspect ascii_uc
when '%U' then
c_i := l_area [i]
inspect character_8_band (c_i)
when Substitute then
Result := Result + (iter.item ($block_index, unencoded, i + 1) = uc).to_integer
when Ascii_range then
do_nothing
else
Result := Result + (uc_table [c_i.code] = uc).to_integer
end
else
-- `uc' is ASCII character
Result := Result + (l_area [i] = ascii_uc).to_integer
end
i := i + 1
end
end
end
other_area (other: EL_READABLE_ZSTRING): like unencoded_area
do
Result := other.unencoded_area
end
other_index_lower (other: EL_READABLE_ZSTRING): INTEGER
do
Result := other.area_lower
end
parse_substring_in_bounds (
unencoded: like unencoded_area; type, i_lower, i_upper: INTEGER; convertor: STRING_TO_NUMERIC_CONVERTOR
)
local
i: INTEGER; failed: BOOLEAN; c_i: CHARACTER_8
do
if attached area as l_area then
from i := i_lower until i > i_upper or failed loop
c_i := l_area [i]
inspect c_i
when '0' .. '9', 'e', 'E', '.', '+', '-' then
convertor.parse_character (c_i)
if convertor.parse_successful then
i := i + 1
else
failed := True
end
else
convertor.reset (type); failed := True
end
end
end
end
right_bracket_index (unencoded: like unencoded_area; left_bracket: CHARACTER_32; start_index, end_index: INTEGER): INTEGER
-- index of right bracket corresponding to `left_bracket'. `-1' if not found.
local
c: EL_CHARACTER_8_ROUTINES
do
Result := c.right_bracket_index (area, left_bracket.to_character_8, start_index, end_index)
end
trim
-- Fix for BOUNDED invariant when calling `update_shared'
-- valid_count: count <= capacity
do
shared_string.set_count (count)
Precursor
end
update_shared
-- update `shared_string'
do
if shared_string /= Current then
shared_string.share (Current)
end
end
feature {NONE} -- Internal attributes
shared_string: ZSTRING
feature {NONE} -- Constants
Once_split_zstring: EL_SPLIT_ZSTRING_ON_CHARACTER
once
create Result.make (Empty_string, '_')
end
Substring_buffer: EL_ZSTRING_BUFFER
once
create Result
end
end