class ZSTRING_UNICODE_TO_Z_CODE
Methods to obtain z_code from unicode character
24 Nov 2023
to_string_32_v2 uses technique:
when Substitute then interval_count := unencoded_interval_count (area_32, j_lower) area_out.copy_data (area_32, j_lower, old_count + i, interval_count) j_lower := j_lower + interval_count + 2 i := i + interval_count
Passes over 500 millisecs (in descending order)
to_string_32 : 164.0 times (100%) to_string_32_v2 : 95.0 times (-42.1%)
prune_all_v2 use technique:
when Substitute then interval_count := unencoded_interval_count (uc_area, k_lower) k_upper := k_lower + interval_count - 1
Passes over 500 millisecs (in descending order)
prune_all : 38.0 times (100%) prune_all_v2 : 38.0 times (-0.0%)
when 0 .. C then VS if x <= C then
Passes over 1000 millisecs (in descending order)
when 0 .. 0x7F then : 16645498.0 times (100%) when 0 .. 0xFF then : 16610377.0 times (-0.2%) if code <= 0x7F then : 16558972.0 times (-0.5%) if code <= 0xFF then : 13450191.0 times (-19.2%)
note
description: "Methods to obtain z_code from unicode character"
author: "Finnian Reilly"
copyright: "Copyright (c) 2001-2022 Finnian Reilly"
contact: "finnian at eiffel hyphen loop dot com"
license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
date: "2024-10-04 12:14:07 GMT (Friday 4th October 2024)"
revision: "23"
class
ZSTRING_UNICODE_TO_Z_CODE
inherit
STRING_BENCHMARK_COMPARISON
HEXAGRAM_NAMES
export
{NONE} all
end
create
make
feature -- Access
Description: STRING = "Methods to obtain z_code from unicode character"
feature -- Basic operations
execute
local
str: ZSTRING
do
str := Name_manifest
compare ("Iterate ZSTRING areas", <<
["if unicode < 0x100 then", agent do_method (str.area, str.unencoded_area, 1, str.count)],
["if unicode <= 0xFF then", agent do_method (str.area, str.unencoded_area, 2, str.count)],
["inspect b.leading_zeros_count_32 (unicode)", agent do_method (str.area, str.unencoded_area, 3, str.count)],
["inspect unicode // 0x100", agent do_method (str.area, str.unencoded_area, 4, str.count)],
["inspect unicode |>> 8", agent do_method (str.area, str.unencoded_area, 5, str.count)],
["Pure bit arithmetic, no branching", agent do_method (str.area, str.unencoded_area, 6, str.count)]
>>)
end
feature {NONE} -- Operations
do_method (area: SPECIAL [CHARACTER]; area_32: SPECIAL [CHARACTER_32]; id, count: INTEGER)
local
iter: EL_COMPACT_SUBSTRINGS_32_ITERATION; block_index, i: INTEGER
uc: CHARACTER_32; c_i: CHARACTER; z_code: NATURAL
do
across 1 |..| 1000 as n loop
block_index := 0
from until i = count loop
inspect area [i]
when Substitute then
uc := iter.item ($block_index, area_32, i + 1)
inspect id
when 1 then
z_code := unicode_to_z_code (uc.natural_32_code)
when 2 then
z_code := unicode_to_z_code_lt_eq_0xFF (uc.natural_32_code)
when 3 then
z_code := unicode_to_z_code_inspect_leading_zero_count (uc.natural_32_code)
when 4 then
z_code := unicode_to_z_code_inspect_div_0x100 (uc.natural_32_code)
when 5 then
z_code := unicode_to_z_code_right_shift_8 (uc.natural_32_code)
when 6 then
z_code := unicode_to_z_code_bitwise_expression (uc.natural_32_code)
end
else
end
i := i + 1
end
end
end
frozen unicode_to_z_code_bitwise_expression (unicode: NATURAL): NATURAL
-- distinguish UCS4 characters below 0xFF from latin encoding by turning on the sign bit.
local
b: EL_BIT_ROUTINES
do
Result := (Sign_bit & (b.zero_or_one (unicode |>> 8) |<< 31)) | unicode
ensure
reversbile: z_code_to_unicode (Result) = unicode
end
frozen unicode_to_z_code_inspect_leading_zero_count (unicode: NATURAL): NATURAL
-- distinguish UCS4 characters below 0xFF from latin encoding by turning on the sign bit.
local
b: EL_BIT_ROUTINES
do
inspect b.leading_zeros_count_32 (unicode)
when 0 .. 23 then
Result := unicode
else
Result := Sign_bit | unicode
end
ensure
same_as_simple: Result = unicode_to_z_code (unicode)
end
frozen unicode_to_z_code_leading_zeros_1_to_15 (unicode: NATURAL): NATURAL
-- distinguish UCS4 characters below 0xFF from latin encoding by turning on the sign bit.
local
b: EL_BIT_ROUTINES
do
inspect b.leading_zeros_count_32 (unicode |>> 8)
when 31 then
Result := Sign_bit | unicode
else
Result := unicode
end
ensure
same_as_simple: Result = unicode_to_z_code (unicode)
end
frozen unicode_to_z_code_inspect_div_0x100 (unicode: NATURAL): NATURAL
-- distinguish UCS4 characters below 0xFF from latin encoding by turning on the sign bit.
do
inspect unicode // 0x100
when 0 then
Result := Sign_bit | unicode
else
Result := unicode
end
ensure
reversbile: z_code_to_unicode (Result) = unicode
end
frozen unicode_to_z_code_lt_eq_0xFF (unicode: NATURAL): NATURAL
-- distinguish UCS4 characters below 0xFF from latin encoding by turning on the sign bit.
do
if unicode <= 0xFF then
Result := Sign_bit | unicode
else
Result := unicode
end
ensure
reversbile: z_code_to_unicode (Result) = unicode
end
frozen unicode_to_z_code_right_shift_8 (unicode: NATURAL): NATURAL
-- distinguish UCS4 characters below 0xFF from latin encoding by turning on the sign bit.
do
inspect unicode |>> 8
when 0 then
Result := Sign_bit | unicode
else
Result := unicode
end
ensure
same_as_simple: Result = unicode_to_z_code (unicode)
end
note
notes: "[
**24 Nov 2023**
**to_string_32_v2** uses technique:
when Substitute then
interval_count := unencoded_interval_count (area_32, j_lower)
area_out.copy_data (area_32, j_lower, old_count + i, interval_count)
j_lower := j_lower + interval_count + 2
i := i + interval_count
Passes over 500 millisecs (in descending order)
to_string_32 : 164.0 times (100%)
to_string_32_v2 : 95.0 times (-42.1%)
**prune_all_v2** use technique:
when Substitute then
interval_count := unencoded_interval_count (uc_area, k_lower)
k_upper := k_lower + interval_count - 1
Passes over 500 millisecs (in descending order)
prune_all : 38.0 times (100%)
prune_all_v2 : 38.0 times (-0.0%)
**when 0 .. C then VS if x <= C then**
Passes over 1000 millisecs (in descending order)
when 0 .. 0x7F then : 16645498.0 times (100%)
when 0 .. 0xFF then : 16610377.0 times (-0.2%)
if code <= 0x7F then : 16558972.0 times (-0.5%)
if code <= 0xFF then : 13450191.0 times (-19.2%)
]"
end