class EL_SEARCH_TERM_PARSER

(source code)

Client examples: SEARCH_ENGINE_AUTOTEST_APP

description

Search term parser

note
	description: "Search term parser"

	author: "Finnian Reilly"
	copyright: "Copyright (c) 2001-2022 Finnian Reilly"
	contact: "finnian at eiffel hyphen loop dot com"

	license: "MIT license (See: en.wikipedia.org/wiki/MIT_License)"
	date: "2024-08-27 7:46:05 GMT (Tuesday 27th August 2024)"
	revision: "25"

class
	EL_SEARCH_TERM_PARSER [G -> EL_WORD_SEARCHABLE]

inherit
	EL_FILE_PARSER
		rename
			make_default as make,
			parse as compile_conditions,
			source_text as search_terms,
			set_source_text as set_search_terms
		export
			{NONE} all
		redefine
			make, reset
		end

	TP_FACTORY
		export
			{NONE} all
		end

	EL_STRING_GENERAL_ROUTINES

create
	make

feature {NONE} -- Initialization

	make
			--
		do
			create word_token_table.make (1)
			create condition_list.make (10)
			create match_words.make (5)
			Precursor
		end

feature {NONE} -- Initialization

	reset
			--
		do
			condition_list.wipe_out
			match_words.wipe_out
			invalid_wildcard := False
			Precursor
		end

feature -- Access

	match_words: ARRAYED_LIST [EL_WORD_TOKEN_LIST]

	query_condition: EL_QUERY_CONDITION [G]
		do
			inspect condition_list.count
				when 0 then
					create {EL_ANY_QUERY_CONDITION [G]} Result
				when 1 then
					Result := condition_list.first
			else
				create {EL_ALL_OF_QUERY_CONDITION [G]} Result.make (condition_list.to_array)
			end
		end

feature -- Element Change

	set_query_condition (a_search_terms: ZSTRING; a_word_table: like word_token_table)
			--
		do
			word_token_table := a_word_table
			set_search_terms (a_search_terms.stripped)
			compile_conditions
		end

feature -- Status query

	invalid_wildcard: BOOLEAN

	is_valid: BOOLEAN
		do
			Result := fully_matched and not invalid_wildcard
		end

feature {NONE} -- Text patterns

	default_word: like all_of
			--
		do
			Result := all_of (<<
				one_or_more (alphanumeric), optional (character_literal ('*'))
			>>) |to| agent on_word_or_phrase_substring
		end

	either_search_term: like all_of
			--
		do
			Result := all_of_separated_by (nonbreaking_white_space, <<
				positive_or_negated_search_term, string_literal ("OR"), recurse (agent search_term, 1)
			>>)
			Result.set_action_last (agent on_either_search_term)
		end

	new_pattern: like all_of
			--
		do
			Result := all_of (<<
				search_term, zero_or_more (all_of (<< nonbreaking_white_space, search_term >>) )
			>>)
		end

	positive_or_negated_search_term: like all_of
			--
		do
			Result := all_of (<<
				optional (character_literal ('-') |to| agent on_hypen_prefix (?, ?, True)),
				positive_search_term
			>>)
			Result.set_action_first (agent on_hypen_prefix (?, ?, False))
			Result.set_action_last (agent on_positive_or_negated_search_term)
		end

	positive_search_term: like one_of
		do
			create Result.make (custom_patterns)
			Result.extend (quoted_phrase)
			Result.extend (default_word)
		end

	quoted_phrase: like quoted_string
			--
		do
			Result := quoted_string ('"', agent on_word_or_phrase)
		end

	search_term: like one_of
			--
		do
			Result := one_of (<< either_search_term, positive_or_negated_search_term >>)
		end

feature {NONE} -- Match actions

	on_either_search_term (start_index, end_index: INTEGER)
			--
		require
			at_least_two_condition_operands: condition_list.count >= 2
		local
			left: EL_QUERY_CONDITION [G]
		do
			condition_list.finish; condition_list.back
			left := condition_list.item -- second last item
			condition_list.remove
			condition_list.replace (left or condition_list.item)
		end

	on_hypen_prefix (start_index, end_index: INTEGER; value: BOOLEAN)
		do
			has_hypen_prefix := value
		end

	on_positive_or_negated_search_term (start_index, end_index: INTEGER)
		do
			if has_hypen_prefix and then not condition_list.is_empty then
				condition_list.finish
				if attached {EL_CONTAINS_WORDS_CONDITION [G]} condition_list.item then
					match_words.finish
					match_words.remove
				end
				condition_list.replace (not condition_list.item)
			end
		end

	on_word_or_phrase_substring (start_index, end_index: INTEGER)
			--
		do
			on_word_or_phrase (source_substring (start_index, end_index, True))
		end

	on_word_or_phrase (general: STRING_GENERAL)
			--
		local
			word_tokens: EL_WORD_TOKEN_LIST; text: ZSTRING
		do
			text := as_zstring (general)
			if not text.is_empty and then text [text.count] = '*' then
				text.remove_tail (1); add_wildcard_term (text)
			else
				word_tokens := word_token_table.paragraph_tokens (text)
				condition_list.extend (create {EL_CONTAINS_WORDS_CONDITION [G]}.make (word_tokens))
				match_words.extend (word_tokens)
			end
		end

feature {NONE} -- Implementation

	add_one_of_words_search_term_condition (phrase_stem_words: EL_WORD_TOKEN_LIST; word_stem: ZSTRING)
		local
			word_list: like word_token_table.word_list
			potential_match_word, word_variations: EL_WORD_TOKEN_LIST
			end_word_token: NATURAL; word_stem_lower: ZSTRING
		do
			word_stem_lower := word_stem.as_lower
			create word_variations.make (20)
			word_list := word_token_table.word_list
			from word_list.start until word_list.after loop
				if word_list.item.starts_with (word_stem_lower) then
					end_word_token := word_list.index.to_natural_32
					word_variations.append_code (end_word_token)

					create potential_match_word.make (phrase_stem_words.count + 1)
					potential_match_word.append (phrase_stem_words)
					potential_match_word.append_code (end_word_token)
					match_words.extend (potential_match_word)
				end
				word_list.forth
			end
			condition_list.extend (create {EL_ONE_OF_WORDS_CONDITION [G]}.make (phrase_stem_words, word_variations))
		end

	add_wildcard_term (text: ZSTRING)
		local
			words: EL_ZSTRING_LIST; word_tokens: EL_WORD_TOKEN_LIST
		do
			create words.make_word_split (text)
			words.prune_all_empty
			if words.last.count < 3 then
				invalid_wildcard := True
			else
				if words.count = 1 then
					create word_tokens.make (0)
				else
					word_tokens := word_token_table.paragraph_tokens (words.subchain (1, words.count - 1).joined_words)
				end
				add_one_of_words_search_term_condition (word_tokens, words.last)
			end
		end

	custom_patterns: ARRAY [TP_PATTERN]
		do
			create Result.make_empty
		end

feature {NONE} -- Internal attributes

	condition_list: ARRAYED_LIST [EL_QUERY_CONDITION [G]]

	has_hypen_prefix: BOOLEAN

	word_token_table: EL_WORD_TOKEN_TABLE

end