From e6308b8167e28a7452e9b87d913c1279f43b130a Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 1 Apr 2025 09:20:19 +0200 Subject: [PATCH] [fix] hardening against arguments of type None, where str or dict is expected On a long-running server, the tracebacks below can be found (albeit rarely), which indicate problems with NoneType where a string or another data type is expected. result.img_src:: File "/usr/local/searxng/searxng-src/searx/templates/simple/result_templates/images.html", line 13, in top-level template code {{ result.title|striptags }}{{- "" -}} ^ File "/usr/local/searxng/searxng-src/searx/webapp.py", line 284, in image_proxify if url.startswith('//'): ^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'startswith' result.content:: File "/usr/local/searxng/searxng-src/searx/result_types/_base.py", line 105, in _normalize_text_fields result.content = WHITESPACE_REGEX.sub(" ", result.content).strip() ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^ TypeError: expected string or bytes-like object, got 'NoneType' html_to_text, when html_str is a NoneType:: File "/usr/local/searxng/searxng-src/searx/engines/wikipedia.py", line 190, in response title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title')) File "/usr/local/searxng/searxng-src/searx/utils.py", line 158, in html_to_text html_str = html_str.replace('\n', ' ').replace('\r', ' ') ^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'replace' presearch engine, when json_resp is a NoneType:: File "/usr/local/searxng/searxng-src/searx/engines/presearch.py", line 221, in response results = parse_search_query(json_resp.get('results')) File "/usr/local/searxng/searxng-src/searx/engines/presearch.py", line 161, in parse_search_query for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []): ^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'get' Signed-off-by: Markus Heiser --- searx/engines/presearch.py | 4 +++- searx/result_types/_base.py | 6 ++++-- searx/utils.py | 2 ++ searx/webapp.py | 5 +++++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/searx/engines/presearch.py b/searx/engines/presearch.py index 034233163..1093b5cff 100644 --- a/searx/engines/presearch.py +++ b/searx/engines/presearch.py @@ -184,6 +184,8 @@ def _fix_title(title, url): def parse_search_query(json_results): results = [] + if not json_results: + return results for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []): result = { @@ -245,7 +247,7 @@ def response(resp): json_resp = resp.json() if search_type == 'search': - results = parse_search_query(json_resp.get('results')) + results = parse_search_query(json_resp.get('results', {})) elif search_type == 'images': for item in json_resp.get('images', []): diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py index caf7e2a4f..ce846c3cf 100644 --- a/searx/result_types/_base.py +++ b/searx/result_types/_base.py @@ -103,8 +103,10 @@ def _normalize_text_fields(result: MainResult | LegacyResult): result.content = str(result) # normalize title and content - result.title = WHITESPACE_REGEX.sub(" ", result.title).strip() - result.content = WHITESPACE_REGEX.sub(" ", result.content).strip() + if result.title: + result.title = WHITESPACE_REGEX.sub(" ", result.title).strip() + if result.content: + result.content = WHITESPACE_REGEX.sub(" ", result.content).strip() if result.content == result.title: # avoid duplicate content between the content and title fields result.content = "" diff --git a/searx/utils.py b/searx/utils.py index ee044704b..a28171a32 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -154,6 +154,8 @@ def html_to_text(html_str: str) -> str: >>> html_to_text(r'regexp: (?