diff --git a/searx/engines/bilibili.py b/searx/engines/bilibili.py index 06e1cddb2..c6aa70dfd 100644 --- a/searx/engines/bilibili.py +++ b/searx/engines/bilibili.py @@ -56,18 +56,6 @@ def request(query, params): return params -# Format the video duration -def format_duration(duration): - if not ":" in duration: - return None - minutes, seconds = map(int, duration.split(":")) - total_seconds = minutes * 60 + seconds - - formatted_duration = str(timedelta(seconds=total_seconds))[2:] if 0 <= total_seconds < 3600 else "" - - return formatted_duration - - def response(resp): search_res = resp.json() @@ -83,7 +71,12 @@ def response(resp): unix_date = item["pubdate"] formatted_date = datetime.fromtimestamp(unix_date) - formatted_duration = format_duration(item["duration"]) + + # the duration only seems to be valid if the video is less than 60 mins + duration = utils.parse_duration_string(item["duration"]) + if duration and duration > timedelta(minutes=60): + duration = None + iframe_url = f"https://player.bilibili.com/player.html?aid={video_id}&high_quality=1&autoplay=false&danmaku=0" results.append( @@ -93,7 +86,7 @@ def response(resp): "content": description, "author": author, "publishedDate": formatted_date, - "length": formatted_duration, + "length": duration, "thumbnail": thumbnail, "iframe_src": iframe_url, "template": "videos.html", diff --git a/searx/engines/iqiyi.py b/searx/engines/iqiyi.py index 3158b44e9..70a746bec 100644 --- a/searx/engines/iqiyi.py +++ b/searx/engines/iqiyi.py @@ -2,9 +2,10 @@ """iQiyi: A search engine for retrieving videos from iQiyi.""" from urllib.parse import urlencode -from datetime import datetime, timedelta +from datetime import datetime from searx.exceptions import SearxEngineAPIException +from searx.utils import parse_duration_string about = { "website": "https://www.iqiyi.com/", @@ -55,20 +56,7 @@ def response(resp): except (ValueError, TypeError): pass - length = None - subscript_content = album_info.get("subscriptContent") - if subscript_content: - try: - time_parts = subscript_content.split(":") - if len(time_parts) == 2: - minutes, seconds = map(int, time_parts) - length = timedelta(minutes=minutes, seconds=seconds) - elif len(time_parts) == 3: - hours, minutes, seconds = map(int, time_parts) - length = timedelta(hours=hours, minutes=minutes, seconds=seconds) - except (ValueError, TypeError): - pass - + length = parse_duration_string(album_info.get("subscriptionContent")) results.append( { 'url': album_info.get("pageUrl", "").replace("http://", "https://"), diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index 0457c5d8d..b9d8e582f 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -6,7 +6,7 @@ import re from urllib.parse import urlencode -from datetime import datetime +from datetime import datetime, timedelta from dateutil.parser import parse from dateutil.relativedelta import relativedelta @@ -50,12 +50,6 @@ safesearch = True safesearch_table = {0: 'both', 1: 'false', 2: 'false'} -def minute_to_hm(minute): - if isinstance(minute, int): - return "%d:%02d" % (divmod(minute, 60)) - return None - - def request(query, params): """Assemble request for the Peertube API""" @@ -117,13 +111,17 @@ def video_response(resp): if x ] + duration = result.get('duration') + if duration: + duration = timedelta(seconds=duration) + results.append( { 'url': result['url'], 'title': result['name'], 'content': html_to_text(result.get('description') or ''), 'author': result.get('account', {}).get('displayName'), - 'length': minute_to_hm(result.get('duration')), + 'length': duration, 'views': humanize_number(result['views']), 'template': 'videos.html', 'publishedDate': parse(result['publishedAt']), diff --git a/searx/engines/presearch.py b/searx/engines/presearch.py index ed68e1507..7e30801d1 100644 --- a/searx/engines/presearch.py +++ b/searx/engines/presearch.py @@ -73,7 +73,7 @@ Implementations from urllib.parse import urlencode, urlparse from searx import locales from searx.network import get -from searx.utils import gen_useragent, html_to_text +from searx.utils import gen_useragent, html_to_text, parse_duration_string about = { "website": "https://presearch.io", @@ -270,7 +270,7 @@ def response(resp): 'url': item.get('link'), 'content': item.get('description', ''), 'thumbnail': item.get('image'), - 'length': item.get('duration'), + 'length': parse_duration_string(item.get('duration')), } ) diff --git a/searx/utils.py b/searx/utils.py index 8fdcb0fda..ee044704b 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,7 +1,5 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Utility functions for the engines - -""" +"""Utility functions for the engines""" from __future__ import annotations @@ -18,6 +16,7 @@ from random import choice from html.parser import HTMLParser from html import escape from urllib.parse import urljoin, urlparse, parse_qs, urlencode +from datetime import timedelta from markdown_it import MarkdownIt from lxml import html @@ -831,3 +830,25 @@ def js_variable_to_python(js_variable): s = s.replace(chr(1), ':') # load the JSON and return the result return json.loads(s) + + +def parse_duration_string(duration_str: str) -> timedelta | None: + """Parse a time string in format MM:SS or HH:MM:SS and convert it to a `timedelta` object. + + Returns None if the provided string doesn't match any of the formats. + """ + duration_str = duration_str.strip() + + if not duration_str: + return None + + try: + # prepending ["00"] here inits hours to 0 if they are not provided + time_parts = (["00"] + duration_str.split(":"))[:3] + hours, minutes, seconds = map(int, time_parts) + return timedelta(hours=hours, minutes=minutes, seconds=seconds) + + except (ValueError, TypeError): + pass + + return None