
    i2&                         d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZ dddd	d	d
dZdZdZdZddgZd	ZdZ	 dZddddZd Zd Zd Zd Zd Zd Zd Zy)aJ  Presearch supports the search types listed in :py:obj:`search_type` (general,
images, videos, news).

Configured ``presarch`` engines:

.. code:: yaml

  - name: presearch
    engine: presearch
    search_type: search
    categories: [general, web]

  - name: presearch images
    ...
    search_type: images
    categories: [images, web]

  - name: presearch videos
    ...
    search_type: videos
    categories: [general, web]

  - name: presearch news
    ...
    search_type: news
    categories: [news, web]

.. hint::

   By default Presearch's video category is intentionally placed into::

       categories: [general, web]


Search type ``video``
=====================

The results in the video category are most often links to pages that contain a
video, for instance many links from Preasearch's video category link content
from facebook (aka Meta) or Twitter (aka X).  Since these are not real links to
video streams SearXNG can't use the video template for this and if SearXNG can't
use this template, then the user doesn't want to see these hits in the videos
category.


Languages & Regions
===================

In Presearch there are languages for the UI and regions for narrowing down the
search.  If we set "auto" for the region in the WEB-UI of Presearch and cookie
``use_local_search_results=false``, then the defaults are set for both (the
language and the region) from the ``Accept-Language`` header.

Since the region is already "auto" by default, we only need to set the
``use_local_search_results`` cookie and send the ``Accept-Language`` header.  We
have to set these values in both requests we send to Presearch; in the first
request to get the request-ID from Presearch and in the final request to get the
result list.

The time format returned by Presearch varies depending on the language set.
Multiple different formats can be supported by using ``dateutil`` parser, but
it doesn't support formats such as "N time ago", "vor N time" (German),
"Hace N time" (Spanish). Because of this, the dates are simply joined together
with the rest of other metadata.


Implementations
===============

    )	urlencodeurlparse)locales)get)gen_useragenthtml_to_textparse_duration_stringzhttps://presearch.ioQ7240905z#https://docs.presearch.io/nodes/apiFJSON)websitewikidiata_idofficial_api_documentationuse_official_apirequire_api_keyresultsTgeneralwebsearchzhttps://presearch.comfalsetrue)r         c                 8    t         dvrt        dt                y )N)r   imagesvideosnewszpresearch search_type: )search_type
ValueError)_s    (/root/searxng/searx/engines/presearch.pyinitr!   e   s$    @@2;-@AA A    c                 .   | |d   d}|d   r|d   |d<   t          dt         dt        |       }t               dt        |d       d	}|d
   dk7  rRt        j                  |d
         }|r8|j                  r,|j                   d|j                   d|j                   d|d<   t        ||d      }|j                  j                  d      D ];  }d|v s|j                  d      d   d d j                  dd      |j                  fc S  t        d      )Npageno)qpage
time_rangetime/?zIb=1; presearch_session=; use_local_search_results=false; use_safe_search=
safesearch)z
User-AgentCookiesearxng_localeall-,z;q=0.9,*;q=0.5zAccept-Language   )headerstimeout
zwindow.searchId = z= r   " z*Couldn't find any request id for presearch)base_urlr   r   r   safesearch_mapr   
get_locale	territorylanguager   textsplitreplacecookiesRuntimeError)queryparamsargsurlr2   lresplines           r    _get_request_idrI   j   sC    x D
 ll+VJa}Aio%6
7C $o  !/vl/C DEGG 5(v&678 ,-JJ<qQqzzlRg)gG%&sGQ/D		% K4'::d#A&s+33C<dllJJK C
DDr"   c                 Z    t        | |      \  }}d|d   d<   t         d| |d<   ||d<   |S )Nzapplication/jsonr2   Acceptz/results?id=rE   r@   )rI   r8   )rB   rC   
request_idr@   s       r    requestrM      sE    )%8J"4F9hjZL9F5MF9Mr"   c                     dD ]1  }| j                         j                  |      s#| d t        |        } 3 | j                         S )N)	wikipediagoogle)lowerendswithlenstrip)r=   xs     r    _strip_leading_stringsrV      sD    $ #::<  #	3q6'?D# ::<r"   c                    t        |      }|j                  }t        |       } | j                  |      rPt	        |       t	        |      kD  r9| j                  |dz         s%| j                  |dz         s| j                  |      } | S )z
    Titles from Presearch shows domain + title without spacing, and HTML
    This function removes these 2 issues.
    Transforming "translate.google.co.in<em>Google</em> Translate" into "Google Translate"
    r)    )r   netlocr   
startswithrS   removeprefix)titlerE   
parsed_urldomains       r    
_fix_titler_      s{     #JFE 	 JV$  #.  #.""6*Lr"   c                    g }| s|S | j                  di       j                  di       j                  dg       D ]@  }|d   t        |d   |d         |d   d|j                  d      d	}|j                  |       B | j                  d
g       D ]8  }|d   t        |d   |d         t        |d         d}|j                  |       : | j                  di       j                  d      }|rg }|j                  dg       D ]`  }t        |      }d|v r|j	                  dd      \  }}n|j	                  dd      \  }}|d d }t        |      }|j                  ||d       b g }	|j                  d      |j                  d      fD ]-  }|st        t        |            }|s|	j                  |       / |j                  |d   |d   |j                  d      dj                  |	      |d       |S )NspecialSectionstopStoriesCompactdatalinkr\   imager7   source)rE   r\   	thumbnailcontentmetadatastandardResultsdescription)rE   r\   rh   infoSectionabout:r   rX   r5   )labelvaluesubtitlez | )infoboxidimg_srcrh   
attributes)r   r_   appendr   r>   rV   join)
json_resultsr   itemresultinforu   r=   ro   rp   rh   s
             r    parse_search_queryr|      s"   G  !2B7;;<OQSTXXY_acd <WtF|<g*
 	v   !2B7 <WtF|<#D$78

 	v M2.226:D
HHWb) 	@D%Dd{#zz#q1u  $zz#q1ucr
*51E>?	@ XXj)488M+BC 	%D),t*<=Dt$	% 	=7m88G, ::g.(	
 Nr"   c                 2   g }| j                         }t        dk(  rt        |j                  di             }|S t        dk(  ri|j                  dg       D ]R  }|j	                  dt        |d         |j                  d      |j                  d      |j                  d      d	       T |S t        d
k(  r|j                  d
g       D ]q  }|j                  d      }|rt        |      }|j	                  t        |d         |j                  d      |j                  dd      |j                  d      |d       s |S t        dk(  r|j                  dg       D ]  }|j                  d      }t        |j                  d            j                         }|g}|dk7  r|j	                  |       |j	                  t        |d         |j                  d      t        |j                  dd            dj                  |      |j                  d      d        |S )Nr   r   r   zimages.htmlr\   rd   re   rg   )templater\   rE   rt   thumbnail_srcr   durationrk   r7   )r\   rE   rh   rg   lengthr   rf   r(   z / )r\   rE   rh   ri   rg   )	jsonr   r|   r   rv   r   r	   rT   rw   )rG   r   	json_respry   r   rf   r(   ri   s           r    responser      s   G		Ih$Y]]9b%ABh Ne 
	 MM(B/ 		DNN -)$w-888F+#xx0%)XXk%:		b NM 
	  MM(B/ 	Dxx
+H0:NN)$w-888F+#xxr:!%'!2&	D N' 
	MM&"- 	DXXh'F 01779DxHrz%NN)$w-888F++DHH]B,GH %

8 4!%'!2	$ Nr"   N)__doc__urllib.parser   r   searxr   searx.networkr   searx.utilsr   r   r	   rm   pagingr+   time_range_support
categoriesenable_http2r   r8   r9   r!   rI   rM   rV   r_   r|   r    r"   r    <module>r      s   EN -   J J &"G	 

 
  A"F3B
*EZ*9x9r"   