
    iQH                     8   U d Z ddlZddlZddlZddlZddlZddlmZm	Z	 ddl
Z
ddlZ
ddlZ
ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZmZmZmZmZ ej>                  rdd
l m!Z! ddl"m#Z# dddddddZ$ddgZ%dZ&dZ'	 dZ(dZ)dddddZ*ddddZ+dZ,ejZ                  ej\                  z   d z   Z/da0e1e2e3f   dz  e4d!<   d"e3d#e2fd$Z5d%d&d'ed#e6e2ejn                  f   fd(Z8d) Z9d*e2d%d&d#dfd+Z: ejv                  d,      Z<d-e2fd.Z=d4d/Z>g d0Z?d5d1ed2e@fd3ZAy)6a,  This is the implementation of the Google WEB engine.  Some of this
implementations (manly the :py:obj:`get_google_info`) are shared by other
engines:

- :ref:`google images engine`
- :ref:`google news engine`
- :ref:`google videos engine`
- :ref:`google scholar engine`
- :ref:`google autocomplete`

    N)unquote	urlencode)html)EngineTraits)SearxEngineCaptchaException)get_official_localeslanguage_tag
region_tag)EngineResults)
eval_xpatheval_xpath_getindexeval_xpath_listextract_textgen_gsa_useragent)SXNG_Response)OnlineParamszhttps://www.google.comQ9366z,https://developers.google.com/custom-search/FHTML)websitewikidata_idofficial_api_documentationuse_official_apirequire_api_keyresultsgeneralwebT2   dwmy)dayweekmonthyearoffmediumhigh)r         z+//div[contains(@class, "gGQDvd iIWm4b")]//az_-_arcid_randomstartreturnc                 @   d}d}t         r*t        t        j                               t         d   z
  dkD  rGdj                  t	        j
                  t        d            t        t        j                               fa dt         d	    d
| d}dj                  |||g      S )zFormat of the response from UI's async request.

    - ``arc_id:<...>,use_ac:true,_fmt:prog``

    The arc_id is random generated every hour.
    zuse_ac:truez	_fmt:progr)   i      )kzarc_id:srp_r   _102,)r+   inttimejoinrandomchoices_arcid_range)r,   use_ac_fmtarc_ids       %/root/searxng/searx/engines/google.pyui_asyncr?   O   s     F D S-a0@@DH!CDc$))+FVW=+,BuRj9F88VVT*++    paramsr   
eng_traitsc                    dddi i i dd}| j                  dd      }	 t        j                  j                  |d      }|j                  |d      }|j                  d      d	   }|j                  ||j                        }||d
<   ||d<   ||d<   |j                  d   j                  |j                         d      |d<   | d| |d   d<   ||d   d<   |dk(  rd|d   d<   d|d   d<   t        |j                  d            dkD  rd|z   |d   d<   d|d   d<   d|d   d<   d|d   d<   t               |d   d<   d|d   d<   |S # t        j                  j
                  $ r d}Y 'w xY w) a  Composing various (language) properties for the google engines (:ref:`google
    API`).

    This function is called by the various google engines (:ref:`google web
    engine`, :ref:`google images engine`, :ref:`google news engine` and
    :ref:`google videos engine`).

    :param dict param: Request parameters of the engine.  At least
        a ``searxng_locale`` key should be in the dictionary.

    :param eng_traits: Engine's traits fetched from google preferences
        (:py:obj:`searx.enginelib.traits.EngineTraits`)

    :rtype: dict
    :returns:
        Py-Dictionary with the key/value pairs:

        language:
            The language code that is used by google (e.g. ``lang_en`` or
            ``lang_zh-TW``)

        country:
            The country code that is used by google (e.g. ``US`` or ``TW``)

        locale:
            A instance of :py:obj:`babel.core.Locale` build from the
            ``searxng_locale`` value.

        subdomain:
            Google subdomain :py:obj:`google_domains` that fits to the country
            code.

        params:
            Py-Dictionary with additional request arguments (can be passed to
            :py:func:`urllib.parse.urlencode`).

            - ``hl`` parameter: specifies the interface language of user interface.
            - ``lr`` parameter: restricts search results to documents written in
              a particular language.
            - ``cr`` parameter: restricts search results to documents
              originating in a particular country.
            - ``ie`` parameter: sets the character encoding scheme that should
              be used to interpret the query string ('utf8').
            - ``oe`` parameter: sets the character encoding scheme that should
              be used to decode the XML result ('utf8').

        headers:
            Py-Dictionary with additional HTTP headers (can be passed to
            request's headers)

            - ``Accept: '*/*``

    N)languagecountry	subdomainrA   headerscookieslocalesearxng_localeall-seplang_en_rD   rE   rI   supported_domainszwww.google.comrF   rA   hllrr/   crr)   utf8ieoez*/*rG   Acceptz
User-AgentzYES+rH   CONSENT)getbabelLocaleparsecoreUnknownLocaleErrorget_languagesplit
get_region
all_localecustomupperlenr   )rA   rB   ret_valsxng_localerI   eng_lang	lang_coderE   s           r>   get_google_inforl   e   s   p !G **-u5K##KS#9 &&{I>Hs#B'I##K1F1FGG #GJ GIGH%,,-@AEEgmmoWghGK "+1WI6GHd  'GHde"$$ !GHd
;S!"Q&"+g"5$* %GHd %GHd $)GIx '8':GI|$ %+GIy!N[ ::(( s   !E E'&E'c                     | j                   j                  dk(  s%| j                   j                  j                  d      r
t	               y )Nzsorry.google.comz/sorry)urlhostpath
startswithr   )resps    r>   detect_google_sorryrs     s7    xx}}**dhhmm.F.Fx.P)++ /Qr@   queryc           	      h   |d   dz
  dz  }t        |t              }d|d   z   dz   dz   t        d| i|d	   d
|d      z   }|d   t        v r |dt        ddt        |d      z   i      z   z  }|d   r|dt        dt        |d      i      z   z  }||d<   |d   |d<   |d   j                  |d          y)zGoogle search requestpagenor)   
   zhttps://rF   z/search?qrA   0)filterr,   
time_range&tbszqdr:
safesearchsafern   rH   rG   N)rl   traitsr   time_range_dictfilter_mappingupdate)rt   rA   r,   google_info	query_urls        r>   requestr     s    H!R'E!&&1K 	
k
"	#
	 	 Uh' 	
		
 6 l.S9eVof\FZ6[-[%\]]]	lS9fnVL=Q.R%STTT	F5M#I.F9
9[34r@   z4(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)textc                     i }t         j                  |       D ](  \  }}|j                  d      j                  d      ||<   * t        j                  dt        |j                                      |S )Nzutf-8zunicode-escapezdata:image objects --> %s)RE_DATA_IMAGEfindallencodedecodeloggerdebuglistkeys)r   data_image_map	image_urlimg_ids       r>   parse_url_imagesr   O  sl    N*2248 T	6!*!1!1'!:!A!ABR!SvT
LL,d>3F3F3H.IJr@   c                 0   t        |        t        | j                        }t               }t	        j
                  | j                        }t        |d      D ]]  }	 t        |ddd      }|t        j                  d       ,t        |      }|j                  d      }|t        j                  d|       a|j                  d	      r!t        |d
d j                  d      d         }n|}t        |d      }	|	D ]7  }
|
j!                  d      D ]!  }|j#                         j%                  |       # 9 t        |	d         }t        |ddd      }d}|F|j                  d      }|j                  d      r$|j                  d      }|r|j                  |      }|j'                  |||xs d|d       ` t        |t,              D ]  }|j'                  dt        |      i         |S # t(        $ r"}t        j+                  |d       Y d}~d}~ww xY w)z)Get response from google's search requestz//a[@data-ved and not(@class)]z.//div[@style]r   N)defaultz7ignoring item from the result_xpath list: missing titlehrefzCignoring item from the result_xpath list: missing url of title "%s"z/url?q=   z&sa=Uz3../..//div[contains(@class, "ilUpNd H66NU aSRlid")]z	.//scriptz.//img)indexr   srcz
data:imageidr/   )rn   titlecontent	thumbnailT)exc_info
suggestion)rs   r   r   r   r   
fromstringr   r   r   r   r   r[   rq   r   rb   r   xpath	getparentremoveappend	Exceptionerrorsuggestion_xpath)rr   r   r   domresult	title_tagr   raw_urlrn   content_nodesitemscriptr   xpath_imager   r   er   s                     r>   responser   X  s    %dii0NoG //$))
$C "#'GH .+	+F4DaQUVI VW +Ejj(GY !!),gabk//8;<&v/deM% 6"jj5 6F$$&--f566 #=#34G .fhaQUVKI&'OOE2	''5(__T2F$2$6$6v$>	NN37=b_hijU.b &c+;< A
l:&>?@A
 N  	LLTL*	s%   &G*4G*7C=G**	H3HH)ALAZBDBNBTETGEGLKHLALKMEMKMMMNMVMYNPTJTMUZengine_traitsadd_domainsc           
         ddl m} i | j                  d<    |dd      }|j                  st	        d      t        j                  |j                  j                  dd	            }d
di}t        |d      D ]  }|j                  d      }	 t        j                  j                  |j                  ||      d      }t#        |      }	| j$                  j                  |	      }
|
r|
|k7  rt        d|	d|
d|       d|z   | j$                  |	<    d| j$                  d<   t        |d      D ]  }|j                  d      }|t&        v r|dk(  rd| _        *t+        || j$                  j-                         d      }|s"t        d|j                  d      d|d        t|D ]  }|| j.                  t1        |      <     d!| j.                  d"<   |r |d#d      }|j                  st	        d$      |j                  j                         D ]k  }|j!                         }|r|d%v r|j                  d&      d'   j3                         }d(|z   | j                  d   |<   |d!k(  sWd(|z   | j                  d   d)<   m y*y*# t        j                  $ r@ t        d|d|j                  j                  d      d   j!                         d       Y Zw xY w)+zFetch languages from Google.r   )r[   rR   z"https://www.google.com/preferences   )timeoutz+Response from Google preferences is not OK.z&<?xml version="1.0" encoding="UTF-8"?>r/   nonbz//select[@name='hl']/optionvaluerL   rM   zINFO:  google UI language z ((z) is unknown by babelzCONFLICT: babel z --> z, lang_z
lang_zh-CNzhz//select[@name='gl']/optionZZT)regionalz%ERROR: can't map from google country z	data-namez) to a babel region.HKzzh-CNz(https://www.google.com/supported_domainsz1Response from Google supported domains is not OK.)z.google.com.rQ   wwwCNN)searx.networkr[   re   okRuntimeErrorr   r   r   replacer   r\   r]   r^   r`   printrb   stripr	   	languagesskip_countriesrd   r   r   regionsr
   rf   )r   r   r[   rr   r   lang_mapxrj   rI   	sxng_langconflicteng_countrysxng_localesri   domainregions                   r>   fetch_traitsr     s    "02M,-3Q?D77HII
//$))++,TVXY
ZC d|HS"?@ @55>	\\''Xx(Hc'RF !(	 **..y98#HhWX-4x-?	*@" %1MD! S"?@ IeeGn.($'+M$+K9P9P9U9U9WbfgWXW\W\]hWikvwx' 	IK=HM!!*["9:	II& &*M'" =qIwwRSSiioo' 
	QF\\^FV (  \\#&r*002F@EM  !45f=~BG&.$$%89$?
	Q Q '' 	XWXW]W]WcWcdgWhijWkWqWqWstu	s   1I>>AKK)rr   r   )T)B__doc__r8   restringr6   typingturllib.parser   r   r\   
babel.corebabel.languageslxmlr   searx.enginelib.traitsr   searx.exceptionsr   searx.localesr   r	   r
   searx.result_typesr   searx.utilsr   r   r   r   r   TYPE_CHECKINGsearx.extended_typesr   searx.search.processorsr   about
categoriespagingmax_pagetime_range_supportr   r   r   r   ascii_lettersdigitsr:   r+   tuplestrr5   __annotations__r?   dictAnyrl   rs   r   compiler   r   r   r   boolr    r@   r>   <module>r     s  
  	    +     / 8 H H ,  ??24 ("P	 
	  
sS#F xF3 A  ##fmm3d:(,uS#X% ,,C ,C ,,qN q qcSTSXSXjIY qh,
)53 )5 )54 )5\ 

RS3 BP4LQ LQ4 LQr@   