
    i                         d Z ddlmZ ddlZddlmZ ddlZddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZmZ dd	lmZ d
ddddddZdgZdZdZdZd Zd Zg dZ	 g dZddiZdefdZy)a  This is the implementation of the Google News engine.

Google News has a different region handling compared to Google WEB.

- the ``ceid`` argument has to be set (:py:obj:`ceid_list`)
- the hl_ argument has to be set correctly (and different to Google WEB)
- the gl_ argument is mandatory

If one of this argument is not set correctly, the request is redirected to
CONSENT dialog::

  https://consent.google.com/m?continue=

The google news API ignores some parameters from the common :ref:`google API`:

- num_ : the number of search results is ignored / there is no paging all
  results for a query term are in the first response.
- save_ : is ignored / Google-News results are always *SafeSearch*

.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp
.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp
.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
    )	urlencodeN)html)locales)
eval_xpatheval_xpath_listeval_xpath_getindexextract_text)fetch_traits)get_google_infodetect_google_sorry)EngineTraitszhttps://news.google.comQ12020z+https://developers.google.com/custom-searchFHTML)websitewikidata_idofficial_api_documentationuse_official_apirequire_api_keyresultsnewsTc                    |j                  dd      }t        j                  |t        j                  d   d      }t        |t              }d|d<   |j                  d      \  }}|j                  d	      d
gz   d
d \  }}||d   d<   |r5|dvr1|j                         |k(  r|d	z   |z   |d   d<   n=|d	z   |z   |d   d<   n.|j                         |k7  r|dv r	||d   d<   n|d	z   |z   |d   d<   d|j                  d	      d   z   |d   d<   ||d   d<   d|d   z   dz   t        d| i|d         z   d|z  z   }||d<   |d   |d<   |d   j                  |d          |S )zGoogle-News search requestsearxng_localezen-USceidUS:en)defaultznews.google.com	subdomain:-N   paramshl)HansHant)ATBECHILSAINBDPTlang_r   lrglzhttps://z/search?qz&ceid=%surlcookiesheaders)
getr   get_engine_localetraitscustomr   splitlowerr   update)	queryr    sxng_localer   google_infoceid_region	ceid_langceid_suffix	query_urls	            */root/searxng/searx/engines/google_news.pyrequestrB   E   s    **-w7K$$[&--2GQXYD!&&1K0K!ZZ_K
	
 	q
I{ #,K${*::)+*3c/K*GK!$'*3c/K*GK!$'						)JJ*3K!$'*3c/K*GK!$'")IOOC,@,C"CK$"-K$ 	
k
"	#
	 Uh'
	
 
	  F5M#I.F9
9[34M    c                    g }t        |        t        j                  | j                        }t	        |d      D ]  }t        |dd      }|j                  d      d   }|j                  d      d   }t        j                  |dz         }||j                  d      d	 j                  d
      d   }|j                         }t        t        |d            }t        t        |d            }t        t        |d            }dj                  ||fD cg c]  }|s|	 c}      }	t        |j                  d            }
|j                  |||	|
d        |S c c}w )z)Get response from google's search requestz//div[@class="xrnccd"]z./article/a/@hrefr   ?/z====s   httpN   z./article/h3[1]z./article//timez./article//a[@data-n-tid]z / z$preceding-sibling::a/figure/img/@src)r0   titlecontent	thumbnail)r   r   
fromstringtextr   r   r7   base64urlsafe_b64decodeindexdecoder	   r   joinxpathappend)respr   domresulthrefrI   pub_date
pub_originxrJ   rK   s              rA   responser\   |   sM   G //$))
$C!#'?@ #
 #6+>Bzz#q!zz#r"''v6DJJw')*009!<{{}Z0ABC  
63D EF!*V5P"QR
***h)?EA1aEF !.T!UV	"&		
9#
L N% Fs   E
E
)TzAE:arz	AR:es-419zAT:dezAU:enzBD:bnzBE:frzBE:nlzBG:bgz	BR:pt-419zBW:enzCA:enzCA:frzCH:dezCH:frz	CL:es-419z
CN:zh-Hansz	CO:es-419z	CU:es-419zCZ:cszDE:dezEG:arzES:esET:enzFR:frzGB:enzGH:enzGR:elz
HK:zh-HantzHU:huID:enzID:idzIE:enzIL:enzIL:hezIN:bnzIN:enzIN:hizIN:mlzIN:mrzIN:tazIN:tezIT:itzJP:jazKE:enzKR:kozLB:arzLT:ltLV:enzLV:lvzMA:frz	MX:es-419zMY:enzNA:enzNG:enzNL:nlNO:nozNZ:enz	PE:es-419zPH:enzPK:enzPL:plz	PT:pt-150zRO:rozRS:srzRU:ruzSA:arzSE:svzSG:enzSI:slzSK:skzSN:frzTH:thzTR:trz
TW:zh-HantzTZ:enzUA:ruzUA:ukzUG:enr   z	US:es-419z	VE:es-419zVN:vizZA:enzZW:en)r]   r^   r_   r`   znb-NOengine_traitsc           	         t        | d       i | j                  d<   t        D ]  }|t        v r|j	                  d      \  }}|j	                  d      }t        |      dkD  r|d   dvr|d   }t        j                  ||dz   |z         }	 t        j                  j                  |d	      }|| j                  d   t        j                  |      <    y # t        j                  $ r t        d
|d|d       Y w xY w)NF)add_domainsr   r   r      )r#   r"   r   )sepzERROR: z -> z is unknown by babel)_fetch_traitsr6   	ceid_list_skip_valuesr7   len_ceid_locale_mapr3   babelLocaleparseUnknownLocaleErrorprintr   
region_tag)ra   r   regionlangr[   r;   locales          rA   r
   r
     s    -U3#%M  H<zz#JJsOq6A:t++t&**4f1DE	\\'''=F
 DHV$W%7%7%?@#H '' 	4MN	s   !C%C<;C<)__doc__urllib.parser   rN   lxmlr   rk   searxr   searx.utilsr   r   r   r	   searx.engines.googler
   rf   r   r   searx.enginelib.traitsr   about
categoriespagingtime_range_support
safesearchrB   r\   rg   rh   rj    rC   rA   <module>r      s   2 #      ? 0 )"O	 X
	  
4n.bU	l2 W% H HrC   