
    i]              	       j   d Z ddlZddlmZ ddlmZ ddlmZ ddlZddl	m
Z
mZmZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ej0                  rddlmZ ddlmZ dddddddZddgZdZdZ 	 dZ!dZ"dZ#de$ddddfdZ%dddefdZ&ddde'e$e(f   fdZ)d efd!Z*d"e$dz  de+e,e$   e$e$edz  f   fd#Z-y)$a:  Google Scholar is a freely accessible web search engine that indexes the full
text or metadata of scholarly literature across an array of publishing formats
and disciplines.

Compared to other Google services the Scholar engine has a simple GET REST-API
and there does not exists ``async`` API.  Even though the API slightly vintage
we can make use of the :ref:`google API` to assemble the arguments of the GET
request.

Configuration
=============

.. code:: yaml

  - name: google scholar
    engine: google_scholar
    shortcut: gos

Implementations
===============

    N)	urlencode)datetime)html)
eval_xpatheval_xpath_getindexeval_xpath_listextract_textElementType)SearxEngineCaptchaException SearxEngineAccessDeniedException)fetch_traits)get_google_infotime_range_dict)EngineResults)SXNG_Response)OnlineParamszhttps://scholar.google.comQ494817z+https://developers.google.com/custom-searchFHTML)websitewikidata_idofficial_api_documentationuse_official_apirequire_api_keyresultssciencezscientific publicationsT2   queryparamsr   returnc                 *   t        |t              }|d   j                  dd      |d<   d| i|d   |d   dz
  dz  d	d
d}|j                  t	        |             d|d   z   dz   t        |      z   |d<   |d   |d<   |d   j                  |d          y)zGoogle-Scholar search request	subdomainzwww.zscholar.qr   pageno   
   20070)startas_sdtas_viszhttps://z	/scholar?urlcookiesheadersN)r   traitsreplaceupdatetime_range_argsr   )r   r   google_infoargss       -/root/searxng/searx/engines/google_scholar.pyrequestr5   L   s     "&&1K*;7??
SK 	U
h
 "Q&",D 	KK'([!99KG)TX/YF5M#I.F9
9[34    respr   c                    | j                   dv rbd| j                  v rTd| j                  d   v rt        d      t        j                  d| j                  d   j                  d      d          t               }t        j                  | j                        }t        |       t        |d	      D ]  }t        t        |d
            }|st        t        |d            xs d}|r|dd j                         }t        |dd      }t        t        |d            xs d}t!        t        t        |d                  \  }}	}
}|
|v rd}
t        t        |d            xs d}d}d}t        |ddd      }t        t        |d            }|dk(  r|}n|}|j#                  |j$                  j'                  |||||
|	|||||              t        |d      D ]6  }|j#                  |j$                  j)                  t        |                   8 t        |d      D ]6  }|j#                  |j$                  j)                  t        |                   8 |S )z"Parse response from Google Scholar)i-  i.  i/  i3  i4  Locationz/sorry/index?continuez(google_scholar: unusual traffic detectedmessagez	location ?r   z//div[@data-rp]z.//h3[1]//az.//span[@class='gs_ctg2'] r$   z.//h3[1]//a/@hrefz.//div[@class='gs_rs']z.//div[@class='gs_a']z>.//div[@class='gs_fl']/a[starts-with(@href,'/scholar?cites=')]z#.//div[@class='gs_or_ggsm']/a/@hrefN)defaultz[PDF])typer+   titleauthors	publisherjournalpublishedDatecontentcommentshtml_urlpdf_urlz2//div[contains(@class, 'gs_qsuggest_wrap')]//li//a)
suggestionz//div[@class='gs_r gs_pda']/a)
correction)status_coder-   r   httpxTooManyRedirectssplitr   r   
fromstringtextdetect_google_captchar   r	   r   lowerr   
parse_gs_aaddtypesPaperLegacyResult)r7   resdomresultrA   pub_typer+   rF   rB   rD   rC   rE   rG   rH   rI   doc_urldoc_typerJ   rK   s                      r4   responser_   a   su    44t||9S"dll:&>> 3B  $$yj1I1O1OPS1TUV1W0X%YZZ
/C
//$))
$C# "#'89 0
Z>?$Z8S%TU[Y["~++-H&v/BAF#Jv7O$PQWUW5?F,CDE6
2)] I F,lmntrt 	
 %f.SUV`de
63N OPwGHIIOO#+!!  	
E0
f !&Z[ M
		&&,z2J&KLM !&EF M
		&&,z2J&KLMJr6   c                 h    i }| d   t         v r$t        j                         j                  dz
  |d<   |S )a  Returns a dictionary with a time range arguments based on
    ``params["time_range"]``.

    Google Scholar supports a detailed search by year.  Searching by *last
    month* or *last week* (as offered by SearXNG) is uncommon for scientific
    publications and is not supported by Google Scholar.

    To limit the result list when the users selects a range, all the SearXNG
    ranges (*day*, *week*, *month*, *year*) are mapped to *year*.  If no range
    is set an empty dictionary of arguments is returned.

    Example; when user selects a time range and we find ourselves in the year
    2025 (current year minus one):

    .. code:: python

        { "as_ylo" : 2024 }

    
time_ranger$   as_ylo)r   r   nowyear)r   ret_vals     r4   r1   r1      s6    ( !Gl.$LLN//!3Nr6   rZ   c                 4    t        | d      rt        d      y)z{In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is
    not redirected to ``sorry.google.com``.
    z//form[@id='gs_captcha_f']zCAPTCHA (gs_captcha_f)r:   N)r   r   )rZ   s    r4   rR   rR      s      #34)2JKK 5r6   rQ   c                    | | dk(  rg dddfS | j                  d      }|d   j                  d      }|d   }t        |      dk7  r|d|dfS |d   j                  d      }t        |      dkD  rdj                  |dd       }|d	k(  rd}nd}|d   }	 t        j                  |j                         d
      }||||fS # t        $ r d}Y w xY w)zParse the text written in green.

    Possible formats:
    * "{authors} - {journal}, {year} - {publisher}"
    * "{authors} - {year} - {publisher}"
    * "{authors} - {publisher}"
    Nr=   z - r   z, r>      r$   u   …z%Y)rO   lenjoinr   strptimestrip
ValueError)rQ   s_textrB   rC   journal_yearrD   rd   rE   s           r4   rT   rT      s     |trz2r4ZZF.GBZI
6{aIt++ !9??4(L
<1yya!34eGD ))$**,= GY55  s   $B< <C
	C
).__doc__typingturllib.parser   r   lxmlr   rM   searx.utilsr   r   r   r	   r
   searx.exceptionsr   r   searx.engines.googler   r   r   searx.result_typesr   TYPE_CHECKINGsearx.extended_typesr   searx.search.processorsr   about
categoriespagingmax_pagelanguage_supporttime_range_support
safesearchstrr5   r_   dictintr1   rR   tuplelistrT    r6   r4   <module>r      s  .  "     [ -
 -??24 ,"O	 23
	   
53 5 54 5*I? I} IXN tCH~ 4L{ L!6S4Z !6E$s)S#x$*N$O !6r6   