
    iG-                     *   d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z
 ddlmZ ddd	d
dddZdgZ	 dZ	 dZ	 dZ	 dddZ	 ddiZd Zd Zd Zej.                  j1                         Zej5                  ddddddd d!d"d#d$d%d&d'd(       d)efd*Zd)efd+Zy),u	  This module implements the Wikipedia engine.  Some of this implementations
are shared by other engines:

- :ref:`wikidata engine`

The list of supported languages is :py:obj:`fetched <fetch_wikimedia_traits>` from
the article linked by :py:obj:`list_of_wikipedias`.

Unlike traditional search engines, wikipedia does not support one Wikipedia for
all languages, but there is one Wikipedia for each supported language. Some of
these Wikipedias have a LanguageConverter_ enabled
(:py:obj:`rest_v1_summary_url`).

A LanguageConverter_ (LC) is a system based on language variants that
automatically converts the content of a page into a different variant. A variant
is mostly the same language in a different script.

- `Wikipedias in multiple writing systems`_
- `Automatic conversion between traditional and simplified Chinese characters`_

PR-2554_:
  The Wikipedia link returned by the API is still the same in all cases
  (`https://zh.wikipedia.org/wiki/出租車`_) but if your browser's
  ``Accept-Language`` is set to any of ``zh``, ``zh-CN``, ``zh-TW``, ``zh-HK``
  or .. Wikipedia's LC automatically returns the desired script in their
  web-page.

  - You can test the API here: https://reqbin.com/gesg2kvx

.. _https://zh.wikipedia.org/wiki/出租車:
   https://zh.wikipedia.org/wiki/%E5%87%BA%E7%A7%9F%E8%BB%8A

To support Wikipedia's LanguageConverter_, a SearXNG request to Wikipedia uses
:py:obj:`get_wiki_params` and :py:obj:`wiki_lc_locale_variants' in the
:py:obj:`fetch_wikimedia_traits` function.

To test in SearXNG, query for ``!wp 出租車`` with each of the available Chinese
options:

- ``!wp 出租車 :zh``    should show 出租車
- ``!wp 出租車 :zh-CN`` should show 出租车
- ``!wp 出租車 :zh-TW`` should show 計程車
- ``!wp 出租車 :zh-HK`` should show 的士
- ``!wp 出租車 :zh-SG`` should show 德士

.. _LanguageConverter:
   https://www.mediawiki.org/wiki/Writing_systems#LanguageConverter
.. _Wikipedias in multiple writing systems:
   https://meta.wikimedia.org/wiki/Wikipedias_in_multiple_writing_systems
.. _Automatic conversion between traditional and simplified Chinese characters:
   https://en.wikipedia.org/wiki/Chinese_Wikipedia#Automatic_conversion_between_traditional_and_simplified_Chinese_characters
.. _PR-2554: https://github.com/searx/searx/pull/2554

    N)html)localesutils)network)EngineTraitszhttps://www.wikipedia.org/Q52zhttps://en.wikipedia.org/api/TFJSON)websitewikidata_idofficial_api_documentationuse_official_apirequire_api_keyresultsinfoboxz2https://meta.wikimedia.org/wiki/List_of_Wikipediasz7https://meta.wikimedia.org/wiki/Wikipedia_article_depthz6https://{wiki_netloc}/api/rest_v1/page/summary/{title})zzh-CNzzh-HKzzh-MOzzh-MYzzh-SGzzh-TW)zh-classical)zhr   r   )zh_Hantzh_Hansc                     |j                  | |j                  | d            }|j                  d   j                  |d      }||fS )zReturns the Wikipedia language tag and the netloc that fits to the
    ``sxng_locale``.  To support LanguageConverter_ this function rates a locale
    (region) higher than a language (compare :py:obj:`wiki_lc_locale_variants`).

    enwiki_netloczen.wikipedia.org)
get_regionget_languagecustomget)sxng_locale
eng_traitseng_tagr   s       (/root/searxng/searx/engines/wikipedia.pyget_wiki_paramsr       sM     ##K1H1HVZ1[\G##M266w@RSKK    c                     | j                         r| j                         } t        |d   t              \  }}t        j
                  j                  |       }t        j                  ||      |d<   d|d<   d|d<   |S )z6Assemble a request (`wikipedia rest_v1 summary API`_).searxng_locale)r   titleurlFraise_for_httperror   soft_max_redirects)	islowerr$   r    traitsurllibparsequoterest_v1_summary_urlformat)queryparams_eng_tagr   r$   s        r   requestr3      su    }}+F3C,DfMHkLLu%E'..;e.TF5M$)F !#$F Mr!   c           	         g }| j                   dk(  rg S | j                   dk(  r#	 | j                         }|d   dk(  r
|d   dk(  rg S t        j                  |        | j                         }t        j                  |j                  di       j                  d      xs |j                  d	            }|d
   d   d   }dt        v s|j                  d      dk7  r%|j                  |||j                  dd      d       dt        v r^|j                  d      dk(  rJ|j                  |||j                  dd      |j                  di       j                  d      d|dgd       |S # t        $ r Y ,w xY w)Ni  i  typez9https://mediawiki.org/wiki/HyperSwitch/errors/bad_requestdetailztitle-invalid-characterstitlesdisplayr$   content_urlsdesktoppageliststandarddescription )r%   r$   contentr   extract	thumbnailsource	Wikipedia)r$   r%   )r   idr@   img_srcurls)
status_codejson	Exception_networkr&   r   html_to_textr   display_typeappend)respr   
api_resultr$   wikipedia_links        r   responserR      s|   G3	3		J
 6"&aax(,FF	  &Jz~~h;??	Jejnn]dNefE/	:6BN!7:!E 	%%>>-<	
 L >>&!Z/NN$()~~i<)~~k2>BB8L'2>JK NM  		s   E& &	E32E3belakagswsgscbkvromapznb-NOnrfrupndsnanyuearg)z	be-taraskakalszbat-smgzcbk-zamzfiu-vrozmap-bmsnonrmzroa-rupznds-nlz
zh-min-nanzzh-yueanengine_traitsc                 b    t        |        t        dt        | j                  d         z         y )NzWIKIPEDIA_LANGUAGES: %sWIKIPEDIA_LANGUAGES)fetch_wikimedia_traitsprintlenr   )re   s    r   fetch_traitsrk      s)    =)	
#c-*>*>?T*U&V
VWr!   c           	      v   ddl m} ddlm} i | j                  d<   g | j                  d<   t
        j                         D ]  \  }}|D ]  }|| j                  |<     t        j                         D ]  \  }}|D ]  }|| j                  |<     d |       d} |t        d|	      }|j                  st        d
      t        j                  |j                        }|j!                  d      D ]  }	|	j!                  d      }
|
s|
D cg c]   }|j#                         j%                         " }
}t'        |
d   j)                  dd      j)                  dd            }t+        |
d   j)                  dd      j)                  dd            }|
d   }|	j!                  d      d   }t,        j.                  j1                  |      }	 t3        j4                  t6        j8                  j/                  t:        j                  ||      d            }	 | j                  d   j?                  |       |t2        j@                  vr|dk  rXt+        |      dk  rh| j                  j                  |      }|r||k7  rtC        d|d|d|       || j                  |<   |jD                  | j                  d   |<    | j                  d   jG                          yc c}w # t6        j<                  $ r" Y | j                  d   j?                  |       %w xY w# | j                  d   j?                  |       w xY w)a  Fetch languages from Wikipedia.  Not all languages from the
    :py:obj:`list_of_wikipedias` are supported by SearXNG locales, only those
    known from :py:obj:`searx.locales.LOCALE_NAMES` or those with a minimal
    :py:obj:`editing depth <wikipedia_article_depth>`.

    The location of the Wikipedia address of a language is mapped in a
    :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
    (``wiki_netloc``).  Here is a reduced example:

    .. code:: python

       traits.custom['wiki_netloc'] = {
           "en": "en.wikipedia.org",
           ..
           "gsw": "als.wikipedia.org",
           ..
           "zh": "zh.wikipedia.org",
           "zh-classical": "zh-classical.wikipedia.org"
       }
    r   )r   )searxng_useragentr   rg   z*/*)Acceptz
User-Agent   )timeoutheadersz"Response from Wikipedia is not OK.z.//table[contains(@class,"sortable")]//tbody/trz./td   -0,r?         z./td[4]/a/@href)sepi'     zCONFLICT: babel z --> z, N)$searx.networkr   searx.utilsrm   r   wikipedia_script_variantsitems	languageswiki_lc_locale_variantsregionslist_of_wikipediasokRuntimeErrorr   
fromstringtextxpathtext_contentstripfloatreplaceintr+   r,   urlparser   language_tagbabelLocalelang_mapUnknownLocaleErrorrN   LOCALE_NAMESri   netlocsort)re   r   rm   r   sxng_tag_listsxng_tagrq   rO   domrowcolscdeptharticleswiki_urlconflicts                   r   rh   rh      s   . "-*,M'24M./ #<"A"A"C 8% 	8H07M##H-	88 #:"?"?"A 6% 	6H.5M!!(+	66 .?.ABG!1g>D77?@@
//$))
$CyyIJ &Gyy 267Q &&(77d2h&&sC088bABtAwsB/77R@Aq'99./2<<((2	H++ELL,>,>x||GU\?]cf,>,ghH
   !67>>wG7///%5zB  **..x87"8WUV,3)7?]+G4M&GP ./446I 8 '' 	  !67>>wG		   !67>>wGs+   %KAK  L3LLL L8)__doc__urllib.parser+   r   lxmlr   searxr   r   r   rK   searx.enginelib.traitsr   aboutrM   r   wikipedia_article_depthr.   r   r|   r    r3   rR   LOCALE_BEST_MATCHcopyr   updaterk   rh    r!   r   <module>r      s  5n      % / ,"A	 {D J  T  O " &
  	    .l $$))+ *X X
T7, T7r!   