
    i&                     P   d Z ddlZddlmZ ddlmZ ddlmZ ej                  rddl	m
Z
 ddlmZ dd	d
ddddZddgZdZdZdZdeddddfdZdddefdZdeeej,                  f   defdZdedz  dedz  fdZdedz  defd Zd!eeee   f   dz  dedz  fd"Zd#eeej,                  f   deeeef   fd$Zd#eeej,                  f   dee   fd%Zd#eeej,                  f   dee   fd&Z d#eeej,                  f   deeeeeeedz  f   fd'Z!d#eeej,                  f   defd(Z"y))a  The OpenAlex engine integrates the `OpenAlex`_ Works API to return scientific
paper results using the :ref:`result_types.paper` class.  It is an "online" JSON
engine that uses the official public API and does not require an API key.

.. _OpenAlex: https://openalex.org
.. _OpenAlex API overview: https://docs.openalex.org/how-to-use-the-api/api-overview

Key features
------------

- Uses the official Works endpoint (JSON)
- Paging support via ``page`` and ``per-page``
- Relevance sorting (``sort=relevance_score:desc``)
- Language filter support (maps SearXNG language to ``filter=language:<iso2>``)
- Maps fields commonly used in scholarly results: title, authors, abstract
  (reconstructed from inverted index), journal/venue, publisher, DOI, tags
  (concepts), PDF/HTML links, pages, volume, issue, published date, and a short
  citations comment
- Supports OpenAlex "polite pool" by adding a ``mailto`` parameter


Configuration
=============

Minimal example for :origin:`settings.yml <searx/settings.yml>`:

.. code:: yaml

   - name: openalex
     engine: openalex
     shortcut: oa
     categories: science, scientific publications
     timeout: 5.0
     # Recommended by OpenAlex: join the polite pool with an email address
     mailto: "[email protected]"

Notes
-----

- The ``mailto`` key is optional but recommended by OpenAlex for better service.
- Language is inherited from the user's UI language; when it is not ``all``, the
  engine adds ``filter=language:<iso2>`` (e.g. ``language:fr``). If OpenAlex has
  few results for that language, you may see fewer items.
- Results typically include a main link. When the primary landing page from
  OpenAlex is a DOI resolver, the engine will use that stable link. When an open
  access link is available, it is exposed via the ``PDF`` and/or ``HTML`` links
  in the result footer.


What is returned
================

Each result uses the :ref:`result_types.paper` class and may include:

- ``title`` and ``content`` (abstract; reconstructed from the inverted index)
- ``authors`` (display names)
- ``journal`` (host venue display name) and ``publisher``
- ``doi`` (normalized to the plain DOI, without the ``https://doi.org/`` prefix)
- ``tags`` (OpenAlex concepts display names)
- ``pdf_url`` (Open access PDF if available) and ``html_url`` (landing page)
- ``publishedDate`` (parsed from ``publication_date``)
- ``pages``, ``volume``, ``number`` (issue)
- ``type`` and a brief ``comments`` string with citation count


Rate limits & polite pool
=========================

OpenAlex offers a free public API with generous daily limits. For extra courtesy
and improved service quality, include a contact email in each request via
``mailto``. You can set it directly in the engine configuration as shown above.
See: `OpenAlex API overview`_.


Troubleshooting
===============

- Few or no results in a non-English UI language:
  Ensure the selected language has sufficient coverage at OpenAlex, or set the
  UI language to English and retry.
- Preference changes fail while testing locally:
  Make sure your ``server.secret_key`` and ``server.base_url`` are set in your
  instance settings so signed cookies work; see :ref:`settings server`.


Implementation
===============

    N)datetime)	urlencode)EngineResults)SXNG_Response)OnlineParamszhttps://openalex.org/
Q110718454z9https://docs.openalex.org/how-to-use-the-api/api-overviewTFJSON)websitewikidata_idofficial_api_documentationuse_official_apirequire_api_keyresultssciencezscientific publicationszhttps://api.openalex.org/works queryparamsr   returnc                    | |d   ddd}|j                  d      }g }t        |t              rM|dk7  rH|j                  d      d   j                  d	      d   }t	        |      d
k(  r|j                  d|        |rdj                  |      |d<   t        t        t              rt        dk7  r	t        |d<   t         dt        |       |d<   y )Npageno
   zrelevance_score:desc)searchpagezper-pagesortlanguageall-r   _   z	language:,filterr   mailto?url)
get
isinstancestrsplitlenappendjoinr"   
search_urlr   )r   r   argsr   filtersiso2s         '/root/searxng/searx/engines/openalex.pyrequestr1   {   s     x &D zz*%HG(C X%6~~c"1%++C03t9>NNYtf-.'*X &#6R<X!l!IdO#45F5M    respr   c                    | j                         }t               }|j                  dg       D ]  }t        |      \  }}}|j                  dd      }t	        |j                  d            xs d}t        |      }	t        |      \  }
}}}}}t        |j                  d            }t        |      }t        |      }|j                  |j                  j                  ||||
||||	|||||||j                  d      |              |S )Nr   titler   abstract_inverted_indexdoitype)r$   r5   contentjournal	publisherr7   tagsauthorspdf_urlhtml_urlpublishedDatepagesvolumenumberr8   comments)jsonr   r%   _extract_links_reconstruct_abstract_extract_authors_extract_biblio_doi_to_plain_extract_tags_extract_commentsaddtypesPaper)r3   dataresitemr$   r?   r>   r5   r9   r=   r:   r;   rA   rB   rC   published_dater7   r<   rD   s                      r0   responserT      s   99;D
/CB' 
!/!5XwXXgr*,TXX6O-PQWUW"4(DSTXDYAE66>DHHUO,T"$T*IIOO#!,XXf%!!  	

> Jr2   biblioc                     | j                  d      }| j                  d      }|r	|r| d| S |rt        |      S |rt        |      S y)N
first_page	last_pager   r   )r%   r'   )rU   rW   rX   s      r0   _stringify_pagesrY      sQ    L)J

;'IiQyk**:9~r2   valuec                 f    | sy dD ]  }	 t        j                  | |      c S  y # t        $ r Y )w xY w)N)z%Y-%m-%dz%Y-%mz%Y)r   strptime
ValueError)rZ   fmts     r0   _parse_dater_      sH    * 	$$UC00
   		s   $	00	doi_valuec                 *    | sy| j                  d      S )Nr   zhttps://doi.org/)removeprefix)r`   s    r0   rJ   rJ      s    !!"455r2   r6   c                     | sy i }d}| j                         D ]  \  }}|D ]  }|||<   t        ||      }  |dk  ry t        d|dz         D cg c]  }|j                  |d       }}dj	                  d |D              }|dk7  r|S d S c c}w )Nr      r    c              3   ,   K   | ]  }|d k7  s	|  yw)r   N ).0ts     r0   	<genexpr>z(_reconstruct_abstract.<locals>.<genexpr>   s     9!bA9s   
)itemsmaxranger%   r+   )	r6   position_to_token	max_indextoken	positionsposiordered_tokenstexts	            r0   rG   rG      s    
 #(*I399; ,y 	,C%*c"Is+I	,, 1}<A!YQR]<STq'++Ar2TNT889~99D2:4'4' Us   BrR   c                    | j                  di       }| j                  di       }|j                  d      xs d}| j                  dd      }|xs |}|}|j                  d      xs |j                  d      xs d}|||fS )Nprimary_locationopen_accesslanding_page_urlr   idr>   oa_url)r%   )rR   rx   ry   rz   work_urlr$   r?   r>   s           r0   rF   rF      s    '+xx0BB'G"&((=""=K,001CDJHHT2&H+8C$H#''	2Ukooh6OUSUG'!!r2   c                     g }| j                  dg       D ]P  }|s|j                  di       }|j                  d      }t        |t              s:|dk7  s@|j                  |       R |S )Nauthorshipsauthordisplay_namer   r%   r&   r'   r*   )rR   r=   auth
author_objr   s        r0   rH   rH      si    G+ )XXh+
!~~n5lC(\R-?NN<() Nr2   c                     g }| j                  dg       D ]?  }|xs i j                  d      }t        |t              s)|dk7  s/|j                  |       A |S )Nconceptsr   r   r   )rR   r<   cnames       r0   rK   rK     sV    DXXj"% R}}^,dC TRZKK Kr2   c                 4   | j                  di       }| j                  di       }|j                  dd      }|j                  dd      }t        |      }|j                  dd      }|j                  dd      }t        | j                  d            }||||||fS )	N
host_venuerU   r   r   r;   rB   issuepublication_date)r%   rY   r_   )	rR   r   rU   r:   r;   rA   rB   rC   rS   s	            r0   rI   rI     s     "&,!;J!XXh3F>>."5G^^K4I!&)EZZ"%FZZ$F *<!=>NIuffnDDr2   c                 P    | j                  d      }t        |t              r| dS y)Ncited_by_countz
 citationsr   )r%   r&   int)rR   r   s     r0   rL   rL   $  s,    XX./N.#& !,,r2   )#__doc__typingrj   r   urllib.parser   searx.result_typesr   TYPE_CHECKINGsearx.extended_typesr   searx.search.processorsr   about
categoriespagingr,   r"   r'   r1   rT   dictAnyrY   r_   rJ   listr   rG   tuplerF   rH   rK   rI   rL   rh   r2   r0   <module>r      s  Xt   " ,??24 '"]	 23
	-
 
63 6 64 6:#? #} #L	T#quu*- 	# 		sTz 	ho 	6S4Z 6C 6(!#tCy.1D8(4Z(*"c155j) "eCcM.B "	4QUU
+ 	S	 	S!%%Z( T#Y E
sAEEz
E
3S#sHtO34EDaee,  r2   