U
    /jOZ                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZ ddlmZ ddl	m
Z
 dZdZedZed	d
hZed	d
dhZed	d
ddddddddh
Zed	d
ddhZeddhZeddddddddhZeddhZeededhZeed	edhZG dd deZG dd deZG dd deZG dd deZeedddZ e!e!e"d d!d"Z#e!e$d#d$d%Z%ee!d#d&d'Z&ee$e!f e"d(d)d*Z'ee$e!f e"e"d+d,d-Z(dNe!e"e"d/d0d1Z)e!e"d(d2d3Z*e!e"d(d4d5Z+e!dd(d6d7Z,e!ee"d8d9d:Z-dOe!ee"e"d;d<d=Z.ee!e$e/f dd(d>d?Z0e!e$d(d@dAZ1ee!e$e/f e!d(dBdCZ2dPe!e"e"e!dEdFdGZ3dQee!e$e/f e"e"e"e"e$dHdIdJZ4dRee!e$e/f e"e"e"e!dKdLdMZ5dS )S    N)OptionalUnion   )idnadata)intranges_contain	   s   xn--u   [.。．｡]RALANENESCSETONBNNSMLDc                   @   s   e Zd ZdZdS )	IDNAErrorz5Base exception for all IDNA-encoding related problemsN__name__
__module____qualname____doc__ r   r   -/tmp/pip-unpacked-wheel-zg0be5mg/idna/core.pyr      s   r   c                   @   s   e Zd ZdZdS )IDNABidiErrorz;Exception when bidirectional requirements are not satisfiedNr   r   r   r   r   r   !   s   r   c                   @   s   e Zd ZdZdS )InvalidCodepointz<Exception when a disallowed or unallocated codepoint is usedNr   r   r   r   r   r   '   s   r   c                   @   s   e Zd ZdZdS )InvalidCodepointContextzCException when the codepoint is not valid in the context it is usedNr   r   r   r   r   r   -   s   r   )cpreturnc                 C   s0   t t| }|dkr,t t| s,td|S )Nr   z Unknown character in unicodedata)unicodedata	combiningchrname
ValueError)r   vr   r   r   _combining_class3   s    r'   )r   scriptr    c                 C   s   t t| tj| S )N)r   ordr   scripts)r   r(   r   r   r   
_is_script:   s    r+   )sr    c                 C   s
   |  dS )Npunycode)encoder,   r   r   r   	_punycode>   s    r0   c                 C   s   d| dS )NzU+Z04Xr   r/   r   r   r   _unotB   s    r1   )labelr    c                 C   s   t | dkS )u  Check that a label does not exceed the maximum permitted length.

    Per :rfc:`1035` (and :rfc:`5891` §4.2.4) a DNS label must not exceed
    63 octets. The argument may be either a :class:`str` (a U-label, where
    length is measured in characters) or :class:`bytes` (an A-label, where
    length is measured in octets).

    :param label: The label to check.
    :returns: ``True`` if the label is within the length limit, otherwise
        ``False``.
    ?   lenr2   r   r   r   valid_label_lengthF   s    r7   )domaintrailing_dotr    c                 C   s   t | |rdndkS )a  Check that a full domain name does not exceed the maximum length.

    Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing
    dot is present, or 254 octets when one is included.

    :param domain: The full (possibly multi-label) domain name.
    :param trailing_dot: ``True`` if ``domain`` includes a trailing ``.``.
    :returns: ``True`` if the domain is within the length limit, otherwise
        ``False``.
          r4   )r8   r9   r   r   r   valid_string_lengthU   s    r<   F)r2   	check_ltrr    c           	      C   sh  d}t | dD ]>\}}t|}|dkr@tdt|  d| |tkrd}q|sZ|sZdS t| d }|tkrvd}n"|dkrd}ntd	t|  d
d}d}t | dD ]\}}t|}|r|tkrtd| d|tkrd}n|dkrd}|t	krT|s
|}n||krTtdq|t
kr8td| d|tkrHd}q|dkrd}q|sdtddS )a!  Validate the Bidi Rule from :rfc:`5893` for a single label.

    The Bidi Rule constrains how bidirectional characters (Hebrew, Arabic,
    etc.) may appear within a label. By default the check is only applied
    when the label contains at least one right-to-left character (Unicode
    bidirectional categories ``R``, ``AL``, or ``AN``); set ``check_ltr``
    to ``True`` to apply it to LTR-only labels as well.

    :param label: The label to validate, as a Unicode string.
    :param check_ltr: If ``True``, apply the rules even when the label
        contains no RTL characters.
    :returns: ``True`` if the label satisfies the Bidi Rule.
    :raises IDNABidiError: If any of Bidi Rule conditions 1-6 are violated,
        or if the directional category of a codepoint cannot be determined.
    Fr    z Unknown directionality in label  at position Tr   r   zFirst codepoint in label z" must be directionality L, R or ALNz,Invalid direction for codepoint at position z in a right-to-left labelr   z2Can not mix numeral types in a right-to-left labelz in a left-to-right labelz0Label ends with illegal codepoint directionality)	enumerater!   bidirectionalr   repr_bidi_rtl_categories_bidi_rtl_first_bidi_rtl_allowed_bidi_rtl_valid_ending_bidi_rtl_numeric_bidi_ltr_allowed_bidi_ltr_valid_ending)	r2   r=   
bidi_labelidxr   	directionrtlvalid_endingnumber_typer   r   r   
check_bidic   sP    






rP   c                 C   s"   t | d d dkrtddS )u^  Reject labels that begin with a combining mark.

    Per :rfc:`5891` §4.2.3.2 a label must not start with a character of
    Unicode general category ``M`` (Mark).

    :param label: The label to check.
    :returns: ``True`` if the first character is not a combining mark.
    :raises IDNAError: If the label begins with a combining character.
    r   Mz0Label begins with an illegal combining characterT)r!   categoryr   r6   r   r   r   check_initial_combiner   s    
rS   c                 C   s<   | dd dkrt d| d dks0| d dkr8t dd	S )
u  Validate the hyphen restrictions for a label.

    Per :rfc:`5891` §4.2.3.1 a label must not start or end with a hyphen
    (``U+002D``), and must not have hyphens in both the third and fourth
    positions (the prefix reserved for A-labels).

    :param label: The label to check.
    :returns: ``True`` if the hyphen restrictions are satisfied.
    :raises IDNAError: If any of the hyphen restrictions are violated.
          z--z4Label has disallowed hyphens in 3rd and 4th positionr   -z)Label must not start or end with a hyphenT)r   r6   r   r   r   check_hyphen_ok   s
    rX   c                 C   s   t d| | krtddS )zRequire that a label is in Unicode Normalization Form C.

    :param label: The label to check.
    :raises IDNAError: If ``label`` differs from its NFC normalisation.
    NFCz%Label must be in Normalization Form CN)r!   	normalizer   r6   r   r   r   	check_nfc   s    r[   )r2   posr    c                 C   s&  t | | }|dkr|dkr8tt | |d  tkr8dS d}t|d ddD ]@}t t | | }|t dkrvqLqL|tkrd} qqL qqL|sdS d}t|d t| D ]@}t t | | }|t dkrqq|t	krd} qq qq|S |dkr|dkott | |d  tkS dS d	S )
a  Validate the CONTEXTJ rules from :rfc:`5892` Appendix A.

    These rules govern the contextual use of the joiner codepoints
    ``U+200C`` (ZERO WIDTH NON-JOINER, Appendix A.1) and ``U+200D``
    (ZERO WIDTH JOINER, Appendix A.2) within a label.

    :param label: The label containing the codepoint.
    :param pos: Index of the joiner codepoint within ``label``.
    :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTJ
        rule, ``False`` otherwise (including when the codepoint at
        ``pos`` is not a recognised joiner).
    :raises ValueError: If an adjacent codepoint has no Unicode name when
        determining its combining class.
    i   r   r   TFrW   Ti   N)
r)   r'   _virama_combining_classranger   joining_typesget_bidi_joiner_l_or_dr5   _bidi_joiner_r_or_d)r2   r\   cp_valueokijoining_typer   r   r   valid_contextj   s8     
"rh   )r2   r\   	exceptionr    c                 C   sr  t | | }|dkr\d|  k o.t| d k n  oZt | |d  dkoZt | |d  dkS |dkr|t| d k rt| dkrt| |d  dS dS |dks|d	kr|dkrt| |d  d
S dS |dkr| D ]6}|dkrqt|dst|dst|dr dS qdS d|  kr&dkr>n ntdd | D  S d|  krVdkrnn ntdd | D  S dS )a  Validate the CONTEXTO rules from :rfc:`5892` Appendix A.

    Covers the contextual rules for codepoints such as MIDDLE DOT
    (``U+00B7``), Greek lower numeral sign, Hebrew punctuation, Katakana
    middle dot, and the Arabic-Indic / Extended Arabic-Indic digit ranges.

    :param label: The label containing the codepoint.
    :param pos: Index of the codepoint within ``label``.
    :param exception: Reserved for forward compatibility; currently unused.
    :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTO
        rule, ``False`` otherwise (including when the codepoint is not a
        recognised CONTEXTO codepoint).
       r   r   l   iu  GreekFi  i  Hebrewi0  u   ・HiraganaKatakanaHanT`  i  c                 s   s*   | ]"}d t |  kodkn  V  qdS )    Nr)   .0r   r   r   r   	<genexpr>4  s     z!valid_contexto.<locals>.<genexpr>rs   rt   c                 s   s*   | ]"}d t |  kodkn  V  qdS )rq   rr   Nru   rv   r   r   r   rx   7  s     )r)   r5   r+   any)r2   r\   ri   rd   r   r   r   r   valid_contexto  s.    H
"rz   c                 C   s  t | ttfr| d} t| dkr,tdt| dds@tdt|  t|  t	|  t
| D ](\}}t|}t|tjd rq`q`t|tjd rz4t| |std	t| d
|d  dt|  W nL tk
r } z,tdt| d|d  dt|  |W 5 d}~X Y nX q`t|tjd rbt| |stdt| d
|d  dt|  q`tdt| d|d  dt|  dq`t|  dS )a8  Run the full set of IDNA 2008 validity checks on a single label.

    Applies, in order: NFC normalisation (:func:`check_nfc`), hyphen
    restrictions (:func:`check_hyphen_ok`), the no-leading-combiner rule
    (:func:`check_initial_combiner`), per-codepoint validity (PVALID,
    CONTEXTJ, CONTEXTO classes from :rfc:`5892`), and the Bidi Rule
    (:func:`check_bidi`).

    :param label: The label to validate. ``bytes`` or ``bytearray`` input
        is decoded as UTF-8 first.
    :raises IDNAError: If the label is empty or fails a structural rule.
    :raises InvalidCodepoint: If the label contains a DISALLOWED or
        UNASSIGNED codepoint.
    :raises InvalidCodepointContext: If a CONTEXTJ or CONTEXTO codepoint
        is not valid in its context.
    :raises IDNABidiError: If the Bidi Rule is violated.
    zutf-8r   zEmpty LabelTr9   Label too longPVALIDCONTEXTJzJoiner  not allowed at position r    in z%Unknown codepoint adjacent to joiner r?   NCONTEXTO
Codepoint z of z not allowed)
isinstancebytes	bytearraydecoder5   r   r<   r[   rX   rS   r@   r)   r   r   codepoint_classesrh   r   r1   rB   r%   rz   r   rP   )r2   r\   r   rd   errr   r   r   check_label<  s@    

   *r   c                 C   sf   z(|  d}t| t|s$td|W S  tk
r<   Y nX t|  tt|  }t|sbtd|S )u  Convert a single U-label into its A-label form.

    The result is the ASCII-Compatible Encoding (ACE) form per :rfc:`5891`
    §4: the label is validated, Punycode-encoded, and prefixed with
    ``xn--``. Pure ASCII labels that are already valid IDNA labels are
    returned unchanged (as :class:`bytes`).

    :param label: The label to convert, as a Unicode string.
    :returns: The A-label as ASCII-encoded :class:`bytes`.
    :raises IDNAError: If the label is invalid or the resulting A-label
        exceeds 63 octets.
    asciir|   )r.   ulabelr7   r   UnicodeEncodeErrorr   _alabel_prefixr0   )r2   label_bytesr   r   r   alabelu  s    
r   c              
   C   s   t | ttfs@z| d}W qH tk
r<   t|  |  Y S X nt| }| }|tr|t	td }|svt
d|drt
dnt| |dS z|d} W n, tk
r } zt
d|W 5 d}~X Y nX t|  | S )a  Convert a single A-label into its U-label form.

    Performs the inverse of :func:`alabel`: an ``xn--``-prefixed label is
    Punycode-decoded and validated. Labels that are already Unicode (or
    plain ASCII without the ACE prefix) are validated and returned as a
    Unicode string.

    :param label: The label to convert. ``bytes`` or ``bytearray`` input
        is treated as ASCII.
    :returns: The U-label as a Unicode string.
    :raises IDNAError: If the label is malformed or fails validation.
    r   Nz5Malformed A-label, no Punycode eligible content found   -z"A-label must not end with a hyphenr-   zInvalid A-label)r   r   r   r.   r   r   lower
startswithr   r5   r   endswithr   UnicodeError)r2   r   r   r   r   r   r     s,    



r   T)r8   
std3_rulestransitionalr    c              	   C   s,  ddl m } d}t| D ]\}}t|}||dk r8|nt||dfd  }|d }	d}
t|dkrn|d }
|	d	kp|	d
kr| p|	dko| o|
dk}|
dk	o|	dkp|	dkr| p|	d
ko|}|r||7 }q|r|
dk	st||
7 }q|	dkrqqtdt| d|d  dt	|  qt
d|S )u  Apply the UTS #46 character mapping to a domain string.

    Implements the mapping table from `UTS #46 §4
    <https://www.unicode.org/reports/tr46/>`_: each character is kept,
    replaced, or rejected based on its status (``V``, ``M``, ``D``, ``3``,
    ``I``). The result is returned in Normalisation Form C.

    :param domain: The full domain name to remap.
    :param std3_rules: If ``True``, apply the stricter STD3 ASCII rules
        (status ``3`` codepoints raise instead of being kept or mapped).
    :param transitional: If ``True``, use transitional processing (status
        ``D`` codepoints are mapped instead of kept). Transitional
        processing has been removed from UTS #46 and this option is
        retained only for backwards compatibility.
    :returns: The remapped domain, in Normalisation Form C.
    :raises InvalidCodepoint: If the domain contains a disallowed
        codepoint under the chosen rules.
    r   )	uts46datar>      ZN   rT   Vr   3rQ   Ir   r   r   rY   )r   r@   r)   bisectbisect_leftr5   AssertionErrorr   r1   rB   r!   rZ   )r8   r   r   r   outputr\   char
code_pointuts46rowstatusreplacementZ
keep_as_isZuse_replacementr   r   r   uts46_remap  s.    $* 

(r   )r,   strictuts46r   r   r    c           
   
   C   s*  |rt jdtdd t| ts^zt| d} W n0 ttfk
r\ } ztd|W 5 d}~X Y nX |rnt| ||} t	| ddstd	d
}g }|r| 
dnt
| }|r|dgkrtd|d dkr|d= d}|D ]$}	t|	} | r||  qtdq|r|d d|} t	| |s&td	| S )aA  Encode a Unicode domain name into its ASCII (A-label) form.

    Splits the input on label separators (only ``U+002E`` if ``strict`` is
    set; otherwise also IDEOGRAPHIC FULL STOP ``U+3002``, FULLWIDTH FULL
    STOP ``U+FF0E``, and HALFWIDTH IDEOGRAPHIC FULL STOP ``U+FF61``),
    encodes each label with :func:`alabel`, and rejoins them with ``.``.
    Optionally pre-processes the input through :func:`uts46_remap`.

    :param s: The domain name to encode.
    :param strict: If ``True``, only ``U+002E`` is recognised as a label
        separator.
    :param uts46: If ``True``, apply UTS #46 mapping before encoding.
    :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is
        ``True``.
    :param transitional: Forwarded to :func:`uts46_remap` when ``uts46``
        is ``True``. Deprecated: emits a :class:`DeprecationWarning` and
        will be removed in a future version.
    :returns: The encoded domain as ASCII :class:`bytes`.
    :raises IDNAError: If the domain is empty, contains an invalid label,
        or exceeds the maximum domain length.
    zuTransitional processing has been removed from UTS #46. The transitional argument will be removed in a future version.rT   )
stacklevelr   zGshould pass a unicode string to the function rather than a byte string.NTr{   Domain too longF.r>   Empty domainrW   Empty label       .)warningswarnDeprecationWarningr   strUnicodeDecodeError	TypeErrorr   r   r<   split_unicode_dots_rer   appendjoin)
r,   r   r   r   r   r   r9   resultlabelsr2   r   r   r   r.     sD    



r.   )r,   r   r   r   r    c           	   
   C   s   t | tsJzt| d} W n0 ttfk
rH } ztd|W 5 d}~X Y nX |rZt| |d} t| ddsntdd}g }|r| dnt| }|r|d	gkrtd
|d s|d= d}|D ]$}t	|} | r|
|  qtdq|r|
d	 d|S )a  Decode an A-label-encoded domain name back to Unicode.

    Splits the input on label separators (see :func:`encode` for the
    rules), decodes each label with :func:`ulabel`, and rejoins them
    with ``.``. Optionally pre-processes the input through
    :func:`uts46_remap`.

    :param s: The domain name to decode.
    :param strict: If ``True``, only ``U+002E`` is recognised as a label
        separator.
    :param uts46: If ``True``, apply UTS #46 mapping before decoding.
    :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is
        ``True``.
    :returns: The decoded domain as a Unicode string.
    :raises IDNAError: If the input is not valid ASCII, contains an
        invalid label, or is empty.
    r   zInvalid ASCII in A-labelNFTr{   r   r   r>   r   rW   r   )r   r   r   r   r   r   r<   r   r   r   r   r   )	r,   r   r   r   r   r9   r   r   r2   r   r   r   r   :  s2    


r   )F)F)TF)FFFF)FFF)6r   rer!   r   typingr   r   r>   r   	intrangesr   r^   r   compiler   	frozensetrD   rC   rE   rF   rG   rH   rI   r)   rb   rc   r   r   r   r   r   intr'   r   boolr+   r   r0   r1   r7   r<   rP   rS   rX   r[   rh   rz   r   r   r   r   r   r.   r   r   r   r   r   <module>   sx   
J
6.9)9    H   