
    xnhB                        d dl mZ d dlmZmZ d dlmZmZ d dlm	Z
 d dlmZmZ d Zd Zd Zd	d
d
d
ddZd	d
d
d
ddZd	d
d
d
ddZd	d
d
d
ddZd Zd
d
ddZd
d
ddZy
)    )annotations)common_affixconv_sequences)is_nonesetupPandas)Indel_py)EditopEditopsc                    t        |       }t        |      }|\  }}}||z  ||z  z   }||k\  rt        |||z  ||z
  |z  z         }|S t        |||z  ||z
  |z  z         }|S )N)lenmin)	s1s2weightslen1len2insertdeletereplacemax_dists	            b/var/www/html/profi_bot/bot/venv/lib/python3.12/site-packages/rapidfuzz/distance/Levenshtein_py.py_levenshtein_maximumr      s    r7Dr7D%FFGf}tf},Ht|x4$;&2H!HI O x4$;&2H!HIO    c                *   t        |       }|\  }}}t        t        d|dz   |z  |            }|D ]]  }|d   }	|dxx   |z  cc<   t        |      D ];  }
|	}| |
   |k7  rt        ||
   |z   ||
dz      |z   |	|z         }||
dz      }	|||
dz   <   = _ |d   S )Nr      )r   listranger   )r   r   r   r   r   r   r   cachech2tempixs               r   _uniform_genericr$      s    r7D%FFGq4!8v-v67EQxaFtAA!u|a6)5Q<&+@$.QQ<DE!a%L   9r   c                   | st        |      S dt        |       z  dz
  }d}t        |       }dt        |       dz
  z  }i }|j                  }d}| D ]  }	 ||	d      |z  ||	<   |dz  } |D ]]  }
 ||
d      }|}||z  |z   |z  |z  |z  }|||z   z  }||z  }|||z  dk7  z  }|||z  dk7  z  }|dz  dz  }|dz  }|||z   z  }||z  }_ |S Nr   r   )r   get)r   r   VPVNcurrDistmaskblock	block_getr#   ch1r    PM_jXD0HPHNs                   r   _uniform_distancer4   ,   s'   2w
s2w,!	B	
B2wHR1DE		I	AsA&*c
	a  a B"}"a'",BG*_"WR$Y1$$R$Y1$$Ag]1WBG*_"W " Or   r   r   r   N)r   	processorscore_cutoff
score_hintc                   |}| ||       }  ||      }t        | |      \  } }||dk(  rt        | |      }n)|dk(  rt        j                  | |      }nt	        | ||      }|||k  r|S |dz   S )a  
    Calculates the minimum number of insertions, deletions, and substitutions
    required to change one sequence into the other according to Levenshtein with custom
    costs for insertion, deletion and substitution

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown

    Examples
    --------
    Find the Levenshtein distance between two strings:

    >>> from rapidfuzz.distance import Levenshtein
    >>> Levenshtein.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> Levenshtein.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    It is possible to select different weights by passing a `weight`
    tuple.

    >>> Levenshtein.distance("lewenstein", "levenshtein", weights=(1,1,2))
    3
    r5   )r   r      r   )r   r4   Indeldistancer$   )r   r   r   r6   r7   r8   _dists           r   r<   r<   P   s    D 	Ar]r]B#FB'Y. R(	I	~~b"%B0 (DL,@4W|VWGWWr   c                   |}| ||       }  ||      }t        | |      \  } }|xs d}t        | ||      }t        | ||      }||z
  }	||	|k\  r|	S dS )a  
    Calculates the levenshtein similarity in the range [max, 0] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``max - distance``, where max is the maximal possible
    Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.
    score_hint : int, optional
        Expected similarity between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown
    r5   r   r   )r   r   r<   )
r   r   r   r6   r7   r8   r=   maximumr>   sims
             r   
similarityrC      sx    ` 	Ar]r]B#FB"G"2r73GBG,D
D.C'3,+>3FQFr   c                   |}t                t        |       st        |      ry| ||       }  ||      }t        | |      \  } }|xs d}t        | ||      }t	        | ||      }|r||z  nd}	||	|k  r|	S dS )a  
    Calculates a normalized levenshtein distance in the range [1, 0] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``distance / max``, where max is the maximal possible
    Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is None,
        which deactivates this behaviour.
    score_hint : float, optional
        Expected normalized distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 1.0 and 0.0

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown
          ?r5   r@   r   r   )r   r   r   r   r<   )
r   r   r   r6   r7   r8   r=   rA   r>   	norm_dists
             r   normalized_distancerG      s    ^ 	AMr{gbkr]r]B#FB"G"2r73GBG,D")wqI%-l1J9RQRRr   c                   |}t                t        |       st        |      ry| ||       }  ||      }t        | |      \  } }|xs d}t        | ||      }d|z
  }|||k\  r|S dS )a  
    Calculates a normalized levenshtein similarity in the range [0, 1] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is None,
        which deactivates this behaviour.
    score_hint : int, optional
        Expected normalized similarity between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown

    Examples
    --------
    Find the normalized Levenshtein similarity between two strings:

    >>> from rapidfuzz.distance import Levenshtein
    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein")
    0.81818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.85)
    0.0

    It is possible to select different weights by passing a `weight`
    tuple.

    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", weights=(1,1,2))
    0.85714285714285

    When a different processor is used s1 and s2 do not have to be strings

    >>> Levenshtein.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        r5   r@   rE   r   )r   r   r   rG   )	r   r   r   r6   r7   r8   r=   rF   norm_sims	            r   normalized_similarityrJ     s    N 	AMr{gbkr]r]B#FB"G#BG<IYH$,L0H8PqPr   c                   | st        |      g g fS dt        |       z  dz
  }d}t        |       }dt        |       dz
  z  }i }|j                  }d}| D ]  }	 ||	d      |z  ||	<   |dz  } g }
g }|D ]  } ||d      }|}||z  |z   |z  |z  |z  }|||z   z  }||z  }|||z  dk7  z  }|||z  dk7  z  }|dz  dz  }|dz  }|||z   z  }||z  }|
j                  |       |j                  |        ||
|fS r&   )r   r'   append)r   r   r(   r)   r*   r+   r,   r-   r#   r.   	matrix_VP	matrix_VNr    r/   r0   r1   r2   r3   s                     r   _matrixrO   v  s]   BR  
s2w,!	B	
B2wHR1DE		I	AsA&*c
	a  IIa B"}"a'",BG*_"WR$Y1$$R$Y1$$Ag]1WBG*_"W% ( i++r   r6   r8   c                  |}| ||       }  ||      }t        | |      \  } }t        | |      \  }}| |t        |       |z
   } ||t        |      |z
   }t        | |      \  }}}	t	        g dd      }
t        |       |z   |z   |
_        t        |      |z   |z   |
_        |dk(  r|
S dg|z  }t        |       }t        |      }|dk7  r|dk7  r||dz
     d|dz
  z  z  r!|dz  }|dz  }t        d||z   ||z         ||<   n_|dz  }|r-|	|dz
     d|dz
  z  z  r|dz  }t        d||z   ||z         ||<   n+|dz  }| |   ||   k7  r|dz  }t        d||z   ||z         ||<   |dk7  r|dk7  r|dk7  r&|dz  }|dz  }t        d||z   ||z         ||<   |dk7  r&|dk7  r&|dz  }|dz  }t        d||z   ||z         ||<   |dk7  r&||
_        |
S )u  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [8] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Levenshtein
    >>> for tag, src_pos, dest_pos in Levenshtein.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[1] s2[0]
    replace s1[3] s2[2]
     insert s1[6] s2[5]
    Nr   r   r   r   r   )	r   r   r   rO   r
   _src_len	_dest_lenr	   _editops)r   r   r6   r8   r=   
prefix_len
suffix_lenr>   r(   r)   editopseditop_listcolrows                 r   rW   rW     sb   ^ 	Ar]r]B#FB)"b1J
	JR:-	.B	JR:-	.B2r?LD"bb!QG2w+j8GB*,z9Gqy&4-K
b'C
b'C
(saxcAg;!a.)AID1HC &xz1A3CS TK1HC 37qS1W~6	$*8S:5EsZGW$XD!q c7bg%AID(.y#
:JCR\L\(]K%' (sax* (	q"8S:-=sZ?OPD (
 (	q"8S:-=sZ?OPD (
 #GNr   c               <    t        | |||      j                         S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [9] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Levenshtein

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in Levenshtein.opcodes("qabxcd", "abycdf"):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
    replace a[3:4] (x) b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    rP   )rW   
as_opcodes)r   r   r6   r8   s       r   opcodesr]     s    j 2rY:FQQSSr   )
__future__r   rapidfuzz._common_pyr   r   rapidfuzz._utilsr   r   rapidfuzz.distancer   r;   !rapidfuzz.distance._initialize_pyr	   r
   r   r$   r4   r<   rC   rG   rJ   rO   rW   r]    r   r   <module>rd      s    # = 1 0 =$!P OXl :GB =SH TQn&,Z dV 5Tr   