
    xnh,                        d dl mZ d dlmZmZ d dlmZmZ d dlm	Z	m
Z
 ddddZ	 ddZdddd	Zdddd
ZddddZd ZdddZdddZy)    )annotations)common_affixconv_sequences)is_nonesetupPandas)EditopEditopsN)	processorscore_cutoffc               f   | ||       }  ||      }| syt        | |      \  } }dt        |       z  dz
  }i }|j                  }d}| D ]  } ||d      |z  ||<   |dz  } |D ]  }	 ||	d      }
||
z  }||z   ||z
  z  } t        |      t        |        d j	                  d      }|||k\  r|S dS )a  
    Calculates the length of the longest common subsequence

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2
    Nr      0)r   lengetbincount)s1s2r
   r   Sblock	block_getxch1ch2Matchesuress                ]/var/www/html/profi_bot/bot/venv/lib/python3.12/site-packages/rapidfuzz/distance/LCSseq_py.py
similarityr   
   s    < r]r]B#FB	
c"gAE		I	AsA&*c
	a  C#KUq1u  a&#b'

"
"3
'C'3,+>3FQF    c                    |sydt        |      z  dz
  }| j                  }|D ]  } ||d      }||z  }||z   ||z
  z  } t        |      t        |       d  j                  d      }	||	|k\  r|	S dS Nr   r   r   )r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   s
             r   _block_similarityr#   B   s     	
c"gA		IC#KUq1u  a&#b'

"
"3
'C'3,+>3FQFr    c                   | ||       }  ||      }t        | |      \  } }t        t        |       t        |            }t        | |      }||z
  }|||k  r|S |dz   S )a  
    Calculates the LCS distance in the range [0, max].

    This is calculated as ``max(len1, len2) - similarity``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Examples
    --------
    Find the LCS distance between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    r   )r   maxr   r   )r   r   r
   r   maximumsimdists          r   distancer)   X   ss    ^ r]r]B#FB#b'3r7#G
R
CS=D (DL,@4W|VWGWWr    c                   t                t        |       st        |      ry| ||       }  ||      }| r|syt        | |      \  } }t        t	        |       t	        |            }t        | |      |z  }|||k  r|S dS )a2  
    Calculates a normalized LCS similarity in the range [1, 0].

    This is calculated as ``distance / max(len1, len2)``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
        which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 0 and 1.0
          ?r   r   )r   r   r   r%   r   r)   )r   r   r
   r   r&   norm_sims         r   normalized_distancer-      s    > Mr{gbkr]r]RB#FB#b'3r7#GB')H$,L0H8PqPr    c                   t                t        |       st        |      ry| ||       }  ||      }dt        | |      z
  }|||k\  r|S dS )a  
    Calculates a normalized LCS similarity in the range [0, 1].

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Examples
    --------
    Find the normalized LCS similarity between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein")
    0.8181818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.9)
    0.0

    When a different processor is used s1 and s2 do not have to be strings

    >>> LCSseq.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        r+   r   )r   r   r-   )r   r   r
   r   r,   s        r   normalized_similarityr/      s[    d Mr{gbkr]r](R00H$,L0H8PqPr    c                B   | sdg fS dt        |       z  dz
  }i }|j                  }d}| D ]  } ||d      |z  ||<   |dz  } g }|D ],  } ||d      }	||	z  }
||
z   ||
z
  z  }|j                  |       . t        |      t        |        d  j	                  d      }||fS r"   )r   r   appendr   r   )r   r   r   r   r   r   r   matrixr   r   r   r'   s               r   _matrixr3      s    2w	
c"gAE		I	AsA&*c
	a  FC#KUq1ua	  a&#b'

"
"3
'C=r    r
   c               t   | ||       }  ||      }t        | |      \  } }t        | |      \  }}| |t        |       |z
   } ||t        |      |z
   }t        | |      \  }}t	        g dd      }t        |       |z   |z   |_        t        |      |z   |z   |_        t        |       t        |      z   d|z  z
  }|dk(  r|S dg|z  }	t        |       }
t        |      }|dk7  r{|
dk7  rv||dz
     d|
dz
  z  z  r!|dz  }|
dz  }
t        d|
|z   ||z         |	|<   n9|dz  }|r-||dz
     d|
dz
  z  z  s|dz  }t        d|
|z   ||z         |	|<   n|
dz  }
|dk7  r|
dk7  rv|
dk7  r&|dz  }|
dz  }
t        d|
|z   ||z         |	|<   |
dk7  r&|dk7  r&|dz  }|dz  }t        d|
|z   ||z         |	|<   |dk7  r&|	|_        |S )uf  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described in [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq
    >>> for tag, src_pos, dest_pos in LCSseq.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[0] s2[0]
     delete s1[3] s2[2]
     insert s1[4] s2[2]
     insert s1[6] s2[5]
    Nr      r   deleteinsert)	r   r   r   r3   r	   _src_len	_dest_lenr   _editops)r   r   r
   
prefix_len
suffix_lenr'   r2   editopsr(   editop_listcolrows               r   r>   r>     s?   X r]r]B#FB)"b1J
	JR:-	.B	JR:-	.B"b/KCb!QG2w+j8GB*,z9Gr7SWq3w&Dqy&4-K
b'C
b'C
(sax#'?aC!Gn-AID1HC &xz1A3CS TK1HC F37OqS1W~>	$*8S:5EsZGW$XD! q (sax" (	q"8S:-=sZ?OPD (
 (	q"8S:-=sZ?OPD (
 #GNr    c               :    t        | ||      j                         S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described in [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in LCSseq.opcodes(a, b):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
     delete a[3:4] (x) b[2:2] ()
     insert a[4:4] () b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    r4   )r>   
as_opcodes)r   r   r
   s      r   opcodesrD   x  s    d 2rY/::<<r    )N)
__future__r   rapidfuzz._common_pyr   r   rapidfuzz._utilsr   r   !rapidfuzz.distance._initialize_pyr   r	   r   r#   r)   r-   r/   r3   r>   rD    r    r   <module>rJ      su    # = 1 = 5Gx 	G4 7X| -Qh ;Q|8 	]H 	2=r    