
    xnhF                     N    d dl mZ d dlZd dlZd dlmZ ddlmZ  G d de      Z	y)    )defaultdictN)xrange   )NGramc                   v    e Zd ZdZdZ ej                  d      Z ej                  d      Zd
dZ	d Z
d Zd	 Zy)LangProfile   i z
^[A-Za-z]$z.*[A-Za-z].*Nc                     t        t              | _        || j                  j                  |       |dgt        j
                  z  }|| _        || _        y )Nr   )r   intfrequpdater   N_GRAMnamen_words)selfr   r   r   s       ^/var/www/html/profi_bot/bot/venv/lib/python3.12/site-packages/langdetect/utils/lang_profile.py__init__zLangProfile.__init__   sI    $	IIT"?cELL(G	    c                     | j                   |yt        |      }|dk  s|t        j                  kD  ry| j                  |dz
  xx   dz  cc<   | j
                  |xx   dz  cc<   y)zAdd n-gram to profile.Nr   )r   lenr   r   r   r   )r   gramlengths      r   addzLangProfile.add   s[    99TA:%,,.VaZ A% 		$1r   c                    | j                   yt        | j                  d   | j                  z  | j                        }d}t        t        j                  | j                              D ]\  \  }}||k  r1| j                  t        |      dz
  xx   |z  cc<   | j                  |= <| j                  j                  |      sX||z  }^ || j                  d   dz  k  r}t        t        j                  | j                              D ]Q  \  }}| j                  j                  |      s"| j                  t        |      dz
  xx   |z  cc<   | j                  |= S yy)zAEliminate below less frequency n-grams and noise Latin alphabets.Nr   r      )r   maxr   LESS_FREQ_RATIOMINIMUM_FREQlistsix	iteritemsr   r   ROMAN_CHAR_REmatchROMAN_SUBSTR_RE)r   	thresholdromankeycounts        r   omit_less_freqzLangProfile.omit_less_freq&   s   99Q4+?+??ARARS	s}}TYY78JC	!SXaZ(E1(IIcN##))#. 9 4<<?a''"3==#;<
U''--c2LLS!,5,		# = (r   c                     |yt        j                  |      }t               }|D ]U  }|j                  |       t        dt         j                  dz         D ]"  }| j                  |j                  |             $ W y)zUpdate the language profile with (fragmented) text.
        Extract n-grams from text and add their frequency into the profile.
        Nr   )r   normalize_viadd_charr   r   r   get)r   textr   chns        r   r   zLangProfile.update;   sf     <!!$'wBMM"Au||A~.!% / r   )NNN)__name__
__module____qualname__r   r   recompiler"   r$   r   r   r)   r    r   r   r   r   
   s@    LOBJJ}-M bjj1O	'*&r   r   )
collectionsr   r4   r    	six.movesr   ngramr   objectr   r6   r   r   <module>r;      s     # 	 
  <&& <&r   