
    ̯wg                         d dl Z d dlZd dlZi dddddddd	d
dddddddddddddddddddddddZd$defd Zdefd!Z G d" d#      Zy)%    Nu   œoeu   ŒOE   øo   ØO   æae   ÆAE   ßssu   ẞSSu   đdu   ĐD   ð   Ð   þth   Þu   łlu   ŁLsc                 `    dj                  fdt        j                  d|       D              S )z
    Replace any other markers, symbols, and punctuations with a space,
    and drop any diacritics (category 'Mn' and some manual mappings)
     c              3      K   | ]S  }|v r|nH|t         v r	t         |   n7t        j                  |      d k(  rdnt        j                  |      d   dv rdn| U yw)Mnr   r   MSP N)ADDITIONAL_DIACRITICSunicodedatacategory).0ckeeps     ^/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/whisper/normalizers/basic.py	<genexpr>z0remove_symbols_and_diacritics.<locals>.<genexpr>    sz        9 	
 %% #1% "d*  "1%. 	s   AANFKDjoinr!   	normalize)r   r%   s    `r&   remove_symbols_and_diacriticsr,      s2    
 77  &&vq1      c                 Z    dj                  d t        j                  d|       D              S )z[
    Replace any other markers, symbols, punctuations with a space, keeping diacritics
    r   c              3   X   K   | ]"  }t        j                  |      d    dv rdn| $ yw)r   r   r   N)r!   r"   )r#   r$   s     r&   r'   z!remove_symbols.<locals>.<genexpr>2   s4       ##A&q)U29s   (*NFKCr)   )r   s    r&   remove_symbolsr1   .   s/     77 &&vq1  r-   c                   *    e Zd ZddedefdZdefdZy)BasicTextNormalizerremove_diacriticssplit_lettersc                 8    |rt         nt        | _        || _        y )N)r,   r1   cleanr5   )selfr4   r5   s      r&   __init__zBasicTextNormalizer.__init__9   s    ->)N 	
 +r-   r   c                 n   |j                         }t        j                  dd|      }t        j                  dd|      }| j                  |      j                         }| j                  r4dj                  t        j                  d|t        j                              }t        j                  dd|      }|S )Nz[<\[][^>\]]*[>\]]r   z\(([^)]+?)\)r   z\Xz\s+)	lowerresubr7   r5   r*   regexfindallU)r8   r   s     r&   __call__zBasicTextNormalizer.__call__?   s    GGIFF'Q/FF?B*JJqM!ua9:AFFC
 r-   N)FF)__name__
__module____qualname__boolr9   strrA    r-   r&   r3   r3   8   s!    +$ +t +# r-   r3   )r   )r<   r!   r>   r    rF   r,   r1   r3   rG   r-   r&   <module>rH      s    	  $$ 	# 	#	
 	$ 	$ 	$ 
4 	# 	# 	# 	# 	$ 	$ 	#  	#! (S &c  r-   