
    Ti
                     d    S SK Jr  S SKJr  S SKr " S S5      r " S S\5      r " S S	\5      rg)
    )Path)ListNc                   D    \ rS rSrS\S\\   4S jrS\\   S\4S jrSrg)		Tokenizer   sentencereturnc                     [        5       eNNotImplementedError)selfr   s     j/var/www/html/backend/Backoffice_Marketplace/venv/lib/python3.13/site-packages/argostranslate/tokenizer.pyencodeTokenizer.encode       !##    tokensc                     [        5       er   r   r   r   s     r   decodeTokenizer.decode   r   r    N)	__name__
__module____qualname____firstlineno__strr   r   r   __static_attributes__r   r   r   r   r      s0    $s $tCy $$T#Y $3 $r   r   c                   t    \ rS rSrS\4S jrS\R                  4S jrS\	S\
\	   4S jrS\
\	   S\	4S	 jrS
rg)SentencePieceTokenizer   
model_filec                     Xl         S U l        g r   r#   	processor)r   r#   s     r   __init__SentencePieceTokenizer.__init__   s    $r   r	   c                     U R                   c,  [        R                  " [        U R                  5      S9U l         U R                   $ )N)r#   )r&   spmSentencePieceProcessorr   r#   )r   s    r   lazy_processor%SentencePieceTokenizer.lazy_processor   s1    >>! 773tCWXDN~~r   r   c                 J    U R                  5       R                  U[        S9nU$ )N)out_type)r,   r   r   )r   r   r   s      r   r   SentencePieceTokenizer.encode   s%    $$&--h-Er   r   c                     U R                  5       R                  U5      R                  SS5      R                  SS5      $ )u   
# Returns not decoded byte-fallback tokens, quite detrimental to Asian languages translations
detokenized = "".join(tokens)
return detokenized.replace("▁", " ")
u   ▁ _)r,   decode_piecesreplacer   s     r   r   SentencePieceTokenizer.decode   s7     !]6"WUC WS#		
r   r%   N)r   r   r   r   r   r'   r*   r+   r,   r   r   r   r   r   r   r   r   r!   r!      sN    4  : : 
s tCy 
T#Y 
3 
r   r!   c                   `    \ rS rSrS\S\S\4S jrS rS\S\\   4S	 jr	S
\\   S\4S jr
Srg)BPETokenizer,   r#   	from_codeto_codec                 `    Xl         X l        X0l        S U l        S U l        S U l        S U l        g r   )r#   r:   r;   	tokenizerdetokenizer
bpe_source
normalizer)r   r#   r:   r;   s       r   r'   BPETokenizer.__init__-   s.    $"r   c                 \   U R                   c  SSKJn  SSKJnJn  U" U R                  5      U l         U" U R                  5      U l        U" U R                  5      U l	        SSK
Jn  [        [        U R                  5      SSS9 nU" U5      U l        S S S 5        g g ! , (       d  f       g = f)Nr   )MosesPunctNormalizer)MosesDetokenizerMosesTokenizer)BPErzutf-8)encoding)r=   sacremoses.normalizerC   sacremoses.tokenizerD   rE   r:   r;   r>   r@   argostranslate.apply_bperF   openr   r#   r?   )r   rC   rD   rE   rF   fs         r   	lazy_loadBPETokenizer.lazy_load6   s    >>!AL+DNN;DN/=D24>>BDO4c$//*C'Ba"%a& CB " CBs   B
B+r   r	   c                 "   U R                  5         U R                  R                  U5      nSR                  U R                  R                  U5      5      nU R                  R                  UR                  S5      R                  S5      5      nU$ )Nr2   z
 )
rN   r@   	normalizejoinr=   tokenizer?   segment_tokensstripsplit)r   r   
normalized	tokenized	segmenteds        r   r   BPETokenizer.encodeD   sl    __..x8
HHT^^44Z@A	OO229??73K3Q3QRU3VW	r   r   c                     U R                  5         U R                  R                  SR                  U5      R	                  SS5      R                  S5      5      $ )Nr2   z@@  )rN   r>   
detokenizerR   r5   rV   r   s     r   r   BPETokenizer.decodeM   sI    **HHV$$UB/55c:
 	
r   )r?   r>   r:   r#   r@   r;   r=   N)r   r   r   r   r   r   r'   rN   r   r   r   r   r   r   r   r8   r8   ,   sO    4 C # )s tCy 
T#Y 
3 
r   r8   )	pathlibr   typingr   sentencepiecer*   r   r!   r8   r   r   r   <module>rb      s0      $ $
Y 
:&
9 &
r   