
    Qi(                         S SK r S SKJr  S SKJr  S SKJr  S SKJrJr  \" 5       r	\" 5       r
 " S S\5      r " S S	\5      rSS	/rg)
    N)Perluniprops)NonbreakingPrefixes)is_cjk)VIRAMASNUKTASc            
         ^  \ rS rSrSr\" SR                  \R                  S5      5      5      r	\" SR                  \R                  S5      5      SR                  \
5      -   SR                  \5      -   5      r\" SR                  \R                  S5      5      5      r\" SR                  \R                  S5      5      5      r\" SR                  \R                  S5      5      SR                  \
5      -   SR                  \5      -   5      r\" SR                  \R                  S	5      5      5      r\R$                  " S
5      S4r\R$                  " S5      S4rSrSrSr\R$                  " SR1                  \5      5      S4r\R$                  " SR1                  \S95      S4r\R$                  " S5      S4r\R$                  " S5      S4r\R$                  " S5      S4r\R$                  " SR1                  \	5      5      S4r\R$                  " SR1                  \	5      5      S4r\R$                  " SR1                  \	5      5      S4r \R$                  " S 5      S!4r!\R$                  " S"5      S!4r"\R$                  " S#5      S$4r#\R$                  " S%5      S&4r$\R$                  " S'5      S(4r%\R$                  " S)5      S(4r&\R$                  " S*5      S+4r'\R$                  " S,5      S-4r(\R$                  " S.5      S/4r)\R$                  " S05      S.4r*\R$                  " S1R1                  \	S295      S34r+\R$                  " S4R1                  \	S295      S34r,\R$                  " S5R1                  \	S295      S34r-\R$                  " S6R1                  \\5      5      S4r.S7R1                  \S9S84r/\R$                  " S95      S:4r0\R$                  " S;5      S4r1\R$                  " S<5      S4r2\R$                  " S=5      S>4r3\R$                  " S?5      S@4r4\R$                  " SA5      SB4r5\R$                  " SC5      SD4r6\R$                  " SE5      SF4r7\R$                  " SG5      SH4r8\R$                  " SI5      SJ4r9\R$                  " SK5      S4r:\R$                  " SL5      S4r;\R$                  " SM5      SN4r<\R$                  " SO5      SP4r=\R$                  " SQ5      SP4r>\R$                  " SR5      SS4r?\R$                  " ST5      SU4r@\R$                  " SV5      SW4rA\R$                  " SX5      SY4rB\R$                  " SZ5      S[4rC\R$                  " S\5      S]4rD\R$                  " S^5      S_4rE\R$                  " S`5      Sa4rF\R$                  " Sb5      Sc4rG\R$                  " Sd5      Se4rH\R$                  " Sf5      Sg4rI\R$                  " Sh5      Si4rJ\R$                  " Sj5      Sk4rK\R$                  " Sl5      Sm4rL\R$                  " Sn5      So4rM\R$                  " Sp5      Sq4rN\R$                  " Sr5      Ss4rO\R$                  " St5      Su4rP\R$                  " Sv5      Sw4rQ\R$                  " Sx5      S4rR\R$                  " Sy5      S4rS\R$                  " Sz5      S4rT\R$                  " S{5      S|4rU\R$                  " S}5      S~4rV\R$                  " S5      S4rW\R$                  " S5      S4rX\R$                  " S5      S4rY\R$                  " S5      S4rZ\R$                  " SA5      S4r[\R$                  " S5      S4r\\R$                  " SR1                  \S95      S4r]\R$                  " SR1                  \\	S95      S4r^\R$                  " SR1                  \S95      S4r_\R$                  " SR1                  \S95      S4r`\R$                  " SR1                  \	S95      S4ra\]\^\_\`\a/rb\R$                  " SR1                  \S95      S4rc\R$                  " SR1                  \S95      S4rd\R$                  " SR1                  \S95      S4re\R$                  " SR1                  \S95      S4rf\c\d\e\f/rg\R$                  " S5      S4rh\R$                  " S5      S4riSrjSrkSrlSrmSrn/ \P\P\!P\"P\#P\$P\%P\&P\'P\(P\)P\+P\,P\-P\.P\/P\0P\1P\2P\3P\4P\5P\6P\7P\8P\9P\:P\;P\<P\=P\>P\?P\@P\AP\BP\CP\DP\EP\FP\GP\HP\IP\JP\KP\LP\MP\NP\OP\PP\QPro\*\R\S\T\U\V\W\X\Y\Z/
rp\U\V\W\X\Y\Z\[\\/rq\j\k\l\m\n/rr/ SQrsSU 4S jjrtS ruS rvS rwS rxS ryS rzS r{SS jr|    SS jr}Sr~U =r$ )MosesTokenizer   z
This is a Python port of the Moses Tokenizer from
https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
 IsNIsAlnumIsScIsSoIsAlphaIsLowerz\s+ z[\000-\037])z +r   )z^ r   )z $r   ([^{}\s\.'\`\,\-]) \1  ([{alphanum}])\-(?=[{alphanum}])alphanum\1 @-@ 	\.([\.]+)z
 DOTMULTIDOTMULTI\.([^\.])zDOTDOTMULTI 
DOTMULTI\.DOTDOTMULTIz
([^{}])[,]z\1 , z
[,]([^{}])z , \1z
([{}])[,]$z^``z`` z^"z^`([^`])z` \1z^'z`  z
([ ([{<])"z\1 `` z([ ([{<])``z([ ([{<])`([^`])z\1 ` \2z
([ ([{<])'z\1 ` z\.\.\.z _ELLIPSIS_ 
_ELLIPSIS_z([^{numbers}])[,]([^{numbers}]))numbersz\1 , \2z([{numbers}])[,]([^{numbers}])z([^{numbers}])[,]([{numbers}])z([;:@#\$%&{}{}])([{alphanum}])\/([{alphanum}])$1 \@\/\@ $2z([^.])([.])([\]\)}>"']*) ?$z\1 \2\3z([?!])z([\]\[\(\){}<>])z\(z-LRB-z\)z-RRB-z\[z-LSB-z\]z-RSB-z\{z-LCB-z\}z-RCB-z--z -- ^$"z '' z([^'])' z\1 ' z([^'])'z'([sSmMdD]) z '\1 z'll z 'll z're z 're z've z 've zn't z n't z'LL z 'LL z'RE z 'RE z'VE z 'VE zN'T z N'T z ([Cc])annot z
 \1an not z ([Dd])'ye z \1' ye z ([Gg])imme z	 \1im me z ([Gg])onna z	 \1on na z ([Gg])otta z	 \1ot ta z ([Ll])emme z	 \1em me z ([Mm])ore'n z
 \1ore 'n z '([Tt])is z '\1 is z '([Tt])was z	 '\1 was z ([Ww])anna z	 \1an na z  *z^ *z *$&&amp;z\|&#124;<&lt;>&gt;z\'&apos;z\"&quot;&#91;]&#93;z([^{alpha}])[']([^{alpha}]))alphaz\1 ' \2z([^{alpha}{isn}])[']([{alpha}]))r0   isnz([{alpha}])[']([^{alpha}])z([{alpha}])[']([{alpha}])z\1 '\2z([{isn}])[']([s]))r1   z([^{alpha}])[']([{alpha}])z\1' \2z ' z\.' ?$z . ' z<\/?\S+\/?>z#<\S+( [a-zA-Z0-9]+\="?[^"]")+ ?\/?>z#<\S+( [a-zA-Z0-9]+\='?[^']')+ ?\/?>'[\w\-\_\.]+\@([\w\-\_]+\.)+[a-zA-Z]{2,}z/(http[s]?|ftp):\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]+)z"((https?|ftp|rsync)://|www\.)[^ ]*r2   z@[a-zA-Z0-9_]+z#[a-zA-Z0-9_]+c                   > [         [        U ]  5         Xl        [        R                  U5       Vs/ s H  o3R                  5       PM     snU l        U(       a  / U l        [        US5       nU H`  nUR                  5       nU(       d  M  UR                  S5      (       a  M4  XPR                  ;  d  ME  U R                  R                  U5        Mb     S S S 5        U R                   Vs/ s H/  nU R                  U5      (       d  M  UR                  S5      S   PM1     snU l        U R                  S;   Ga  SnU R                  S;   a0  U[        SR                  [         R#                  S5      5      5      -  nU R                  S	;   a0  U[        SR                  [         R#                  S
5      5      5      -  nU R                  S;   a  U[        SR                  [         R#                  S5      5      5      -  nU[        SR                  [         R#                  S5      5      5      -  nU[        SR                  [         R#                  S
5      5      5      -  nU =R$                  U-  sl        U =R&                  U-  sl        [(        R*                  " SR-                  U R&                  5      5      S4U l        [(        R*                  " SR-                  U R&                  S95      S4U l        [(        R*                  " SR-                  U R&                  S95      S4U l        g g s  snf ! , (       d  f       GN]= fs  snf )Nr#r   r   )zhjakocjkr   )r8   r9   Hangul)r6   r9   Han)r7   r9   HiraganaKatakanar   r   r   r   r   r   r    )superr	   __init__langnonbreaking_prefixeswordsstripNONBREAKING_PREFIXESopen
startswithappendhas_numeric_only
rpartitionNUMERIC_ONLY_PREFIXESstrjoinperlunipropscharsr   r   recompileformatPAD_NOT_ISALNUMAGGRESSIVE_HYPHEN_SPLITINTRATOKEN_SLASHES)	selfr@    custom_nonbreaking_prefixes_file_nbpfinlinew	cjk_chars	__class__s	           e/var/www/html/backend/Backoffice_Marketplace/venv/lib/python3.13/site-packages/sacremoses/tokenize.pyr?   MosesTokenizer.__init__(  s   nd,.	 &:%?%?%E%
%ETJJL%E%
!
 ,(*D%6<D::<DtDOOC$8$8'@'@@ 55<<TB	   = ..&
.$$Q' !ALLa .&
" 9911IyyM)S););H)E!FGG	yyM)S););E)B!CDD	yyM)S););J)G!HII	S););J)G!HII	S););E)B!CDD	LLI%LLLI%L#%::.C.J.J4<<.X#Y[b#bD 

>EEt||E\],D(
 

<CCT\\CZ['D#% 2)%
 =<&
s/   L$,L)L)#L)4L))L;L;)
L8c                    [         R                  " SSU5      n[         R                  " S5      nUR                  U5      (       aB  [         R                  " SSU5      nUR                  SU5      nUR                  U5      (       a  MB  U$ )Nr   z DOTMULTI\1r   r   zDOTDOTMULTI \1r   )rO   subrP   searchrU   textdotmultis      r]   replace_multidots MosesTokenizer.replace_multidotsY  sm    vvlND9::m,ood##66.0A4HD<<t4D ood##     c                     [         R                  " S5      nUR                  U5      (       a*  UR                  SU5      nUR                  U5      (       a  M*  [         R                  " SSU5      $ )Nr   z	DOTMULTI.DOTMULTI.)rO   rP   ra   r`   rb   s      r]   restore_multidots MosesTokenizer.restore_multidotsa  sS    ::n-ood##<<d3D ood##vvk4..rg   c                 f    [        U5      R                  [        U R                  5      5      (       + $ N)set
differencer   rU   rc   s     r]   islowerMosesTokenizer.islowerg  s#    t9''DLL(9:::rg   c                 n    [        [        U5      R                  [        U R                  5      5      5      $ rn   )anyro   intersectionr   rq   s     r]   
isanyalphaMosesTokenizer.isanyalphaj  s%    3t9))#dll*;<==rg   c                 B    [        [        R                  " SU5      5      $ )Nz[\s]+(\#NUMERIC_ONLY\#))boolrO   ra   rq   s     r]   rH   MosesTokenizer.has_numeric_onlym  s    BII8$?@@rg   c                 D   UR                  5       n[        U5      n[        U5       H  u  pE[        R                  " SU5      nU(       d  M%  UR                  S5      nSU;   a  U R                  U5      (       dP  XpR                  ;   a  XpR                  ;  d2  XCS-
  :w  a,  X$S-      (       a   U R                  X$S-      S   5      (       a  M  XpR                  ;   a,  US-   U:  a#  [        R                  " SX$S-      5      (       a  M  US-   X$'   M     SR                  U5      $ )Nz	^(\S+)\.$   rj   r   z^[0-9]+z .r   )splitlen	enumeraterO   ra   grouprw   rD   rJ   rr   rL   )rU   rc   tokens
num_tokensitokentoken_ends_with_periodprefixs           r]   handles_nonbreaking_prefixes+MosesTokenizer.handles_nonbreaking_prefixesp  s   [
!&)HA%'YY|U%C"%%/55a8 F]tv'>'>";";;"*D*DD !^+"q5M LLAq)9::  888Q*,		*fUm<< &FIE *F xxrg   c                 R    U R                    H  u  p#UR                  X15      nM     U$ rn   )MOSES_ESCAPE_XML_REGEXESr`   rU   rc   regexpsubstitutions       r]   
escape_xmlMosesTokenizer.escape_xml  s)    $($A$A F::l1D %Brg   c                    [        U5      nU R                   H  u  p4UR                  XA5      nM     U R                  U5      nU R                   H  u  p4UR                  XA5      nM     U(       a  U$ UR                  5       $ )zj
This is a Python port of the Penn treebank tokenizer adapted by the Moses
machine translation community.
)rK   MOSES_PENN_REGEXES_1r`   r   MOSES_PENN_REGEXES_2r~   )rU   rc   
return_strr   r   s        r]   penn_tokenizeMosesTokenizer.penn_tokenize  st     4y$($=$= F::l1D %> 006$($=$= F::l1D %>!t3tzz|3rg   c                 l   [        U5      nU R                  U R                  4 H  u  pgUR                  Xq5      nM     U(       a  U Vs/ s H(  n[        R
                  " U[        R                  5      PM*     nnU V	V
s/ s H,  n	U	R                  U5        H  n
U
R                  5       PM     M.     nn	n
[        U5      S::  d   e[        [        U5      S SS9 H3  u  pS[        U5      R                  S5      -   nUR                  X5      nM5     UR                  5       n U R                  u  pgUR                  Xq5      nU(       a  U R                   u  pgUR                  Xq5      nU R#                  U5      nU R$                  U R&                  U R(                  4 H  u  pgUR                  Xq5      nM     U R*                  S:X  a'  U R,                   H  u  pgUR                  Xq5      nM     OVU R*                  S;   a'  U R.                   H  u  pgUR                  Xq5      nM     OU R0                  u  pgUR                  Xq5      nU R3                  U5      nU R                  u  pgUR                  Xq5      R                  5       nU R4                  u  pnUR                  X5      nU(       aB  [        W5       H3  u  pS[        U5      R                  S5      -   nUR                  X5      nM5     U R7                  U5      nU(       a  U R9                  U5      nU(       a  U$ UR;                  5       $ s  snf s  sn
n	f )	z
Python port of the Moses tokenizer.

    :param tokens: A single string, i.e. sentence text.
    :type tokens: str
    :param aggressive_dash_splits: Option to trigger dash split rules .
    :type aggressive_dash_splits: bool
i  c                     [        U S   5      $ )Nr}   )r   )pairs    r]   <lambda>)MosesTokenizer.tokenize.<locals>.<lambda>  s    PSTXYZT[P\rg   T)keyreverseTHISISPROTECTED   en)frit)rK   DEDUPLICATE_SPACE
ASCII_JUNKr`   rO   rP   
IGNORECASEfinditerr   r   sortedr   zfillreplacerC   rR   rS   re   COMMA_SEPARATE_1COMMA_SEPARATE_2COMMA_SEPARATE_3r@   ENGLISH_SPECIFIC_APOSTROPHEFR_IT_SPECIFIC_APOSTROPHENON_SPECIFIC_APOSTROPHEr   TRAILING_DOT_APOSTROPHErk   r   r~   )rU   rc   aggressive_dash_splitsr   escapeprotected_patternsr   r   pprotected_patternmatchprotected_tokensr   r   substituitions                  r]   tokenizeMosesTokenizer.tokenize  s   " 4y%)%;%;T__$M F::l1D %N HZ![HZ1"**Q">HZ![ *< );%.77=E = );   
 '(D000 #9-=#>D\fjk 1CFLLO C||E9 l
 zz|	  $33zz,-!#'#?#? F::l1D %%d+ !!!!!!%
 F
 ::l1D%
 99(,(H(H$zz,5 )IYY,&(,(F(F$zz,5 )G $(#?#? F::l1D 006#55zz,-335 $ < <zz-. %&67 1CFLLO C||M9 8
 %%d+??4(D!t3tzz|3m "\ s   	/L+?3L0)rS   rT   rD   rJ   rR   r@   )r   N)F)FFTN)__name__
__module____qualname____firstlineno____doc__rK   rL   rM   rN   r   r   r   r   r   r   r   r   rO   rP   r   r   	MID_STRIP
LEFT_STRIPRIGHT_STRIPrQ   rR   rS    REPLACE_DOT_WITH_LITERALSTRING_1 REPLACE_DOT_WITH_LITERALSTRING_2 REPLACE_DOT_WITH_LITERALSTRING_3r   r   r   DIRECTIONAL_QUOTE_1DIRECTIONAL_QUOTE_2DIRECTIONAL_QUOTE_3DIRECTIONAL_QUOTE_4DIRECTIONAL_QUOTE_5DIRECTIONAL_QUOTE_6DIRECTIONAL_QUOTE_7DIRECTIONAL_QUOTE_8REPLACE_ELLIPSISRESTORE_ELLIPSISCOMMA_1COMMA_2COMMA_3SYMBOLSrT   FINAL_PERIODPAD_QUESTION_EXCLAMATION_MARKPAD_PARENTHESISCONVERT_PARENTHESIS_1CONVERT_PARENTHESIS_2CONVERT_PARENTHESIS_3CONVERT_PARENTHESIS_4CONVERT_PARENTHESIS_5CONVERT_PARENTHESIS_6PAD_DOUBLE_DASHESPAD_START_OF_STRPAD_END_OF_STRCONVERT_DOUBLE_TO_SINGLE_QUOTESHANDLES_SINGLE_QUOTES
APOSTROPHECONTRACTION_1CONTRACTION_2CONTRACTION_3CONTRACTION_4CONTRACTION_5CONTRACTION_6CONTRACTION_7CONTRACTION_8CONTRACTION_9CONTRACTION_10CONTRACTION_11CONTRACTION_12CONTRACTION_13CONTRACTION_14CONTRACTION_15CONTRACTION_16CONTRACTION_17CONTRACTION_18CONTRACTION_19CLEAN_EXTRA_SPACE_1CLEAN_EXTRA_SPACE_2CLEAN_EXTRA_SPACE_3ESCAPE_AMPERSANDESCAPE_PIPEESCAPE_LEFT_ANGLE_BRACKETESCAPE_RIGHT_ANGLE_BRACKETESCAPE_SINGLE_QUOTEESCAPE_DOUBLE_QUOTEESCAPE_LEFT_SQUARE_BRACKETESCAPE_RIGHT_SQUARE_BRACKETEN_SPECIFIC_1EN_SPECIFIC_2EN_SPECIFIC_3EN_SPECIFIC_4EN_SPECIFIC_5r   FR_IT_SPECIFIC_1FR_IT_SPECIFIC_2FR_IT_SPECIFIC_3FR_IT_SPECIFIC_4r   r   r   BASIC_PROTECTED_PATTERN_1BASIC_PROTECTED_PATTERN_2BASIC_PROTECTED_PATTERN_3BASIC_PROTECTED_PATTERN_4BASIC_PROTECTED_PATTERN_5r   r   r   BASIC_PROTECTED_PATTERNSWEB_PROTECTED_PATTERNSr?   re   rk   rr   rw   rH   r   r   r   r   __static_attributes____classcell__r\   s   @r]   r	   r	      s    bggl((/0
1C
""9-.1AABGGFOSG rww|))&123Drww|))&123D
""9-.1AABGGFOSG "'',,,Y789G 

6*D0N+S0J IJK jj!6!=!=g!FGPO
 	

6==w=OP (*zz,'?'N$')zz2F'GIY'Y$')zz-'@-'O$ zz-"6"6s";<hFzz-"6"6s";<hFzz-"6"6s";<hF **V,f4**U+V3**[17:**U+V3**]3Y>**^4i?**%89:E**]3X= zz),o=zz-0); jj;BB3BOPR\\Gjj:AA#ANOQ[[Gjj:AA#ANOQ[[G jj,33D$?@'IG 	*00'0B ::@A:ML$&JJy$97$B! jj!45w>OJJu-w6JJu-w6JJu-w6JJu-w6JJu-w6JJu-w6 

5)61 zz$',ZZ%s*N ')jj&6&>#JJ{3X= J'1J JJ/9MJJw'1MJJw'1MJJw'1MJJw'1MJJw'1MJJw'1MJJw'1MJJw'1M ZZ 01=@NZZ/<NZZ0,>NZZ0,>NZZ0,>NZZ0,>NZZ 01=@NZZ/<NZZ0,>NZZ0,>N **V,d2**V,c1**V,c1 zz$'1**U#Y.K "

4 0' 9!#D!17!:**U+Y6**U+Y6!#E!2H!<"$**T"2H"<JJ=DD7DSTV``M


5<<7PS<TUM JJ<CC'CRSU__MJJ;BBBQRT]]MJJ3::s:CDiOM 	# zz"@"G"Gg"G"VWYcczz"?"F"FW"F"UVXbbzz"?"F"FW"F"UVXbbzz">"E"EG"E"TUW`` 		! !jj/6 jj3W< . F F J R333 	3 		3
 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3  	!3" 	#3$ 	&%3& 	'3( 	)3* 	+3, 	-3. 	/30 	132 	334 	536 	738 	93: 	(;3< 	=3> 	?3@ 	A3B 	C3D 	E3F 	G3H 	I3J 	K3L 	M3N 	O3P 	Q3R 	S3T 	U3V 	W3X 	Y3Z 	[3\ 	]3^ 	_3` 	a3b 	c3d 	e3l 	!" 	!""#	  	"!!!! /b/;>A' R
4(  %m4 m4rg   r	   c                   X  ^  \ rS rSrSr\" SR                  \R                  S5      5      5      r	\" SR                  \R                  S5      5      5      r
\" SR                  \R                  S5      5      5      r\R                  " S5      S4r\R                  " S	5      S
4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\R                  " S5      S4r\\\\\\\\\\\/r/ SQr/ SQr/ S Qr\R                  " S!R?                  SR                  \5      SR                  \5      SR                  \5      5      5      r \R                  " S"R?                  \5      5      r!\R                  " S#R?                  \
5      5      r"\R                  " S$R?                  \
5      5      r#\R                  " S%R?                  \
5      5      r$\R                  " S&5      r%\R                  " S'5      r&S-U 4S( jjr'S) r(S.S* jr)S.S+ jr*S,r+U =r,$ )/MosesDetokenizeri  z
This is a Python port of the Moses Detokenizer from
https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl

r   r   r   r   z \@\-\@ -z {2,}r   r&   |r(   r'   r*   r)   r,   r#   r+   'r-   [r/   r.   r%   r$   z&bar;z&bra;z&ket;)6NnAa   Ä   ässaSsau   ssäu   Ssästau   stäStau   StähunHunhynHynhanr;      hänu   Hänu   hönu   HönunUnynYnanAnu   änu   Änu   önu   ÖnseenSeenllaLlau   lläu   LlältaLtau   ltäu   LtälleLleksiKsikseKsettaTtaineIne)nisimmennensa)	r8   u   kör(  r)  pau   päkaanu   käänkinz^({})({})?({})$u   ^[{}\(\[\{{\¿\¡]+$z^['][{}]z[{}][']$z^[{}]z^[\,\.\?\!\:\;\\\%\}\]\)]+$u   ^[\'\"„“`]+$c                 6   > [         [        U ]  5         Xl        g rn   )r>   r  r?   r@   )rU   r@   r\   s     r]   r?   MosesDetokenizer.__init__  s    .0	rg   c                 R    U R                    H  u  p#UR                  X15      nM     U$ rn   )MOSES_UNESCAPE_XML_REGEXESr`   r   s       r]   unescape_xmlMosesDetokenizer.unescape_xml  s)    $($C$C F::l1D %Drg   c                 

   SR                  SR                  U5      5      n[        U5      nU R                  u  pVUR	                  Xd5      nU(       a  U R                  U5      nSSSSSS.nSnSn	UR                  5       n[        [        U5      5       GH%  u  p[        US   5      (       a>  U R                  S:w  a.  U
S:  a  [        XS-
     S   5      (       a  X-  n	OXU-   -  n	SnMW  U R                  R                  U5      (       a  XU-   -  n	SnM  U R                  R                  U5      (       a9  U R                  S	:X  a!  [        R                  " S
U5      (       a  U	S-  n	X-  n	SnM  U R                  S:X  a/  U
S:  a)  U R                  R                  U5      (       a	  X-  n	SnGM  U R                  S:X  ai  U
S:  ac  [        R                  " SUS   5      (       aD  [        R                  " SUS   5      (       a%  [        R                  " SU5      (       a	  X-  n	SnGM  U R                  S;   ac  U
[!        U5      S-
  ::  aQ  U R"                  R                  U5      (       a1  U R$                  R                  XS-      5      (       a  XU-   -  n	SnGM  U R                  S:X  a  U
[!        U5      S-
  ::  a  U R"                  R                  U5      (       aq  [        R                  " SXS-      5      (       aP  [        R                  " SXS-      [        R&                  5      (       a   XU-   XS-      -   -  n	[)        US5        SnGM  U R*                  R                  U5      (       a  Un[        R                  " SU5      (       a  SnUR-                  US5      X|'   U R                  S:X  a
  US:X  a  SX|'   U R                  S:X  a
  US:X  a  SX|'   X|   S-  S:X  a^  U R                  S:X  a6  US:X  a0  U
S:  a*  [        R                  " SXS-
     5      (       a	  X-  n	SnGM  XU-   -  n	SnX|==   S-  ss'   GM  X-  n	SnX|==   S-  ss'   GM  U R                  S:X  aM  [        R                  " SXS-
     5      (       a,  U R.                  R                  U5      (       a  XU-   -  n	SnGM  XU-   -  n	SnGM(     U R0                  u  pVUR	                  Xi5      n	U	R3                  5       n	U(       a  U	$ U	R                  5       $ )z
Python port of the Moses detokenizer.
:param tokens: A list of strings, i.e. tokenized text.
:type tokens: list(str)
:return: str
z {} r   r   )r  r#   z```z''r   r8   r}   r   z^[\?\!\:\;\\\%]$r   csz^[0-9]+$z^[.,]$)r   r   ga   r   u   ^[-–]$z^li$|^mail.*Nu   ^[„“”]+$r#   u   „u   “r  z[s]$fiz:$)rQ   rL   rK   rS   r`   rL  r~   r   iterr   r@   IS_CURRENCY_SYMBOLra   IS_PUNCTrO   IS_ENGLISH_CONTRACTIONr   IS_FRENCH_CONRTACTIONSTARTS_WITH_ALPHAr   nextIS_OPEN_QUOTEgetFINNISH_REGEX	ONE_SPACErC   )rU   r   r   unescaperc   r   r   quote_countsprepend_spacedetokenized_textr   r   normalized_quos                r]   r   MosesDetokenizer.tokenize  s    ~~chhv./4y#;;zz,-$$T*DQaaqA !$v,/HAeAhDII$5q5VFq5M"$566$-$ %(==$ #((//66 E$99  "%%e,,99$3F)N)N$+$ )  # 		T!E//66u== !)  # 		T!EII  IIi44IIk511 !)  # 		//Vq(..55e<<**11&Q-@@ !E$99  " 		T!Vq(..55e<<IIk6a%=99IIov!e}bmmLL !E$9Fq5M$II VT" " ##**511!&99.66%(N/;/?/?PQ/R,99$%34L099$%34L0/!3q8		T)!SLEIIgv!e}== )1((+ )E,AA((*$494 %-$$'M 0A50 		T!IIeVE]33&&--e44 !E$99  # !E$99  #g 0l  $~~!::lE+113#-K3C3I3I3KKrg   c                 &    U R                  XU5      $ )z&Duck-typing the abstract *tokenize()*.)r   )rU   r   r   ra  s       r]   
detokenizeMosesDetokenizer.detokenizeG  s    }}V::rg   )r@   )r   )TT)-r   r   r   r   r   rK   rL   rM   rN   r   r   r   rO   rP   rS   r`  UNESCAPE_FACTOR_SEPARATORUNESCAPE_LEFT_ANGLE_BRACKETUNESCAPE_RIGHT_ANGLE_BRACKETUNESCAPE_DOUBLE_QUOTEUNESCAPE_SINGLE_QUOTE UNESCAPE_SYNTAX_NONTERMINAL_LEFT!UNESCAPE_SYNTAX_NONTERMINAL_RIGHTUNESCAPE_AMPERSAND UNESCAPE_FACTOR_SEPARATOR_LEGACY'UNESCAPE_SYNTAX_NONTERMINAL_LEFT_LEGACY(UNESCAPE_SYNTAX_NONTERMINAL_RIGHT_LEGACYrK  FINNISH_MORPHSET_1FINNISH_MORPHSET_2FINNISH_MORPHSET_3rQ   r_  rW  rY  rZ  r[  rX  r]  r?   rL  r   rh  r  r  r  s   @r]   r  r    s    "'',,,Y789G"'',,,Y789Grww|))&123D jj5t; 

8$c)I !#

9 5t ;"$**W"5t";#%::g#6#< JJy147JJy147')zz(';T'A$(*

8(<d(B%H-t3')zz(';T'A$.0jj.BD.H+/1zz(/CT/I, 	)!#$/0()"7r ;
 JJ188#$#$#$  M $;$B$B4$HIZZ(:(:7(CDJJ{'9'9''BC

8??7#;<zz89HJJ67M
WLr; ;rg   r  )rO   sacremoses.corpusr   r   sacremoses.utilr   sacremoses.indicr   r   rM   rA   objectr	   r  __all__ rg   r]   <module>r~     sQ    
 * 1 " ,~*, M4V M4`j;v j;Z	 /
0rg   