
    QiQ                         S SK JrJr  S SKJrJr  S SKJrJr  S SK	J	r	   " S S\
5      r\" 5       R                  rS rS rS	 rS
 rSS jrSS jrg)    )teezip_longest)escapeunescape)Paralleldelayed)tqdmc                   b    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrSrSr\\\\\	\
\\\\\/rSrg)CJKChars   z
An object that enumerates the code points of the CJK characters as listed on
http://en.wikipedia.org/wiki/Basic_Multilingual_Plane#Basic_Multilingual_Plane
)i   i  )i.  iϤ  )i@  i  )i   i  )i   i  )i0  iO  )ie  i  )io io )i p i )i  i/ )ip i )i   i  N)__name__
__module____qualname____firstlineno____doc__Hangul_JamoCJK_RadicalsPhags_PaHangul_SyllablesCJK_Compatibility_IdeographsCJK_Compatibility_FormsKatakana_Hangul_Halfwidth#Ideographic_Symbols_And_PunctuationTangutKana_SupplementNushuSupplementary_Ideographic_Planeranges__static_attributes__r       a/var/www/html/backend/Backoffice_Marketplace/venv/lib/python3.13/site-packages/sacremoses/util.pyr   r      s     K* "L H & $2  - !/+' F 'O E'# 	$!'Fr!   r   c                 N    [        U 5      n[         H  u  p#X:  d  M  X:  s  $    g)u  
This checks for CJK character.

    >>> CJKChars().ranges
    [(4352, 4607), (11904, 42191), (43072, 43135), (44032, 55215), (63744, 64255), (65072, 65103), (65381, 65500), (94208, 101119), (110592, 110895), (110960, 111359), (131072, 196607)]
    >>> is_cjk('㏾')
    True
    >>> is_cjk('﹟')
    False

:param character: The character that needs to be checked.
:type character: char
:return: bool
F)ord_CJKChars_ranges)	charactercharstartends       r"   is_cjkr*   a   s+     y>D&
:< ' r!   c           	      "    [        U SSSSSS.S9$ )aO  
This function transforms the input text into an "escaped" version suitable
for well-formed XML formatting.
Note that the default xml.sax.saxutils.escape() function don't escape
some characters that Moses does so we have to manually add them to the
entities dictionary.

    >>> input_str = ''')| & < > ' " ] ['''
    >>> expected_output =  ''')| &amp; &lt; &gt; ' " ] ['''
    >>> escape(input_str) == expected_output
    True
    >>> xml_escape(input_str)
    ')&#124; &amp; &lt; &gt; &apos; &quot; &#93; &#91;'

:param text: The text that needs to be escaped.
:type text: str
:rtype: str
&apos;&quot;&#124;&#91;&#93;)'"|[]entities)r   texts    r"   
xml_escaper:   w   s(    & 
	 	r!   c           	      "    [        U SSSSSS.S9$ )a1  
This function transforms the "escaped" version suitable
for well-formed XML formatting into humanly-readable string.
Note that the default xml.sax.saxutils.unescape() function don't unescape
some characters that Moses does so we have to manually add them to the
entities dictionary.

    >>> from xml.sax.saxutils import unescape
    >>> s = ')&#124; &amp; &lt; &gt; &apos; &quot; &#93; &#91;'
    >>> expected = ''')| & < > ' " ] ['''
    >>> xml_unescape(s) == expected
    True

:param text: The text that needs to be unescaped.
:type text: str
:rtype: str
r1   r2   r3   r4   r5   )r,   r-   r.   r/   r0   r6   )r   r8   s    r"   xml_unescaper<      s(    $ 
	 	r!   c                 J    [        U 5      u  p[        US5        [        X5      $ )zd
From https://docs.python.org/3/library/itertools.html#recipes
s -> (s0,s1), (s1,s2), (s2, s3), ...
N)r   nextzip)iterableabs      r"   pairwiserC      s"    
 x=DADMq9r!   Nc                 6    [        U 5      /U-  n[        USU06$ )zaCollect data into fixed-length chunks or blocks
from https://stackoverflow.com/a/16789869/610569
	fillvalue)iterr   )r@   nrE   argss       r"   grouperrI      s%    
 NaD2	22r!   c                    ^  U(       a  [        U5      OUnUS::  a  [        T U5      $ [        US9" U 4S jU 5       5      $ )N   )n_jobsc              3   F   >#    U  H  n[        T5      " U5      v   M     g 7fN)r   ).0linefuncs     r"   	<genexpr>)parallelize_preprocess.<locals>.<genexpr>   s     %OhdgdmD&9&9hs   !)r	   mapr   )rQ   iterator	processesprogress_bars   `   r"   parallelize_preprocessrX      s:    !-tH~8HA~4""9%%Oh%OOOr!   rN   )F)	itertoolsr   r   xml.sax.saxutilsr   r   joblibr   r   r	   objectr   r   r%   r*   r:   r<   rC   rI   rX   r   r!   r"   <module>r]      sQ    ' - $ Pv Pf :$$ ,><3Pr!   