
    Ti"                       S SK Jr  S SKrS SKJr  S SKJr   S SKr S SK	r	S SK
JrJr  S SKJrJr  S SKJr  S SKJr  S S	KJrJr  \" \R2                  S
-  5      \l        SS jrSr\R:                  \R<                  R>                  :X  a
  \b  \" 5       r " S S5      r  " S S\ 5      r! " S S\ 5      r" " S S\ 5      r#Sr$Sr%Sr&Sr'SS jr( S     S S jjr)S!S jr*S"S jr+ S     S#S jjr,g! \ a    Sr Nf = f! \ a    Sr	 Nf = f)$    )annotationsN)SequenceMatcher)List)SBDetectmodels)packagesettings)cache_spacy)Package)infowarningminisbdc                h     X   R                  S5      (       a  S$ S$ ! [        [        4 a     gf = f)zFGet appropriate processors for a language, including MWT if available.mwtztokenize,mwttokenize)getKeyError	TypeError)	lang_code	resourcess     d/var/www/html/backend/Backoffice_Marketplace/venv/lib/python3.13/site-packages/argostranslate/sbd.pyget_stanza_processorsr      s<    !*!5!9!9%!@!@~PjPi  s     11c                  *    \ rS rSr% S\S'   SS jrSrg)ISentenceBoundaryDetectionModel(   r   pkgc                    [         e)N)NotImplementedErrorselftexts     r   split_sentences/ISentenceBoundaryDetectionModel.split_sentences,   s    !!     Nr!   strreturnz	List[str])__name__
__module____qualname____firstlineno____annotations__r"   __static_attributes__r%   r$   r   r   r   (   s    	L"r$   r   c                  .    \ rS rSrSS jrSS jrS rSrg)	SpacySentencizerSmall5   c                d   Xl         [        c  [        S5      eUR                  b?  S[	        UR                  5      ;   a&  [        R
                  " UR                  S/S9U l        O1[        c  [        S5      e[        R
                  " [        S/S9U l        U R                  R                  S5        g)z
Packaging specific spacy when "xx_sent_ud_sm" doesn't cover the language improves performances over stanza.
Please use small models ".._core/web_sm" for consistency.
NzBSpaCy is not installed. Install spacy or change ChunkType settingsspacyparser)excludezSpaCy cache not initializedsentencizer)	r   r3   RuntimeErrorpackaged_sbd_pathr'   loadnlp_cached_spacy_pathadd_piper    r   s     r   __init__SpacySentencizerSmall.__init__6   s    
 =cdd  ,C@U@U<V1Vzz#"7"7(LDH ")"#@AAzz"4xjIDH-(r$   c                    [        SU R                  R                   S[        U 5       35        U R	                  U5      nUR
                   Vs/ s H  o3R                  PM     sn$ s  snf Nz&Splitting sentences using SBD Model: (z) )r   r   	from_coder'   r:   sentsr!   r    r!   docsents       r   r"   %SpacySentencizerSmall.split_sentencesH   sS    5dhh6H6H5ICPTI;WXhhtn&)ii0id		i000s   A&c                    g)NzUsing Spacy model.r%   r    s    r   __str__SpacySentencizerSmall.__str__M       #r$   )r:   r   Nr   r   r&   )r)   r*   r+   r,   r>   r"   rJ   r.   r%   r$   r   r0   r0   5   s    )$1
$r$   r0   c            	      J    \ rS rSrSSSSSSSSS.rSS	 jrS
 rSS jrS rSr	g)MiniSBDSentencizerQ   zh-hantzzh-hanspttrhien)ztzhpbazbneomstlc                   Xl         UR                  S-  nS nUR                  5       (       aT  [        R                  " U5       Vs/ s H  oDR                  S5      (       d  M  UPM     nnU(       a  [        X%S   -  5      nUb  UnOmU R                  R                  UR                  UR                  5      nU[        R                  " 5       ;  a$  [        U R                   R                   S35        SnX`l        S U l        g s  snf )Nr   z.onnxr   z0 is not available in MiniSBD, falling back to enrU   )r   package_pathexistsoslistdirendswithr'   LANGUAGE_CODE_MAPPINGr   rB   minisbd_modelslist_modelsr   langdetector)r    r   sbd_path
model_filefmodel_filesrg   s          r   r>   MiniSBDSentencizer.__init__`   s    ##i/
??&(jj&:R&:jj>Q1&:KR N!:;
!D --11s}}D
 >5577488--..^_`	' Ss   D"Dc                    U R                   c*  [        U R                  [        R                  S:H  S9U l         U R                   $ )Ncuda)use_gpu)rh   r   rg   r	   devicerI   s    r   lazy_detector MiniSBDSentencizer.lazy_detector|   s1    == $TYY68QRDM}}r$   c                    [        SU R                  R                   S[        U 5       35        U R	                  5       R                  U5      $ rA   )r   r   rB   r'   rr   	sentencesr   s     r   r"   "MiniSBDSentencizer.split_sentences   sB    5dhh6H6H5ICPTI;WX!!#--d33r$   c                    g)NrO   r%   rI   s    r   rJ   MiniSBDSentencizer.__str__   rL   r$   )rh   rg   r   NrM   r&   )
r)   r*   r+   r,   rd   r>   rr   r"   rJ   r.   r%   r$   r   rO   rO   Q   s8     8
4$r$   rO   c                  >    \ rS rSrSSS.rSS jrS rSS jrS rS	r	g
)StanzaSentencizer   rQ   rR   )rV   rX   c                    Xl         [        c  [        S5      eU R                  R	                  UR
                  UR
                  5      U l        S U l        g )NzDStanza is not installed. Install stanza or change ChunkType settings)r   stanzar7   rd   r   rB   stanza_lang_codestanza_pipeliner=   s     r   r>   StanzaSentencizer.__init__   sH    >eff $ : : > >MM3==!
  $r$   c                    U R                   cX  [        R                  " U R                  [	        U R
                  R                  S-  5      S[        R                  S:H  SS9U l         U R                   $ )Nr}   r   ro   WARNING)rg   dir
processorsrp   logging_level)	r   r}   Pipeliner~   r'   r   r_   r	   rq   rI   s    r   lazy_pipelineStanzaSentencizer.lazy_pipeline   s_    '#)??**--89% 61'$D  ###r$   c                    [        SU R                  R                   S[        U 5       35        U R	                  5       " U5      nUR
                   Vs/ s H  o3R                  PM     sn$ s  snf rA   )r   r   rB   r'   r   ru   r!   rD   s       r   r"   !StanzaSentencizer.split_sentences   sY    5dhh6H6H5ICPTI;WX  "4(&)mm4md		m444s   A+c                    g)Nrz   r%   rI   s    r   rJ   StanzaSentencizer.__str__   s    "r$   )r   r~   r   NrM   r&   )
r)   r*   r+   r,   rd   r>   r   r"   rJ   r.   r%   r$   r   rz   rz      s$    
	$	$5
#r$   rz   a  <detect-sentence-boundaries> I walked down to the river. Then I went to the
I walked down to the river. <sentence-boundary>
----------
<detect-sentence-boundaries> Argos Translate is machine translation software. It is also
Argos Translate is machine translation software. <sentence-boundary>
----------
<detect-sentence-boundaries> Argos Translate is written in Python and uses OpenAI. It also supports
Argos Translate is written in Python and uses OpenAI. <sentence-boundary>
----------
z<detect-sentence-boundaries>z<sentence-boundary>z
----------c                 h    [         R                  " 5       n U  H  nUR                  S:X  d  M  Us  $    g )Nsbd)r   get_installed_packagestype)packagesr   s     r   get_sbd_packager      s0    --/H88uJ  r$   c                @    U S U n[         S-   U-   n[        SU5        U$ )Nz<detect-sentence-boundaries> generate_fewshot_sbd_prompt)fewshot_promptr   )
input_textsentence_guess_lengthsentence_guess	to_returns       r   r   r      s3       6!67N!@@>QI	&	2r$   c                    U R                  [        5      n[        SU5        [        U5      S:  a  g US   R                  S5      n[        U5      S:  a  g US   $ )Nparse_fewshot_response   
)splitFEWSHOT_BOUNDARY_TOKENr   len)response_textresponses     r   r   r      sZ    ""#9:H	!8,
8}q|!!$'H
8}qB<r$   c                    UR                  [        5      nUS:w  as  US U n[        SU5        SnSn[        [	        U 5      5       HD  nU S U n[        5       nUR                  Xa5        UR                  5       nUS:X  d  X:  d  M@  UnUnMF     U$ g)Nr   zsbd_translated_guess:r   g        )findSENTENCE_BOUNDARY_TOKENr   ranger   r   set_seqsratio)	r   sbd_translated_guesssbd_translated_guess_index
best_index
best_ratioicandidate_sentencesmr   s	            r   process_seq2seq_sbdr      s    !5!:!:;R!S!R'34O5OP$&:;

s:'A!+BQ "BKK*AHHJEAv+
"
 ( r$   c                j    U SU n[        SU5        UR                  [        U-   5      n[        X5      $ )ax  Given input text, return the index after the end of the first sentence.

Args:
    input_text: The text to detect the first sentence of.
    sbd_translation: An ITranslation for detecting sentences.
    sentence_guess_length: Estimated number of chars > than most sentences.

Returns:
    The index of the character after the end of the sentence.
            -1 if not found.
Nzsentence_guess:)r   	translate DETECT_SENTENCE_BOUNDARIES_TOKENr   )r   sbd_translationr   r   r   s        r   detect_sentencer      sC       6!67N	N+*44(>9 z@@r$   )r   r'   r   dictr(   r'   )r(   zPackage | None)   )r   r'   r   intr(   r'   )r   r'   r(   z
str | None)r   r'   r   r'   r(   r   )r   r'   r   r   r(   r   )-
__future__r   ra   difflibr   typingr   r3   ImportErrorr}   r   r   r   re   argostranslater   r	   argostranslate.networkingr
   argostranslate.packager   argostranslate.utilsr   r   r'   data_dir	cache_dirr   r;   
chunk_type	ChunkTypeSPACYr   r0   rO   rz   r   r   r   r   r   r   r   r   r   r%   r$   r   <module>r      sR   " 	 #  7 , 1 * .x009<=   (,,222u7H$" "$; $85$8 5$p"#7 "#V	 $B  / !  36,/* DGAA=@AAW  E
  Fs"   C1 C? 1C<;C<?D
	D
