
    Ki&                        S SK r S SKrS SKr SSKJr  \R                  r\R                  rSr	Sr
SrSr \R                  r\R$                  r\R(                  \S4   r\R.                  \   r\R.                  \   r\R.                  \   r\R.                  \   r\R.                  \R>                     r      S@S	\RB                  S
\
S\S\RD                  S\#S\$4S jjr%      SAS	\RB                  S
\
S\S\RD                  S\#S\$4S jjr&    SBS	\RB                  S
\
S\S\RD                  S\4
S jjr' SCS	\RB                  S\
S\RD                  S\4S jjr(  SDS	\RB                  S\	S\	S
\
S\RD                  4
S jjr)     SES	\RB                  S\S\S\S\#S\S\RD                  4S jjr* SFSSSSSSS.S	\RB                  S\S
\
S\S\RD                  S\#4S jjjr+SCS\4S  jjr,S!\S"\S\4S# jr-S	\RB                  S$\S\4S% jr.S\$4S& jr/S\$4S' jr0S(\S\14S) jr2S(\S\14S* jr3S+\Rh                  S!\S\14S, jr5S- r6 S. r7S/ r8S\4S0 jr9S\4S1 jr:S2\S\4S3 jr;S4\1S5\S6\1S\Rx                  4S7 jr=S4\1S5\S\Rx                  4S8 jr>SCS9\S:\$S\Rx                  4S; jjr?SCS4\1S5\S<\$S\Rx                  4S= jjr@S4\1S5\S>\S\Rx                  4S? jrAg! \ a    S SKr GNf = f! \ a    \\-  \-  r GNf = f)G    N   )pymupdf
point_like	rect_likematrix_like	quad_likeFpageclipflagstextpagesortreturnc                    [         R                  " U 5        Uc  [         R                  nUnUc  U R                  XS9nO[	        US5      U :w  a  [        S5      eUR                  5       nUc  AU(       a  UR                  S S9  U$ )a?  Return the text blocks on a page.

Notes:
    Lines in a block are concatenated with line breaks.
Args:
    flags: (int) control the amount of data parsed into the textpage.
Returns:
    A list of the blocks. Each item contains the containing rectangle
    coordinates, text lines, running block number and block type.
r
   r   parentnot a textpage of this pagec                     U S   U S   4$ N   r    )bs    _/var/www/html/backend/Backoffice_Marketplace/venv/lib/python3.13/site-packages/pymupdf/utils.py<lambda>!get_text_blocks.<locals>.<lambda>N   s    1Q41,    key)r   CheckParentTEXTFLAGS_BLOCKSget_textpagegetattr
ValueErrorextractBLOCKSr   )r	   r
   r   r   r   tpblockss          r   get_text_blocksr&   0   s    " }((	B	zD6	X	$	&677F./Mr   r   c                   ^ U4S jn[         R                  " U 5        Uc  [         R                  nUnUc  U R                  XS9nO[	        US5      U :w  a  [        S5      eUR                  U5      n	Ubd  Uba  [         R                  " U5      nU	 V
s/ s H>  n
[        XSS -  5      S[        [         R                  " U
SS 5      5      -  :  d  M<  U
PM@     n	n
Uc  AU	(       a  U(       a  U" U	5      n	U	$ s  sn
f )ah  Return the text words as a list with the bbox for each word.

Args:
    page: pymupdf.Page
    clip: (rect-like) area on page to consider
    flags: (int) control the amount of data parsed into the textpage.
    textpage: (pymupdf.TextPage) either passed-in or None.
    sort: (bool) sort the words in reading sequence.
    delimiters: (str,list) characters to use as word delimiters.
    tolerance: (float) consider words to be part of the same line if
        top or bottom coordinate are not larger than this. Relevant
        only if sort=True.

Returns:
    Word tuples (x0, y0, x1, y1, "word", bno, lno, wno).
c                   > U R                  S S9  / nU S   /n[        R                  " U S   SS 5      nU SS  H  n[        R                  " USS 5      n[        UR                  UR                  -
  5      T::  d&  [        UR
                  UR
                  -
  5      T::  a  UR                  U5        X5-  nM  UR                  S S9  UR                  U5        U/nUnM     UR                  S S9  UR                  U5        U$ )	z1Sort words line-wise, forgiving small deviations.c                     U S   U S   4$ r   r   ws    r   r   4get_text_words.<locals>.sort_words.<locals>.<lambda>n   s    !A$!r   r   r   N   r   c                     U S   $ Nr   r   r*   s    r   r   r,   {   s    !r   c                     U S   $ r/   r   r*   s    r   r   r,      s    !r   )r   r   Rectabsy0y1appendextend)wordsnwordslinelrectr+   wrect	tolerances         r   
sort_words"get_text_words.<locals>.sort_wordsl   s    

-
.azU1Xbq\*qrALL2A'EEHHuxx'(I5uxx%((*+y8A		n	-d#s  			n	%dr   Nr   r   r   r-   g      ?)	r   r   TEXTFLAGS_WORDSr    r!   r"   extractWORDSr1   r2   )r	   r
   r   r   r   
delimitersr<   r=   r$   r7   r+   s         `    r   get_text_wordsrB   R   s    42 }''	B	zD6	X	$	&677OOJ'E  0||D!
!DRa5L 1S3w||AbqE?R;S5S SAu 	 
 5!L
s   ;C5C5c           	         S n[        U UUUSUS9 Vs/ s H!  n[        R                  " USS 5      US   4PM#     nnU(       d  g[        R                  " 5       nU H	  u  pX-  nM     / nUS   /nUS   S   nUSS  H  u  pUS	   u  p[	        UR
                  U	R
                  -
  5      U::  d&  [	        UR                  U	R                  -
  5      U::  a  UR                  X45        X-  nMp  U" X5      nUR                  UU45        X4/nU	nM     U" X5      nUR                  UU45        UR                  S
 S9  US   S   n
US   S   R                  nUSS  HZ  u  nn[        [        [        UR
                  U-
  UR                  -  5      5      S5      nSUS-   -  nU
UU-   -  n
UR                  nM\     U
$ s  snf )ag  Extract plain text avoiding unacceptable line breaks.

Text contained in clip will be sorted in reading sequence. Some effort
is also spent to simulate layout vertically and horizontally.

Args:
    page: pymupdf.Page
    clip: (rect-like) only consider text inside
    flags: (int) text extraction flags
    textpage: pymupdf.TextPage
    tolerance: (float) consider words to be on the same line if their top
        or bottom coordinates do not differ more than this.

Notes:
    If a TextPage is provided, all text is checked for being inside clip
    with at least 50% of its bbox.
    This allows to use some "global" TextPage in conjunction with sub-
    selecting words in parts of the defined TextPage rectangle.

Returns:
    A text string in reading sequence. Left indentation of each line,
    inter-line and inter-word distances strive to reflect the layout.
c                    UR                  S S9  SnU R                  n[        R                  " 5       nU H  u  pVXE-  n[	        [        [        UR                  U-
  UR                  -  [        U5      -  5      5      X0R                  :X  d  UR                  U::  a  SOS5      nUSU-  U-   -  nUR                  nM     U$ )a  Create the string of one text line.

We are trying to simulate some horizontal layout here, too.

Args:
    clip: (pymupdf.Rect) the area from which all text is being read.
    line: (list) word tuples (rect, text) contained in the line
Returns:
    Text in this line. Generated from words in 'line'. Distance from
    predecessor is translated to multiple spaces, thus simulating
    text indentations and large horizontal distances.
c                      U S   R                   $ r/   )x0r*   s    r   r   4get_sorted_text.<locals>.line_text.<locals>.<lambda>   s    !r   r    r   r    )
r   rF   r   
EMPTY_RECTmaxintroundwidthlenx1)r
   r9   ltextrP   r:   rtdists           r   	line_text"get_sorted_text.<locals>.line_text   s     			'	(WW""$DAJEE144"9/#a&89:GGmqttrzD
 S4Z!^#EB  r   T)r
   r   r   r   r<   Nr-   rH   r   r   c                      U S   R                   $ r/   )r4   )ls    r   r   !get_sorted_text.<locals>.<lambda>  s    adggr   r      
)rB   r   r1   rJ   r2   r3   r4   r5   r   minrL   rM   height)r	   r
   r   r   r<   rU   r+   r7   totalboxwrtextlinesr9   r:   w0r_rQ   r4   distancebreakss                       r   get_sorted_textrg      s   >@  


A 
ae	ad#
 
 
 !!#H  E!H:D!HQKE !"Ib uxx"%% I-UXX5E1F)1SKK
#KE h-ELL%(J<DE  h%E	LL%  
JJ&J'8A;D	q!Bab	us5%((R-5<<!?@A1EA&XX	 " Kk
s   (F>rectc                     UnUc  U R                  5       nO[        US5      U :w  a  [        S5      eUR                  U5      nUc  AU$ )Nr   r   )r    r!   r"   extractTextbox)r	   rh   r   r$   rcs        r   get_textboxrl     sU    
 
B	z 	X	$	&677			4	 BIr   p1p2c                     [         R                  " U 5        UnUc  U R                  U[         R                  S9nO[	        US5      U :w  a  [        S5      eUR                  X5      nUc  AU$ )Nr   r   r   )r   r   r    TEXT_DEHYPHENATEr!   r"   extractSelection)r	   rm   rn   r
   r   r$   rk   s          r   get_text_selectionrr   &  sp     	B	zD0H0HI	X	$	&677			R	$BIr   languagedpifulltessdatac                    ^ [         R                  " U 5        [         R                  " T5      mU[         R                  ) -  [         R                  ) -  nU4S jnU4S jnU(       a	  U" XX!5      $ U" XX!5      $ )a;  Create a Textpage from the OCR version of the page.

OCR can be executed for the full page image, or (the default) only
for areas that are not covered by readable digital text.

Args:
    flags: (int) control content becoming part of the result.
    language: (str) specify expected language(s). Default is "eng" (English).
    dpi: (int) resolution in dpi, default 72.
    full: (bool) whether to OCR the full page, or to keep legible text
    tessdata: (str) path to Tesseract language data files. If None, the
              built-in function is used to find the path.
c                   > U R                  US9n[        R                  " UR                  SUT
S9S9nUR	                  S5      nU R
                  R                  UR
                  R                  -  n[        R                  " Xw5      U R                  -  nUR                  X8S9n	[        R                  " U 5      U	l        U	$ )z$Perform OCR for the full page image.rt   Fcompressrs   rv   streamr   )r   matrix)
get_pixmapr   Documentpdfocr_tobytes	load_pagerh   rN   Matrixderotation_matrixr    weakrefproxyr   )r	   rt   rs   r   pixocr_pdfocr_pageunzoomctmtpagerv   s             r   full_ocr"get_textpage_ocr.<locals>.full_ocrX  s    oo#o&""%%!! & 
 $$Q'8==#6#66nnV,t/E/EE%%E%> }}T*r   c           
        > U R                   n[        R                  " 5       nUR                  X@R                  U R                  S9  UR                  S5      nUR                  5         UR                  US9nUR                  5       S   n U V	V
Vs/ s HA  n	U	S   S:X  d  M  U	S     H)  n
U
S     H  n[        S5      US	   ;   d  M  US
   PM     M+     MC     nn
n	nU(       a  U H  nUR                  U5        M     UR                  [        R                  [        R                  [        R                  S9  UR                  US9nUR                  5       S   nUR                  X@R                  U R                  S9  UR                  S5      nUR                  5         U V	V
Vs/ s HA  n	U	S   S:X  d  M  U	S     H)  n
U
S     H  n[        S5      US	   ;  d  M  US
   PM     M+     MC     nn
n	nU H  nUR                  U5        M     UR                  [        R                  [        R                  [        R                  S9  UR                  US9n[        R                  " UR!                  SUTS9S9nUS   nUR#                  U[        R$                  S9  [&        R(                  " U 5      Ul         U$ s  snn
n	f s  snn
n	f )zxPerform OCR for parts of the page without legible text.

We create a temporary PDF for which we can freely redact text.
)	from_pageto_pager   )r   r%   typerb   spansi  ra   bbox)imagesgraphicsra   rW   ry   Frz   r|   )r   r   open
insert_pdfnumberr   remove_rotationr    extractDICTchradd_redact_annotapply_redactionsPDF_REDACT_IMAGE_NONEPDF_REDACT_LINE_ART_NONEPDF_REDACT_TEXT_REMOVEr   r   extend_textpageTEXT_ACCURATE_BBOXESr   r   )r	   rt   rs   r   doctemp_pdf	temp_pager$   r%   r   rY   s
fffd_spansr   span_bboxesr   r   r   rv   s                     r   partial_ocr%get_textpage_ocr.<locals>.partial_ocrl  s   
 kk <<>C;;L&&q)	!!# ##%#0!(+	 
yA~  wZwZ6{ai' AfI  	     	 
 "**40 #&&44 9933 '  ''e'4B^^%h/F{{DKKP **2.I%%' 
yA~  wZwZv;!F)+ AfI  	     	 
  D&&t,   	""0055// 	# 	

 ""s"+ ,,%%!! & 
 1: 	  7+G+G H MM$'		}
0
s$   J9""J9J9&K 7"K K )r   r   get_tessdata TEXT_USE_CID_FOR_UNKNOWN_UNICODE TEXT_USE_GID_FOR_UNKNOWN_UNICODE)r	   r   rs   rt   ru   rv   r   r   s        `  r   get_textpage_ocrr   9  s|    * ##H-H 	33
3	433
3	4 
(Tn 833 t(22r   )r
   r   r   r   rA   r<   optionc                   [         R                  [         R                  [         R                  [         R                  [         R
                  [         R                  [         R                  [         R                  [         R                  [         R                  S.
nUR                  5       nX;   d   eX;  a  SnUc  X   nUS:X  a  [        U UUUUUS9$ US:X  a  [        XX4US9$ US:X  a  U(       a  [        U UUUUS9$ [         R                  " U 5        Sn	US	;   a  U R                  nUb  [         R                  " U5      nSn	O([!        U 5      [         R"                  L a  U R                  n	Un
U
c  U R%                  X#S
9n
O['        U
S5      U :w  a  [)        S5      eUS:X  a  U
R+                  XS9nOUS:X  a  U
R-                  XS9nOUS:X  a  U
R/                  XS9nOjUS:X  a  U
R1                  XS9nOTUS:X  a  U
R3                  5       nO=US:X  a  U
R5                  5       nO&US:X  a  U
R7                  5       nOU
R9                  US9nUc  A
U$ )a  Extract text from a page or an annotation.

This is a unifying wrapper for various methods of the pymupdf.TextPage class.

Args:
    option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
    clip: (rect-like) restrict output to this area.
    flags: bit switches to e.g. exclude images or decompose ligatures.
    textpage: reuse this pymupdf.TextPage and make no new one. If specified,
        'flags' and 'clip' are ignored.

Returns:
    the output of methods get_text_words / get_text_blocks or pymupdf.TextPage
    methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT,
    extractXHTML or etractXML respectively.
    Default and misspelling choice is "text".
)
ra   htmljsonrawjsonxmlxhtmldictrawdictr7   r%   ra   Nr7   )r
   r   r   r   rA   r%   )r
   r   r   r   )r
   r   r   r<   )r   r   r   r   r   r   r   )cbr   r   r   r   r   r   r   )r   )r   TEXTFLAGS_TEXTTEXTFLAGS_HTMLTEXTFLAGS_DICTTEXTFLAGS_RAWDICTTEXTFLAGS_XMLTEXTFLAGS_XHTMLr?   r   lowerrB   r&   rg   r   cropboxr1   r   Pager    r!   r"   extractJSONextractRAWJSONr   extractRAWDICTextractHTML
extractXMLextractXHTMLextractText)r	   r   r
   r   r   r   rA   r<   formatsr   r$   rS   s               r   get_textr     sQ   : &&&&&&,,$$((&&,,((**G \\^F}!
 	
 5$
 	
 D
 	
 	B))||||D!	dw||	#\\	B	zD6	X	$	&677NNbN,	9	/	6	NNbN,	9	/	6	NN	5MMO	7	OONNN%Hr   c                 <   [        U [        R                  5      (       a  U R                  U5      nOA[        U [        R                  5      (       a  U R
                  nO S[        U 5      < S35       eUR                  SS.n [        U S5      (       a  U R                  US'   [        R                  " SS5      nUR                  [        R                  -  (       a  UR                   R"                  Ul        UR                  [        R$                  -  (       a  UR                   R&                  Ul        UR                  [        R(                  :X  a  UR*                  US'   U$ UR                  [        R,                  :X  aX  UR.                  US	'   XCS
'   UR                  [        R0                  -  (       a  UR2                  R"                  US'   U$ SUS'    U$ UR                  [        R4                  :X  a  UR6                  R9                  SS5      US'   UR.                  US	'   UR.                  S:  a  UR
                  US
'   U$ XCS
'   UR                  [        R0                  -  (       a  UR2                  R"                  US'   U$ SUS'    U$ UR                  [        R:                  :X  a!  UR6                  R9                  SS5      US'   U$ UR                  [        R<                  :X  aq  UR>                  RA                  5       URA                  5       -  (       a   eURC                  UR>                  5        S
U;   a  [        R                  " US
   5      US
'   U$ UR.                  US	'   U$ ! [         a#    [        S:  a  [        R                  " 5          GNf = f)Nr   zUnexpected type(ln)=.)kindxrefrh   from   urir	   tozoomg        \/file)"
isinstancer   OutlinedestinationLinkdestr   r   hasattrrh   	Exceptiong_exceptions_verboseexception_infoPointr   LINK_FLAG_L_VALIDltxLINK_FLAG_T_VALIDyLINK_URIr   	LINK_GOTOr	   LINK_FLAG_R_IS_ZOOMrb
LINK_GOTOR	file_specreplaceLINK_LAUNCH
LINK_NAMEDnamedkeysupdate)lndocumentr   nlpnts        r   getLinkDictr   8  s   "goo&&~~h'	B	%	%ww,)R{!,,q))Q	'B2vBvJ
 --1
CzzG---		zzG---		yyG$$$HH5	D IA 
g''	'YY6
4::333BvJ8 I5 BvJ4 I1 
g((	(^^++D#66
YY6
99q=yyBtH( I% tHzzG777!WWYY6
  I !6
 I 
g))	)^^++D#66
 I 
g((	(JJOO%	122
		$**2:}}RX.BtH I YY6
I[  1$(>(>(@s    M. .)NNr   ddictc                    U(       d  gS nS nS nS nS n[        U5      [        [        4;   a  U" U SUS5      nU$ UR                  S[        R
                  5      nU[        R
                  :X  a  gUS   [        R                  :X  aH  UR                  S	S5      n	UR                  S
[        R                  " SS5      5      n
U
u  pU" XX5      nU$ US   [        R                  :X  a!  U" [        R                  " US   5      5      nU$ US   [        R                  :X  a#  [        R                  " US   5      nU" X5      nU$ US   [        R                  :X  aD  US   S:  a;  [        R                  " US   5      nU" [        R                  " US
   5      X5      nU$ US   [        R                  :X  aQ  US   S:  aH  [        R                  " US   5      nU" US   US
   R                  US
   R                  US	   UU5      nU$ g)zfCalculate the PDF action string.

Notes:
    Supports Link annotations and outline items (bookmarks).
rH   c                 *    SU  S[        XU45       S3$ )Nz/A<</S/GoTo/D[z	 0 R/XYZ z]>>	_format_g)ar   cds       r   r   getDestStr.<locals>.<lambda>{  s    N1#Yy!PQ?S>TTW"Xr   c           	      6    SU  S[        XU45       SU SU S3	$ )Nz/A<</S/GoToR/D[z /XYZ z]/F<</F/UF/Type/Filespec>>>>r   )r   r   r   r   efs         r   r   r   |  s2    OA3fYPQVWyEYDZZabcaddghigjj|*}r   c                     SU  SU SU S3$ )Nz/A<</S/GoToR/Dz/F<</Fr  r  r   )r   r   r   s      r   r   r   }  s    >!F1#SCU!Vr   c                     SU  SU S3$ )Nz/A<</S/Launch/F<</Fr  r  r   )r   r   s     r   r   r   ~  s     3A3c!<NOr   c                     SU  S3$ )Nz/A<</S/URI/URIz>>r   )r   s    r   r   r     s    .2.r   r   r   r   r   r   r   r	   )r   rL   floatgetr   	LINK_NONEr   r   r   get_pdf_strr   r   r   r   )r   r   str_goto
str_gotor1
str_gotor2
str_launchstr_urir   d_kindd_zoomr   d_leftd_topfspecs                 r   
getDestStrr  s  s    XH}JVJOJ.GE{sEl"a*YYvw001F"""V})))61%YYtW]]1a01e4V}(((w**5<8:V}+++##E&M2%'V}***uV}q/@##E&M2'--eDk:EIV}***uV}/A##E&M2&M$KMM$KMM&M
 r   lnkc           	         U R                   nU) nUS   n[        [        XC-  5      5      nSnUS   [        R                  :X  a  US   S:  a  [        R
                  S   nUS   nU R                  R                  U5      n	UR                  S[        R                  " SS5      5      n
U R                  U   nUR                   nU) nX-  nU" XR                  UR                  UR                  SS5      U5      nGO[        R
                  S	   nU" [        R                  " US   5      U5      nGOUS   [        R                  :X  a  US   S:  a  [        R
                  S
   nUR                  S[        R                  " SS5      5      n
[        U
5      [        R                  La  [        R                  " SS5      n
U" US   U
R                  U
R                  UR                  SS5      US   US   U5      nO[        R
                  S   nU" [        R                  " US   5      US   U5      nOUS   [        R                  :X  a$  [        R
                  S   nU" US   US   U5      nOUS   [        R                   :X  a   [        R
                  S   nU" US   U5      nOKUS   [        R"                  :X  a4  [        R
                  S   nUR                  S5      nUc  US   nU" X5      nU(       d  U$ [%        U R'                  5        Vs/ s H&  nUS   [        R(                  :X  d  M  US   US   4PM(     sn5      nUR                  SS5      nU(       a  US   U4UR+                  5       ;   a  UnOESn[        R,                  R/                  5       S-   n UU-  nUUR1                  5       ;  a  OUS-  nM!  UR3                  SSU S35      nU$ s  snf )Nr   rH   r   r	   r   goto1r   r   goto2gotor1r   gotor2launchr   r   name	nameddestr   r   idr   z-L%iz/Linkz	/Link/NM())transformation_matrixr   tupler   r   
annot_skelr   	page_xrefr
  r   r   r   r  r   r   r   r   r   r   annot_xrefsPDF_ANNOT_LINKitemsTOOLSset_annot_stemvaluesr   )r	   r  r   ictmrR   rh   annottxtpnor   r   	dest_pagedest_ctm	dest_ictmipntlnamer   
link_namesold_namer  istems                         r   getLinkTextr:    s    
$
$C4DFAU18_%DE
6{g'''v;!$$W-Cf+C;;((-D''$a 34CC(I 66H!	I?Dffdffcggfa.@$GE$$W-C++CI6=E	V**	*v;!$$X.C''$a 34CCy-mmAq)F"FFE $$X.C++CI6FTJE	V++	+  *CKVd3	V((	(  'CJ%	V**	*  )=$EE  #//1T1!QqTW=S=S5S!A$!1TJ wwtR HS[(+z/?/?/AA}}++-6!8D:,,..FA	  MMEivQ$78EL# 	Us   O,Oc            
      p    [         R                  " 5        V VVVs/ s H  u  pp#U PM
     snnnn $ s  snnnn f )zD
Returns a list of upper-case colour names.
:rtype: list of strings
r   colors_wx_list)r  rR   gr   s       r   getColorListr?    s+    
 '.&<&<&>?&>]TaD&>???s   0
c                  ,    [         R                  " 5       $ )z
Returns list of (name, red, gree, blue) tuples, where:
    name: upper-case color name.
    read, green, blue: integers in range 0..255.
:rtype: list of tuples
r<  r   r   r   getColorInfoListrA    s     !!##r   r  c                 h    [         R                  " 5       R                  U R                  5       S5      $ )zRetrieve RGB color in PDF format by name.

Returns:
    a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned.
)r   r   r   )r   colors_pdf_dictr
  r   )r  s    r   getColorrD     s&     ""$((yAAr   c                 >    [        5       [        5       R                  U R                  5       5         nUS   S-  nUS   S-  nUS   S-  n[        X#U5      n[        US-  S5      n[        X#U5      nXW-
  nUS:X  a  Sn	O3XR:X  a  SX4-
  U-  S	-  -  n	O XS:X  a  SXB-
  U-  S-   -  n	OSX#-
  U-  S
-   -  n	[        [        U	5      5      n
US:X  a  SnOX-  n[        [        US-  5      5      nXU4$ ! [         a#    [
        (       a  [        R                  " 5          gf = f)zRetrieve the hue, saturation, value triple of a color name.

Returns:
    a triple (degree, percent, percent). If not found (-1, -1, -1) is returned.
)rW   rW   rW   r   g     o@r   r   d   r   g      N@   r-   )rA  r?  indexupperr   r   r   r   rK   rM   r]   rL   )r  r   rR   r>  r   cmaxVcmindeltahueHsatSs                r   getColorHSVrR  )  s4   |~33DJJLAB
 	
!uA	!uA	!uAqQ<DdSj!AqQ<DKEz	!+,	!+,!+,E#JAqylE#)A!99  G$:$:$<s   2C/ /*DDr   c                 j   U R                  U5      u  p#pESnSnUS:X  a  X#XFU4$ U(       ag   [        R                  " US9nUR                  nUR                  nUR
                  n	Xg-
  S:  a!  U	R                  U:  a  U	R                  nSU-
  nX#XFU4$ US:w  a0   [        R                  " U5      nUR                  nUR                  nO
US-  nUS-  nX#XFU4$ ! [         a"    [        R                  " 5         US-  nUS-  n Nuf = f! [         a"    [        R                  " 5         US-  nUS-  n N_f = f)Ng?gɿrH   )
fontbufferr   g333333?zn/a)	extract_fontr   Fontascender	descenderr   r3   r   r   )
r   r   fontnameextstypebufferascdscfontr   s
             r   _get_font_propertiesr`  P  sE   #&#3#3D#9 H5
C
C
bye#--	<<62D--C..C99Dy1}77S=''C#g
 e#--
e|	<<)D--C..C 	s
s
%c))#  	""$3JC3JC	  	""$3JC3JC	s$   A!C .D )DD)D21D2c                     SnSnU R                   R                  n U(       d  O!US-  nX#R                  -  nUR                  nM)  SU SU 3$ )Nr   r   z
num_spans=z num_chars=)
m_internalheadrO   next)ra   	num_spans	num_charsspans       r   _show_fz_textrh  u  s^    
 II??D
Q	XX	yy  	{+i[99r   c                    U u  pUSS R                  S5      SS nUSSS.nSn[        U5       H  u  pPU(       a  SnM  U S	:X  a  X%S-      US
'   SnM$  U R                  S5      (       a+  U SS R                  SS5      R                  SS5      nXcS'   Me  U R                  S5      (       d  M}  [	        U SS 5      nXcS'   M     U$ )a  Make a Python dict from a PDF page label rule.

Args:
    item -- a tuple (pno, rule) with the start page number and the rule
            string like <</S/D...>>.
Returns:
    A dict like
    {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
r   r   r   NrH   )	startpageprefixfirstpagenumFrQ  styleTP(r"  rl  Strm  )split	enumerate
startswithr   rL   )itemr0  ruler   skipr8  r   s          r   	rule_dictrx    s     IC":C $DR;ADT?D3;!eAgJD??3QR  b)11#r:AhK??4  DHA !n # Hr   c                     U Vs/ s H  o"S   U ::  d  M  UPM     snS   n[        U5      nUR                  SS5      nUR                  SS5      nUS;   a  SOSnXS   -
  US   -   U-   n[        XeU5      $ s  snf )	zReturn the label for this page number.

Args:
    pgNo: page number, 0-based.
    labels: result of doc._get_page_labels().
Returns:
    The label (str) of the page number. Errors return an empty string.
r   rW   rl  rH   rn  )r   Ark  rm  )rx  r
  construct_label)	pgNolabelsr   ru  rv  rl  rn  rM  
pagenumbers	            r   get_label_pnor    s     .v!1Av.r2DT?DXXh#FHHWb!E:%B1E[))D,@@5HJ5*55 /s
   A6A6c                 4   SnU S:X  a  [        U5      nOU S:X  a  [        U5      R                  5       nO_U S:X  a  [        U5      R                  5       nO?U S:X  a  [	        U5      R                  5       nOU S:X  a  [	        U5      R                  5       nX-   nU$ )z9Construct a label based on style, prefix and page number.rH   DrR   Rr   rz  )strintegerToRomanr   rI  integerToLetter)rn  rl  r0  n_strresults        r   r{  r{    s     E|C	#s#))+	#s#))+	#$**,	#$**,^FMr   c           
      d   SSK nUR                  nSU pC[        SU5      U::  a:  U[        [        R                  " SU5      5      -  nUS-  n[        SU5      U::  a  M:  Sn[        [        U5      5       H8  n[        U[        [        R                  " SU5      5      5      u  pxXRU   -  nUnM:     U$ )z-Returns letter sequence string for integer i.r   Nr      rH   )stringascii_uppercasepowrL   mathreversedrangedivmod)	r8  r  lsnr   str_tjr  r>  s	            r   r  r    s     			Baq
b!*/	S"a!!	Q b!*/ EeAhaTXXb!_-.A   Lr   numc                 l   ^ SmU4S jnSR                  U" U 5       Vs/ s H  o"PM     sn5      $ s  snf )z$Return roman numeral for an integer.))i  M)i  CM)i  r  )i  CD)rF  C)Z   XC)2   L)(   XL)
   X)	   IX)r[   rK  )r-   IV)r   Ic              3   h   >#    T H'  u  p[        X5      u  p4X#-  v   XU-  -  n U S::  d  M'    g    g 7fr/   )r  )r  rR   ltrr   rd   romans        r   	roman_num!integerToRoman.<locals>.roman_num  s9     FA#>DA'Mq5LCax s   &22rH   )join)r  r  r   r  s      @r   r  r    s6    E  77y~.~!A~.//.s   1line_dirrg  r   c                 b   U c  US   n U u  p4[         R                  " U5      n[         R                  R                  5       (       a  SnOUS   US   -
  nXQS   -  nXd-  nXc-  nUS:  aK  US::  aE  UR                  SU4-
  n	UR
                  US4-   n
UR                  US4-
  nUR
                  SU4-   nOUS::  aK  US::  aE  UR                  US4-   n	UR                  SU4-
  n
UR                  SU4-   nUR                  US4-
  nOUS::  aK  US:  aE  UR
                  SU4-
  n	UR                  US4-   n
UR
                  US4-
  nUR                  SU4-   nODUR                  US4-   n	UR                  SU4-
  n
UR                  SU4-   nUR                  US4-
  n[         R                  " XX5      $ )ao  Compute the quad located inside the bbox.

The bbox may be any of the resp. tuples occurring inside the given span.

Args:
    line_dir: (tuple) 'line["dir"]' of the owning line or None.
    span: (dict) the span. May be from get_texttrace() method.
    bbox: (tuple) the bbox of the span or any of its characters.
Returns:
    The quad which is wrapped by the bbox.
dirr   rW  rX  sizer   )	r   r1   r*  set_small_glyph_heightsbltrbrtlQuad)r  rg  r   cossinr   r^   hshculurlllrs                r   recover_bbox_quadr    s    ;HC<<D}},,..tK00fF 
B	B	Qw27WW2wWWAwWWAwWW2w	qR1WWWAwWW2wWW2wWWAw	qR1WWW2wWWAwWWAwWW2wWWAwWW2wWW2wWWAw<<''r   c                     [        U 5      [        Ld  [        U 5      S:w  a  [        S5      e[        U5      [        La  [        S5      e[        XUS   5      $ )zRecover the quadrilateral of a text span.

Args:
    line_dir: (tuple) 'line["dir"]' of the owning line.
    span: the span.
Returns:
    The quadrilateral enveloping the span's text.
r   bad line dir argumentbad span argumentr   )r   r$  rO   r"   r   r  )r  rg  s     r   recover_quadr  H  sP     H~U"c(mq&8011Dz,--XT&\::r   r9   r   c           	      2   Uc  U S   n[        U5      S:X  a  [        S5      eU S   nUu  p4[        X!S   5      n[        U5      S:  a  [        X!S   5      nOUnUR                  nUR                  n[
        R                  " Xx5      n	X-  n
[
        R                  R                  5       n[        U Vs/ s H  oS   U(       a  SO
US   US	   -
  -  PM     sn5      n[
        R                  " SU* U
R                  S5      nUR                  nX) -  nU$ s  snf )
a  Calculate the line quad for 'dict' / 'rawdict' text extractions.

The lower quad points are those of the first, resp. last span quad.
The upper points are determined by the maximum span quad height.
From this, compute a rect with bottom-left in (0, 0), convert this to a
quad and rotate and shift back to cover the text of the spans.

Args:
    spans: (list, optional) sub-list of spans to consider.
Returns:
    pymupdf.Quad covering selected spans.
r   r   zbad span listr  r   rW   r  rW  rX  )rO   r"   r  r  r  r   planish_liner*  r  rK   r1   r   quad)r9   r   r  r  r  q0q1line_llline_lrmat0x_lrsmallr   h	line_rect	line_quads                   r   recover_line_quadr  X  s    }W
5zQ))E{HHC	ha	)B
5zA~("I.eeGeeG1D >DMM113EQVWQVA65aq}q~'E	GQVW	A QDFFA.III 	Xs   3#Dcharsc                    U c  US   n Uc  [        X5      $ SUR                  5       ;  a  [        S5      e[        XUS   5      n[	        U5      S:  a  [        XUS   5      nOUnUR
                  nUR                  n[        R                  " XV5      nXg-  n[        R                  R                  5       n	US   U	(       a  SO
US   US	   -
  -  n
[        R                  " SU
* UR                  S5      nUR                  nX) -  nU$ )
a*  Calculate the span quad for 'dict' / 'rawdict' text extractions.

Notes:
    There are two execution paths:
    1. For the full span quad, the result of 'recover_quad' is returned.
    2. For the quad of a sub-list of characters, the char quads are
       computed and joined. This is only supported for the "rawdict"
       extraction option.

Args:
    line_dir: (tuple) 'line["dir"]' of the owning line.
    span: (dict) the span.
    chars: (list, optional) sub-list of characters to consider.
Returns:
    pymupdf.Quad covering selected characters.
r  r  z)need 'rawdict' option to sub-select charsr   r   rW   r  rW  rX  )r  r   r"   recover_char_quadrO   r  r  r   r  r*  r  r1   r   r  )r  rg  r  r  r  span_llspan_lrr  r  r  r  	span_rect	span_quads                r   recover_span_quadr    s    " ;}H++diik!DEE	858	4B
5zA~xuRy9eeGeeG1D>DMM113EVUj)9D<M)MOAQDFFA.IIIr   charc                    U c  US   n [        U 5      [        Ld  [        U 5      S:w  a  [        S5      e[        U5      [        La  [        S5      e[        U5      [        L a  [
        R                  " US   5      nO7[        U5      [        L a  [
        R                  " US   5      nO[        S5      e[        XU5      $ )a$  Recover the quadrilateral of a text character.

This requires the "rawdict" option of text extraction.

Args:
    line_dir: (tuple) 'line["dir"]' of the span's line.
    span: (dict) the span dict.
    char: (dict) the character dict.
Returns:
    The quadrilateral enveloping the character.
r  r   r  r  r   r   )r   r$  rO   r"   r   r   r1   r  )r  rg  r  r   s       r   r  r    s     ;H~U"c(mq&8011Dz,--DzT||DL)	du	||DG$,--XT22r   )NNNF)NNNFNr   )NNNr   )N)NN)r   engH   FN)ra   )Br  typingr   rH   r   r   format_gr   r   r   r   r   r   
ByteStringAttributeErrorbytes	bytearray
memoryviewAnyAnyTypeUnionrL   OptIntOptionalr	  OptFloatr  OptStrr   OptDictOptBytesSequenceOptSeqr   TextPageboollistr&   rB   rg   rl   rr   r   r   r   r  r:  r?  rA  r$  rD  rR  r   r`  rh  rx  r  r{  r  r  r  r  r  r  r  r  r   r   r   <module>r     s      	33 
		0""J
 **	c4i	 ??5!		
//$
??:&		) !%
,,
  	
  
H !%L
,,L
L L 	L
 L 
Lb !%r
,,r
r r 	r 	rp "&
,,
  		( !%
,, 	 	
 * O3
,,O3O3 O3 
	O3
 O3 O3 O3h j !%j
,,jj 	j
 j j jZ8d 8v7S 7 7# 7tPgll P P# PB@d @$$ $B3 B5 B$c $e $N"*g.. "*c "*e "*J:"$D6*3 &# $0 0 0D/( /(T /( /(7<< /(d;5 ; ; ; *D * * *Z* *T *$ *',, *Z3 3T 3 3',, 3G%     0"Z/J0s"   K K# 
K K #K54K5