
    ?i4                     @   d Z ddlZddlZddlmZmZ ddlmZmZmZm	Z	m
Z
 	 ddlZdZdddd	d
dddddZdZ eh d      Zddhddhh dh ddhdhdhdhddhddhdhdhdZded e
e   fd!Zd7d"eded#ee   d efd$Zd%ed efd&Zd'efd(Zd)ed eeef   fd*Zd+eeef   d e	e   fd,Zded efd-Z	 	 d8d%ed.ed/ed0ed)ed eeef   fd1Z	 d9d2eeeef      d)ed0ed eeef   fd3Z	 	 d8d%ed.ed/ed0ed)ed eeef   fd4Zd5eeef   d eeeef      fd6Z y# e$ r dZY w xY w):a>  TikTok search via ScrapeCreators API for /last30days.

Uses ScrapeCreators REST API to search TikTok by keyword, extract engagement
metrics (views, likes, comments, shares), and fetch video transcripts.

Requires SCRAPECREATORS_API_KEY in config. 100 free credits, then PAYG.
API docs: https://scrapecreators.com/docs
    N)datetimetimezone)AnyDictListOptionalSetz(https://api.scrapecreators.com/v1/tiktok
      )results_per_pagemax_captions      (      )quickdefaultdeepi  >,   aianatbebydoifinisitmemynoofonorsotoweallandarebutcanforgethashowitsnotthewasyoufromhavejustthatthiswhatwillwithyouraboutraphiphop>   hiphoprB   >   rC   rD   rA   
javascriptjs
typescriptts
artificialintelligencemachinelearningreactjsreact)rC   rD   rA   rB   rF   rE   rH   rG   aimlrN   rM   textreturnc                 *   t        j                  dd| j                               j                         }|D ch c]  }|t        vst        |      dkD  s| }}t        |      }|D ]#  }|t        v s|j                  t        |          % |S c c}w )zHLowercase, strip punctuation, remove stopwords, drop single-char tokens.z[^\w\s]    )	resublowersplit	STOPWORDSlensetSYNONYMSupdate)rQ   wordswtokensexpandedts         Q/home/ubuntu/.openclaw/workspace/skills/last30days-official/scripts/lib/tiktok.py	_tokenizere   :   s    FF:sDJJL1779EDA!9"4Q!aDFD6{H )=OOHQK() O Es   BBBqueryhashtagsc                 D   t        |       }|}|r| ddj                  |       }t        |      }|r:|D ]5  }|j                         }|D ]  }||v s||k7  s|j                  |         7 |syt	        ||z        }	|	t	        |      z  }
t        dt        d|
            S )zCompute relevance as ratio of query tokens found in text + hashtags.

    Uses ratio overlap (intersection / query_length). Hashtags provide
    a TikTok-specific relevance boost. Floors at 0.1.
    rT   g      ?g?g      ?)re   joinrX   addr[   maxmin)rf   rQ   rg   q_tokenscombinedt_tokenstag	tag_lowerqtoverlapratios              rd   _compute_relevanceru   E   s     H HV1SXXh/01"H  	%C		I %?rYLL$%	% (X%&Gc(m#EsCUO$$    topicc                 \   | j                         j                         }g d}|D ]3  }|j                  |dz         s|t        |      d j                         }5 h d}|j	                         }|D cg c]	  }||vs| }}|rdj                  |      n|}|j                  d      S c c}w )zExtract core subject from verbose query for TikTok search.

    Strips meta/research words to keep only the core product/concept name.
    )zwhat are the bestzwhat is the bestzwhat are the latestzwhat are people saying aboutzwhat do people think aboutzhow do i usez
how to usezhow tozwhat arezwhat isztips forzbest practices forrT   N>   newtopbestgoodnewsgreatviraladvicekillerlatestpromptr^   awesomehottestmethodspopularpromptsupdatesfeaturestrending	practices	prompting
approaches
strategiesrecommendationsz?!.)rX   strip
startswithr[   rY   ri   rstrip)	rw   rQ   prefixespnoiser_   r`   filteredresults	            rd   _extract_core_subjectr   c   s    
 ;;= DH  )??1s7#A=&&(D)
E JJLE 3aAUN3H3#+SXXhF== 4s   3	B)=B)msgc                     t         j                  j                         rBt         j                  j                  d|  d       t         j                  j	                          yy)zGLog to stderr (only in interactive terminals; spinner handles non-TTY).z	[TikTok] 
N)sysstderrisattywriteflush)r   s    rd   _logr      sB    
zz

9SE,-

 rv   tokenc                     | ddS )z%Build ScrapeCreators request headers.zapplication/json)z	x-api-keyzContent-Type )r   s    rd   _sc_headersr      s     * rv   itemc                     | j                  d      }|r@	 t        j                  t        |      t        j
                        }|j                  d      S y# t        t        t        f$ r Y yw xY w)ziParse date from ScrapeCreators TikTok item to YYYY-MM-DD.

    Handles create_time (unix timestamp).
    create_time)tzz%Y-%m-%dN)
r/   r   fromtimestampintr   utcstrftime
ValueError	TypeErrorOSError)r   rH   dts      rd   _parse_dater      sf    
 
-	 B		''BHLLAB;;z**  Iw/ 		s   >A A,+A,c                    | sy| j                  d      }g }|D ]T  }|j                         }|s|j                  d      r(t        j                  d|      r?d|v rD|j                  |       V dj                  |      S )z9Strip WebVTT timestamps and headers from transcript text. r   WEBVTTz^\d{2}:\d{2}z-->rT   )rY   r   r   rV   matchappendri   )rQ   linescleanedlines       rd   _clean_webvttr      s    JJtEG 
zz|??8$88OT*D=t
 88Grv   	from_dateto_datedepthc                 f   |sg ddS t         sg ddS t        j                  |t        d         }t        |       }t	        d| d| d|d    d	       	 t        j                  t
         d
|ddt        |      d      }|j                          |j                         }|j                  d      xs |j                  d      xs g }
g }|
D ]6  }t        |t              s|j                  d|      }|j                  |       8 |d|d    }g }|D ]  }t        |j                  dd            }|j                  dd      }|j                  d      xs i }|j                  d      xs d}|j                  d      xs d}|j                  d      xs d}|j                  d      xs d}|j                  d      xs i }|j                  dd      }|j                  d d      }|j                  d!      xs g }|D cg c]5  }t        |t              r#|j                  d"      r|j                  d"d      7 }}|j                  d#      xs i j                  d$      }t!        |      }t#        |||      }|r|j%                  d%      d   nd} | s|r
|rd&| d'| } |j                  ||| ||||||d(||||rd)|dd*  nd)| dd+        |D !cg c]  }!|!d,   s	||!d,   cxk  r|k  sn n|! }"}!t'        |      t'        |"      z
  }#|"r|"}|#r't	        d-|# d.       nt	        d/t'        |              |j)                  d0 d12       t	        d3t'        |       d4       d5|iS # t        $ r5}	t	        d|	        g t        |	      j                   d|	 dcY d}	~	S d}	~	ww xY wc c}w c c}!w )6a9  Search TikTok via ScrapeCreators API.

    Args:
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        depth: 'quick', 'default', or 'deep'
        token: ScrapeCreators API key

    Returns:
        Dict with 'items' list and optional 'error'.
    z$No SCRAPECREATORS_API_KEY configureditemserrorzrequests library not installedr   zSearching TikTok for 'z	' (depth=z, count=r   )z/search/keyword	relevance)rf   sort_by   paramsheaderstimeoutzScrapeCreators error: : Nsearch_item_listdata
aweme_infoaweme_idr   desc
statistics
play_countr   
digg_countcomment_countshare_countauthor	unique_id	share_url
text_extrahashtag_namevideoduration?zhttps://www.tiktok.com/@z/video/)viewslikescommentsshareszTikTok: <   )video_idrQ   urlauthor_namedate
engagementrg   r   r   why_relevantcaption_snippetr   z	Filtered z videos outside date rangez)No videos within date range, keeping all c                     | d   d   S )Nr   r   r   )xs    rd   <lambda>zsearch_tiktok.<locals>.<lambda>)  s    Q|_W5 rv   T)keyreversezFound z TikTok videosr   )	_requestsDEPTH_CONFIGr/   r   r   SCRAPECREATORS_BASEr   raise_for_statusjson	Exceptiontype__name__
isinstancedictr   strr   ru   rY   r[   sort)$rw   r   r   r   r   config
core_topicrespr   eraw_entries	raw_itemsentryinfor   rawr   rQ   statsr   r   r   r   r   r   r   r   rc   hashtag_namesr   date_strr   r   r   in_rangeout_of_ranges$                                       rd   search_tiktokr
     s    & &LMM&FGGe\)%<=F&u-J!*YugXfM_F`Eaab	cdB}}"#?3'K@&	
 	yy{ ((-.H$((62BHbKI #eT"99\51DT"# 56"456I E +swwz2./wwvr"%+YY|,1
YY|,1
		/27aii.3!"(bjjb1GGK,	WW\*0b
<F Kq&q$/AEE.4I ~r2 K KGGG$*//
;s# 'z4G	 *3iooc"1%{x,[M
KC &##)%	 & "6:htCRyk2(:,@W!!
 	5+\ !TaAfI)qy2SG2STHTu:H-L9\N*DEF8UEF 
JJ5tJD6#e*^	,-UY  B%aS)*a)9)9(:"QC&@AAB:KD Us7   AM( =:N)
N.N.+N.(	N&1*N!N&!N&video_itemsc                    t         j                  |t         d         }|d   }| r|rt        si S | d| }t        dt	        |       d       i }|D ]^  }|d   }|j                  dd      }	|	s|	j                         }
t	        |
      t        kD  rd	j                  |
dt               d
z   }	|	||<   ` |D ]  }|d   }|j                  dd      }|s	 t        j                  t         dd|it        |      d      }|j                  dk(  r|j                         }|j                  d      }|rwt        |t              rd	j                  d |D              }t        |      }|rB|j                         }
t	        |
      t        kD  rd	j                  |
dt               d
z   }|||<    t!        d |j#                         D              }t        d| dt	        |       d       |S # t        $ r}t        d| d|        Y d}~Qd}~ww xY w)a  Fetch transcripts for top N TikTok videos via ScrapeCreators.

    Strategy:
    1. Use the 'text' field (video description) as baseline caption
    2. For top N, call /video/transcript for spoken-word captions

    Args:
        video_items: Items from search_tiktok()
        token: ScrapeCreators API key
        depth: Depth level for caption limit

    Returns:
        Dict mapping video_id -> caption text (truncated to 500 words)
    r   r   NzEnriching captions for z videosr   rQ   r   rT   z...r   z/video/transcript   r      
transcriptc              3   2   K   | ]  }t        |        y wN)r   ).0ss     rd   	<genexpr>z!fetch_captions.<locals>.<genexpr>i  s     -Ic!f-Is   zTranscript fetch failed for r   c              3   &   K   | ]	  }|sd   yw)rU   Nr   )r  vs     rd   r  z!fetch_captions.<locals>.<genexpr>s  s     0Aaa0s   zGot captions for /)r   r/   r   r   r[   rY   CAPTION_MAX_WORDSri   r   r   status_coder   r   listr   r   sumvalues)r  r   r   r   r   	top_itemscaptionsr   vidrQ   r_   r   r   r   r  r   gots                    rd   fetch_captionsr!  /  s$   & e\)%<=F.)Le9	M\*I"3y>"2'	:;H  !:xx#JJLE5z--xx&8'8 9:UB HSM!  <:hhub!	<==&''89s|#E*	D 3&yy{!XXl3
!*d3%(XX-Ij-I%I
!.z!:J! * 0 0 2u:(99),%8J9J2K)Lu)TJ(2/<6 0*0
0CSE3y>"2'	:;O  	</uBqc:;;	<s   CG$$	H	-HH	c                     t        | ||||      }|j                  dg       }|s|S t        |||      }|D ]   }|d   }	|j                  |	      }
|
s|
|d<   " ||j                  d      dS )am  Full TikTok search: find videos, then fetch captions for top results.

    Args:
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        depth: 'quick', 'default', or 'deep'
        token: ScrapeCreators API key

    Returns:
        Dict with 'items' list. Each item has a 'caption_snippet' field.
    r   r   r   r   r   )r
  r/   r!  )rw   r   r   r   r   search_resultr   r  r   r  captions              rd   search_and_enrichr%  x  s    ( "%GUEJMgr*E eUE2H  .:,,s#&-D"#	. ]%6%6w%?@@rv   responsec                 &    | j                  dg       S )zyParse TikTok search response to normalized format.

    Returns:
        List of item dicts ready for normalization.
    r   )r/   )r&  s    rd   parse_tiktok_responser(    s     <<$$rv   r  )r   N)r   )!__doc__rV   r   r   r   typingr   r   r   r   r	   requestsr   ImportErrorr   r   r  	frozensetrZ   r]   r   re   floatru   r   r   r   r   r   r
  r!  r%  r(  r   rv   rd   <module>r/     s   
 
 ' 1 1  A  %':$&:$&:     	 88##.&.&
(j
![y C CH %c % %S	 %U %<        Fc s tCH~ d38n #    0 sss s 	s
 s 
#s(^sr Fd38n%FF F 
#s(^	FZ $A$A$A $A 	$A
 $A 
#s(^$AN%DcN %tDcN7K %]  Is   D DD