
    ?i6                     F   d Z ddlZddlZddlmZmZ ddlmZmZmZm	Z	m
Z
 	 ddlZdZdddd	d
dddddZdZ eh d      Zddhddhh dh ddhdhdhdhddhddhdhdhdZded e
e   fd!Zd8d"eded#ee   d efd$Zd%ed efd&Zd'efd(Zd)ed eeef   fd*Zd+eeef   d e	e   fd,Zd-ed ee   fd.Z	 	 d9d%ed/ed0ed1ed)ed eeef   fd2Z	 d:d3eeeef      d)ed1ed eeef   fd4Z	 	 d9d%ed/ed0ed1ed)ed eeef   fd5Zd6eeef   d eeeef      fd7Z y# e$ r dZY w xY w);aH  Instagram Reels search via ScrapeCreators API for /last30days.

Uses ScrapeCreators REST API to search Instagram Reels by keyword, extract
engagement metrics (views, likes, comments), and fetch video transcripts.

Requires SCRAPECREATORS_API_KEY in config. 100 free credits, then PAYG.
API docs: https://scrapecreators.com/docs
    N)datetimetimezone)AnyDictListOptionalSetzhttps://api.scrapecreators.com
      )results_per_pagemax_captions      (      )quickdefaultdeepi  >,   aianatbebydoifinisitmemynoofonorsotoweallandarebutcanforgethashowitsnotthewasyoufromhavejustthatthiswhatwillwithyouraboutraphiphop>   hiphoprB   >   rC   rD   rA   
javascriptjs
typescriptts
artificialintelligencemachinelearningreactjsreact)rC   rD   rA   rB   rF   rE   rH   rG   aimlrN   rM   textreturnc                 *   t        j                  dd| j                               j                         }|D ch c]  }|t        vst        |      dkD  s| }}t        |      }|D ]#  }|t        v s|j                  t        |          % |S c c}w )zHLowercase, strip punctuation, remove stopwords, drop single-char tokens.z[^\w\s]    )	resublowersplit	STOPWORDSlensetSYNONYMSupdate)rQ   wordswtokensexpandedts         T/home/ubuntu/.openclaw/workspace/skills/last30days-official/scripts/lib/instagram.py	_tokenizere   :   s    FF:sDJJL1779EDA!9"4Q!aDFD6{H )=OOHQK() O Es   BBBqueryhashtagsc                 D   t        |       }|}|r| ddj                  |       }t        |      }|r:|D ]5  }|j                         }|D ]  }||v s||k7  s|j                  |         7 |syt	        ||z        }	|	t	        |      z  }
t        dt        d|
            S )zCompute relevance as ratio of query tokens found in text + hashtags.

    Uses ratio overlap (intersection / query_length). Hashtags provide
    an Instagram-specific relevance boost. Floors at 0.1.
    rT   g      ?g?g      ?)re   joinrX   addr[   maxmin)rf   rQ   rg   q_tokenscombinedt_tokenstag	tag_lowerqtoverlapratios              rd   _compute_relevanceru   E   s     H HV1SXXh/01"H  	%C		I %?rYLL$%	% (X%&Gc(m#EsCUO$$    topicc                 \   | j                         j                         }g d}|D ]3  }|j                  |dz         s|t        |      d j                         }5 h d}|j	                         }|D cg c]	  }||vs| }}|rdj                  |      n|}|j                  d      S c c}w )zExtract core subject from verbose query for Instagram search.

    Strips meta/research words to keep only the core product/concept name.
    )zwhat are the bestzwhat is the bestzwhat are the latestzwhat are people saying aboutzwhat do people think aboutzhow do i usez
how to usezhow tozwhat arezwhat isztips forzbest practices forrT   N>   newtopbestgoodnewsgreatviraladvicekillerlatestpromptr^   awesomehottestmethodspopularpromptsupdatesfeaturestrending	practices	prompting
approaches
strategiesrecommendationsz?!.)rX   strip
startswithr[   rY   ri   rstrip)	rw   rQ   prefixespnoiser_   r`   filteredresults	            rd   _extract_core_subjectr   c   s    
 ;;= DH  )??1s7#A=&&(D)
E JJLE 3aAUN3H3#+SXXhF== 4s   3	B)=B)msgc                     t         j                  j                         rBt         j                  j                  d|  d       t         j                  j	                          yy)zGLog to stderr (only in interactive terminals; spinner handles non-TTY).z[Instagram] 
N)sysstderrisattywriteflush)r   s    rd   _logr      sB    
zz

<uB/0

 rv   tokenc                     | ddS )z%Build ScrapeCreators request headers.zapplication/json)z	x-api-keyzContent-Type )r   s    rd   _sc_headersr      s     * rv   itemc                    | j                  d      }|syt        |t              r7	 t        j                  |j                  dd            }|j                  d      S 	 t        j                  t        |      t        j                        }|j                  d      S # t        t        f$ r Y nw xY wt        |      dk\  sd|dd S # t        t        t        f$ r Y yw xY w)zParse date from ScrapeCreators Instagram item to YYYY-MM-DD.

    Handles taken_at as ISO string (e.g. "2026-02-26T16:00:00.000Z")
    or unix timestamp.
    taken_atNZz+00:00z%Y-%m-%dr
   )tz)r/   
isinstancestrr   fromisoformatreplacestrftime
ValueError	TypeErrorr[   fromtimestampintr   utcOSError)r   rH   dts      rd   _parse_dater      s     
*	B "c	''

3(ABB;;z**##CG={{:&& I& 		 r7b=cr7N 	7+ s#   5B >C B.-B.CCcaption_textc                 6    | sg S t        j                  d|       S )z-Extract hashtags from Instagram caption text.z#(\w+))rV   findall)r   s    rd   _extract_hashtagsr      s    	::i..rv   	from_dateto_datedepthc                 d   |sg ddS t         sg ddS t        j                  |t        d         }t        |       }t	        d| d| d|d    d	       	 t        j                  t
         d
d|it        |      d      }|j                          |j                         }|j                  d      xs( |j                  d      xs |j                  d      xs g }
|
d|d    }
g }|
D ]  }t        |t              st        |j                  d|j                  dd                  }|j                  d|j                  dd            }|j                  dd      }t        |t              r|j                  dd      }n5t        |t              r|}n"|j                  d|j                  dd            }|j                  d      xs( |j                  d      xs |j                  d      xs d}|j                  d       xs d}|j                  d!      xs d}|j                  d"      xs |j                  d#      xs i }|j                  d$d      }|j                  d%      }t        |      }t!        |      }t#        |||      }|j                  d&d      }|s|rd'| }|j%                  ||||||||d(||||rd)|dd*  nd)| dd+        |D cg c]  }|d,   s	||d,   cxk  r|k  sn n| }}t'        |      t'        |      z
  }|r|}|r't	        d-| d.       nt	        d/t'        |              |j)                  d0 d12       t	        d3t'        |       d4       d|iS # t        $ r5}	t	        d|	        g t        |	      j                   d|	 dcY d}	~	S d}	~	ww xY wc c}w )5aB  Search Instagram Reels via ScrapeCreators API.

    Args:
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        depth: 'quick', 'default', or 'deep'
        token: ScrapeCreators API key

    Returns:
        Dict with 'items' list and optional 'error'.
    z$No SCRAPECREATORS_API_KEY configureditemserrorzrequests library not installedr   zSearching Instagram for 'z	' (depth=z, count=r   )z/v1/instagram/reels/searchrf      paramsheaderstimeoutzScrapeCreators error: : Nreelsr   dataidpk 	shortcodecodecaptionrQ   descvideo_play_countvideo_view_count
play_countr   
like_countcomment_countowneruserusernamevideo_durationurlzhttps://www.instagram.com/reel/)viewslikescommentszInstagram: <   )video_idrQ   r   author_namedate
engagementrg   duration	relevancewhy_relevantcaption_snippetr   z	Filtered z reels outside date rangez(No reels within date range, keeping all c                     | d   d   S )Nr   r   r   )xs    rd   <lambda>z"search_instagram.<locals>.<lambda>7  s    Q|_W5 rv   T)keyreversezFound z Instagram reels)	_requestsDEPTH_CONFIGr/   r   r   SCRAPECREATORS_BASEr   raise_for_statusjson	Exceptiontype__name__r   dictr   r   r   ru   appendr[   sort)rw   r   r   r   r   config
core_topicrespr   e	raw_itemsr   rawreel_pkr   caption_objrQ   r   r   r   r   r   r   date_strrg   r   r   r   in_rangeout_of_ranges                                 rd   search_instagramr     s   & &LMM&FGGe\)%<=F&u-J$ZL	%PbIcHdde	fgB}}"##=>Z(&	
 	yy{ !PTXXg%6P$((6:JPbI 56"456I E ;#t$ cggdCGGD"$567GGK)<=	 ggi,k4(??62.DS)D77637762#67D WW/0mCGG<N4OmSVSZSZ[gShmlm
WW\*/a
05A  9CGGFO9rii
B/ 77+, s# %T* 'z4B	 ggeR y3I;?C&##)
 ! "9=k$s)5[Q[P\C]!
 	W;| !TaAfI)qy2SG2STHTu:H-L9\N*CDE7E
|DE 
JJ5tJD6#e*-	./Uo  B%aS)*a)9)9(:"QC&@AABT Us1   AM, 
N- N-/N-,	N*5*N%N*%N*video_itemsc                 
   t         j                  |t         d         }|d   }| r|rt        si S | d| }t        dt	        |       d       i }|D ]^  }|d   }|j                  dd      }	|	s|	j                         }
t	        |
      t        kD  rd	j                  |
dt               d
z   }	|	||<   ` |D ]  }|d   }|j                  dd      }|s	 t        j                  t         dd|it        |      d      }|j                  dk(  r|j                         }|j                  d      xs g }|rlt        |t              r\d	j                  d |D              }|rB|j                         }
t	        |
      t        kD  rd	j                  |
dt               d
z   }|||<    t        d |j!                         D              }t        d| dt	        |       d       |S # t        $ r}t        d| d|        Y d}~Jd}~ww xY w)a  Fetch transcripts for top N Instagram reels via ScrapeCreators.

    Strategy:
    1. Use the 'text' field (caption) as baseline
    2. For top N, call /v2/instagram/media/transcript for spoken-word captions

    Args:
        video_items: Items from search_instagram()
        token: ScrapeCreators API key
        depth: Depth level for caption limit

    Returns:
        Dict mapping video_id -> caption text (truncated to 500 words)
    r   r   NzEnriching captions for z reelsr   rQ   r   rT   z...r   z/v2/instagram/media/transcript   r      transcriptsc              3      K   | ]7  }t        |t              r%|j                  d       r|j                  d d       9 yw)rQ   r   N)r   r   r/   ).0rc   s     rd   	<genexpr>z!fetch_captions.<locals>.<genexpr>w  s7      /./%a.155= fb)/s   =?zTranscript fetch failed for r   c              3   &   K   | ]	  }|sd   yw)rU   Nr   )r  vs     rd   r  z!fetch_captions.<locals>.<genexpr>  s     0Aaa0s   zGot captions for /)r   r/   r   r   r[   rY   CAPTION_MAX_WORDSri   r   r   status_coder   r   listr   sumvalues)r  r   r   r   r   	top_itemscaptionsr   vidrQ   r_   r   r   r   r
  transcript_textr   gots                     rd   fetch_captionsr  =  s(   & e\)%<=F.)Le9	M\*I"3y>"2&	9:H  !:xx#JJLE5z--xx&8'8 9:UB HSM!  <:hhub!	<==&''EFs|#E*	D 3&yy{"hh}5;:k4#@&)hh /3>/ 'O ' / 5 5 7u:(99.1hhu=O>O7P.QTY.YO(73<: 0*0
0CSE3y>"2&	9:O  	</uBqc:;;	<s   CG	H&G==Hc                     t        | ||||      }|j                  dg       }|s|S t        |||      }|D ]   }|d   }	|j                  |	      }
|
s|
|d<   " ||j                  d      dS )ao  Full Instagram search: find reels, then fetch captions for top results.

    Args:
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        depth: 'quick', 'default', or 'deep'
        token: ScrapeCreators API key

    Returns:
        Dict with 'items' list. Each item has a 'caption_snippet' field.
    r   r   r   r   r   )r  r/   r  )rw   r   r   r   r   search_resultr   r  r   r  r   s              rd   search_and_enrichr    s    ( %UIwuMMgr*E eUE2H  .:,,s#&-D"#	. ]%6%6w%?@@rv   responsec                 &    | j                  dg       S )z|Parse Instagram search response to normalized format.

    Returns:
        List of item dicts ready for normalization.
    r   )r/   )r  s    rd   parse_instagram_responser!    s     <<$$rv   )N)r   N)r   )!__doc__rV   r   r   r   typingr   r   r   r   r	   requestsr   ImportErrorr   r   r  	frozensetrZ   r]   r   re   floatru   r   r   r   r   r   r  r  r  r!  r   rv   rd   <module>r(     s   
 
 ' 1 1  7  %':$&:$&:     	 88##.&.&
(j
![y C CH %c % %S	 %U %<        Fc s tCH~ d38n # @/C /DI / ~~~ ~ 	~
 ~ 
#s(^~H Hd38n%HH H 
#s(^	H^ $A$A$A $A 	$A
 $A 
#s(^$AN%tCH~ %$tCH~:N %}  Is   D D D 