
    ?iGJ                        d Z ddlZddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
mZmZ 	 ddlZddlmZ dZdd	d
ddd	d
dddd
dddddZ eh d      ZdefdZdede	eef   fdZdedefdZdedede
e   fdZ eh d      Z	 	 d6de
e	eef      dedede
e   fdZdee   fdZd7d e	eef   d!ed"ede	eef   fd#Z	 	 d8d$eded%ed&ede
e	eef      f
d'Z 	 	 d8d(ed$eded%ed&ede
e	eef      fd)Z!d*edede
e	eef      fd+Z"d,e
e	eef      de
e	eef      fd-Z#	 	 d9ded.ed/ededede	eef   fd0Z$	 d:d1e
e	eef      dedede
e	eef      fd2Z%	 	 d9ded.ed/ededede	eef   fd3Z&d4e	eef   de
e	eef      fd5Z'y# e$ r dZY _w xY w);a  Reddit search via ScrapeCreators API for /last30days.

Uses ScrapeCreators REST API to search Reddit globally, discover relevant
subreddits, run targeted subreddit searches, and fetch comment trees.

Replaces openai_reddit.py as the primary Reddit search backend.
Falls back to openai_reddit.py if SCRAPECREATORS_API_KEY is missing but
OPENAI_API_KEY is present.

Requires SCRAPECREATORS_API_KEY in config (same key as TikTok + Instagram).
API docs: https://scrapecreators.com/docs
    N)Counter)datetimetimezone)AnyDictListOptionalSet   )httpz(https://api.scrapecreators.com/v1/reddit      week)global_searchessubreddit_searchescomment_enrichments	timeframe   month   )quickdefaultdeep>*   aaninisofontoareforhownewthetopbestgoodnewstipswhatwithgreatguideusingwhichadvicekillerlatestpromptupdateawesomehottestmethodspopularpromptsupdatesfeaturestrendingtutorial	practices	prompting
approaches
strategiesrecommendationsmsgc                     t         j                  j                  d|  d       t         j                  j                          y)zLog to stderr.z	[Reddit] 
N)sysstderrwriteflush)rD   s    Q/home/ubuntu/.openclaw/workspace/skills/last30days-official/scripts/lib/reddit.py_logrL   B   s-    JJyR()JJ    tokenreturnc                     | ddS )z%Build ScrapeCreators request headers.zapplication/json)z	x-api-keyzContent-Type )rN   s    rK   _sc_headersrR   H   s     * rM   topicc                 \   | j                         j                         }g d}|D ]3  }|j                  |dz         s|t        |      d j                         }5 |j	                         }|D cg c]  }|t
        vs| }}|rdj                  |      n|}|j                  d      S c c}w )zyExtract core subject from verbose query.

    Strips meta/research words to keep only the core product/concept name.
    )zwhat are the bestzwhat is the bestzwhat are the latestzwhat are people saying aboutzwhat do people think aboutzhow do i usez
how to usezhow tozwhat arezwhat isztips forzbest practices for N?!.)lowerstrip
startswithlensplitNOISE_WORDSjoinrstrip)rS   textprefixespwordswfilteredresults           rK   _extract_core_subjectrf   P   s    
 ;;= DH  )??1s7#A=&&(D) JJLE 9aA[$89H9#+SXXhF== :s   /B)=B)depthc                 ^   t        |       }|g}| j                         j                  d      }|j                         |j                         k7  r-t	        |j                               dk  r|j                  |       |dv r|j                  | d       |dk(  r|j                  | d       |S )a]  Generate multiple Reddit search queries from a topic.

    Uses local logic (no LLM call needed):
    1. Extract core subject (strip noise words)
    2. Include original topic if different from core
    3. For default/deep: add casual/review variant
    4. For deep: add problem/issues variant

    Returns 1-4 query strings depending on depth.
    rV   r   )r   r   z worth it OR thoughts OR reviewr   z$ issues OR problems OR bug OR broken)rf   rX   r^   rW   rZ   r[   append)rS   rg   corequeriesoriginal_cleans        rK   expand_reddit_queriesrm   i   s     !'DfG [[]))%0Nzz|~++--#n6J6J6L2MQR2R~&##$>?@$CDENrM   >	   
helpmefindfindaredditfindthatsongnamethatsongtipofmytonguewhatsthissongsubredditdramawhatisthissongwhatisthisthingresultsmax_subsc                    |rt        |      nd}|r't        |j                         j                               n	t               }t	               }| D ]  }|j                  dd      }|sd}|j                         |rt        fd|D              r|dz  }t        v r|dz  }|j                  d      xs |j                  dd	      }	|	r
|	d
kD  r|dz  }||xx   |z  cc<    |j                  |      D 
cg c]  \  }}
|	 c}
}S c c}
}w )a  Extract top subreddits from global search results with relevance weighting.

    Uses frequency + topic-word matching + utility-sub penalties + engagement
    bonus to find discussion subs rather than utility/meta subs.

    Args:
        results: List of post dicts from global search
        topic: Original search topic (for relevance matching)
        max_subs: Maximum subreddits to return

    Returns:
        Top subreddit names sorted by weighted score
     	subredditg      ?c              3   D   K   | ]  }t        |      d kD  s|v   yw)r   N)rZ   ).0rc   	sub_lowers     rK   	<genexpr>z&discover_subreddits.<locals>.<genexpr>   s     OCFQJa9nOs    	 g       @g333333?upsscorer   d   g      ?)	rf   setrW   r[   r   getanyUTILITY_SUBSmost_common)rw   rS   rx   rj   
core_wordsscorespostsubbaser   _r~   s              @rK   discover_subredditsr      s   $ ,1 'bD.2TZZ\'')*JYF hh{B'  IIK	#OjOOCKD $CKD hhuo5'1!539CKDst-0 %00:;FCC;;;s   6Dc                     | sy	 t        j                  t        |       t        j                        }|j                  d      S # t        t        t        f$ r Y yw xY w)z%Convert Unix timestamp to YYYY-MM-DD.N)tzz%Y-%m-%d)	r   fromtimestampfloatr   utcstrftime
ValueError	TypeErrorOSError)created_utcdts     rK   _parse_dater      sP    ##E+$68<<H{{:&&	7+ s   >A AAr   idxsource_labelc                 @   | j                  dd      }|rd| n| j                  dd      }|rd|vrd}d| | j                  dd      t        | j                  dd            j                         |t        | j                  d	d            j                         t        | j                  d
            | j                  d      xs | j                  dd      | j                  dd      | j                  d      ddd| dt        | j                  dd            dd d
S )z>Normalize a ScrapeCreators Reddit post to our internal format.	permalinkrz   zhttps://www.reddit.comurlz
reddit.comRidtitler{   r   r   r   r   num_commentsupvote_ratio)r   r   r   gffffff?zReddit z searchselftextNi  )
r   	reddit_idr   r   r{   date
engagement	relevancewhy_relevantr   )r   strrX   r   )r   r   r   r   r   s        rK   _normalize_postr      s   b)I2;"9+
.%QSATC |3& #iXXdB'TXXgr*+113+r2399;DHH]34XXe_<!(< HH^Q7 HH^4

 !,w7R01$37 rM   querysortr   c                    t         st        d       	 ddlm}  || ||d      }t         d| }t        |      }t        j                  |d<   t        j                  ||dd	      }|j                  d
|j                  dg             S 	 t        j                  t         d| ||dt        |      d      }
|
j                          |
j                         }|j                  d
|j                  dg             S # t        $ r}	t        d|	        g cY d}	~	S d}	~	ww xY w# t        $ r}	t        d|	        g cY d}	~	S d}	~	ww xY w)a/  Search across all of Reddit via ScrapeCreators global search.

    Args:
        query: Search query
        token: ScrapeCreators API key
        sort: Sort order (relevance, hot, top, new)
        timeframe: Time filter (hour, day, week, month, year, all)

    Returns:
        List of post dicts
    z6requests library not installed, falling back to urllibr   	urlencode)r   r   r   z/search?
User-Agent   r   headerstimeoutretriespostsdatazGlobal search error (urllib): Nz/searchparamsr   r   zGlobal search error: )	_requestsrL   urllib.parser   SCRAPECREATORS_BASErR   r   
USER_AGENTr   	Exceptionraise_for_statusjson)r   rN   r   r   r   r   r   r   r   eresps              rK   _global_searchr      s7   " EF
	.9UVF()&:C!%(G$(OOGL!88C"aHD88GTXXfb%9::
}}"#7+"DyI&	
 	yy{xx&"!566  	1!56I	  $QC()	s=   A5C9 
A.D 9	DDDD	E(D=7E=Er{   c                    t         sx	 ddlm}  || |||d      }t         d| }t	        |      }t
        j                  |d<   t        j                  ||dd      }	|	j                  d	|	j                  d
g             S 	 t        j                  t         d| |||dt	        |      d      }|j                          |j                         }	|	j                  d	|	j                  d
g             S # t        $ r}
t        d|  d|
        g cY d}
~
S d}
~
ww xY w# t        $ r}
t        d|  d|
        g cY d}
~
S d}
~
ww xY w)a  Search within a specific subreddit via ScrapeCreators.

    Args:
        subreddit: Subreddit name (without r/)
        query: Search query
        token: ScrapeCreators API key
        sort: Sort order
        timeframe: Time filter

    Returns:
        List of post dicts
    r   r   )r{   r   r   r   z/subreddit/search?r   r   r   r   r   r   z&Subreddit search error (urllib) for r/z: Nz/subreddit/searchr   zSubreddit search error for r/r   r   r   r   rR   r   r   r   r   rL   r   r   )r{   r   rN   r   r   r   r   r   r   r   r   r   s               rK   _subreddit_searchr     sO   & 	.&9  F )));F8DC!%(G$(OOGL!88C"aHD88GTXXfb%9::
}}"##45&&	  &

 	yy{xx&"!566%  	9)BqcJKI	&  ,YKr!=>	s=   A6C0  A/D 0	D9DDD	D?"D:4D?:D?r   c                 l   t         su	 ddlm}  |d| i      }t         d| }t	        |      }t
        j                  |d<   t        j                  ||dd      }|j                  d	|j                  d
g             S 	 t        j                  t         dd| it	        |      d      }|j                          |j                         }|j                  d	|j                  d
g             S # t        $ r}t        d|        g cY d}~S d}~ww xY w# t        $ r}t        d|        g cY d}~S d}~ww xY w)zFetch comments for a Reddit post via ScrapeCreators.

    Args:
        url: Reddit post URL or permalink
        token: ScrapeCreators API key

    Returns:
        List of comment dicts with score, author, body, etc.
    r   r   r   z/post/comments?r   r   r   r   commentsr   zComment fetch error (urllib): Nz/post/commentsr   zComment fetch error: r   )	r   rN   r   r   api_urlr   r   r   r   s	            rK   fetch_post_commentsr   K  s(    
	.s|,F,-_VHEG!%(G$(OOGL!88GWb!LD88J(<==
}}"#>23<&	
 	yy{xx
DHHVR$899  	1!56I	  $QC()	s=   A3C* =A,D *	D3DDD	D3D.(D3.D3r   c                    t               }t               }g }| D ]k  }|j                  dd      }|j                  dd      }|r||v r.|r||v r5|r|j                  |       |r|j                  |       |j                  |       m |S )z9Deduplicate posts by reddit_id, keeping first occurrence.r   rz   r   )r   r   addri   )r   seen_ids	seen_urlsuniquer   ridr   s          rK   _dedupe_postsr   t  s    uHIF hh{B'hhub!3(?3)#LLMM#d MrM   	from_dateto_datec                 J   |sg ddS t         j                  |t         d         }|d   }t        | |      }t        d|  dt	        |       d|        g }|d   }	t        |d	|	       D ]a  \  }
}|
d
k(  rdnd}t        d|
dz    d|	 d| d| d	       t        ||||      }t        dt	        |       d       |j                  |       c g }t        |      D ]&  \  }
}t        ||
dz   d      }|j                  |       ( t        || |d         }t        d|        t        |       }|d	|d    D ]  }t        d| d| d       t        |||d|      }t        dt	        |       d|        t        |      D ]5  \  }}t        |t	        |      |z   dz   d|       }|j                  |       7  t        |      }t        dt	        |       d        g }d
}|D ]F  }|d!   r#||d!   cxk  r|k  rn n|j                  |       +|d!   |j                  |       B|dz  }H |r|}|r't        d"| d#       nt        d$t	        |              |j                  d% d&'       t        |      D ]  \  }
}d(|
dz    |d)<    t        d*t	        |       d+       d,|iS )-a  Full Reddit search: multi-query global discovery + subreddit drill-down.

    This is the main entry point. Replaces openai_reddit.search_reddit().

    Args:
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        depth: 'quick', 'default', or 'deep'
        token: ScrapeCreators API key

    Returns:
        Dict with 'items' list and optional 'error'.
    z$No SCRAPECREATORS_API_KEY configured)itemserrorr   r   z
Expanded 'z' into z
 queries: r   Nr   r   r&   zGlobal search r   /z: 'z' (sort=))r   r   z  -> z resultsglobalr   )rS   rx   zDiscovered subreddits: zSubreddit search: r/z for ''z results from r/zr/zAfter dedup: z unique postsr   z	Filtered z posts outside date rangez(No posts within date range, keeping all c                 N    | j                  di       j                  dd      xs dS )Nr   r   r   r   )xs    rK   <lambda>zsearch_reddit.<locals>.<lambda>  s#    quu\2.227A>C! rM   Tkeyreverser   r   zFinal: z Reddit postsr   )DEPTH_CONFIGr   rm   rL   rZ   	enumerater   extendr   ri   r   rf   r   r   r   )rS   r   r   rg   rN   configr   rk   all_raw_posts
max_globalir   r   r   	all_itemsr   itemdiscovered_subsrj   r   	sub_postsjin_rangeout_of_ranges                           rK   search_redditr     s1   * &LMMe\)%<=F{#I $E51G:eWGCL>G9	EF M)*Jgkz23 $51f{%~acU!J<s5'$qIJue$)LuSZL)*U#$ I]+ 4tQUH5
 *-uvVjOklO"?"3	45 'D<'; <= ##C5tfA67%c4[T]^	uS^$$4SE:; + 	#GAt"4Y!);a)?2cULDT"	#	# i(I=Y(	67 HL <If@@OOD!&\!OOD!AL 	9\N*CDE7I7GHI NND   Y' 41YT
 	73y>"-	01YrM   r   c           
         t         j                  |t         d         }|d   }| r|s| S | d| }t        dt        |       d       |D ]  }|j                  dd      }|st	        ||      }|s(g }	g }
t        |dd       D ]  \  }}|j                  d	d      }|r|d
v r |j                  d      xs |j                  dd      }|j                  dd      }|j                  dd      }|rd| nd}|dk(  rdnd}|	j                  |t        |j                  d            ||d| |d       t        |      dk\  s|dvs|dd }t        |      dkD  r:t        |      D ]  \  }}|dv s|dkD  s|d|dz    } n |j                         dz   }|
j                  |       ! |	j                  d d       |	dd |d <   |
dd |d!<    | S )"a  Enrich top items with comment data from ScrapeCreators.

    Args:
        items: Reddit items from search_reddit()
        token: ScrapeCreators API key
        depth: Depth for comment limit

    Returns:
        Items with top_comments and comment_insights added.
    r   r   NzEnriching comments for z postsr   rz   
   body)	[deleted]	[removed]r   r   r   authorr   r   zhttps://reddit.comi  i,  r   )r   r   r   excerptr   r   )r   r   AutoModerator   z.!?2   r   z...c                 &    | j                  dd      S )Nr   r   r   )cs    rK   r   z&enrich_with_comments.<locals>.<lambda>/  s    gq(9 rM   Tr   top_commentscomment_insights)
r   r   rL   rZ   r   r   ri   r   r^   r   )r   rN   rg   r   max_comments	top_itemsr   r   raw_commentsr   insightscir   r   r   r   r   comment_urlmax_excerptinsightr   chars                         rK   enrich_with_commentsr
    s*    e\)%<=F/0Lm|$I"3y>"2&	9: 11hhub!*36 |CR01 	)EB55$D4#==EE%L5AEE'1$5EUU8[1Fk2.I>G.yk:RK "$q#cK#AEE-$89 -"!  4yB61\#\t*t9s?#,W#5 ;45=QV&-dqsmG!;
 #*.."2U":(=	)B 	94H+CR0^#+CR= c11f LrM   c                 t    t        | ||||      }|j                  dg       }|r|rt        |||      }||d<   |S )a  Full Reddit pipeline: search + comment enrichment.

    This is the convenience function that does everything.

    Args:
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        depth: 'quick', 'default', or 'deep'
        token: ScrapeCreators API key

    Returns:
        Dict with 'items' list. Items include top_comments and comment_insights.
    r   )r   r   r
  )rS   r   r   rg   rN   re   r   s          rK   search_and_enrichr  7  sG    * 5)WeUCFJJw#E$UE59wMrM   responsec                 &    | j                  dg       S )zParse ScrapeCreators response to item list.

    Compatibility shim matching openai_reddit.parse_reddit_response() signature.
    r   r   )r  s    rK   parse_reddit_responser  V  s    
 <<$$rM   )rz   r   )r   )r   r   )r   N)r   )(__doc__rerG   collectionsr   r   r   typingr   r   r   r	   r
   requestsr   ImportErrorrz   r   r   r   	frozensetr\   r   rL   rR   rf   rm   r   intr   r   r   r   r   r   r   r   r
  r  r  rQ   rM   rK   <module>r     s   
 
  ' 1 1  @ 
  	  	  	,   c s tCH~      2 S T#Y <    .<$sCx.!.<.< .< 
#Y	.<b $sCx. s # UYZ]_bZbUc < 	,,, , 	,
 
$sCx.,f 444 4 	4
 4 
$sCx.4n&	&& 
$sCx.&Rd38n- $tCH~2F 0 ^ ^ ^  ^  	^ 
 ^  
#s(^^ H KS#XKK K 
$sCx.	Kd   	
  
#s(^>%DcN %tDcN7K %A  Is   F FF