
    ?ix%                        d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
mZ dedee   fd	Z G d
 de      Z	 	 	 ddedee   dededeeeef      f
dZdedeeef   fdZddee   dedeeeef      fdZddee   dedee   fdZ	 	 	 ddeeef   dee   dededeeef   f
dZ	 ddeeef   dededeeef   fdZy) zReddit thread enrichment with real engagement metrics.

Supports two backends:
1. ScrapeCreators API (preferred) - no rate limits, 1 credit/call
2. reddit.com/.json (fallback) - free but 429-prone
    N)AnyDictListOptional)urlparse   )httpdatesurlreturnc                 ^    	 t        |       }d|j                  vry|j                  S #  Y yxY w)zxExtract the path from a Reddit URL.

    Args:
        url: Reddit URL

    Returns:
        Path component or None
    z
reddit.comN)r   netlocpath)r   parseds     X/home/ubuntu/.openclaw/workspace/skills/last30days-official/scripts/lib/reddit_enrich.pyextract_reddit_pathr      s3    #v}},{{s   ( ( ,c                       e Zd ZdZy)RedditRateLimitErrorz3Raised when Reddit returns HTTP 429 (rate limited).N)__name__
__module____qualname____doc__     r   r   r   !   s    =r   r   	mock_datatimeoutretriesc                     ||S t        |       }|sy	 t        j                  |||      }|S # t        j                  $ r(}|j                  dk(  rt        d|        |Y d}~yd}~ww xY w)ak  Fetch Reddit thread JSON data.

    Args:
        url: Reddit thread URL
        mock_data: Mock data for testing
        timeout: HTTP timeout per attempt in seconds
        retries: Number of retries on failure

    Returns:
        Thread data dict or None on failure

    Raises:
        RedditRateLimitError: When Reddit returns 429 (caller should bail)
    Nr   r   i  z#Reddit rate limited (429) fetching )r   r	   get_reddit_json	HTTPErrorstatus_coder   )r   r   r   r   r   dataes          r   fetch_thread_datar%   &   sw    ( s#D##D'7K>> ==C&)LSE'RSYZZs   . A)A$$A)r#   c           
         dg d}t        | t              rt        |       dk  r|S | d   }t        |t              r|j	                  di       j	                  dg       }|r|d   j	                  di       }|j	                  d      |j	                  d      |j	                  d	      |j	                  d
      |j	                  d      |j	                  d      |j	                  dd      dd d|d<   t        |       dk\  r| d   }t        |t              r|j	                  di       j	                  dg       }|D ]  }|j	                  d      dk7  r|j	                  di       }|j	                  d      s<|j	                  dd      |j	                  d
      |j	                  dd      |j	                  dd      dd |j	                  d      d}|d   j                  |        |S )zParse Reddit thread JSON into structured data.

    Args:
        data: Raw Reddit JSON response

    Returns:
        Dict with submission and comments data
    N)
submissioncommentsr   r   r#   childrenscorenum_commentsupvote_ratiocreated_utc	permalinktitleselftext i  )r*   r+   r,   r-   r.   r/   r0   r'      kindt1bodyauthor	[deleted],  )r*   r-   r6   r5   r.   r(   )
isinstancelistlendictgetappend)	r#   resultsubmission_listingr)   sub_datacomments_listingchildc_datacomments	            r   parse_thread_datarF   J   s    F
 dD!SY] a$d+%))&"599*bI{vr2H!g. (^ < (^ <'||M:%\\+6!g.$LLR8#>$F<  4yA~7&-'++FB7;;JKH! 399V$,62.zz&) $ZZ3#)::m#<$jj;?"JJvr24C8!'K!8 z"))'23  Mr   r(   limitc                 z    | D cg c]  }|j                  d      dvs| }}t        |d d      }|d| S c c}w )zGet top comments sorted by score.

    Args:
        comments: List of comment dicts
        limit: Maximum number to return

    Returns:
        Top comments sorted by score
    r6   r7   z	[removed]c                 &    | j                  dd      S Nr*   r   r=   cs    r   <lambda>z"get_top_comments.<locals>.<lambda>   s    !%%2C r   TkeyreverseN)r=   sorted)r(   rG   rN   validsorted_commentss        r   get_top_commentsrV      sK     !V1AEE(O;U$UQVEV U(CTRO6E"" Ws   88c                    g }| d|dz   D ]  }|j                  dd      j                         rt              dk  r4g d}t        fd|D              rMdd }t              dkD  r:t	        |      D ]  \  }}|d	v s|d
kD  s|d|dz    } n |j                         dz   }|j                  |       t        |      |k\  s |S  |S )aM  Extract key insights from top comments.

    Uses simple heuristics to identify valuable comments:
    - Has substantive text
    - Contains actionable information
    - Not just agreement/disagreement

    Args:
        comments: Top comments
        limit: Max insights to extract

    Returns:
        List of insight strings
    Nr2   r5   r1      )z@^(this|same|agreed|exactly|yep|nope|yes|no|thanks|thank you)\.?$z^lol|lmao|hahaz^\[deleted\]z^\[removed\]c              3   f   K   | ](  }t        j                  |j                                * y w)N)rematchlower).0pr5   s     r   	<genexpr>z+extract_comment_insights.<locals>.<genexpr>   s"     @Qrxx4::<(@s   .1   z.!?2   r   z...)r=   stripr;   any	enumeraterstripr>   )	r(   rG   insightsrE   skip_patternsinsighticharr5   s	           @r   extract_comment_insightsrk      s     HJUQY' {{62&,,.s4y2~
 @-@@ t*t9s?$W- 345=QV%dqsmG3
 "..*U2 x=E!O=< Or   itemmock_thread_datac           
         | j                  dd      }t        ||||      }|s| S t        |      }|j                  d      }|j                  dg       }|ra|j                  d      |j                  d      |j                  d      d	| d
<   |j                  d      }	|	rt        j                  |	      | d<   t        |      }
g | d<   |
D ]  }|j                  dd      }|rd| nd}| d   j                  |j                  dd      t        j                  |j                  d            |j                  dd      |j                  dd      dd |d        t        |
      | d<   | S )u  Enrich a Reddit item with real engagement data.

    Args:
        item: Reddit item dict
        mock_thread_data: Mock data for testing
        timeout: HTTP timeout per attempt (default 10s for enrichment)
        retries: Number of retries (default 1 — fail fast for enrichment)

    Returns:
        Enriched item dict

    Raises:
        RedditRateLimitError: Propagated so caller can bail on remaining items
    r   r1   r   r'   r(   r*   r+   r,   )r*   r+   r,   
engagementr-   datetop_commentsr.   https://reddit.comr   r6   r5   N   r*   rp   r6   excerptr   comment_insights)r=   r%   rF   r
   timestamp_to_daterV   r>   rk   )rl   rm   r   r   r   thread_datar   r'   r(   r-   rq   rN   r.   comment_urls                 r   enrich_reddit_itemrz      st   ( ((5"
C $C)97T[\K{+FL)Jzz*b)H ^^G,&NN>:&NN>:
\ !nn]3 22;?DL $H-LD 	EE+r*	:C*9+6^##UU7A&++AEE-,@AeeHb)uuVR(#.%
 		  8ED	Kr   tokenc                 ,   ddl m} | j                  dd      }|s| S |j                  ||      }|s| S g }|dd D ]  }|j                  dd      }|r|dv r|j                  d	      xs |j                  d
d      }	|j                  dd      }
|j                  dd      }|rd| nd}|j	                  |	|j                  d      r$t        j                  |j                  d            nd|
|dd |dd |d        |j                  d d       g | d<   |D ]k  }| d   j	                  |j                  d
d      |j                  d      |j                  dd      |j                  dd      |j                  dd      d       m t        |      | d<   | S )aJ  Enrich a Reddit item using ScrapeCreators comment API.

    No rate limit risk. Uses 1 credit per call.

    Args:
        item: Reddit item dict (already has engagement from search)
        token: ScrapeCreators API key
        timeout: HTTP timeout

    Returns:
        Enriched item with top_comments and comment_insights
    r   )redditr   r1   N
   r5   rI   upsr*   r   r6   r7   r.   rr   r-   r8   rs   )r*   rp   r6   r5   ru   r   c                 &    | j                  dd      S rK   rL   rM   s    r   rO   z'enrich_reddit_item_sc.<locals>.<lambda>7  s    AEE'1$5 r   TrP   rq   rp   ru   rt   rv   )	r1   r}   r=   fetch_post_commentsr>   r
   rw   sortrk   )rl   r{   r   
reddit_modr   raw_commentsrq   rN   r5   r*   r6   r.   ry   s                r   enrich_reddit_item_scr     s   " '
((5"
C11#u=LL#2 uuVR t99e1gq 1x-EE+r*	:C*9+6EFUU=EYE++AEE-,@A_c#JDSz
 	& 5tDD ^##UU7A&EE&MeeHb)uuY+55#%
 	  8ED	Kr   )NrX      )r~   )   )Nr~   r   )rX   )r   rZ   typingr   r   r   r   urllib.parser   r1   r	   r
   strr   	Exceptionr   intr%   rF   rV   rk   rz   r   r   r   r   <module>r      s   
 , , ! S Xc] $	9 	 !%	!	!~! ! 	!
 d38n!H6C 6DcN 6r#tDz ## #tDcN?S #&/tDz /# /d3i /h (,	=
sCx.=tn= = 	=
 
#s(^=F =
sCx.== = 
#s(^	=r   