
    ?i$F                        d Z ddlZddlZddlZddlmZmZmZmZ ddl	m
Z
mZ ddgZdefd	Zdefd
Zde
j                   defdZdZdZdZdedeeeef      fdZdedeeeef      fdZdedeeef   fdZddddZdZdedefdZdedefdZdeded ed!edeeef   f
d"Z	 	 	 	 	 d2d#ededed$ed%ed&ed!ed'ee   d(ee   d)edeeef   fd*Z	 d3d+ee   ded$ed%ed,edeeeef      fd-Z d.edefd/Z!d0eeef   deeeef      fd1Z"y)4z1OpenAI Responses API client for Reddit discovery.    N)AnyDictListOptional   )httpenvzgpt-4.1zgpt-4omsgc                     t         j                  j                  d|  d       t         j                  j                          y)zLog error to stderr.z[REDDIT ERROR] 
Nsysstderrwriteflushr
   s    X/home/ubuntu/.openclaw/workspace/skills/last30days-official/scripts/lib/openai_reddit.py
_log_errorr      s-    JJse2./JJ    c                     t         j                  j                  d|  d       t         j                  j                          y)zLog info to stderr.z	[REDDIT] r   Nr   r   s    r   	_log_infor      s-    JJyR()JJr   errorreturnc                     | j                   dvry| j                  sy| j                  j                         t        fddD              S )z:Check if error is due to model access/verification issues.)  i  Fc              3   &   K   | ]  }|v  
 y w)N ).0phrase
body_lowers     r   	<genexpr>z)_is_model_access_error.<locals>.<genexpr>#   s      v# s   )verifiedzorganization must bezdoes not have accessznot availablez	not found)status_codebodylowerany)r   r    s    @r   _is_model_access_errorr'      sL    
*::!!#J  3   r   z#https://api.openai.com/v1/responsesz/https://chatgpt.com/backend-api/codex/responsesa  You are a research assistant for a skill that summarizes what people are discussing in the last 30 days. Your goal is to find relevant Reddit threads about the topic and return ONLY the required JSON. Be inclusive (return more rather than fewer), but avoid irrelevant results. Prefer threads with discussion and comments. If you can infer a date, include it; otherwise use null. Do not include developers.reddit.com or business.reddit.com.chunkc                 P   | j                  d      }g }|D ]6  }|j                  d      s|j                  |dd j                                8 |sydj	                  |      j                         }|r|dk(  ry	 t        j                  |      S # t
        j                  $ r Y yw xY w)z,Parse a single SSE chunk into a JSON object.r   zdata:   Nz[DONE])split
startswithappendstripjoinjsonloadsJSONDecodeError)r(   lines
data_lineslinedatas        r   _parse_sse_chunkr7   8   s    KKEJ 0??7#d12hnn./0 99Z &&(D48#zz$ s   :B B%$B%rawc                    g }d}| j                  d      D ]D  }||z  }d|v s|j                  dd      \  }}t        |      }||j                  |       d|v r8F |j	                         rt        |      }||j                  |       |S )z6Parse SSE stream from raw text and return JSON events. T)keepends

r   )
splitlinesr+   r7   r-   r.   )r8   eventsbufferr(   event_chunkevents         r   _parse_sse_stream_rawrB   N   s    #%FF. %%"(,,vq"9K$[1E e$	 % ||~ (MM% Mr   c                 
   t        |       }t        |      D ]s  }t        |t              s|j	                  d      dk(  r&t        |j	                  d      t              r|d   c S t        |j	                  d      t              sn|d   c S  d}|D ]a  }t        |t              s|j	                  d      }t        |t
              r||z  };|j	                  d      }t        |t
              s]||z  }c |rddd	|d
gdgiS i S )z@Parse SSE stream from Codex responses into a response-like dict.typezresponse.completedresponser:   deltatextoutputmessageoutput_textrD   rG   )rD   content)rB   reversed
isinstancedictgetstr)r8   r>   evtrJ   rF   rG   s         r   _parse_codex_streamrS   `   s   "3'F  'c4 wwv"66:cggjFY[_;`:&#''*-t4:&' K 	 #t$ eS!5 KwwvdC 4K	  %)6 LM
 	
 Ir   )      )   2   )F   d   )quickdefaultdeepu  Find Reddit discussion threads about: {topic}

STEP 1: EXTRACT THE CORE SUBJECT
Get the MAIN NOUN/PRODUCT/TOPIC:
- "best nano banana prompting practices" → "nano banana"
- "killer features of clawdbot" → "clawdbot"
- "top Claude Code skills" → "Claude Code"
DO NOT include "best", "top", "tips", "practices", "features" in your search.

STEP 2: SEARCH BROADLY
Search for the core subject:
1. "[core subject] site:reddit.com"
2. "reddit [core subject]"
3. "[core subject] reddit"

Return as many relevant threads as you find. We filter by date server-side.

STEP 3: INCLUDE ALL MATCHES
- Include ALL threads about the core subject
- Set date to "YYYY-MM-DD" if you can determine it, otherwise null
- We verify dates and filter old content server-side
- DO NOT pre-filter aggressively - include anything relevant

REQUIRED: URLs must contain "/r/" AND "/comments/"
REJECT: developers.reddit.com, business.reddit.com

Find {min_items}-{max_items} threads. Return MORE rather than fewer.

Return JSON:
{{
  "items": [
    {{
      "title": "Thread title",
      "url": "https://www.reddit.com/r/sub/comments/xyz/title/",
      "subreddit": "subreddit_name",
      "date": "YYYY-MM-DD or null",
      "why_relevant": "Why relevant",
      "relevance": 0.85
    }}
  ]
}}topicc                     g d}| j                         j                         }|D cg c]	  }||vs| }}dj                  |dd       xs | S c c}w )z2Extract core subject from verbose query for retry.)besttopzhow toztips for	practicesfeatureskillerguidetutorialrecommendationsadvice	promptingusingforwiththeofinon N   )r%   r+   r/   )r]   noisewordswresults        r   _extract_core_subjectrv      sX    KE KKM!E1A!5.a1F188F2AJ(5( 2s
   	AAc                     t        |       }|j                  dd      j                  dd      j                         }d| dS )zBuild a subreddit-targeted search query for fallback.

    When standard search returns few results, try searching for the
    subreddit itself: 'r/kanye', 'r/howie', etc.
    .r:   rp   r/z site:reddit.com)rv   replacer%   )r]   coresub_names      r   _build_subreddit_queryr}      sD     !'D||C$,,S"5;;=Hz)**r   modelinstructions_text
input_textauth_sourcec                 t    | ddddgidgdg||d}|t         j                  k(  rdd	d
|dgdg|d<   d|d<   |S )z6Build responses payload for OpenAI or Codex endpoints.F
web_searchallowed_domains
reddit.comrD   filtersweb_search_call.action.sources)r~   storetoolsincludeinstructionsinputrI   userr   rK   )rD   rolerL   r   Tstream)r	   AUTH_SOURCE_CODEX)r~   r   r   r   payloads        r   _build_payloadr      s      %%~
 55)G c+++ "%1:FG
 !Nr   api_key	from_dateto_datedepth
account_idmock_response_retryc
                    ||S t         j                  |t         d         \  }
}|t        j                  k(  r|st	        d      d|  |dddd}t
        }nd|  dd	}t        }|d
k(  rdn|dk(  rdnd}|gt        D cg c]
  }||k7  s	| c}z   }t        j                  ||||
|      }|t        j                  k(  rddl
m} |g|j                  D cg c]
  }||k7  s	| c}z   }t        dz   |z   }d}|D ]:  }	 t        ||||      }t        j                   ||||      }t#        |xs d      c S  |r|t        j$                  d      d}|D ],  }|dddgidgdg|d}	 t        j*                  ||||      c S  |rt/        d"|        |t        j$                  d#      c c}w c c}w # t        j$                  $ r+}|}|j&                  dk(  rt)        d| d       Y d}~ d}~ww xY w# t        j$                  $ rJ}|}t-        |      rt)        d| d       Y d}~|j&                  dk(  rt)        d | d!       Y d}~ d}~ww xY w)$a  Search Reddit for relevant threads using OpenAI Responses API.

    Args:
        api_key: OpenAI API key
        model: Model to use
        topic: Search topic
        from_date: Start date (YYYY-MM-DD) - only include threads after this
        to_date: End date (YYYY-MM-DD) - only include threads before this
        depth: Research depth - "quick", "default", or "deep"
        mock_response: Mock response for testing

    Returns:
        Raw API response
    Nr[   z)Missing chatgpt_account_id for Codex authzBearer zresponses=experimentalpiapplication/json)Authorizationzchatgpt-account-idzOpenAI-Beta
originatorContent-Type)r   r   rZ   Z   x      )r]   r   r   	min_items	max_itemsr   )modelsr<   )headerstimeoutr:   r   zModel z+ not supported on Codex, trying fallback...z$No Codex-compatible models availabler   r   r   r   r   )r~   r   r   r   z# not accessible, trying fallback...  zRate limited on z, trying fallback model...zAll models failed. Last error: zNo models available)DEPTH_CONFIGrP   r	   r   
ValueErrorCODEX_RESPONSES_URLOPENAI_RESPONSES_URLMODEL_FALLBACK_ORDERREDDIT_SEARCH_PROMPTformatr:   r   CODEX_FALLBACK_MODELSCODEX_INSTRUCTIONSr   r   post_rawrS   	HTTPErrorr#   r   postr'   r   )r   r~   r]   r   r   r   r   r   r   r   r   r   r   urlr   mmodels_to_tryr   
models_modcodex_models_to_tryr   
last_errorcurrent_modelr   r8   es                             r   search_redditr      s   4  '++E<	3JKIyc+++HII&wi0",3.
 "  'wi0.
 # W$b)1C#G G*>MQ!u*qMMM &,, - J c+++*$gJ4T4T(cqXY]bXb(cc.7*D
0 
	M	(8I5R]^mmC'7S*39"55	
	 nnCDD J& " ))L>  99
	99S'7GLL!: 4ZLAB
...
//G N )d >> 
==C'}o5`ab: ~~ 		J%a(F=/1TUV}}#,]O;UVW		sZ   ?
F*
F*
F/!F/>5F4'G54G2 G-,G--G25II)III
subreddits	count_perc                    g }t        |      }| D ]}  }|j                  d      }	 d| d}dt        |       d| d}	| d|	 }
t        j                  dd	}t        j
                  |
|d
d      }|j                  di       j                  dg       }t        |      D ]  \  }}|j                  d      dk7  r|j                  di       }|j                  dd      }|sBdt        |      dz    t        |j                  dd            j                         d| t        |j                  d|            j                         dd| ddd}|j                  d      }|rddl
m} |j                  |      |d<   |j                  |         |S # t        j                  $ r=}t        d| d |        |j                   d!k(  rt        d"       Y d}~ |S Y d}~d}~wt"        $ r}t        d#| d |        Y d}~d}~ww xY w)$a
  Search specific subreddits via Reddit's free JSON endpoint.

    No API key needed. Uses reddit.com/r/{sub}/search/.json endpoint.
    Used in Phase 2 supplemental search after entity extraction.

    Args:
        subreddits: List of subreddit names (without r/)
        topic: Search topic
        from_date: Start date (YYYY-MM-DD)
        to_date: End date (YYYY-MM-DD)
        count_per: Results to request per subreddit

    Returns:
        List of raw item dicts (same format as parse_reddit_response output).
    ry   zhttps://www.reddit.com/r/z/search/.jsonzq=z&restrict_sr=on&sort=new&limit=z&raw_json=1?r   )z
User-AgentAcceptrT   r   )r   r   retriesr6   childrenkindt3	permalinkr:   RStitlezhttps://www.reddit.com	subredditNzFound in r/z supplemental searchg?idr   r   r   datewhy_relevant	relevancecreated_utc)datesr   zSubreddit search failed for r/z: r   u;   Reddit rate-limited (429) — skipping remaining subredditszSubreddit search error for r/)rv   lstrip_url_encoder   
USER_AGENTrP   	enumeratelenrQ   r.   r:   r   timestamp_to_dater-   r   r   r#   	Exception)r   r]   r   r   r   	all_itemsr{   subr   paramsfull_urlr   r6   r   ichildr   r   itemr   	dates_modr   s                         r   search_subredditsr   f  s6   , I 'D 0Bjj.	B-cU-@C+d+,,KI;VabFax(H #oo,G
 88Hgr1MD xx+//
B?H%h/ '599V$,yy, HH["5	  s9~a/01 '2!67==?3I;?!$TXXk3%?!@!F!F!H &1#6J$K!% #hh}54#,#>#>{#KDL  &1'!0Bd  ~~ 	6se2aSAB}}#WX  $  	B5cU"QC@AA	Bs$   E(FH&+GH*HHrG   c                 @    ddl }|j                  j                  |       S )z)Simple URL encoding for query parameters.r   N)urllib.parseparse
quote_plus)rG   urllibs     r   r   r     s    <<""4((r   rE   c                    g }d| v r| d   r| d   }t        |t              r|j                  dt        |            n
t        |      }t	        d|        t
        j                  r&t	        dt        j                  | d      dd         |S d	}d
| v r| d
   }t        |t              r|}nt        |t              r|D ]  }t        |t              rq|j                  d      dk(  rS|j                  dg       }|D ];  }t        |t              s|j                  d      dk(  s)|j                  dd	      } n nd|v r|d   }nt        |t              r|}|s n |s)d| v r%| d   D ]  }	d|	v s|	d   j                  dd	      } n |s)t        dt        | j                                d       |S t        j                  d|      }
|
r6	 t        j                  |
j                               }|j                  dg       }g }t#        |      D ]=  \  }}t        |t              s|j                  dd	      }|rd|vr1d|dz    t        |j                  dd	            j%                         |t        |j                  dd	            j%                         j'                  d      |j                  d      t        |j                  dd	            j%                         t)        dt+        dt-        |j                  d d!                        d"}|d   r't        j.                  d#t        |d               sd|d<   |j1                  |       @ |S # t        j                   $ r Y fw xY w)$zParse OpenAI response to extract Reddit items.

    Args:
        response: Raw API response

    Returns:
        List of item dicts
    r   rI   zOpenAI API error: zFull error response:    )indentNi  r:   rH   rD   rL   rJ   rG   choiceszH[REDDIT WARNING] No output text found in OpenAI response. Keys present: T)r   z\{[\s\S]*"items"[\s\S]*\}itemsr   r   Rr   r   r   ry   r   r   g      ?g        r   g      ?r   z^\d{4}-\d{2}-\d{2}$)rN   rO   rP   rQ   r   r   DEBUGr0   dumpslistprintkeysresearchr1   groupr2   r   r.   r   minmaxfloatmatchr-   )rE   r   r   err_msgrJ   rH   r   rL   cchoice
json_matchr6   clean_itemsr   r   
clean_items                   r   parse_reddit_responser     sT    E (x0!6@6M%))Is5z2SVW\S]'y12::.tzz(1/Met/T.UVW K8(#fc" K% dD)xx'94"&((9b"9!( &A)!T2quuV}7U./eeFB.? %&  4&*6lc*"&K  90y) 	FF"$Y/33IrB	
 XY]^f^k^k^mYnXopx|} 7EJ	::j..01DHHWb)E
 KU# '4$%hhub!l#- acU)'2./557TXXk267==?FFtLHHV$ <=CCES#c5+s1K+L"MN

 f882C
68J4KL%)
6":&/'2 = ## 		s   5M M$#M$)r[   r   NNF)r*   )#__doc__r0   r   r   typingr   r   r   r   r:   r   r	   r   rQ   r   r   r   boolr'   r   r   r   r7   rB   rS   r   r   rv   r}   r   r   intr   r   r   r   r   r   <module>r     sy   7  	 
 , ,  "8, C 3 $.. T " = G C C HT#s(^$< ,s tDcN'; $#S #T#s(^ #P ( V) ) )	+# 	+# 	+# # 3 UX ]abegjbj]k F   $$(x0x0x0 x0 	x0
 x0 x0 x0 x0 D>x0 x0 
#s(^x0@ KS	KK K 	K
 K 
$sCx.K\)c )c )YDcN YtDcN7K Yr   