
    ?i/                        d Z ddlmZmZmZmZmZ ddlmZm	Z	  ede	j                  e	j                  e	j                  e	j                  e	j                  e	j                  e	j                   e	j"                  	      Z	 ddee   deded	ed
ee   f
dZdeeeef      deded
ee	j                     fdZdeeeef      deded
ee	j                     fdZdeeeef      deded
ee	j                     fdZdeeeef      deded
ee	j                     fdZdeeeef      deded
ee	j                     fdZdeeeef      deded
ee	j                      fdZdeeeef      deded
ee	j"                     fdZded
eeeef      fdZy)z2Normalization of raw API data to canonical schema.    )AnyDictListTypeVarUnion   )datesschemaTitems	from_dateto_daterequire_datereturnc                     g }| D ]S  }|j                   |s|j                  |       #|j                   |k  r3|j                   |kD  rC|j                  |       U |S )a  Hard filter: Remove items outside the date range.

    This is the safety net - even if the prompt lets old content through,
    this filter will exclude it.

    Args:
        items: List of items to filter
        from_date: Start date (YYYY-MM-DD) - exclude items before this
        to_date: End date (YYYY-MM-DD) - exclude items after this
        require_date: If True, also remove items with no date

    Returns:
        Filtered list with only items in range (or unknown dates if not required)
    )dateappend)r   r   r   r   resultitems         T/home/ubuntu/.openclaw/workspace/skills/last30days-official/scripts/lib/normalize.pyfilter_by_date_ranger   
   si    ( F 99d# 99y  99wd  M    c                    g }| D ]  }d}|j                  d      }t        |t              rEt        j                  |j                  d      |j                  d      |j                  d            }g }|j                  dg       D ]z  }|j                  t        j                  |j                  dd      |j                  d	      |j                  d
d      |j                  dd      |j                  dd                   | |j                  d	      }	t        j                  |	||      }
|j                  t        j                  |j                  dd      |j                  dd      |j                  dd      |j                  dd      |	|
|||j                  dg       |j                  dd      |j                  dd                    |S )zNormalize raw Reddit items to schema.

    Args:
        items: Raw Reddit items from API
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of RedditItem objects
    N
engagementscorenum_commentsupvote_ratio)r   r   r   top_commentsr   r   author excerpturlr   r   r   r!   r"   idtitle	subredditcomment_insights	relevance      ?why_relevant)r$   r%   r"   r&   r   date_confidencer   r   r'   r(   r*   )
get
isinstancedictr
   
Engagementr   Commentr	   get_date_confidence
RedditItem)r   r   r   
normalizedr   r   eng_rawr   cdate_strr+   s              r   normalize_reddit_itemsr7   2   s    J &
((<(gt$**kk'*$[[8$[[8J ."- 	AeeGQ'UU6]uuXr*i,EE%$! 	 88F#33HiQ&++xxb!((7B'#hh{B/+!%!XX&8"=hh{C0."5
 	5&P r   c                 v   g }| D ]0  }d}|j                  d      }t        |t              rUt        j                  |j                  d      |j                  d      |j                  d      |j                  d            }|j                  d      }t        j                  |||      }|j                  t        j                  |j                  d	d
      |j                  dd
      |j                  dd
      |j                  dd
      ||||j                  dd      |j                  dd
      	             3 |S )zNormalize raw X items to schema.

    Args:
        items: Raw X items from API
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of XItem objects
    Nr   likesrepostsrepliesquotes)r9   r:   r;   r<   r   r$   r    textr"   author_handler(   r)   r*   )	r$   r=   r"   r>   r   r+   r   r(   r*   )	r,   r-   r.   r
   r/   r	   r1   r   XItem)	r   r   r   r3   r   r   r4   r6   r+   s	            r   normalize_x_itemsr@   n   s    J 
((<(gt$**kk'*I.I.{{8,	J 88F#33HiQ&,,xxb!&"%#((?B7+!hh{C0."5

 
	!8 r   c                 .   g }| D ]  }|j                  d      xs i }t        j                  |j                  d      |j                  d      |j                  d            }|j                  d      }|j                  t        j                  |j                  dd      |j                  d	d      |j                  d
d      |j                  dd      |d||j                  dd      |j                  dd      |j                  dd      
              |S )zNormalize raw YouTube items to schema.

    Args:
        items: Raw YouTube items from yt-dlp
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of YouTubeItem objects
    r   viewsr9   commentsrB   r9   r   r   video_idr    r%   r"   channel_namehightranscript_snippetr(   ffffff?r*   )
r$   r%   r"   rF   r   r+   r   rH   r(   r*   )r,   r
   r/   r   YouTubeItem)r   r   r   r3   r   r4   r   r6   s           r   normalize_youtube_itemsrK      s     J ((<(.B&&++g&++g& Z0

 88F#&,,xx
B'((7B'#."5"!#xx(<bAhh{C0."5
 	2 r   c                 t   g }t        |       D ]&  \  }}|j                  d      xs i }t        j                  |j                  d      |j                  d      |j                  d      |j                  d            }|j                  d      }|j	                  t        j
                  d|d	z    |j                  d
d      |j                  dd      |j                  dd      |d||j                  dd      |j                  dg       |j                  dd      |j                  dd                   ) |S )zNormalize raw TikTok items to schema.

    Args:
        items: Raw TikTok items from Apify
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of TikTokItem objects
    r   rB   r9   rC   shares)rB   r9   r   rM   r   TKr   r=   r    r"   author_namerG   caption_snippethashtagsr(   rI   r*   r$   r=   r"   rO   r   r+   r   rP   rQ   r(   r*   )	enumerater,   r
   r/   r   
TikTokItem	r   r   r   r3   ir   r4   r   r6   s	            r   normalize_tiktok_itemsrW      s     JU# 4((<(.B&&++g&++g& Z0;;x(	

 88F#&++AaC5z&"%#3"! HH%6;XXj"-hh{C0."5
 	6 r   c                 T   g }t        |       D ]  \  }}|j                  d      xs i }t        j                  |j                  d      |j                  d      |j                  d            }|j                  d      }|j	                  t        j
                  d|dz    |j                  d	d
      |j                  dd
      |j                  dd
      |d||j                  dd
      |j                  dg       |j                  dd      |j                  dd
                    |S )zNormalize raw Instagram items to schema.

    Args:
        items: Raw Instagram items from ScrapeCreators
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of InstagramItem objects
    r   rB   r9   rC   rD   r   IGr   r=   r    r"   rO   rG   rP   rQ   r(   rI   r*   rR   )rS   r,   r
   r/   r   InstagramItemrU   s	            r   normalize_instagram_itemsr[      s    JU# 4((<(.B&&++g&++g& Z0

 88F#&..AaC5z&"%#3"! HH%6;XXj"-hh{C0."5
 	4 r   c                    g }t        |       D ]y  \  }}|j                  d      xs i }t        j                  |j                  d      |j                  d            }g }|j                  dg       D ][  }	|j	                  t        j
                  |	j                  dd      d|	j                  dd	      |	j                  d
d	      d	             ] |j                  d      }
|j	                  t        j                  d|dz    |j                  dd	      |j                  dd	      |j                  dd	      |j                  dd	      |
d|||j                  dg       |j                  dd      |j                  dd	                   | |S )zNormalize raw Hacker News items to schema.

    Args:
        items: Raw HN items from Algolia API
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of HackerNewsItem objects
    r   pointsr   )r   r   r   r   Nr   r    r=   r#   r   HNr   r%   r"   hn_urlrG   r'   r(   r)   r*   )r$   r%   r"   r_   r   r   r+   r   r   r'   r(   r*   )rS   r,   r
   r/   r   r0   HackerNewsItem)r   r   r   r3   rV   r   r4   r   r   r5   r6   s              r   normalize_hackernews_itemsra   (  sv    JU# #4((<(.B&&++h' ^4

 ."- 	AeeHa(uuXr*fb)! 	 88F#&//AaC5z((7B'#88Hb)88Hb)"!%!XX&8"=hh{C0."5
 	-#J r   c                 x   g }t        |       D ](  \  }}|j                  d      xs |j                  dd      }t        j                  ||j                  dd            }|j                  d      }|j	                  t        j
                  d|dz    |j                  d	d
      |j                  dd
      |j                  dd
      |j                  dg       |j                  dd      |j                  d      |d||j                  d      |j                  dd      |j                  dd
                   + |S )zNormalize raw Polymarket items to schema.

    Args:
        items: Raw Polymarket items from Gamma API
        from_date: Start of date range
        to_date: End of date range

    Returns:
        List of PolymarketItem objects
    	volume1mo
volume24hrg        	liquidity)volumere   r   PMr   r%   r    questionr"   outcome_pricesoutcomes_remainingr   price_movementrG   end_dater(   r)   r*   )r$   r%   rh   r"   ri   rj   rk   r   r+   r   rl   r(   r*   )rS   r,   r
   r/   r   PolymarketItem)	r   r   r   r3   rV   r   rf   r   r6   s	            r   normalize_polymarket_itemsrn   a  s&    JU# 4+&E$((<*E&&hh{C0


 88F#&//AaC5z((7B'XXj"-#88$4b9#xx(<a@88$45"!XXj)hh{C0."5
 	4 r   c                 H    | D cg c]  }|j                          c}S c c}w )z5Convert schema items to dicts for JSON serialization.)to_dict)r   r   s     r   items_to_dictsrq     s    ',-tDLLN---s   N)F)__doc__typingr   r   r   r   r   r    r	   r
   r2   r?   WebSearchItemrJ   rT   rZ   r`   rm   r   strboolr   r7   r@   rK   rW   r[   ra   rn   rq    r   r   <module>rx      s   8 2 2 C""FLL&2F2FHZHZ\b\m\mou  pD  pD  FL  F[  F[  ]c  ]r  ]r  s 	%7%% % 	%
 
!W%P9S#X99 9 
&

	9x-S#X-- - 
&,,	-`*S#X** * 
&

	*Z,S#X,, , 
&

	,^+S#X++ + 
&

	+\6S#X66 6 
&

 	6r+S#X++ + 
&

 	+\.$ .4S#X#7 .r   