diff --git a/rstat_tool/database.py b/rstat_tool/database.py index 9df9d25..5b5345f 100644 --- a/rstat_tool/database.py +++ b/rstat_tool/database.py @@ -2,7 +2,11 @@ import sqlite3 import time -from .ticker_extractor import COMMON_WORDS_BLACKLIST, extract_golden_tickers, extract_potential_tickers +from .ticker_extractor import ( + COMMON_WORDS_BLACKLIST, + extract_golden_tickers, + extract_potential_tickers, +) from .logger_setup import logger as log from datetime import datetime, timedelta, timezone @@ -111,12 +115,14 @@ def initialize_db(): ticker_id INTEGER, subreddit_id INTEGER, post_id TEXT NOT NULL, + comment_id TEXT, -- NEW: Will be NULL for post mentions mention_type TEXT NOT NULL, mention_sentiment REAL, - post_avg_sentiment REAL, mention_timestamp INTEGER NOT NULL, FOREIGN KEY (ticker_id) REFERENCES tickers (id), - FOREIGN KEY (subreddit_id) REFERENCES subreddits (id) + FOREIGN KEY (subreddit_id) REFERENCES subreddits (id), + -- The new, perfect uniqueness rule: + UNIQUE(ticker_id, post_id, comment_id) ) """ ) @@ -148,27 +154,27 @@ def add_mention( mention_type, timestamp, mention_sentiment, - post_avg_sentiment=None, + comment_id=None, ): cursor = conn.cursor() try: cursor.execute( """ - INSERT INTO mentions (ticker_id, subreddit_id, post_id, mention_type, mention_timestamp, mention_sentiment, post_avg_sentiment) + INSERT INTO mentions (ticker_id, subreddit_id, post_id, comment_id, mention_type, mention_timestamp, mention_sentiment) VALUES (?, ?, ?, ?, ?, ?, ?) """, ( ticker_id, subreddit_id, post_id, + comment_id, mention_type, timestamp, mention_sentiment, - post_avg_sentiment, ), ) - conn.commit() except sqlite3.IntegrityError: + # This will now correctly catch and ignore any true duplicates. pass @@ -231,6 +237,7 @@ def get_week_start_end(for_date): end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999) return start_of_week, end_of_week + def get_overall_daily_summary(): """Gets the top tickers across all subreddits from the LAST 24 HOURS.""" conn = get_db_connection() @@ -249,6 +256,7 @@ def get_overall_daily_summary(): conn.close() return results + def get_overall_weekly_summary(): """Gets the top tickers across all subreddits for LAST WEEK (Mon-Sun).""" conn = get_db_connection() @@ -270,6 +278,7 @@ def get_overall_weekly_summary(): conn.close() return results, start_of_week, end_of_week + def get_daily_summary_for_subreddit(subreddit_name): """Gets a summary for a subreddit's DAILY view (last 24 hours).""" conn = get_db_connection() @@ -288,6 +297,7 @@ def get_daily_summary_for_subreddit(subreddit_name): conn.close() return results + def get_weekly_summary_for_subreddit(subreddit_name, for_date): """Gets a summary for a subreddit's WEEKLY view (for a specific week).""" conn = get_db_connection() @@ -303,7 +313,9 @@ def get_weekly_summary_for_subreddit(subreddit_name, for_date): GROUP BY t.symbol, t.market_cap, t.closing_price ORDER BY total_mentions DESC LIMIT 10; """ - results = conn.execute(query, (subreddit_name, start_timestamp, end_timestamp)).fetchall() + results = conn.execute( + query, (subreddit_name, start_timestamp, end_timestamp) + ).fetchall() conn.close() return results, start_of_week, end_of_week diff --git a/rstat_tool/main.py b/rstat_tool/main.py index ed969da..89ee66f 100644 --- a/rstat_tool/main.py +++ b/rstat_tool/main.py @@ -110,6 +110,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit): "post", int(submission.created_utc), post_sentiment, + comment_id=None, ) # 3. --- Process Comments (Single, Efficient Loop) --- @@ -132,6 +133,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit): "comment", int(comment.created_utc), comment_sentiment, + comment_id=comment.id, ) else: # If no title tickers, we must scan the comment for direct mentions. @@ -156,6 +158,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit): "comment", int(comment.created_utc), comment_sentiment, + comment_id=comment.id, ) # 4. --- Save Deep Dive Analysis ---