Corrected mentions by adding unique contraint to avoid duplicates.

This commit is contained in:
2025-07-31 23:25:10 +02:00
parent 7fec7ec740
commit 1aaa2d70a0
2 changed files with 23 additions and 8 deletions

View File

@@ -2,7 +2,11 @@
import sqlite3
import time
from .ticker_extractor import COMMON_WORDS_BLACKLIST, extract_golden_tickers, extract_potential_tickers
from .ticker_extractor import (
COMMON_WORDS_BLACKLIST,
extract_golden_tickers,
extract_potential_tickers,
)
from .logger_setup import logger as log
from datetime import datetime, timedelta, timezone
@@ -111,12 +115,14 @@ def initialize_db():
ticker_id INTEGER,
subreddit_id INTEGER,
post_id TEXT NOT NULL,
comment_id TEXT, -- NEW: Will be NULL for post mentions
mention_type TEXT NOT NULL,
mention_sentiment REAL,
post_avg_sentiment REAL,
mention_timestamp INTEGER NOT NULL,
FOREIGN KEY (ticker_id) REFERENCES tickers (id),
FOREIGN KEY (subreddit_id) REFERENCES subreddits (id)
FOREIGN KEY (subreddit_id) REFERENCES subreddits (id),
-- The new, perfect uniqueness rule:
UNIQUE(ticker_id, post_id, comment_id)
)
"""
)
@@ -148,27 +154,27 @@ def add_mention(
mention_type,
timestamp,
mention_sentiment,
post_avg_sentiment=None,
comment_id=None,
):
cursor = conn.cursor()
try:
cursor.execute(
"""
INSERT INTO mentions (ticker_id, subreddit_id, post_id, mention_type, mention_timestamp, mention_sentiment, post_avg_sentiment)
INSERT INTO mentions (ticker_id, subreddit_id, post_id, comment_id, mention_type, mention_timestamp, mention_sentiment)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
ticker_id,
subreddit_id,
post_id,
comment_id,
mention_type,
timestamp,
mention_sentiment,
post_avg_sentiment,
),
)
conn.commit()
except sqlite3.IntegrityError:
# This will now correctly catch and ignore any true duplicates.
pass
@@ -231,6 +237,7 @@ def get_week_start_end(for_date):
end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
return start_of_week, end_of_week
def get_overall_daily_summary():
"""Gets the top tickers across all subreddits from the LAST 24 HOURS."""
conn = get_db_connection()
@@ -249,6 +256,7 @@ def get_overall_daily_summary():
conn.close()
return results
def get_overall_weekly_summary():
"""Gets the top tickers across all subreddits for LAST WEEK (Mon-Sun)."""
conn = get_db_connection()
@@ -270,6 +278,7 @@ def get_overall_weekly_summary():
conn.close()
return results, start_of_week, end_of_week
def get_daily_summary_for_subreddit(subreddit_name):
"""Gets a summary for a subreddit's DAILY view (last 24 hours)."""
conn = get_db_connection()
@@ -288,6 +297,7 @@ def get_daily_summary_for_subreddit(subreddit_name):
conn.close()
return results
def get_weekly_summary_for_subreddit(subreddit_name, for_date):
"""Gets a summary for a subreddit's WEEKLY view (for a specific week)."""
conn = get_db_connection()
@@ -303,7 +313,9 @@ def get_weekly_summary_for_subreddit(subreddit_name, for_date):
GROUP BY t.symbol, t.market_cap, t.closing_price
ORDER BY total_mentions DESC LIMIT 10;
"""
results = conn.execute(query, (subreddit_name, start_timestamp, end_timestamp)).fetchall()
results = conn.execute(
query, (subreddit_name, start_timestamp, end_timestamp)
).fetchall()
conn.close()
return results, start_of_week, end_of_week

View File

@@ -110,6 +110,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
"post",
int(submission.created_utc),
post_sentiment,
comment_id=None,
)
# 3. --- Process Comments (Single, Efficient Loop) ---
@@ -132,6 +133,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
"comment",
int(comment.created_utc),
comment_sentiment,
comment_id=comment.id,
)
else:
# If no title tickers, we must scan the comment for direct mentions.
@@ -156,6 +158,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
"comment",
int(comment.created_utc),
comment_sentiment,
comment_id=comment.id,
)
# 4. --- Save Deep Dive Analysis ---