Corrected mentions by adding unique contraint to avoid duplicates.
This commit is contained in:
@@ -2,7 +2,11 @@
|
||||
|
||||
import sqlite3
|
||||
import time
|
||||
from .ticker_extractor import COMMON_WORDS_BLACKLIST, extract_golden_tickers, extract_potential_tickers
|
||||
from .ticker_extractor import (
|
||||
COMMON_WORDS_BLACKLIST,
|
||||
extract_golden_tickers,
|
||||
extract_potential_tickers,
|
||||
)
|
||||
from .logger_setup import logger as log
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
@@ -111,12 +115,14 @@ def initialize_db():
|
||||
ticker_id INTEGER,
|
||||
subreddit_id INTEGER,
|
||||
post_id TEXT NOT NULL,
|
||||
comment_id TEXT, -- NEW: Will be NULL for post mentions
|
||||
mention_type TEXT NOT NULL,
|
||||
mention_sentiment REAL,
|
||||
post_avg_sentiment REAL,
|
||||
mention_timestamp INTEGER NOT NULL,
|
||||
FOREIGN KEY (ticker_id) REFERENCES tickers (id),
|
||||
FOREIGN KEY (subreddit_id) REFERENCES subreddits (id)
|
||||
FOREIGN KEY (subreddit_id) REFERENCES subreddits (id),
|
||||
-- The new, perfect uniqueness rule:
|
||||
UNIQUE(ticker_id, post_id, comment_id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
@@ -148,27 +154,27 @@ def add_mention(
|
||||
mention_type,
|
||||
timestamp,
|
||||
mention_sentiment,
|
||||
post_avg_sentiment=None,
|
||||
comment_id=None,
|
||||
):
|
||||
cursor = conn.cursor()
|
||||
try:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO mentions (ticker_id, subreddit_id, post_id, mention_type, mention_timestamp, mention_sentiment, post_avg_sentiment)
|
||||
INSERT INTO mentions (ticker_id, subreddit_id, post_id, comment_id, mention_type, mention_timestamp, mention_sentiment)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
ticker_id,
|
||||
subreddit_id,
|
||||
post_id,
|
||||
comment_id,
|
||||
mention_type,
|
||||
timestamp,
|
||||
mention_sentiment,
|
||||
post_avg_sentiment,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
except sqlite3.IntegrityError:
|
||||
# This will now correctly catch and ignore any true duplicates.
|
||||
pass
|
||||
|
||||
|
||||
@@ -231,6 +237,7 @@ def get_week_start_end(for_date):
|
||||
end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
|
||||
return start_of_week, end_of_week
|
||||
|
||||
|
||||
def get_overall_daily_summary():
|
||||
"""Gets the top tickers across all subreddits from the LAST 24 HOURS."""
|
||||
conn = get_db_connection()
|
||||
@@ -249,6 +256,7 @@ def get_overall_daily_summary():
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def get_overall_weekly_summary():
|
||||
"""Gets the top tickers across all subreddits for LAST WEEK (Mon-Sun)."""
|
||||
conn = get_db_connection()
|
||||
@@ -270,6 +278,7 @@ def get_overall_weekly_summary():
|
||||
conn.close()
|
||||
return results, start_of_week, end_of_week
|
||||
|
||||
|
||||
def get_daily_summary_for_subreddit(subreddit_name):
|
||||
"""Gets a summary for a subreddit's DAILY view (last 24 hours)."""
|
||||
conn = get_db_connection()
|
||||
@@ -288,6 +297,7 @@ def get_daily_summary_for_subreddit(subreddit_name):
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def get_weekly_summary_for_subreddit(subreddit_name, for_date):
|
||||
"""Gets a summary for a subreddit's WEEKLY view (for a specific week)."""
|
||||
conn = get_db_connection()
|
||||
@@ -303,7 +313,9 @@ def get_weekly_summary_for_subreddit(subreddit_name, for_date):
|
||||
GROUP BY t.symbol, t.market_cap, t.closing_price
|
||||
ORDER BY total_mentions DESC LIMIT 10;
|
||||
"""
|
||||
results = conn.execute(query, (subreddit_name, start_timestamp, end_timestamp)).fetchall()
|
||||
results = conn.execute(
|
||||
query, (subreddit_name, start_timestamp, end_timestamp)
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return results, start_of_week, end_of_week
|
||||
|
||||
|
@@ -110,6 +110,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
|
||||
"post",
|
||||
int(submission.created_utc),
|
||||
post_sentiment,
|
||||
comment_id=None,
|
||||
)
|
||||
|
||||
# 3. --- Process Comments (Single, Efficient Loop) ---
|
||||
@@ -132,6 +133,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
|
||||
"comment",
|
||||
int(comment.created_utc),
|
||||
comment_sentiment,
|
||||
comment_id=comment.id,
|
||||
)
|
||||
else:
|
||||
# If no title tickers, we must scan the comment for direct mentions.
|
||||
@@ -156,6 +158,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
|
||||
"comment",
|
||||
int(comment.created_utc),
|
||||
comment_sentiment,
|
||||
comment_id=comment.id,
|
||||
)
|
||||
|
||||
# 4. --- Save Deep Dive Analysis ---
|
||||
|
Reference in New Issue
Block a user