Corrected mentions by adding unique contraint to avoid duplicates.
This commit is contained in:
@@ -2,7 +2,11 @@
|
|||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import time
|
import time
|
||||||
from .ticker_extractor import COMMON_WORDS_BLACKLIST, extract_golden_tickers, extract_potential_tickers
|
from .ticker_extractor import (
|
||||||
|
COMMON_WORDS_BLACKLIST,
|
||||||
|
extract_golden_tickers,
|
||||||
|
extract_potential_tickers,
|
||||||
|
)
|
||||||
from .logger_setup import logger as log
|
from .logger_setup import logger as log
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
@@ -111,12 +115,14 @@ def initialize_db():
|
|||||||
ticker_id INTEGER,
|
ticker_id INTEGER,
|
||||||
subreddit_id INTEGER,
|
subreddit_id INTEGER,
|
||||||
post_id TEXT NOT NULL,
|
post_id TEXT NOT NULL,
|
||||||
|
comment_id TEXT, -- NEW: Will be NULL for post mentions
|
||||||
mention_type TEXT NOT NULL,
|
mention_type TEXT NOT NULL,
|
||||||
mention_sentiment REAL,
|
mention_sentiment REAL,
|
||||||
post_avg_sentiment REAL,
|
|
||||||
mention_timestamp INTEGER NOT NULL,
|
mention_timestamp INTEGER NOT NULL,
|
||||||
FOREIGN KEY (ticker_id) REFERENCES tickers (id),
|
FOREIGN KEY (ticker_id) REFERENCES tickers (id),
|
||||||
FOREIGN KEY (subreddit_id) REFERENCES subreddits (id)
|
FOREIGN KEY (subreddit_id) REFERENCES subreddits (id),
|
||||||
|
-- The new, perfect uniqueness rule:
|
||||||
|
UNIQUE(ticker_id, post_id, comment_id)
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
@@ -148,27 +154,27 @@ def add_mention(
|
|||||||
mention_type,
|
mention_type,
|
||||||
timestamp,
|
timestamp,
|
||||||
mention_sentiment,
|
mention_sentiment,
|
||||||
post_avg_sentiment=None,
|
comment_id=None,
|
||||||
):
|
):
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
try:
|
try:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO mentions (ticker_id, subreddit_id, post_id, mention_type, mention_timestamp, mention_sentiment, post_avg_sentiment)
|
INSERT INTO mentions (ticker_id, subreddit_id, post_id, comment_id, mention_type, mention_timestamp, mention_sentiment)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
""",
|
""",
|
||||||
(
|
(
|
||||||
ticker_id,
|
ticker_id,
|
||||||
subreddit_id,
|
subreddit_id,
|
||||||
post_id,
|
post_id,
|
||||||
|
comment_id,
|
||||||
mention_type,
|
mention_type,
|
||||||
timestamp,
|
timestamp,
|
||||||
mention_sentiment,
|
mention_sentiment,
|
||||||
post_avg_sentiment,
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
conn.commit()
|
|
||||||
except sqlite3.IntegrityError:
|
except sqlite3.IntegrityError:
|
||||||
|
# This will now correctly catch and ignore any true duplicates.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -231,6 +237,7 @@ def get_week_start_end(for_date):
|
|||||||
end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
|
end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
|
||||||
return start_of_week, end_of_week
|
return start_of_week, end_of_week
|
||||||
|
|
||||||
|
|
||||||
def get_overall_daily_summary():
|
def get_overall_daily_summary():
|
||||||
"""Gets the top tickers across all subreddits from the LAST 24 HOURS."""
|
"""Gets the top tickers across all subreddits from the LAST 24 HOURS."""
|
||||||
conn = get_db_connection()
|
conn = get_db_connection()
|
||||||
@@ -249,6 +256,7 @@ def get_overall_daily_summary():
|
|||||||
conn.close()
|
conn.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_overall_weekly_summary():
|
def get_overall_weekly_summary():
|
||||||
"""Gets the top tickers across all subreddits for LAST WEEK (Mon-Sun)."""
|
"""Gets the top tickers across all subreddits for LAST WEEK (Mon-Sun)."""
|
||||||
conn = get_db_connection()
|
conn = get_db_connection()
|
||||||
@@ -270,6 +278,7 @@ def get_overall_weekly_summary():
|
|||||||
conn.close()
|
conn.close()
|
||||||
return results, start_of_week, end_of_week
|
return results, start_of_week, end_of_week
|
||||||
|
|
||||||
|
|
||||||
def get_daily_summary_for_subreddit(subreddit_name):
|
def get_daily_summary_for_subreddit(subreddit_name):
|
||||||
"""Gets a summary for a subreddit's DAILY view (last 24 hours)."""
|
"""Gets a summary for a subreddit's DAILY view (last 24 hours)."""
|
||||||
conn = get_db_connection()
|
conn = get_db_connection()
|
||||||
@@ -288,6 +297,7 @@ def get_daily_summary_for_subreddit(subreddit_name):
|
|||||||
conn.close()
|
conn.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_weekly_summary_for_subreddit(subreddit_name, for_date):
|
def get_weekly_summary_for_subreddit(subreddit_name, for_date):
|
||||||
"""Gets a summary for a subreddit's WEEKLY view (for a specific week)."""
|
"""Gets a summary for a subreddit's WEEKLY view (for a specific week)."""
|
||||||
conn = get_db_connection()
|
conn = get_db_connection()
|
||||||
@@ -303,7 +313,9 @@ def get_weekly_summary_for_subreddit(subreddit_name, for_date):
|
|||||||
GROUP BY t.symbol, t.market_cap, t.closing_price
|
GROUP BY t.symbol, t.market_cap, t.closing_price
|
||||||
ORDER BY total_mentions DESC LIMIT 10;
|
ORDER BY total_mentions DESC LIMIT 10;
|
||||||
"""
|
"""
|
||||||
results = conn.execute(query, (subreddit_name, start_timestamp, end_timestamp)).fetchall()
|
results = conn.execute(
|
||||||
|
query, (subreddit_name, start_timestamp, end_timestamp)
|
||||||
|
).fetchall()
|
||||||
conn.close()
|
conn.close()
|
||||||
return results, start_of_week, end_of_week
|
return results, start_of_week, end_of_week
|
||||||
|
|
||||||
|
@@ -110,6 +110,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
|
|||||||
"post",
|
"post",
|
||||||
int(submission.created_utc),
|
int(submission.created_utc),
|
||||||
post_sentiment,
|
post_sentiment,
|
||||||
|
comment_id=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. --- Process Comments (Single, Efficient Loop) ---
|
# 3. --- Process Comments (Single, Efficient Loop) ---
|
||||||
@@ -132,6 +133,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
|
|||||||
"comment",
|
"comment",
|
||||||
int(comment.created_utc),
|
int(comment.created_utc),
|
||||||
comment_sentiment,
|
comment_sentiment,
|
||||||
|
comment_id=comment.id,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# If no title tickers, we must scan the comment for direct mentions.
|
# If no title tickers, we must scan the comment for direct mentions.
|
||||||
@@ -156,6 +158,7 @@ def _process_submission(submission, subreddit_id, conn, comment_limit):
|
|||||||
"comment",
|
"comment",
|
||||||
int(comment.created_utc),
|
int(comment.created_utc),
|
||||||
comment_sentiment,
|
comment_sentiment,
|
||||||
|
comment_id=comment.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. --- Save Deep Dive Analysis ---
|
# 4. --- Save Deep Dive Analysis ---
|
||||||
|
Reference in New Issue
Block a user