Added functionality for exporting image, and cleaning up subreddits and stale tickers.

This commit is contained in:
2025-07-22 14:30:23 +02:00
parent 6b4fb136e2
commit 966ef45916
7 changed files with 159 additions and 18 deletions

68
rstat_tool/cleanup.py Normal file
View File

@@ -0,0 +1,68 @@
# rstat_tool/cleanup.py
import argparse
from . import database
# We can't reuse load_subreddits from main anymore if it's not in the same file
# So we will duplicate it here. It's small and keeps this script self-contained.
import json
def load_subreddits(filepath):
"""Loads a list of subreddits from a JSON file."""
try:
with open(filepath, 'r') as f:
data = json.load(f)
return data.get("subreddits", [])
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error loading config file '{filepath}': {e}")
return None
def run_cleanup():
"""Main function for the cleanup tool."""
parser = argparse.ArgumentParser(
description="A tool to clean stale data from the RSTAT database.",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("--tickers", action="store_true", help="Clean tickers that are in the blacklist.")
# --- UPDATED ARGUMENT DEFINITION ---
# nargs='?': Makes the argument optional.
# const='subreddits.json': The value used if the flag is present with no argument.
# default=None: The value if the flag is not present at all.
parser.add_argument(
"--subreddits",
nargs='?',
const='subreddits.json',
default=None,
help="Clean data from subreddits NOT in the specified config file.\n(Defaults to 'subreddits.json' if flag is used without a value)."
)
parser.add_argument("--all", action="store_true", help="Run all available cleanup tasks.")
args = parser.parse_args()
run_any_task = False
# --- UPDATED LOGIC TO HANDLE THE NEW ARGUMENT ---
if args.all or args.tickers:
run_any_task = True
database.clean_stale_tickers()
# The --subreddits argument will be None if not provided, or a filename string if it is.
if args.all or args.subreddits is not None:
run_any_task = True
# If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified
config_file = args.subreddits or 'subreddits.json'
print(f"\nCleaning subreddits based on active list in: {config_file}")
active_subreddits = load_subreddits(config_file)
if active_subreddits is not None:
database.clean_stale_subreddits(active_subreddits)
if not run_any_task:
parser.print_help()
print("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).")
return
print("\nCleanup finished.")
if __name__ == "__main__":
run_cleanup()

View File

@@ -105,6 +105,33 @@ def clean_stale_tickers():
conn.close()
print(f"Cleanup complete. Removed {deleted_count} records.")
def clean_stale_subreddits(active_subreddits):
"""
Removes all data associated with subreddits that are NOT in the active list.
"""
print("\n--- Cleaning Stale Subreddits from Database ---")
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT id, name FROM subreddits")
db_subreddits = cursor.fetchall()
stale_sub_ids = []
for sub in db_subreddits:
if sub['name'] not in active_subreddits:
print(f"Found stale subreddit to remove: r/{sub['name']}")
stale_sub_ids.append(sub['id'])
if not stale_sub_ids:
print("No stale subreddits to clean.")
conn.close()
return
for sub_id in stale_sub_ids:
print(f" -> Deleting associated data for subreddit ID: {sub_id}")
cursor.execute("DELETE FROM mentions WHERE subreddit_id = ?", (sub_id,))
cursor.execute("DELETE FROM posts WHERE subreddit_id = ?", (sub_id,))
cursor.execute("DELETE FROM subreddits WHERE id = ?", (sub_id,))
conn.commit()
conn.close()
print("Stale subreddit cleanup complete.")
def get_db_connection():
conn = sqlite3.connect(DB_FILE)
conn.row_factory = sqlite3.Row

View File

@@ -136,10 +136,9 @@ def main():
"""Main function to run the Reddit stock analysis tool."""
parser = argparse.ArgumentParser(description="Analyze stock ticker mentions on Reddit.", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)")
parser.add_argument("--subreddit", help="Scan a single subreddit, ignoring the config file.")
parser.add_argument("--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)")
parser.add_argument("-f", "--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)")
parser.add_argument("-s", "--subreddit", help="Scan a single subreddit, ignoring the config file.")
parser.add_argument("-d", "--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)")
parser.add_argument("-p", "--posts", type=int, default=200, help="Max posts to check per subreddit.\n(Default: 200)")
parser.add_argument("-c", "--comments", type=int, default=100, help="Number of comments to scan per post.\n(Default: 100)")
parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)")
@@ -162,7 +161,6 @@ def main():
# --- Initialize and Run ---
database.initialize_db()
database.clean_stale_tickers()
reddit = get_reddit_instance()
if not reddit: return