Files
reddit_stock_analyzer/rstat_tool/cleanup.py

74 lines
2.8 KiB
Python

# rstat_tool/cleanup.py
import argparse
from . import database
from .logger_setup import setup_logging, logger as log
# We can't reuse load_subreddits from main anymore if it's not in the same file
# So we will duplicate it here. It's small and keeps this script self-contained.
import json
def load_subreddits(filepath):
"""Loads a list of subreddits from a JSON file."""
try:
with open(filepath, 'r') as f:
data = json.load(f)
return data.get("subreddits", [])
except (FileNotFoundError, json.JSONDecodeError) as e:
log.error(f"Error loading config file '{filepath}': {e}")
return None
def run_cleanup():
"""Main function for the cleanup tool."""
parser = argparse.ArgumentParser(
description="A tool to clean stale data from the RSTAT database.",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("--tickers", action="store_true", help="Clean tickers that are in the blacklist.")
# --- UPDATED ARGUMENT DEFINITION ---
# nargs='?': Makes the argument optional.
# const='subreddits.json': The value used if the flag is present with no argument.
# default=None: The value if the flag is not present at all.
parser.add_argument(
"--subreddits",
nargs='?',
const='subreddits.json',
default=None,
help="Clean data from subreddits NOT in the specified config file.\n(Defaults to 'subreddits.json' if flag is used without a value)."
)
parser.add_argument("--all", action="store_true", help="Run all available cleanup tasks.")
parser.add_argument("--stdout", action="store_true", help="Print all log messages to the console.")
args = parser.parse_args()
setup_logging(console_verbose=args.stdout)
run_any_task = False
log.critical("\n--- Starting Cleanup ---")
# --- UPDATED LOGIC TO HANDLE THE NEW ARGUMENT ---
if args.all or args.tickers:
run_any_task = True
database.clean_stale_tickers()
# The --subreddits argument will be None if not provided, or a filename string if it is.
if args.all or args.subreddits is not None:
run_any_task = True
# If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified
config_file = args.subreddits or 'subreddits.json'
log.info(f"\nCleaning subreddits based on active list in: {config_file}")
active_subreddits = load_subreddits(config_file)
if active_subreddits is not None:
database.clean_stale_subreddits(active_subreddits)
if not run_any_task:
parser.print_help()
log.error("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).")
return
log.critical("\nCleanup finished.")
if __name__ == "__main__":
run_cleanup()