diff --git a/rstat_tool/cleanup.py b/rstat_tool/cleanup.py index b54f55b..3529741 100644 --- a/rstat_tool/cleanup.py +++ b/rstat_tool/cleanup.py @@ -2,10 +2,13 @@ import argparse from . import database +from .logger_setup import get_logger # We can't reuse load_subreddits from main anymore if it's not in the same file # So we will duplicate it here. It's small and keeps this script self-contained. import json +log = get_logger() + def load_subreddits(filepath): """Loads a list of subreddits from a JSON file.""" try: @@ -13,7 +16,7 @@ def load_subreddits(filepath): data = json.load(f) return data.get("subreddits", []) except (FileNotFoundError, json.JSONDecodeError) as e: - print(f"Error loading config file '{filepath}': {e}") + log.error(f"Error loading config file '{filepath}': {e}") return None def run_cleanup(): @@ -52,17 +55,17 @@ def run_cleanup(): run_any_task = True # If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified config_file = args.subreddits or 'subreddits.json' - print(f"\nCleaning subreddits based on active list in: {config_file}") + log.info(f"\nCleaning subreddits based on active list in: {config_file}") active_subreddits = load_subreddits(config_file) if active_subreddits is not None: database.clean_stale_subreddits(active_subreddits) if not run_any_task: parser.print_help() - print("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).") + log.error("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).") return - print("\nCleanup finished.") + log.info("\nCleanup finished.") if __name__ == "__main__": run_cleanup() \ No newline at end of file diff --git a/rstat_tool/dashboard.py b/rstat_tool/dashboard.py index e6a9b17..3d7b515 100644 --- a/rstat_tool/dashboard.py +++ b/rstat_tool/dashboard.py @@ -1,7 +1,8 @@ # rstat_tool/dashboard.py from flask import Flask, render_template -from datetime import datetime, timedelta +from datetime import datetime, timedelta +from .logger_setup import get_logger from .database import ( get_overall_summary, get_subreddit_summary, @@ -12,6 +13,7 @@ from .database import ( get_overall_image_view_summary ) +log = get_logger() app = Flask(__name__, template_folder='../templates') @app.template_filter('format_mc') @@ -95,9 +97,9 @@ def overall_image_view(): def start_dashboard(): """The main function called by the 'rstat-dashboard' command.""" - print("Starting Flask server...") - print("Open http://127.0.0.1:5000 in your browser.") - print("Press CTRL+C to stop the server.") + log.info("Starting Flask server...") + log.info("Open http://127.0.0.1:5000 in your browser.") + log.info("Press CTRL+C to stop the server.") app.run(debug=True) if __name__ == "__main__": diff --git a/rstat_tool/database.py b/rstat_tool/database.py index 60f5c0e..d47089e 100644 --- a/rstat_tool/database.py +++ b/rstat_tool/database.py @@ -3,9 +3,11 @@ import sqlite3 import time from .ticker_extractor import COMMON_WORDS_BLACKLIST +from .logger_setup import get_logger from datetime import datetime, timedelta DB_FILE = "reddit_stocks.db" +log = get_logger() def get_db_connection(): """Establishes a connection to the SQLite database.""" @@ -71,14 +73,14 @@ def initialize_db(): conn.commit() conn.close() - print("Database initialized successfully.") + log.info("Database initialized successfully.") def clean_stale_tickers(): """ Removes tickers and their associated mentions from the database if the ticker symbol exists in the COMMON_WORDS_BLACKLIST. """ - print("\n--- Cleaning Stale Tickers from Database ---") + log.info("\n--- Cleaning Stale Tickers from Database ---") conn = get_db_connection() cursor = conn.cursor() @@ -89,27 +91,27 @@ def clean_stale_tickers(): stale_tickers = cursor.fetchall() if not stale_tickers: - print("No stale tickers to clean.") + log.info("No stale tickers to clean.") conn.close() return for ticker in stale_tickers: ticker_id = ticker['id'] ticker_symbol = ticker['symbol'] - print(f"Removing stale ticker '{ticker_symbol}' (ID: {ticker_id})...") + log.info(f"Removing stale ticker '{ticker_symbol}' (ID: {ticker_id})...") cursor.execute("DELETE FROM mentions WHERE ticker_id = ?", (ticker_id,)) cursor.execute("DELETE FROM tickers WHERE id = ?", (ticker_id,)) deleted_count = conn.total_changes conn.commit() conn.close() - print(f"Cleanup complete. Removed {deleted_count} records.") + log.info(f"Cleanup complete. Removed {deleted_count} records.") def clean_stale_subreddits(active_subreddits): """ Removes all data associated with subreddits that are NOT in the active list. """ - print("\n--- Cleaning Stale Subreddits from Database ---") + log.info("\n--- Cleaning Stale Subreddits from Database ---") conn = get_db_connection() cursor = conn.cursor() cursor.execute("SELECT id, name FROM subreddits") @@ -117,20 +119,20 @@ def clean_stale_subreddits(active_subreddits): stale_sub_ids = [] for sub in db_subreddits: if sub['name'] not in active_subreddits: - print(f"Found stale subreddit to remove: r/{sub['name']}") + log.info(f"Found stale subreddit to remove: r/{sub['name']}") stale_sub_ids.append(sub['id']) if not stale_sub_ids: - print("No stale subreddits to clean.") + log.info("No stale subreddits to clean.") conn.close() return for sub_id in stale_sub_ids: - print(f" -> Deleting associated data for subreddit ID: {sub_id}") + log.info(f" -> Deleting associated data for subreddit ID: {sub_id}") cursor.execute("DELETE FROM mentions WHERE subreddit_id = ?", (sub_id,)) cursor.execute("DELETE FROM posts WHERE subreddit_id = ?", (sub_id,)) cursor.execute("DELETE FROM subreddits WHERE id = ?", (sub_id,)) conn.commit() conn.close() - print("Stale subreddit cleanup complete.") + log.info("Stale subreddit cleanup complete.") def get_db_connection(): conn = sqlite3.connect(DB_FILE) @@ -184,7 +186,7 @@ def initialize_db(): """) conn.commit() conn.close() - print("Database initialized successfully.") + log.info("Database initialized successfully.") def add_mention(conn, ticker_id, subreddit_id, post_id, mention_type, timestamp, mention_sentiment, post_avg_sentiment=None): cursor = conn.cursor() @@ -230,7 +232,7 @@ def get_ticker_info(conn, ticker_id): def generate_summary_report(limit=20): """Queries the DB to generate a summary for the command-line tool.""" - print(f"\n--- Top {limit} Tickers by Mention Count ---") + log.info(f"\n--- Top {limit} Tickers by Mention Count ---") conn = get_db_connection() cursor = conn.cursor() diff --git a/rstat_tool/logger_setup.py b/rstat_tool/logger_setup.py new file mode 100644 index 0000000..9caaffa --- /dev/null +++ b/rstat_tool/logger_setup.py @@ -0,0 +1,47 @@ +# rstat_tool/logger_setup.py + +import logging +import sys + +# Get the root logger +logger = logging.getLogger("rstat_app") +logger.setLevel(logging.INFO) # Set the minimum level of messages to handle + +# Prevent the logger from propagating messages to the parent (root) logger +logger.propagate = False + +# Only add handlers if they haven't been added before +# This prevents duplicate log messages if this function is called multiple times. +if not logger.handlers: + # --- Console Handler --- + # This handler prints logs to the standard output (your terminal) + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + # A simple formatter for the console + console_formatter = logging.Formatter('%(message)s') + console_handler.setFormatter(console_formatter) + logger.addHandler(console_handler) + + # --- File Handler --- + # This handler writes logs to a file + # 'a' stands for append mode + file_handler = logging.FileHandler("rstat.log", mode='a') + file_handler.setLevel(logging.INFO) + # A more detailed formatter for the file, including timestamp and log level + file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_formatter) + logger.addHandler(file_handler) + +# Get the logger used by the yfinance library +yfinance_logger = logging.getLogger("yfinance") +# Set its level to capture warnings and errors +yfinance_logger.setLevel(logging.WARNING) +# Add our existing handlers to it. This tells yfinance's logger +# to send its messages to our console and our log file. +if not yfinance_logger.handlers: + yfinance_logger.addHandler(console_handler) + yfinance_logger.addHandler(file_handler) + +def get_logger(): + """A simple function to get our configured logger.""" + return logger \ No newline at end of file diff --git a/rstat_tool/main.py b/rstat_tool/main.py index 8c21e7c..c044d46 100644 --- a/rstat_tool/main.py +++ b/rstat_tool/main.py @@ -12,17 +12,20 @@ from dotenv import load_dotenv from . import database from .ticker_extractor import extract_tickers from .sentiment_analyzer import get_sentiment_score +from .logger_setup import get_logger load_dotenv() MARKET_CAP_REFRESH_INTERVAL = 86400 POST_AGE_LIMIT = 86400 +log = get_logger() + def load_subreddits(filepath): try: with open(filepath, 'r') as f: return json.load(f).get("subreddits", []) except (FileNotFoundError, json.JSONDecodeError) as e: - print(f"Error loading config file '{filepath}': {e}") + log.error(f"Error loading config file '{filepath}': {e}") return None def get_financial_data(ticker_symbol): @@ -52,7 +55,7 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100, post_age_limit = days_to_scan * 86400 current_time = time.time() - print(f"\nScanning {len(subreddits_list)} subreddit(s) for NEW posts in the last {days_to_scan} day(s)...") + log.info(f"\nScanning {len(subreddits_list)} subreddit(s) for NEW posts in the last {days_to_scan} day(s)...") for subreddit_name in subreddits_list: try: # Always use the lowercase version of the name for consistency. @@ -60,14 +63,12 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100, subreddit_id = database.get_or_create_entity(conn, 'subreddits', 'name', normalized_sub_name) subreddit = reddit.subreddit(normalized_sub_name) - print(f"Scanning r/{normalized_sub_name}...") + log.info(f"Scanning r/{normalized_sub_name}...") for submission in subreddit.new(limit=post_limit): if (current_time - submission.created_utc) > post_age_limit: - print(f" -> Reached posts older than the {days_to_scan}-day limit.") + log.info(f" -> Reached posts older than the {days_to_scan}-day limit.") break - - # --- NEW HYBRID LOGIC --- tickers_in_title = set(extract_tickers(submission.title)) all_tickers_found_in_post = set(tickers_in_title) # Start a set to track all tickers for financials @@ -77,7 +78,7 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100, # --- CASE A: Tickers were found in the title --- if tickers_in_title: - print(f" -> Title Mention(s): {', '.join(tickers_in_title)}. Attributing all comments.") + log.info(f" -> Title Mention(s): {', '.join(tickers_in_title)}. Attributing all comments.") post_sentiment = get_sentiment_score(submission.title) # Add one 'post' mention for each title ticker @@ -109,7 +110,7 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100, ticker_id = database.get_or_create_entity(conn, 'tickers', 'symbol', ticker_symbol) ticker_info = database.get_ticker_info(conn, ticker_id) if not ticker_info['last_updated'] or (current_time - ticker_info['last_updated'] > MARKET_CAP_REFRESH_INTERVAL): - print(f" -> Fetching financial data for {ticker_symbol}...") + log.info(f" -> Fetching financial data for {ticker_symbol}...") financials = get_financial_data(ticker_symbol) database.update_ticker_financials( conn, ticker_id, @@ -129,10 +130,10 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100, database.add_or_update_post_analysis(conn, post_analysis_data) except Exception as e: - print(f"Could not scan r/{subreddit_name}. Error: {e}") + log.error(f"Could not scan r/{subreddit_name}. Error: {e}") conn.close() - print("\n--- Scan Complete ---") + log.info("\n--- Scan Complete ---") def main(): @@ -147,19 +148,18 @@ def main(): parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)") args = parser.parse_args() - # --- THIS IS THE CORRECTED LOGIC BLOCK --- if args.subreddit: # If --subreddit is used, create a list with just that one. subreddits_to_scan = [args.subreddit] - print(f"Targeted Scan Mode: Focusing on r/{args.subreddit}") + log.info(f"Targeted Scan Mode: Focusing on r/{args.subreddit}") else: # Otherwise, load from the config file. - print(f"Config Scan Mode: Loading subreddits from {args.config}") + log.info(f"Config Scan Mode: Loading subreddits from {args.config}") # Use the correct argument name: args.config subreddits_to_scan = load_subreddits(args.config) if not subreddits_to_scan: - print("Error: No subreddits to scan. Please check your config file or --subreddit argument.") + log.error("Error: No subreddits to scan. Please check your config file or --subreddit argument.") return # --- Initialize and Run ---