Added logger.

2025-07-22 15:35:37 +02:00
parent afe3cecb4f
commit d4ed76e153
5 changed files with 88 additions and 34 deletions
--- a/rstat_tool/cleanup.py
+++ b/rstat_tool/cleanup.py
@@ -2,10 +2,13 @@

 import argparse
 from . import database
+from .logger_setup import get_logger
 # We can't reuse load_subreddits from main anymore if it's not in the same file
 # So we will duplicate it here. It's small and keeps this script self-contained.
 import json

+log = get_logger()
+
 def load_subreddits(filepath):
    """Loads a list of subreddits from a JSON file."""
    try:
@@ -13,7 +16,7 @@ def load_subreddits(filepath):
            data = json.load(f)
            return data.get("subreddits", [])
    except (FileNotFoundError, json.JSONDecodeError) as e:
-        print(f"Error loading config file '{filepath}': {e}")
+        log.error(f"Error loading config file '{filepath}': {e}")
        return None

 def run_cleanup():
@@ -52,17 +55,17 @@ def run_cleanup():
        run_any_task = True
        # If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified
        config_file = args.subreddits or 'subreddits.json'
-        print(f"\nCleaning subreddits based on active list in: {config_file}")
+        log.info(f"\nCleaning subreddits based on active list in: {config_file}")
        active_subreddits = load_subreddits(config_file)
        if active_subreddits is not None:
            database.clean_stale_subreddits(active_subreddits)

    if not run_any_task:
        parser.print_help()
-        print("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).")
+        log.error("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).")
        return

-    print("\nCleanup finished.")
+    log.info("\nCleanup finished.")

 if __name__ == "__main__":
    run_cleanup()
--- a/rstat_tool/dashboard.py
+++ b/rstat_tool/dashboard.py
@@ -1,7 +1,8 @@
 # rstat_tool/dashboard.py

 from flask import Flask, render_template
-from datetime import datetime, timedelta 
+from datetime import datetime, timedelta
+from .logger_setup import get_logger
 from .database import (
    get_overall_summary,
    get_subreddit_summary,
@@ -12,6 +13,7 @@ from .database import (
    get_overall_image_view_summary
 )

+log = get_logger()
 app = Flask(__name__, template_folder='../templates')

@app.template_filter('format_mc')
@@ -95,9 +97,9 @@ def overall_image_view():

 def start_dashboard():
    """The main function called by the 'rstat-dashboard' command."""
-    print("Starting Flask server...")
-    print("Open http://127.0.0.1:5000 in your browser.")
-    print("Press CTRL+C to stop the server.")
+    log.info("Starting Flask server...")
+    log.info("Open http://127.0.0.1:5000 in your browser.")
+    log.info("Press CTRL+C to stop the server.")
    app.run(debug=True)

 if __name__ == "__main__":
--- a/rstat_tool/database.py
+++ b/rstat_tool/database.py
@@ -3,9 +3,11 @@
 import sqlite3
 import time
 from .ticker_extractor import COMMON_WORDS_BLACKLIST
+from .logger_setup import get_logger
 from datetime import datetime, timedelta

 DB_FILE = "reddit_stocks.db"
+log = get_logger()

 def get_db_connection():
    """Establishes a connection to the SQLite database."""
@@ -71,14 +73,14 @@ def initialize_db():

    conn.commit()
    conn.close()
-    print("Database initialized successfully.")
+    log.info("Database initialized successfully.")

 def clean_stale_tickers():
    """
    Removes tickers and their associated mentions from the database
    if the ticker symbol exists in the COMMON_WORDS_BLACKLIST.
    """
-    print("\n--- Cleaning Stale Tickers from Database ---")
+    log.info("\n--- Cleaning Stale Tickers from Database ---")
    conn = get_db_connection()
    cursor = conn.cursor()

@@ -89,27 +91,27 @@ def clean_stale_tickers():
    stale_tickers = cursor.fetchall()

    if not stale_tickers:
-        print("No stale tickers to clean.")
+        log.info("No stale tickers to clean.")
        conn.close()
        return

    for ticker in stale_tickers:
        ticker_id = ticker['id']
        ticker_symbol = ticker['symbol']
-        print(f"Removing stale ticker '{ticker_symbol}' (ID: {ticker_id})...")
+        log.info(f"Removing stale ticker '{ticker_symbol}' (ID: {ticker_id})...")
        cursor.execute("DELETE FROM mentions WHERE ticker_id = ?", (ticker_id,))
        cursor.execute("DELETE FROM tickers WHERE id = ?", (ticker_id,))
    
    deleted_count = conn.total_changes
    conn.commit()
    conn.close()
-    print(f"Cleanup complete. Removed {deleted_count} records.")
+    log.info(f"Cleanup complete. Removed {deleted_count} records.")

 def clean_stale_subreddits(active_subreddits):
    """
    Removes all data associated with subreddits that are NOT in the active list.
    """
-    print("\n--- Cleaning Stale Subreddits from Database ---")
+    log.info("\n--- Cleaning Stale Subreddits from Database ---")
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute("SELECT id, name FROM subreddits")
@@ -117,20 +119,20 @@ def clean_stale_subreddits(active_subreddits):
    stale_sub_ids = []
    for sub in db_subreddits:
        if sub['name'] not in active_subreddits:
-            print(f"Found stale subreddit to remove: r/{sub['name']}")
+            log.info(f"Found stale subreddit to remove: r/{sub['name']}")
            stale_sub_ids.append(sub['id'])
    if not stale_sub_ids:
-        print("No stale subreddits to clean.")
+        log.info("No stale subreddits to clean.")
        conn.close()
        return
    for sub_id in stale_sub_ids:
-        print(f"  -> Deleting associated data for subreddit ID: {sub_id}")
+        log.info(f"  -> Deleting associated data for subreddit ID: {sub_id}")
        cursor.execute("DELETE FROM mentions WHERE subreddit_id = ?", (sub_id,))
        cursor.execute("DELETE FROM posts WHERE subreddit_id = ?", (sub_id,))
        cursor.execute("DELETE FROM subreddits WHERE id = ?", (sub_id,))
    conn.commit()
    conn.close()
-    print("Stale subreddit cleanup complete.")
+    log.info("Stale subreddit cleanup complete.")

 def get_db_connection():
    conn = sqlite3.connect(DB_FILE)
@@ -184,7 +186,7 @@ def initialize_db():
    """)
    conn.commit()
    conn.close()
-    print("Database initialized successfully.")
+    log.info("Database initialized successfully.")

 def add_mention(conn, ticker_id, subreddit_id, post_id, mention_type, timestamp, mention_sentiment, post_avg_sentiment=None):
    cursor = conn.cursor()
@@ -230,7 +232,7 @@ def get_ticker_info(conn, ticker_id):

 def generate_summary_report(limit=20):
    """Queries the DB to generate a summary for the command-line tool."""
-    print(f"\n--- Top {limit} Tickers by Mention Count ---")
+    log.info(f"\n--- Top {limit} Tickers by Mention Count ---")
    conn = get_db_connection()
    cursor = conn.cursor()
    
--- a/rstat_tool/logger_setup.py
+++ b/rstat_tool/logger_setup.py
@@ -0,0 +1,47 @@
+# rstat_tool/logger_setup.py
+
+import logging
+import sys
+
+# Get the root logger
+logger = logging.getLogger("rstat_app")
+logger.setLevel(logging.INFO) # Set the minimum level of messages to handle
+
+# Prevent the logger from propagating messages to the parent (root) logger
+logger.propagate = False
+
+# Only add handlers if they haven't been added before
+# This prevents duplicate log messages if this function is called multiple times.
+if not logger.handlers:
+    # --- Console Handler ---
+    # This handler prints logs to the standard output (your terminal)
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logging.INFO)
+    # A simple formatter for the console
+    console_formatter = logging.Formatter('%(message)s')
+    console_handler.setFormatter(console_formatter)
+    logger.addHandler(console_handler)
+
+    # --- File Handler ---
+    # This handler writes logs to a file
+    # 'a' stands for append mode
+    file_handler = logging.FileHandler("rstat.log", mode='a')
+    file_handler.setLevel(logging.INFO)
+    # A more detailed formatter for the file, including timestamp and log level
+    file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
+    file_handler.setFormatter(file_formatter)
+    logger.addHandler(file_handler)
+
+# Get the logger used by the yfinance library
+yfinance_logger = logging.getLogger("yfinance")
+# Set its level to capture warnings and errors
+yfinance_logger.setLevel(logging.WARNING)
+# Add our existing handlers to it. This tells yfinance's logger
+# to send its messages to our console and our log file.
+if not yfinance_logger.handlers:
+    yfinance_logger.addHandler(console_handler)
+    yfinance_logger.addHandler(file_handler)
+
+def get_logger():
+    """A simple function to get our configured logger."""
+    return logger
--- a/rstat_tool/main.py
+++ b/rstat_tool/main.py
@@ -12,17 +12,20 @@ from dotenv import load_dotenv
 from . import database
 from .ticker_extractor import extract_tickers
 from .sentiment_analyzer import get_sentiment_score
+from .logger_setup import get_logger

 load_dotenv()
 MARKET_CAP_REFRESH_INTERVAL = 86400
 POST_AGE_LIMIT = 86400 

+log = get_logger()
+
 def load_subreddits(filepath):
    try:
        with open(filepath, 'r') as f:
            return json.load(f).get("subreddits", [])
    except (FileNotFoundError, json.JSONDecodeError) as e:
-        print(f"Error loading config file '{filepath}': {e}")
+        log.error(f"Error loading config file '{filepath}': {e}")
        return None

 def get_financial_data(ticker_symbol):
@@ -52,7 +55,7 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100,
    post_age_limit = days_to_scan * 86400
    current_time = time.time()
    
-    print(f"\nScanning {len(subreddits_list)} subreddit(s) for NEW posts in the last {days_to_scan} day(s)...")
+    log.info(f"\nScanning {len(subreddits_list)} subreddit(s) for NEW posts in the last {days_to_scan} day(s)...")
    for subreddit_name in subreddits_list:
        try:
            # Always use the lowercase version of the name for consistency.
@@ -60,14 +63,12 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100,
            
            subreddit_id = database.get_or_create_entity(conn, 'subreddits', 'name', normalized_sub_name)
            subreddit = reddit.subreddit(normalized_sub_name)
-            print(f"Scanning r/{normalized_sub_name}...")
+            log.info(f"Scanning r/{normalized_sub_name}...")
            
            for submission in subreddit.new(limit=post_limit):
                if (current_time - submission.created_utc) > post_age_limit:
-                    print(f"  -> Reached posts older than the {days_to_scan}-day limit.")
+                    log.info(f"  -> Reached posts older than the {days_to_scan}-day limit.")
                    break
-
-                # --- NEW HYBRID LOGIC ---
                
                tickers_in_title = set(extract_tickers(submission.title))
                all_tickers_found_in_post = set(tickers_in_title) # Start a set to track all tickers for financials
@@ -77,7 +78,7 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100,
                
                # --- CASE A: Tickers were found in the title ---
                if tickers_in_title:
-                    print(f"  -> Title Mention(s): {', '.join(tickers_in_title)}. Attributing all comments.")
+                    log.info(f"  -> Title Mention(s): {', '.join(tickers_in_title)}. Attributing all comments.")
                    post_sentiment = get_sentiment_score(submission.title)
                    
                    # Add one 'post' mention for each title ticker
@@ -109,7 +110,7 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100,
                    ticker_id = database.get_or_create_entity(conn, 'tickers', 'symbol', ticker_symbol)
                    ticker_info = database.get_ticker_info(conn, ticker_id)
                    if not ticker_info['last_updated'] or (current_time - ticker_info['last_updated'] > MARKET_CAP_REFRESH_INTERVAL):
-                        print(f"    -> Fetching financial data for {ticker_symbol}...")
+                        log.info(f"    -> Fetching financial data for {ticker_symbol}...")
                        financials = get_financial_data(ticker_symbol)
                        database.update_ticker_financials(
                            conn, ticker_id,
@@ -129,10 +130,10 @@ def scan_subreddits(reddit, subreddits_list, post_limit=100, comment_limit=100,
                database.add_or_update_post_analysis(conn, post_analysis_data)
                
        except Exception as e:
-            print(f"Could not scan r/{subreddit_name}. Error: {e}")
+            log.error(f"Could not scan r/{subreddit_name}. Error: {e}")
            
    conn.close()
-    print("\n--- Scan Complete ---")
+    log.info("\n--- Scan Complete ---")


 def main():
@@ -147,19 +148,18 @@ def main():
    parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)")
    args = parser.parse_args()

-    # --- THIS IS THE CORRECTED LOGIC BLOCK ---
    if args.subreddit:
        # If --subreddit is used, create a list with just that one.
        subreddits_to_scan = [args.subreddit]
-        print(f"Targeted Scan Mode: Focusing on r/{args.subreddit}")
+        log.info(f"Targeted Scan Mode: Focusing on r/{args.subreddit}")
    else:
        # Otherwise, load from the config file.
-        print(f"Config Scan Mode: Loading subreddits from {args.config}")
+        log.info(f"Config Scan Mode: Loading subreddits from {args.config}")
        # Use the correct argument name: args.config
        subreddits_to_scan = load_subreddits(args.config) 

    if not subreddits_to_scan:
-        print("Error: No subreddits to scan. Please check your config file or --subreddit argument.")
+        log.error("Error: No subreddits to scan. Please check your config file or --subreddit argument.")
        return

    # --- Initialize and Run ---