Added functionality for exporting image, and cleaning up subreddits and stale tickers.

2025-07-22 14:30:23 +02:00
parent 6b4fb136e2
commit 966ef45916
7 changed files with 159 additions and 18 deletions
--- a/export_image.py
+++ b/export_image.py
@@ -0,0 +1,51 @@
+# export_image.py
+
+import argparse
+from playwright.sync_api import sync_playwright
+import time
+
+def export_subreddit_image(subreddit_name, weekly=False):
+    """
+    Launches a headless browser to take a screenshot of a subreddit's image view.
+    """
+    view_type = "weekly" if weekly else "daily"
+    print(f"Exporting {view_type} image for r/{subreddit_name}...")
+
+    # The URL our Flask app serves
+    base_url = "http://127.0.0.1:5000"
+    path = f"image/weekly/{subreddit_name}" if weekly else f"image/{subreddit_name}"
+    url = f"{base_url}/{path}"
+    
+    # Define the output filename
+    output_file = f"{subreddit_name}_{'weekly' if weekly else 'daily'}_{int(time.time())}.png"
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch()
+        page = browser.new_page()
+        
+        # Set a large viewport for high-quality screenshots
+        page.set_viewport_size({"width": 1920, "height": 1080})
+        
+        print(f"Navigating to {url}...")
+        page.goto(url)
+        
+        # Important: Give the page a second to ensure all styles and fonts have loaded
+        page.wait_for_timeout(1000) 
+        
+        # Target the specific element we want to screenshot
+        element = page.locator(".image-container")
+        
+        print(f"Saving screenshot to {output_file}...")
+        element.screenshot(path=output_file)
+        
+        browser.close()
+        print("Export complete!")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Export subreddit sentiment images.")
+    parser.add_argument("subreddit", help="The name of the subreddit to export.")
+    parser.add_argument("--weekly", action="store_true", help="Export the weekly view instead of the daily view.")
+    args = parser.parse_args()
+
+    # NOTE: This script assumes your 'rstat-dashboard' server is already running in another terminal.
+    export_subreddit_image(args.subreddit, args.weekly)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
-yfinance
+Flask
+nltk
+playwright
 praw
 python-dotenv
-nltk
-Flask
+yfinance
--- a/rstat_tool/cleanup.py
+++ b/rstat_tool/cleanup.py
@@ -0,0 +1,68 @@
+# rstat_tool/cleanup.py
+
+import argparse
+from . import database
+# We can't reuse load_subreddits from main anymore if it's not in the same file
+# So we will duplicate it here. It's small and keeps this script self-contained.
+import json
+
+def load_subreddits(filepath):
+    """Loads a list of subreddits from a JSON file."""
+    try:
+        with open(filepath, 'r') as f:
+            data = json.load(f)
+            return data.get("subreddits", [])
+    except (FileNotFoundError, json.JSONDecodeError) as e:
+        print(f"Error loading config file '{filepath}': {e}")
+        return None
+
+def run_cleanup():
+    """Main function for the cleanup tool."""
+    parser = argparse.ArgumentParser(
+        description="A tool to clean stale data from the RSTAT database.",
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+    parser.add_argument("--tickers", action="store_true", help="Clean tickers that are in the blacklist.")
+    
+    # --- UPDATED ARGUMENT DEFINITION ---
+    # nargs='?': Makes the argument optional.
+    # const='subreddits.json': The value used if the flag is present with no argument.
+    # default=None: The value if the flag is not present at all.
+    parser.add_argument(
+        "--subreddits", 
+        nargs='?', 
+        const='subreddits.json', 
+        default=None,
+        help="Clean data from subreddits NOT in the specified config file.\n(Defaults to 'subreddits.json' if flag is used without a value)."
+    )
+    
+    parser.add_argument("--all", action="store_true", help="Run all available cleanup tasks.")
+    
+    args = parser.parse_args()
+
+    run_any_task = False
+
+    # --- UPDATED LOGIC TO HANDLE THE NEW ARGUMENT ---
+    if args.all or args.tickers:
+        run_any_task = True
+        database.clean_stale_tickers()
+
+    # The --subreddits argument will be None if not provided, or a filename string if it is.
+    if args.all or args.subreddits is not None:
+        run_any_task = True
+        # If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified
+        config_file = args.subreddits or 'subreddits.json'
+        print(f"\nCleaning subreddits based on active list in: {config_file}")
+        active_subreddits = load_subreddits(config_file)
+        if active_subreddits is not None:
+            database.clean_stale_subreddits(active_subreddits)
+
+    if not run_any_task:
+        parser.print_help()
+        print("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).")
+        return
+
+    print("\nCleanup finished.")
+
+if __name__ == "__main__":
+    run_cleanup()
--- a/rstat_tool/database.py
+++ b/rstat_tool/database.py
@@ -105,6 +105,33 @@ def clean_stale_tickers():
    conn.close()
    print(f"Cleanup complete. Removed {deleted_count} records.")

+def clean_stale_subreddits(active_subreddits):
+    """
+    Removes all data associated with subreddits that are NOT in the active list.
+    """
+    print("\n--- Cleaning Stale Subreddits from Database ---")
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute("SELECT id, name FROM subreddits")
+    db_subreddits = cursor.fetchall()
+    stale_sub_ids = []
+    for sub in db_subreddits:
+        if sub['name'] not in active_subreddits:
+            print(f"Found stale subreddit to remove: r/{sub['name']}")
+            stale_sub_ids.append(sub['id'])
+    if not stale_sub_ids:
+        print("No stale subreddits to clean.")
+        conn.close()
+        return
+    for sub_id in stale_sub_ids:
+        print(f"  -> Deleting associated data for subreddit ID: {sub_id}")
+        cursor.execute("DELETE FROM mentions WHERE subreddit_id = ?", (sub_id,))
+        cursor.execute("DELETE FROM posts WHERE subreddit_id = ?", (sub_id,))
+        cursor.execute("DELETE FROM subreddits WHERE id = ?", (sub_id,))
+    conn.commit()
+    conn.close()
+    print("Stale subreddit cleanup complete.")
+
 def get_db_connection():
    conn = sqlite3.connect(DB_FILE)
    conn.row_factory = sqlite3.Row
--- a/rstat_tool/main.py
+++ b/rstat_tool/main.py
@@ -136,10 +136,9 @@ def main():
    """Main function to run the Reddit stock analysis tool."""
    parser = argparse.ArgumentParser(description="Analyze stock ticker mentions on Reddit.", formatter_class=argparse.RawTextHelpFormatter)
    
-    parser.add_argument("--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)")
-    parser.add_argument("--subreddit", help="Scan a single subreddit, ignoring the config file.")
-    parser.add_argument("--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)")
-    
+    parser.add_argument("-f", "--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)")
+    parser.add_argument("-s", "--subreddit", help="Scan a single subreddit, ignoring the config file.")
+    parser.add_argument("-d", "--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)")
    parser.add_argument("-p", "--posts", type=int, default=200, help="Max posts to check per subreddit.\n(Default: 200)")
    parser.add_argument("-c", "--comments", type=int, default=100, help="Number of comments to scan per post.\n(Default: 100)")
    parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)")
@@ -162,7 +161,6 @@ def main():

    # --- Initialize and Run ---
    database.initialize_db()
-    database.clean_stale_tickers()
    
    reddit = get_reddit_instance()
    if not reddit: return
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@ setup(
            # The path is now 'package_name.module_name:function_name'
            'rstat=rstat_tool.main:main',
            'rstat-dashboard=rstat_tool.dashboard:start_dashboard',
+            'rstat-cleanup=rstat_tool.cleanup:run_cleanup',
        ],
    },
 )
--- a/subreddits.json
+++ b/subreddits.json
@@ -1,18 +1,13 @@
 {
  "subreddits": [
+    "investing",
    "pennystocks",
    "Shortsqueeze",
    "smallstreetbets",
-    "wallstreetbets",
-    "Wallstreetbetsnew",
-    "wallstreetbets2",
    "stocks",
-    "RobinHoodPennyStocks",
-    "StocksAndTrading",
-    "investing",
-    "WallStreetBetsELITE",
+    "Tollbugatabets",
    "ValueInvesting",
-    "Daytrading",
-    "Tollbugatabets"
+    "wallstreetbets",
+    "WallStreetBetsELITE"
  ]
 }