diff --git a/export_image.py b/export_image.py new file mode 100644 index 0000000..8603ee8 --- /dev/null +++ b/export_image.py @@ -0,0 +1,51 @@ +# export_image.py + +import argparse +from playwright.sync_api import sync_playwright +import time + +def export_subreddit_image(subreddit_name, weekly=False): + """ + Launches a headless browser to take a screenshot of a subreddit's image view. + """ + view_type = "weekly" if weekly else "daily" + print(f"Exporting {view_type} image for r/{subreddit_name}...") + + # The URL our Flask app serves + base_url = "http://127.0.0.1:5000" + path = f"image/weekly/{subreddit_name}" if weekly else f"image/{subreddit_name}" + url = f"{base_url}/{path}" + + # Define the output filename + output_file = f"{subreddit_name}_{'weekly' if weekly else 'daily'}_{int(time.time())}.png" + + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + # Set a large viewport for high-quality screenshots + page.set_viewport_size({"width": 1920, "height": 1080}) + + print(f"Navigating to {url}...") + page.goto(url) + + # Important: Give the page a second to ensure all styles and fonts have loaded + page.wait_for_timeout(1000) + + # Target the specific element we want to screenshot + element = page.locator(".image-container") + + print(f"Saving screenshot to {output_file}...") + element.screenshot(path=output_file) + + browser.close() + print("Export complete!") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Export subreddit sentiment images.") + parser.add_argument("subreddit", help="The name of the subreddit to export.") + parser.add_argument("--weekly", action="store_true", help="Export the weekly view instead of the daily view.") + args = parser.parse_args() + + # NOTE: This script assumes your 'rstat-dashboard' server is already running in another terminal. + export_subreddit_image(args.subreddit, args.weekly) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a7a9c37..419264e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -yfinance +Flask +nltk +playwright praw python-dotenv -nltk -Flask \ No newline at end of file +yfinance \ No newline at end of file diff --git a/rstat_tool/cleanup.py b/rstat_tool/cleanup.py new file mode 100644 index 0000000..b54f55b --- /dev/null +++ b/rstat_tool/cleanup.py @@ -0,0 +1,68 @@ +# rstat_tool/cleanup.py + +import argparse +from . import database +# We can't reuse load_subreddits from main anymore if it's not in the same file +# So we will duplicate it here. It's small and keeps this script self-contained. +import json + +def load_subreddits(filepath): + """Loads a list of subreddits from a JSON file.""" + try: + with open(filepath, 'r') as f: + data = json.load(f) + return data.get("subreddits", []) + except (FileNotFoundError, json.JSONDecodeError) as e: + print(f"Error loading config file '{filepath}': {e}") + return None + +def run_cleanup(): + """Main function for the cleanup tool.""" + parser = argparse.ArgumentParser( + description="A tool to clean stale data from the RSTAT database.", + formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument("--tickers", action="store_true", help="Clean tickers that are in the blacklist.") + + # --- UPDATED ARGUMENT DEFINITION --- + # nargs='?': Makes the argument optional. + # const='subreddits.json': The value used if the flag is present with no argument. + # default=None: The value if the flag is not present at all. + parser.add_argument( + "--subreddits", + nargs='?', + const='subreddits.json', + default=None, + help="Clean data from subreddits NOT in the specified config file.\n(Defaults to 'subreddits.json' if flag is used without a value)." + ) + + parser.add_argument("--all", action="store_true", help="Run all available cleanup tasks.") + + args = parser.parse_args() + + run_any_task = False + + # --- UPDATED LOGIC TO HANDLE THE NEW ARGUMENT --- + if args.all or args.tickers: + run_any_task = True + database.clean_stale_tickers() + + # The --subreddits argument will be None if not provided, or a filename string if it is. + if args.all or args.subreddits is not None: + run_any_task = True + # If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified + config_file = args.subreddits or 'subreddits.json' + print(f"\nCleaning subreddits based on active list in: {config_file}") + active_subreddits = load_subreddits(config_file) + if active_subreddits is not None: + database.clean_stale_subreddits(active_subreddits) + + if not run_any_task: + parser.print_help() + print("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).") + return + + print("\nCleanup finished.") + +if __name__ == "__main__": + run_cleanup() \ No newline at end of file diff --git a/rstat_tool/database.py b/rstat_tool/database.py index 7138f45..07eb85e 100644 --- a/rstat_tool/database.py +++ b/rstat_tool/database.py @@ -105,6 +105,33 @@ def clean_stale_tickers(): conn.close() print(f"Cleanup complete. Removed {deleted_count} records.") +def clean_stale_subreddits(active_subreddits): + """ + Removes all data associated with subreddits that are NOT in the active list. + """ + print("\n--- Cleaning Stale Subreddits from Database ---") + conn = get_db_connection() + cursor = conn.cursor() + cursor.execute("SELECT id, name FROM subreddits") + db_subreddits = cursor.fetchall() + stale_sub_ids = [] + for sub in db_subreddits: + if sub['name'] not in active_subreddits: + print(f"Found stale subreddit to remove: r/{sub['name']}") + stale_sub_ids.append(sub['id']) + if not stale_sub_ids: + print("No stale subreddits to clean.") + conn.close() + return + for sub_id in stale_sub_ids: + print(f" -> Deleting associated data for subreddit ID: {sub_id}") + cursor.execute("DELETE FROM mentions WHERE subreddit_id = ?", (sub_id,)) + cursor.execute("DELETE FROM posts WHERE subreddit_id = ?", (sub_id,)) + cursor.execute("DELETE FROM subreddits WHERE id = ?", (sub_id,)) + conn.commit() + conn.close() + print("Stale subreddit cleanup complete.") + def get_db_connection(): conn = sqlite3.connect(DB_FILE) conn.row_factory = sqlite3.Row diff --git a/rstat_tool/main.py b/rstat_tool/main.py index 50c7461..58ac89f 100644 --- a/rstat_tool/main.py +++ b/rstat_tool/main.py @@ -136,10 +136,9 @@ def main(): """Main function to run the Reddit stock analysis tool.""" parser = argparse.ArgumentParser(description="Analyze stock ticker mentions on Reddit.", formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument("--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)") - parser.add_argument("--subreddit", help="Scan a single subreddit, ignoring the config file.") - parser.add_argument("--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)") - + parser.add_argument("-f", "--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)") + parser.add_argument("-s", "--subreddit", help="Scan a single subreddit, ignoring the config file.") + parser.add_argument("-d", "--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)") parser.add_argument("-p", "--posts", type=int, default=200, help="Max posts to check per subreddit.\n(Default: 200)") parser.add_argument("-c", "--comments", type=int, default=100, help="Number of comments to scan per post.\n(Default: 100)") parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)") @@ -162,7 +161,6 @@ def main(): # --- Initialize and Run --- database.initialize_db() - database.clean_stale_tickers() reddit = get_reddit_instance() if not reddit: return diff --git a/setup.py b/setup.py index af9394b..f764442 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ setup( # The path is now 'package_name.module_name:function_name' 'rstat=rstat_tool.main:main', 'rstat-dashboard=rstat_tool.dashboard:start_dashboard', + 'rstat-cleanup=rstat_tool.cleanup:run_cleanup', ], }, ) \ No newline at end of file diff --git a/subreddits.json b/subreddits.json index 8125e32..b194cc6 100644 --- a/subreddits.json +++ b/subreddits.json @@ -1,18 +1,13 @@ { "subreddits": [ + "investing", "pennystocks", "Shortsqueeze", "smallstreetbets", - "wallstreetbets", - "Wallstreetbetsnew", - "wallstreetbets2", "stocks", - "RobinHoodPennyStocks", - "StocksAndTrading", - "investing", - "WallStreetBetsELITE", + "Tollbugatabets", "ValueInvesting", - "Daytrading", - "Tollbugatabets" + "wallstreetbets", + "WallStreetBetsELITE" ] -} +} \ No newline at end of file