Added functionality for exporting image, and cleaning up subreddits and stale tickers.
This commit is contained in:
51
export_image.py
Normal file
51
export_image.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# export_image.py
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
import time
|
||||||
|
|
||||||
|
def export_subreddit_image(subreddit_name, weekly=False):
|
||||||
|
"""
|
||||||
|
Launches a headless browser to take a screenshot of a subreddit's image view.
|
||||||
|
"""
|
||||||
|
view_type = "weekly" if weekly else "daily"
|
||||||
|
print(f"Exporting {view_type} image for r/{subreddit_name}...")
|
||||||
|
|
||||||
|
# The URL our Flask app serves
|
||||||
|
base_url = "http://127.0.0.1:5000"
|
||||||
|
path = f"image/weekly/{subreddit_name}" if weekly else f"image/{subreddit_name}"
|
||||||
|
url = f"{base_url}/{path}"
|
||||||
|
|
||||||
|
# Define the output filename
|
||||||
|
output_file = f"{subreddit_name}_{'weekly' if weekly else 'daily'}_{int(time.time())}.png"
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch()
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
# Set a large viewport for high-quality screenshots
|
||||||
|
page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
|
||||||
|
print(f"Navigating to {url}...")
|
||||||
|
page.goto(url)
|
||||||
|
|
||||||
|
# Important: Give the page a second to ensure all styles and fonts have loaded
|
||||||
|
page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Target the specific element we want to screenshot
|
||||||
|
element = page.locator(".image-container")
|
||||||
|
|
||||||
|
print(f"Saving screenshot to {output_file}...")
|
||||||
|
element.screenshot(path=output_file)
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
print("Export complete!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Export subreddit sentiment images.")
|
||||||
|
parser.add_argument("subreddit", help="The name of the subreddit to export.")
|
||||||
|
parser.add_argument("--weekly", action="store_true", help="Export the weekly view instead of the daily view.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# NOTE: This script assumes your 'rstat-dashboard' server is already running in another terminal.
|
||||||
|
export_subreddit_image(args.subreddit, args.weekly)
|
@@ -1,5 +1,6 @@
|
|||||||
yfinance
|
Flask
|
||||||
|
nltk
|
||||||
|
playwright
|
||||||
praw
|
praw
|
||||||
python-dotenv
|
python-dotenv
|
||||||
nltk
|
yfinance
|
||||||
Flask
|
|
68
rstat_tool/cleanup.py
Normal file
68
rstat_tool/cleanup.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# rstat_tool/cleanup.py
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from . import database
|
||||||
|
# We can't reuse load_subreddits from main anymore if it's not in the same file
|
||||||
|
# So we will duplicate it here. It's small and keeps this script self-contained.
|
||||||
|
import json
|
||||||
|
|
||||||
|
def load_subreddits(filepath):
|
||||||
|
"""Loads a list of subreddits from a JSON file."""
|
||||||
|
try:
|
||||||
|
with open(filepath, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data.get("subreddits", [])
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||||
|
print(f"Error loading config file '{filepath}': {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def run_cleanup():
|
||||||
|
"""Main function for the cleanup tool."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="A tool to clean stale data from the RSTAT database.",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter
|
||||||
|
)
|
||||||
|
parser.add_argument("--tickers", action="store_true", help="Clean tickers that are in the blacklist.")
|
||||||
|
|
||||||
|
# --- UPDATED ARGUMENT DEFINITION ---
|
||||||
|
# nargs='?': Makes the argument optional.
|
||||||
|
# const='subreddits.json': The value used if the flag is present with no argument.
|
||||||
|
# default=None: The value if the flag is not present at all.
|
||||||
|
parser.add_argument(
|
||||||
|
"--subreddits",
|
||||||
|
nargs='?',
|
||||||
|
const='subreddits.json',
|
||||||
|
default=None,
|
||||||
|
help="Clean data from subreddits NOT in the specified config file.\n(Defaults to 'subreddits.json' if flag is used without a value)."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument("--all", action="store_true", help="Run all available cleanup tasks.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
run_any_task = False
|
||||||
|
|
||||||
|
# --- UPDATED LOGIC TO HANDLE THE NEW ARGUMENT ---
|
||||||
|
if args.all or args.tickers:
|
||||||
|
run_any_task = True
|
||||||
|
database.clean_stale_tickers()
|
||||||
|
|
||||||
|
# The --subreddits argument will be None if not provided, or a filename string if it is.
|
||||||
|
if args.all or args.subreddits is not None:
|
||||||
|
run_any_task = True
|
||||||
|
# If --all is used, default to 'subreddits.json' if --subreddits wasn't also specified
|
||||||
|
config_file = args.subreddits or 'subreddits.json'
|
||||||
|
print(f"\nCleaning subreddits based on active list in: {config_file}")
|
||||||
|
active_subreddits = load_subreddits(config_file)
|
||||||
|
if active_subreddits is not None:
|
||||||
|
database.clean_stale_subreddits(active_subreddits)
|
||||||
|
|
||||||
|
if not run_any_task:
|
||||||
|
parser.print_help()
|
||||||
|
print("\nError: Please provide at least one cleanup option (e.g., --tickers, --subreddits, --all).")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("\nCleanup finished.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_cleanup()
|
@@ -105,6 +105,33 @@ def clean_stale_tickers():
|
|||||||
conn.close()
|
conn.close()
|
||||||
print(f"Cleanup complete. Removed {deleted_count} records.")
|
print(f"Cleanup complete. Removed {deleted_count} records.")
|
||||||
|
|
||||||
|
def clean_stale_subreddits(active_subreddits):
|
||||||
|
"""
|
||||||
|
Removes all data associated with subreddits that are NOT in the active list.
|
||||||
|
"""
|
||||||
|
print("\n--- Cleaning Stale Subreddits from Database ---")
|
||||||
|
conn = get_db_connection()
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("SELECT id, name FROM subreddits")
|
||||||
|
db_subreddits = cursor.fetchall()
|
||||||
|
stale_sub_ids = []
|
||||||
|
for sub in db_subreddits:
|
||||||
|
if sub['name'] not in active_subreddits:
|
||||||
|
print(f"Found stale subreddit to remove: r/{sub['name']}")
|
||||||
|
stale_sub_ids.append(sub['id'])
|
||||||
|
if not stale_sub_ids:
|
||||||
|
print("No stale subreddits to clean.")
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
for sub_id in stale_sub_ids:
|
||||||
|
print(f" -> Deleting associated data for subreddit ID: {sub_id}")
|
||||||
|
cursor.execute("DELETE FROM mentions WHERE subreddit_id = ?", (sub_id,))
|
||||||
|
cursor.execute("DELETE FROM posts WHERE subreddit_id = ?", (sub_id,))
|
||||||
|
cursor.execute("DELETE FROM subreddits WHERE id = ?", (sub_id,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("Stale subreddit cleanup complete.")
|
||||||
|
|
||||||
def get_db_connection():
|
def get_db_connection():
|
||||||
conn = sqlite3.connect(DB_FILE)
|
conn = sqlite3.connect(DB_FILE)
|
||||||
conn.row_factory = sqlite3.Row
|
conn.row_factory = sqlite3.Row
|
||||||
|
@@ -136,10 +136,9 @@ def main():
|
|||||||
"""Main function to run the Reddit stock analysis tool."""
|
"""Main function to run the Reddit stock analysis tool."""
|
||||||
parser = argparse.ArgumentParser(description="Analyze stock ticker mentions on Reddit.", formatter_class=argparse.RawTextHelpFormatter)
|
parser = argparse.ArgumentParser(description="Analyze stock ticker mentions on Reddit.", formatter_class=argparse.RawTextHelpFormatter)
|
||||||
|
|
||||||
parser.add_argument("--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)")
|
parser.add_argument("-f", "--config", default="subreddits.json", help="Path to the JSON file containing subreddits.\n(Default: subreddits.json)")
|
||||||
parser.add_argument("--subreddit", help="Scan a single subreddit, ignoring the config file.")
|
parser.add_argument("-s", "--subreddit", help="Scan a single subreddit, ignoring the config file.")
|
||||||
parser.add_argument("--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)")
|
parser.add_argument("-d", "--days", type=int, default=1, help="Number of past days to scan for new posts.\n(Default: 1 for last 24 hours)")
|
||||||
|
|
||||||
parser.add_argument("-p", "--posts", type=int, default=200, help="Max posts to check per subreddit.\n(Default: 200)")
|
parser.add_argument("-p", "--posts", type=int, default=200, help="Max posts to check per subreddit.\n(Default: 200)")
|
||||||
parser.add_argument("-c", "--comments", type=int, default=100, help="Number of comments to scan per post.\n(Default: 100)")
|
parser.add_argument("-c", "--comments", type=int, default=100, help="Number of comments to scan per post.\n(Default: 100)")
|
||||||
parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)")
|
parser.add_argument("-l", "--limit", type=int, default=20, help="Number of tickers to show in the CLI report.\n(Default: 20)")
|
||||||
@@ -162,7 +161,6 @@ def main():
|
|||||||
|
|
||||||
# --- Initialize and Run ---
|
# --- Initialize and Run ---
|
||||||
database.initialize_db()
|
database.initialize_db()
|
||||||
database.clean_stale_tickers()
|
|
||||||
|
|
||||||
reddit = get_reddit_instance()
|
reddit = get_reddit_instance()
|
||||||
if not reddit: return
|
if not reddit: return
|
||||||
|
1
setup.py
1
setup.py
@@ -19,6 +19,7 @@ setup(
|
|||||||
# The path is now 'package_name.module_name:function_name'
|
# The path is now 'package_name.module_name:function_name'
|
||||||
'rstat=rstat_tool.main:main',
|
'rstat=rstat_tool.main:main',
|
||||||
'rstat-dashboard=rstat_tool.dashboard:start_dashboard',
|
'rstat-dashboard=rstat_tool.dashboard:start_dashboard',
|
||||||
|
'rstat-cleanup=rstat_tool.cleanup:run_cleanup',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
@@ -1,18 +1,13 @@
|
|||||||
{
|
{
|
||||||
"subreddits": [
|
"subreddits": [
|
||||||
|
"investing",
|
||||||
"pennystocks",
|
"pennystocks",
|
||||||
"Shortsqueeze",
|
"Shortsqueeze",
|
||||||
"smallstreetbets",
|
"smallstreetbets",
|
||||||
"wallstreetbets",
|
|
||||||
"Wallstreetbetsnew",
|
|
||||||
"wallstreetbets2",
|
|
||||||
"stocks",
|
"stocks",
|
||||||
"RobinHoodPennyStocks",
|
"Tollbugatabets",
|
||||||
"StocksAndTrading",
|
|
||||||
"investing",
|
|
||||||
"WallStreetBetsELITE",
|
|
||||||
"ValueInvesting",
|
"ValueInvesting",
|
||||||
"Daytrading",
|
"wallstreetbets",
|
||||||
"Tollbugatabets"
|
"WallStreetBetsELITE"
|
||||||
]
|
]
|
||||||
}
|
}
|
Reference in New Issue
Block a user