From b617016b61bf77dcab6e9f49d48926b4aa29c07d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A5l-Kristian=20Hamre?= <pkhamre@gmail.com>
Date: Mon, 21 Jul 2025 12:14:45 +0200
Subject: [PATCH] Integrate with Reddit.

---
 .gitignore          |  5 +++
 main.py             | 92 +++++++++++++++++++++++++++++++++++----------
 requirements.txt    |  2 +
 ticker_extractor.py | 40 ++++++++++++++++++++
 4 files changed, 120 insertions(+), 19 deletions(-)
 create mode 100644 ticker_extractor.py

diff --git a/.gitignore b/.gitignore
index 21d0b89..ac8e601 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,6 @@
 .venv/
+.env
+__pycache__/
+*.pyc
+*.sqlite3
+*.log
diff --git a/main.py b/main.py
index 58942e8..0c2dde9 100644
--- a/main.py
+++ b/main.py
@@ -2,7 +2,17 @@
 
 import argparse
 import json
+import os
+from collections import Counter
+
+import praw
 import yfinance as yf
+from dotenv import load_dotenv
+
+from ticker_extractor import extract_tickers
+
+# Load environment variables from .env file
+load_dotenv()
 
 def load_subreddits(filepath):
     """Loads a list of subreddits from a JSON file."""
@@ -25,41 +35,85 @@ def get_market_cap(ticker_symbol):
         if market_cap:
             # Formatting for better readability
             return f"${market_cap:,}"
+
         return "N/A"
     except Exception as e:
         # yfinance can sometimes fail for various reasons (e.g., invalid ticker)
         return "N/A"
 
+def get_reddit_instance():
+    """Initializes and returns a PRAW Reddit instance."""
+    client_id = os.getenv("REDDIT_CLIENT_ID")
+    client_secret = os.getenv("REDDIT_CLIENT_SECRET")
+    user_agent = os.getenv("REDDIT_USER_AGENT")
+
+    if not all([client_id, client_secret, user_agent]):
+        print("Error: Reddit API credentials not found in .env file.")
+        return None
+
+    return praw.Reddit(
+        client_id=client_id,
+        client_secret=client_secret,
+        user_agent=user_agent
+    )
+
+def scan_subreddits(reddit, subreddits_list, post_limit=25):
+    """Scans subreddits for stock tickers and returns a count of each."""
+    all_tickers = Counter()
+
+    print(f"\nScanning {len(subreddits_list)} subreddits for top {post_limit} posts...")
+    for subreddit_name in subreddits_list:
+        try:
+            subreddit = reddit.subreddit(subreddit_name)
+            print(f"r/{subreddit_name}...")
+            # Fetch hot posts from the subreddit
+            for submission in subreddit.hot(limit=post_limit):
+                # Combine title and selftext for analysis
+                full_text = submission.title + " " + submission.selftext
+
+                # Extract tickers from the combined text
+                tickers_in_post = extract_tickers(full_text)
+                all_tickers.update(tickers_in_post)
+
+                # Future work: also scan comments
+                # submission.comments.replace_more(limit=0) # Expand all comment trees
+                # for comment in submission.comments.list():
+                #     tickers_in_comment = extract_tickers(comment.body)
+                #     all_tickers.update(tickers_in_comment)
+
+        except Exception as e:
+            print(f"Could not scan r/{subreddit_name}. Error: {e}")
+
+    return all_tickers
+
 def main():
     """Main function to run the Reddit stock analysis tool."""
     parser = argparse.ArgumentParser(description="Analyze stock ticker mentions on Reddit.")
-    parser.add_argument(
-        "config_file",
-        help="Path to the JSON file containing the list of subreddits."
-    )
+    parser.add_argument("config_file", help="Path to the JSON file containing subreddits.")
     args = parser.parse_args()
 
-    # --- Part 1: Load Configuration ---
-    print("Loading configuration...")
+    # --- Part 1: Load Configuration & Initialize Reddit ---
     subreddits = load_subreddits(args.config_file)
     if not subreddits:
-        print("No subreddits found in the configuration file. Exiting.")
         return
 
-    print(f"Successfully loaded {len(subreddits)} subreddits: {', '.join(subreddits)}")
-    print("-" * 30)
+    reddit = get_reddit_instance()
+    if not reddit:
+        return
 
+    # --- Part 2: Scan Reddit for Tickers ---
+    ticker_counts = scan_subreddits(reddit, subreddits)
+    if not ticker_counts:
+        print("No tickers found.")
+        return
 
-    # --- Part 2: Test Market Data Fetching (Example) ---
-    print("Testing market data functionality...")
-    example_ticker = "AAPL"
-    market_cap = get_market_cap(example_ticker)
-    print(f"Market Cap for {example_ticker}: {market_cap}")
-    print("-" * 30)
-
-    # In the next steps, we will add the Reddit scanning logic here.
-    print("Next up: Integrating the Reddit API to find tickers...")
+    print("\n--- Scan Complete ---")
+    print("Top 15 mentioned tickers:")
 
+    # --- Part 3: Display Results ---
+    # We will enrich this data with market cap and sentiment in the next steps
+    for ticker, count in ticker_counts.most_common(15):
+        print(f"{ticker}: {count} mentions")
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 29d91af..67ac91b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
 yfinance
+praw
+python-dotenv
\ No newline at end of file
diff --git a/ticker_extractor.py b/ticker_extractor.py
new file mode 100644
index 0000000..46d1c6f
--- /dev/null
+++ b/ticker_extractor.py
@@ -0,0 +1,40 @@
+# ticker_extractor.py
+
+import re
+
+# A set of common English words and acronyms that look like stock tickers.
+# This helps reduce false positives.
+COMMON_WORDS_BLACKLIST = {
+    "A", "I", "DD", "CEO", "CFO", "CTO", "EPS", "IPO", "YOLO", "FOMO",
+    "TLDR", "EDIT", "THE", "AND", "FOR", "ARE", "BUT", "NOT", "YOU",
+    "ALL", "ANY", "CAN", "HAS", "NEW", "NOW", "OLD", "SEE", "TWO",
+    "WAY", "WHO", "WHY", "BIG", "BUY", "SELL", "HOLD", "BE", "GO",
+    "ON", "AT", "IN", "IS", "IT", "OF", "OR", "TO", "WE", "UP",
+    "OUT", "SO", "RH", "SEC", "IRS", "USA", "UK", "EU",
+    "AI", "ML", "AR", "VR", "NFT", "DAO", "WEB3", "ETH", "BTC",
+    "USD", "EUR", "GBP", "JPY", "CNY", "INR", "AUD", "CAD", "CHF",
+    "RUB", "ZAR", "BRL", "MXN", "HKD", "SGD", "NZD", "RSD",
+    "JPY", "KRW", "SEK", "NOK", "DKK", "PLN", "CZK", "HUF", "TRY",
+    "US", "IRA", "FDA", "SEC", "FBI", "CIA", "NSA", "NATO",
+}
+
+def extract_tickers(text):
+    """
+    Extracts potential stock tickers from a given piece of text.
+    A ticker is identified as a 1-5 character uppercase word, or a word prefixed with $.
+    """
+    # Regex to find potential tickers:
+    # 1. Words prefixed with $: $AAPL, $TSLA
+    # 2. All-caps words between 1 and 5 characters: GME, AMC
+    ticker_regex = r"\$[A-Z]{1,5}\b|\b[A-Z]{1,5}\b"
+
+    potential_tickers = re.findall(ticker_regex, text)
+
+    # Filter out common words and remove the '$' prefix
+    tickers = []
+    for ticker in potential_tickers:
+        cleaned_ticker = ticker.replace("$", "").upper()
+        if cleaned_ticker not in COMMON_WORDS_BLACKLIST:
+            tickers.append(cleaned_ticker)
+
+    return tickers
\ No newline at end of file