import logging
import requests
from bs4 import BeautifulSoup
from typing import Dict, List, Optional
import mysql.connector
from mysql.connector import Error
from datetime import datetime
import time
import os
import sys
from dotenv import load_dotenv
import pdfplumber
import io
import warnings

# Add the parent directory to the Python path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Load environment variables from .env file
load_dotenv()

# Create logs directory if it doesn't exist
logs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs')
os.makedirs(logs_dir, exist_ok=True)

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(os.path.join(logs_dir, 'votes_scraper.log'), mode='w'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

warnings.filterwarnings("ignore", message="CropBox missing from /Page, defaulting to MediaBox")

class VotesScraper:
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        })
        self.current_session = int(os.getenv('CURRENT_SESSION', '103'))
        self.setup_database()

    def get_db_connection(self):
        """Get a database connection using credentials from .env file."""
        try:
            return mysql.connector.connect(
                host=os.getenv('DB_HOST'),
                database=os.getenv('DB_NAME'),
                user=os.getenv('DB_USER'),
                password=os.getenv('DB_PASS')
            )
        except Error as e:
            logger.error(f"Error connecting to database: {str(e)}")
            raise

    def get_bills(self) -> List[Dict]:
        """
        Fetch all bills from bill_listings_exp table.
        
        Returns:
            List of dictionaries containing bill information
        """
        try:
            with self.get_db_connection() as connection:
                with connection.cursor(dictionary=True) as cursor:
                    cursor.execute("""
                        SELECT bill_number, bill_url, chamber
                        FROM bill_listings_exp
                    """)
                    return cursor.fetchall()
        except Exception as e:
            logger.error(f"Error fetching bills: {str(e)}")
            return []

    def get_legislators(self) -> List[Dict]:
        """
        Fetch all legislators from legislators table.
        
        Returns:
            List of dictionaries containing legislator information
        """
        try:
            with self.get_db_connection() as connection:
                with connection.cursor(dictionary=True) as cursor:
                    cursor.execute("""
                        SELECT id, name, party, chamber, member_id
                        FROM legislators
                    """)
                    return cursor.fetchall()
        except Exception as e:
            logger.error(f"Error fetching legislators: {str(e)}")
            return []

    def setup_database(self):
        """Set up the database tables for votes if they don't exist or are missing columns."""
        try:
            with self.get_db_connection() as connection:
                with connection.cursor() as cursor:
                    # Create votes table if it doesn't exist
                    cursor.execute("""
                        CREATE TABLE IF NOT EXISTS votes (
                            id INT AUTO_INCREMENT PRIMARY KEY,
                            bill_id VARCHAR(50),
                            chamber VARCHAR(10),
                            vote_date TIMESTAMP,
                            vote_type VARCHAR(50),
                            vote_result VARCHAR(50),
                            yea_count INT,
                            nay_count INT,
                            present_count INT,
                            not_voting_count INT,
                            excused_count INT,
                            session INT,
                            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                        )
                    """)

                    # Add excused_count if missing
                    cursor.execute("SHOW COLUMNS FROM votes LIKE 'excused_count'")
                    if not cursor.fetchone():
                        cursor.execute("ALTER TABLE votes ADD COLUMN excused_count INT AFTER not_voting_count")

                    # Create vote_details table if it doesn't exist
                    cursor.execute("""
                        CREATE TABLE IF NOT EXISTS vote_details (
                            id INT AUTO_INCREMENT PRIMARY KEY,
                            vote_id INT,
                            legislator_id INT,
                            vote_value VARCHAR(10),
                            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                            FOREIGN KEY (vote_id) REFERENCES votes(id),
                            FOREIGN KEY (legislator_id) REFERENCES legislators(id)
                        )
                    """)

                    connection.commit()
                    logger.info("Database tables for votes created/updated successfully")
        except Exception as e:
            logger.error(f"Error setting up database: {str(e)}")
            raise

    def scrape_votes(self, chamber: str, start_date: Optional[str] = None, end_date: Optional[str] = None):
        """
        Scrape votes for a specific chamber and date range.
        
        Args:
            chamber: 'house' or 'senate'
            start_date: Optional start date in YYYY-MM-DD format
            end_date: Optional end date in YYYY-MM-DD format
        """
        # TODO: Implement vote scraping logic
        pass

    def process_vote(self, vote_data: Dict):
        """
        Process a single vote and store it in the database.
        
        Args:
            vote_data: Dictionary containing vote information
        """
        # TODO: Implement vote processing logic
        pass

    def update_vote_details(self, vote_id: int, vote_details: List[Dict]):
        """
        Update vote details for a specific vote.
        
        Args:
            vote_id: ID of the vote in the votes table
            vote_details: List of dictionaries containing vote details
        """
        # TODO: Implement vote details processing logic
        pass

    def match_legislator(self, roll_call_name: str) -> Optional[int]:
        """
        Match a roll call name to a legislator ID.
        
        Args:
            roll_call_name: Name as it appears in roll call votes
            
        Returns:
            legislator_id if found, None otherwise
        """
        try:
            with self.get_db_connection() as connection:
                with connection.cursor(dictionary=True) as cursor:
                    cursor.execute("""
                        SELECT id FROM legislators 
                        WHERE roll_call_name = %s
                    """, (roll_call_name,))
                    result = cursor.fetchone()
                    return result['id'] if result else None
        except Exception as e:
            logger.error(f"Error matching legislator: {str(e)}")
            return None

    def get_votes_link_for_bill(self, bill_url: str) -> Optional[str]:
        """
        Given a bill status page URL, fetch and return the absolute URL to the 'Votes' page if it exists.
        """
        try:
            resp = self.session.get(bill_url)
            resp.raise_for_status()
            soup = BeautifulSoup(resp.text, 'html.parser')
            # Find the 'Votes' link (usually contains 'votehistory.asp')
            votes_link_tag = soup.find('a', href=lambda x: x and 'votehistory.asp' in x)
            if votes_link_tag:
                votes_link = votes_link_tag['href']
                # Make absolute if needed
                if votes_link.startswith('http'):
                    return votes_link
                else:
                    return f"https://ilga.gov/legislation/{votes_link.lstrip('/')}"
            else:
                logger.info(f"No votes link found for bill page: {bill_url}")
                return None
        except Exception as e:
            logger.error(f"Error fetching/parsing bill status page {bill_url}: {str(e)}")
            return None

    def parse_votes_page(self, votes_url: str) -> List[Dict]:
        """
        Fetch and parse the votes page, returning a list of roll call event links and basic info.
        Each event dict will have: label, chamber, detail_link
        """
        try:
            resp = self.session.get(votes_url)
            resp.raise_for_status()
            soup = BeautifulSoup(resp.text, 'html.parser')
            events = []
            # Find all <a> tags inside <td class="content" bgcolor="white">
            for td in soup.find_all('td', class_='content', bgcolor='white'):
                a_tag = td.find('a', href=True)
                if a_tag:
                    label = a_tag.get_text(strip=True)
                    href = a_tag['href']
                    if href.startswith('http'):
                        detail_link = href
                    else:
                        detail_link = f"https://ilga.gov{href}"
                    # Get the chamber from the next sibling <td>
                    chamber = None
                    next_td = td.find_next_sibling('td')
                    if next_td:
                        chamber = next_td.get_text(strip=True)
                    events.append({
                        'label': label,
                        'chamber': chamber,
                        'detail_link': detail_link
                    })
            return events
        except Exception as e:
            logger.error(f"Error parsing votes page {votes_url}: {str(e)}")
            return []

    def parse_roll_call_pdf(self, pdf_url: str) -> Optional[Dict]:
        """
        Download and parse a roll call PDF to extract vote totals and legislator votes.
        Returns a dict with 'totals' and 'votes' (list of dicts with name and vote_value), or None on failure.
        """
        try:
            resp = self.session.get(pdf_url)
            resp.raise_for_status()
            with pdfplumber.open(io.BytesIO(resp.content)) as pdf:
                text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
            lines = text.splitlines()
            totals = {'yea': 0, 'nay': 0, 'present': 0, 'not_voting': 0, 'excused': 0}
            votes = []
            import re
            # Find the totals line (e.g., '112 YEAS 0 NAYS 1 PRESENT')
            for line in lines:
                match = re.search(r"(\d+) YEAS? (\d+) NAYS? (\d+) PRESENT", line)
                if match:
                    totals['yea'] = int(match.group(1))
                    totals['nay'] = int(match.group(2))
                    totals['present'] = int(match.group(3))
                    break
            # Find the start of the votes section (after the totals line)
            votes_section = False
            for line in lines:
                if re.search(r"\d+ YEAS? \d+ NAYS? \d+ PRESENT", line):
                    votes_section = True
                    continue
                if votes_section:
                    # Stop if we hit the legend or end
                    if line.strip().startswith('E - Denotes Excused'):
                        break
                    # Split the line into tokens
                    tokens = line.split()
                    i = 0
                    while i < len(tokens):
                        vote_value = tokens[i]
                        if vote_value in ['Y', 'N', 'P', 'NV', 'E'] and i + 1 < len(tokens):
                            # Name may be one or more tokens until the next vote or end
                            name_tokens = []
                            i += 1
                            while i < len(tokens) and tokens[i] not in ['Y', 'N', 'P', 'NV', 'E']:
                                name_tokens.append(tokens[i])
                                i += 1
                            name = ' '.join(name_tokens)
                            if name:
                                votes.append({'name': name, 'vote_value': vote_value})
                        else:
                            i += 1
            if not votes:
                logger.warning(f"No legislator votes found in PDF: {pdf_url}")
            return {'totals': totals, 'votes': votes}
        except Exception as e:
            logger.error(f"Error parsing roll call PDF {pdf_url}: {str(e)}")
            return None

    def load_manual_legislator_map(self, path='app/votes/manual_legislator_map.txt'):
        mapping = {}
        try:
            with open(path, 'r', encoding='utf-8') as f:
                header = f.readline()
                columns = [col.strip().lower() for col in header.split('\t')]
                pdf_col = columns.index('conversion')
                db_col = columns.index('name')
                for line in f:
                    if not line.strip():
                        continue
                    parts = line.strip().split('\t')
                    if len(parts) > max(pdf_col, db_col):
                        pdf_name = parts[pdf_col].strip()
                        db_name = parts[db_col].strip()
                        if pdf_name and db_name:
                            mapping[pdf_name] = db_name
        except Exception as e:
            logger.warning(f"Could not load manual legislator map: {e}")
        return mapping

    def match_legislator_id(self, name: str, legislators: List[Dict], manual_map=None) -> int:
        import unicodedata
        def normalize(s):
            s = s.lower().replace('.', '').replace(',', '').replace('  ', ' ').strip()
            s = ''.join(c for c in unicodedata.normalize('NFD', s) if not unicodedata.combining(c))
            return s
        # Try manual map first
        if manual_map and name in manual_map:
            mapped_name = manual_map[name]
            for leg in legislators:
                if normalize(leg['name']) == normalize(mapped_name):
                    return leg['id']
        # Fallback: try to convert "Last, First" to "First Last"
        if ',' in name:
            last, first = name.split(',', 1)
            first = first.strip()
            last = last.strip()
            pdf_name = f'{first} {last}'
        else:
            pdf_name = name.strip()
        pdf_name_norm = normalize(pdf_name)
        for leg in legislators:
            leg_name_norm = normalize(leg['name'])
            if pdf_name_norm == leg_name_norm:
                return leg['id']
        # Try matching just "First Last" if "First Middle Last" didn't work
        if ',' in name:
            last, first = name.split(',', 1)
            first = first.strip().split(' ')[0]  # Only first name
            last = last.strip()
            pdf_name_simple = f'{first} {last}'
            pdf_name_simple_norm = normalize(pdf_name_simple)
            for leg in legislators:
                leg_name_norm = normalize(leg['name'])
                if pdf_name_simple_norm == leg_name_norm:
                    return leg['id']
        return None

    def insert_vote_event(self, bill, event, pdf_data, connection):
        """
        Insert a vote event into the votes table and return the inserted vote_id.
        If the event already exists, return None to skip.
        """
        with connection.cursor(buffered=True, dictionary=True) as cursor:
            cursor.execute("""
                SELECT id FROM votes WHERE bill_id = %s AND chamber = %s AND vote_type = %s
            """, (
                bill['bill_number'],
                event['chamber'],
                event['label'],
            ))
            existing = cursor.fetchone()
            if existing:
                vote_id = existing['id']
                # Check if vote_details exist for this vote_id
                cursor.execute("SELECT COUNT(*) as cnt FROM vote_details WHERE vote_id = %s", (vote_id,))
                details_count = cursor.fetchone()['cnt']
                if details_count == 0:
                    self.insert_vote_details(vote_id, pdf_data['votes'], self.get_legislators(), connection, self.load_manual_legislator_map())
                    logger.info(f"Inserted missing vote_details for {bill['bill_number']} - {event['label']}")
                else:
                    logger.info(f"Vote event and details already exist for {bill['bill_number']} - {event['label']}, skipping.")
                return None
            cursor.execute("""
                INSERT INTO votes (bill_id, chamber, vote_date, vote_type, vote_result, yea_count, nay_count, present_count, not_voting_count, excused_count, created_at)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
            """, (
                bill['bill_number'],
                event['chamber'],
                event['label'],  # using label as date for now (can parse date if needed)
                event['label'],  # using label as type for now
                'PASSED' if pdf_data['totals']['yea'] > pdf_data['totals']['nay'] else 'FAILED',
                pdf_data['totals']['yea'],
                pdf_data['totals']['nay'],
                pdf_data['totals']['present'],
                pdf_data['totals']['not_voting'],
                pdf_data['totals']['excused'],
            ))
            connection.commit()
            return cursor.lastrowid

    def insert_vote_details(self, vote_id, votes, legislators, connection, manual_map=None):
        """
        Insert all legislator votes for a vote event into vote_details.
        """
        with connection.cursor() as cursor:
            for v in votes:
                legislator_id = self.match_legislator_id(v['name'], legislators, manual_map)
                if legislator_id:
                    cursor.execute("""
                        INSERT INTO vote_details (vote_id, legislator_id, vote_value, created_at)
                        VALUES (%s, %s, %s, NOW())
                    """, (vote_id, legislator_id, v['vote_value']))
                else:
                    logger.warning(f"No match for legislator name: {v['name']}")
            connection.commit()

    def process_all_bills(self):
        """
        Batch process all bills and all roll call events.
        """
        bills = self.get_bills()
        legislators = self.get_legislators()
        manual_map = self.load_manual_legislator_map()
        with self.get_db_connection() as connection:
            for bill in bills:
                votes_link = self.get_votes_link_for_bill(bill['bill_url'])
                if not votes_link:
                    logger.info(f"No votes link for bill {bill['bill_number']}")
                    continue
                events = self.parse_votes_page(votes_link)
                if not events:
                    logger.info(f"No roll call events for bill {bill['bill_number']}")
                    continue
                for event in events:
                    pdf_data = self.parse_roll_call_pdf(event['detail_link'])
                    if not pdf_data or not pdf_data['votes']:
                        logger.info(f"No votes found in PDF for event {event['label']} ({bill['bill_number']})")
                        continue
                    vote_id = self.insert_vote_event(bill, event, pdf_data, connection)
                    if not vote_id:
                        continue  # Skip if already exists
                    self.insert_vote_details(vote_id, pdf_data['votes'], legislators, connection, manual_map)
                    logger.info(f"Inserted vote event for {bill['bill_number']} - {event['label']}")

def main():
    scraper = VotesScraper()
    # Test the database connection and data fetching
    bills = scraper.get_bills()
    legislators = scraper.get_legislators()
    
    logger.info(f"Found {len(bills)} bills")
    logger.info(f"Found {len(legislators)} legislators")

    # Test the get_votes_link_for_bill method on a sample bill
    sample_bill_url = "https://ilga.gov/legislation/billstatus.asp?DocNum=2986&GAID=18&GA=104&DocTypeID=HB&LegID=161236&SessionID=114"
    votes_link = scraper.get_votes_link_for_bill(sample_bill_url)
    print(f"Votes link for sample bill: {votes_link}")

    # Test the parse_votes_page method on the found votes link
    if votes_link:
        events = scraper.parse_votes_page(votes_link)
        print("Parsed roll call events:")
        for event in events:
            print(event)
        # Test the parse_roll_call_pdf method on the first event (if any)
        if events:
            print("\nTesting PDF parsing for first roll call event:")
            pdf_url = events[0]['detail_link']
            # Download and extract text from the PDF
            import pdfplumber, io
            resp = scraper.session.get(pdf_url)
            resp.raise_for_status()
            with pdfplumber.open(io.BytesIO(resp.content)) as pdf:
                text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
            print("\n--- Extracted PDF Text ---\n")
            print(text)
            print("\n--- End Extracted PDF Text ---\n")
            # Optionally, still run the parser
            pdf_data = scraper.parse_roll_call_pdf(pdf_url)
            print(f"PDF data for {pdf_url}:")
            print(pdf_data)

    # Test the process_all_bills method
    scraper.process_all_bills()

if __name__ == "__main__":
    main() 