import os
import sys
import logging
import mysql.connector
from mysql.connector import Error
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import argparse
import re
import concurrent.futures
from datetime import datetime
import importlib.util

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

class BillListingAppender:
    def __init__(self, limit=None):
        self.project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
        self.setup_logging()
        self.db_config = self.get_database_config()
        self.limit = limit

    def setup_logging(self):
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)

    def get_database_config(self):
        env_path = os.path.join(self.project_root, 'app', '.env')
        load_dotenv(env_path)
        try:
            return {
                'host': os.getenv('DB_HOST'),
                'user': os.getenv('DB_USER'),
                'password': os.getenv('DB_PASS'),
                'database': os.getenv('DB_NAME')
            }
        except Exception as e:
            self.logger.error(f"Failed to read database config from .env: {str(e)}")
            sys.exit(1)

    def connect_to_database(self):
        try:
            conn = mysql.connector.connect(**self.db_config)
            return conn
        except Error as e:
            self.logger.error(f"Error connecting to database: {str(e)}")
            sys.exit(1)

    def get_bills_to_update(self, cursor):
        sql = "SELECT * FROM bill_listings_exp"
        if self.limit:
            sql += f' LIMIT {int(self.limit)}'
        cursor.execute(sql)
        return cursor.fetchall()

    def parse_bill_page(self, url):
        try:
            response = requests.get(url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'lxml')

            # Short Description
            short_desc = ''
            short_heading = soup.find('span', class_='heading2', string=re.compile(r'Short Description', re.I))
            if short_heading:
                next_content = short_heading.find_next('span', class_='content')
                if next_content:
                    short_desc = next_content.get_text(strip=True)

            # State Sponsors (names and memberids)
            sponsors_names = []
            sponsors_ids = []
            sponsors_heading = soup.find('span', class_='heading2', string=re.compile(r'(Senate|House) Sponsors', re.I))
            if sponsors_heading:
                for sib in sponsors_heading.next_siblings:
                    if getattr(sib, 'name', None) == 'span' and 'heading2' in sib.get('class', []):
                        break
                    if getattr(sib, 'name', None) == 'a':
                        sponsors_names.append(sib.get_text(strip=True))
                        href = sib.get('href', '')
                        match = re.search(r'(\d{4})', href)
                        if match:
                            sponsors_ids.append(match.group(1))

            # Last Action (date, chamber, action)
            last_action_date = ''
            last_action_chamber = ''
            last_action_action = ''
            last_action_heading = soup.find('span', class_='heading2', string=re.compile(r'Last Action', re.I))
            if last_action_heading:
                # Find the next table after the heading
                next_table = last_action_heading.find_next('table')
                if next_table:
                    trs = next_table.find_all('tr')
                    if len(trs) > 1:
                        tds = trs[1].find_all('td')
                        if len(tds) >= 3:
                            last_action_date = tds[0].get_text(strip=True)
                            last_action_chamber = tds[1].get_text(strip=True)
                            action_parts = [x for x in tds[2].stripped_strings]
                            last_action_action = ' '.join(action_parts)

            # Synopsis
            synopsis = ''
            synopsis_heading = soup.find('span', class_='heading2', string=re.compile(r'Synopsis As Introduced', re.I))
            if synopsis_heading:
                next_content = synopsis_heading.find_next('span', class_='content')
                if next_content:
                    synopsis = next_content.get_text(strip=True)
                else:
                    next_content = synopsis_heading.find_next('span', class_='content no translate')
                    if next_content:
                        synopsis = next_content.get_text(strip=True)

            # Debug print
            print("Scraped data for", url)
            print({
                'short_description': short_desc,
                'state_sponsors_names': ', '.join(sponsors_names),
                'state_sponsors_memberid': ', '.join(sponsors_ids),
                'last_action_date': last_action_date,
                'last_action_chamber': last_action_chamber,
                'last_action_action': last_action_action,
                'synopsis': synopsis
            })

            return {
                'short_description': short_desc,
                'state_sponsors_names': ', '.join(sponsors_names),
                'state_sponsors_memberid': ', '.join(sponsors_ids),
                'last_action_date': last_action_date,
                'last_action_chamber': last_action_chamber,
                'last_action_action': last_action_action,
                'synopsis': synopsis
            }
        except Exception as e:
            self.logger.error(f"Error parsing bill page {url}: {str(e)}")
            return None

    def update_bill_row(self, cursor, bill_id, data):
        sql = """
        UPDATE bill_listings_exp SET
            short_description = %s,
            state_sponsors_names = %s,
            state_sponsors_memberid = %s,
            last_action_date = %s,
            last_action_chamber = %s,
            last_action_action = %s,
            synopsis = %s,
            updated_at = CURRENT_TIMESTAMP
        WHERE id = %s
        """
        cursor.execute(sql, (
            data['short_description'],
            data['state_sponsors_names'],
            data['state_sponsors_memberid'],
            data['last_action_date'],
            data['last_action_chamber'],
            data['last_action_action'],
            data['synopsis'],
            bill_id
        ))

    def get_current_session(self):
        # Dynamically import config.py to get CURRENT_SESSION
        config_path = os.path.join(self.project_root, 'app', 'config.py')
        spec = importlib.util.spec_from_file_location('config', config_path)
        config = importlib.util.module_from_spec(spec)
        sys.modules['config'] = config
        spec.loader.exec_module(config)
        return getattr(config, 'CURRENT_SESSION', None)

    def get_ftp_file_dates(self, session):
        # Parse the FTP directory for last modified dates
        url = f"https://ilga.gov/ftp/legislation/{session}/BillStatus/HTML/"
        resp = requests.get(url)
        resp.raise_for_status()
        file_dates = {}
        # Parse the HTML directory listing
        soup = BeautifulSoup(resp.text, 'html.parser')
        for pre in soup.find_all('pre'):
            lines = pre.get_text().splitlines()
            for line in lines:
                parts = line.split()
                if len(parts) >= 4 and parts[-1].endswith('.html'):
                    # Example: 2/19/2025 12:20 AM 4228 10400AM1030246.html
                    date_str = f"{parts[0]} {parts[1]} {parts[2]}"
                    try:
                        file_date = datetime.strptime(date_str, '%m/%d/%Y %I:%M %p')
                        file_dates[parts[-1]] = file_date
                    except Exception:
                        continue
        return file_dates

    def bill_to_ftp_filename(self, session, bill_type, bill_number):
        # bill_type: 'SB', 'HB', etc. bill_number: '1', '25', etc.
        return f"{session}00{bill_type}{str(bill_number).zfill(4)}.html"

    def append_bill_data(self):
        conn = self.connect_to_database()
        cursor = conn.cursor(dictionary=True)
        try:
            bills = self.get_bills_to_update(cursor)
            self.logger.info(f"Found {len(bills)} bills to check.")

            current_session = self.get_current_session()
            ftp_file_dates = self.get_ftp_file_dates(current_session)
            self.logger.info(f"Fetched {len(ftp_file_dates)} FTP file dates for session {current_session}.")

            def should_update(bill):
                # bill['bill_number'] is like 'SB0001' or 'HB0025'
                bill_type = re.match(r'([A-Z]+)', bill['bill_number']).group(1)
                bill_num = re.search(r'(\d+)', bill['bill_number']).group(1)
                ftp_filename = self.bill_to_ftp_filename(current_session, bill_type, bill_num)
                ftp_date = ftp_file_dates.get(ftp_filename)
                db_date = bill.get('updated_at')
                if not ftp_date or not db_date:
                    return True  # If we can't find the FTP file or db date, play it safe and update
                # db_date is a datetime or string
                if isinstance(db_date, str):
                    try:
                        db_date = datetime.strptime(db_date, '%Y-%m-%d %H:%M:%S')
                    except Exception:
                        return True
                return ftp_date > db_date

            def scrape_and_compare(bill):
                if not should_update(bill):
                    return (bill, None, False, 'Skipped (up-to-date)')
                data = self.parse_bill_page(bill['bill_url'])
                needs_update = False
                if data:
                    for key in ['short_description', 'state_sponsors_names', 'state_sponsors_memberid', 'last_action_date', 'last_action_chamber', 'last_action_action', 'synopsis']:
                        if (bill.get(key) or '') != (data.get(key) or ''):
                            needs_update = True
                            break
                return (bill, data, needs_update, 'Updated' if needs_update else 'No change')

            results = []
            with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
                futures = [executor.submit(scrape_and_compare, bill) for bill in bills]
                for future in concurrent.futures.as_completed(futures):
                    results.append(future.result())

            for bill, data, needs_update, status in results:
                if status == 'Skipped (up-to-date)':
                    self.logger.info(f"Skipped bill {bill['bill_number']} (up-to-date)")
                elif data and needs_update:
                    self.update_bill_row(cursor, bill['id'], data)
                    self.logger.info(f"Updated bill {bill['bill_number']}")
                elif data:
                    self.logger.info(f"No update needed for bill {bill['bill_number']}")
                else:
                    self.logger.error(f"Failed to scrape bill {bill['bill_number']}")
            conn.commit()
        except Exception as e:
            self.logger.error(f"Error in append_bill_data: {str(e)}")
            conn.rollback()
        finally:
            cursor.close()
            conn.close()

    def run(self):
        self.logger.info("Starting bill data append process")
        self.append_bill_data()
        self.logger.info("Completed bill data append process")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Append bill data from ILGA.')
    parser.add_argument('--limit', type=int, help='Number of bills to process')
    args = parser.parse_args()
    appender = BillListingAppender(limit=args.limit)
    appender.run() 