#!/usr/bin/env python3
"""
CSS class and ID analyzer for HTML template files.
- Extracts all CSS classes and IDs used in HTML files
- Compares with those declared in CSS files
- Identifies unused CSS rules and missing CSS definitions
"""

import os
import re
import argparse
from pathlib import Path
from collections import defaultdict, Counter
import sys

# Configuration
HTML_DIRECTORIES = ['cc', 'ww']
LANGUAGES = ['en', 'es', 'fr', 'de', 'it', 'pt']
CSS_FILES = [
    'modern-styles.css',
    'wordweb-essential.css',
    'cc-essential.css'
]

def extract_classes_from_html(content):
    """Extract all CSS classes from HTML content."""
    # Match class="..." attributes, handling multiple classes
    class_pattern = r'class\s*=\s*["\']([^"\']+)["\']'
    matches = re.findall(class_pattern, content, re.IGNORECASE)
    
    classes = set()
    for match in matches:
        # Split multiple classes and clean them
        class_list = [cls.strip() for cls in match.split() if cls.strip()]
        classes.update(class_list)
    
    return sorted(classes)

def extract_ids_from_html(content):
    """Extract all IDs from HTML content."""
    # Match id="..." attributes
    id_pattern = r'id\s*=\s*["\']([^"\']+)["\']'
    matches = re.findall(id_pattern, content, re.IGNORECASE)

    # Filter out template placeholders that might be mistaken for IDs
    filtered_matches = []
    for match in matches:
        match = match.strip()
        if match and not match.startswith('###'):
            filtered_matches.append(match)

    return sorted(set(filtered_matches))

def extract_css_selectors(content):
    """Extract CSS class and ID selectors from CSS content."""
    classes = set()
    ids = set()

    # Remove comments and strings to avoid false matches
    content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
    content = re.sub(r'"[^"]*"', '', content)
    content = re.sub(r"'[^']*'", '', content)

    # Find class selectors (.classname) - avoid template placeholders
    class_pattern = r'\.([a-zA-Z_-][a-zA-Z0-9_-]*)'
    class_matches = re.findall(class_pattern, content)
    # Filter out any that might be template placeholders
    for match in class_matches:
        if not match.startswith('###') and not match.endswith('###'):
            classes.add(match)

    # Find ID selectors (#idname) - avoid template placeholders and hex colors
    id_pattern = r'#([a-zA-Z_-][a-zA-Z0-9_-]*)'
    id_matches = re.findall(id_pattern, content)
    # Filter out hex colors and template placeholders
    for match in id_matches:
        if (not re.match(r'^[0-9a-fA-F]{3,6}$', match) and
            not match.startswith('###') and
            not match.endswith('###')):
            ids.add(match)

    return sorted(classes), sorted(ids)

def scan_html_files():
    """Scan all HTML files for CSS classes and IDs."""
    html_classes = defaultdict(set)
    html_ids = defaultdict(set)
    file_details = {}
    
    for directory in HTML_DIRECTORIES:
        for lang in LANGUAGES:
            scan_dir = os.path.join(directory, lang)
            if not os.path.exists(scan_dir):
                continue
                
            print(f"Scanning HTML: {scan_dir}")
            
            html_files = Path(scan_dir).glob('*.html')
            
            for file_path in html_files:
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                    
                    classes = extract_classes_from_html(content)
                    ids = extract_ids_from_html(content)
                    
                    relative_path = str(file_path).replace('\\', '/')
                    file_details[relative_path] = {
                        'classes': classes,
                        'ids': ids,
                        'language': lang
                    }
                    
                    # Add to global sets
                    html_classes[lang].update(classes)
                    html_ids[lang].update(ids)
                    
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    
    return html_classes, html_ids, file_details

def scan_css_files():
    """Scan CSS files for class and ID definitions."""
    css_classes = set()
    css_ids = set()
    css_details = {}
    
    print("\nScanning CSS files:")
    
    for css_file in CSS_FILES:
        if os.path.exists(css_file):
            print(f"  {css_file}")
            try:
                with open(css_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                classes, ids = extract_css_selectors(content)
                css_details[css_file] = {
                    'classes': classes,
                    'ids': ids
                }
                
                css_classes.update(classes)
                css_ids.update(ids)
                
            except Exception as e:
                print(f"Error reading {css_file}: {e}")
        else:
            print(f"  {css_file} (not found)")
    
    return css_classes, css_ids, css_details

def analyze_usage(html_classes, html_ids, css_classes, css_ids):
    """Analyze CSS usage and find discrepancies."""
    # Combine all HTML classes/IDs from all languages
    all_html_classes = set()
    all_html_ids = set()
    
    for lang_classes in html_classes.values():
        all_html_classes.update(lang_classes)
    
    for lang_ids in html_ids.values():
        all_html_ids.update(lang_ids)
    
    # Find unused CSS (defined but not used)
    unused_css_classes = css_classes - all_html_classes
    unused_css_ids = css_ids - all_html_ids
    
    # Find missing CSS (used but not defined)
    missing_css_classes = all_html_classes - css_classes
    missing_css_ids = all_html_ids - css_ids
    
    # Filter out Bootstrap and common framework classes
    bootstrap_classes = {
        'container', 'container-fluid', 'row', 'col', 'col-12', 'col-md-6', 'col-lg-4',
        'btn', 'btn-primary', 'btn-secondary', 'btn-success', 'btn-danger', 'btn-warning',
        'btn-info', 'btn-light', 'btn-dark', 'btn-outline-primary', 'btn-outline-secondary',
        'card', 'card-body', 'card-header', 'card-footer', 'card-title', 'card-text',
        'navbar', 'navbar-nav', 'navbar-brand', 'nav-link', 'nav-item',
        'modal', 'modal-dialog', 'modal-content', 'modal-header', 'modal-body', 'modal-footer',
        'form-control', 'form-group', 'form-label', 'input-group', 'input-group-text',
        'd-none', 'd-block', 'd-flex', 'justify-content-center', 'align-items-center',
        'text-center', 'text-left', 'text-right', 'text-muted', 'text-primary',
        'mb-3', 'mt-3', 'p-3', 'px-3', 'py-3', 'mx-auto', 'w-100', 'h-100',
        'img-fluid', 'img-responsive', 'table', 'table-striped', 'table-bordered',
        'alert', 'alert-info', 'alert-warning', 'alert-danger', 'alert-success',
        'carousel', 'carousel-inner', 'carousel-item', 'carousel-control-prev',
        'carousel-control-next', 'sr-only', 'visually-hidden'
    }
    
    # Filter out classes that start with common Bootstrap prefixes
    bootstrap_prefixes = ['col-', 'btn-', 'text-', 'bg-', 'd-', 'p-', 'm-', 'border-', 'rounded-']
    
    def is_bootstrap_class(cls):
        if cls in bootstrap_classes:
            return True
        return any(cls.startswith(prefix) for prefix in bootstrap_prefixes)
    
    # Filter missing classes to exclude likely Bootstrap classes
    missing_css_classes = {cls for cls in missing_css_classes if not is_bootstrap_class(cls)}
    
    return unused_css_classes, unused_css_ids, missing_css_classes, missing_css_ids

def main():
    parser = argparse.ArgumentParser(description='Analyze CSS classes and IDs in HTML template files')
    parser.add_argument('--verbose', action='store_true', help='Show detailed analysis (not just errors)')
    parser.add_argument('--file', type=str, help='Analyze only a specific file')
    args = parser.parse_args()

    if args.file:
        # Analyze single file
        if not os.path.exists(args.file):
            print(f"Error: File {args.file} not found")
            return

        print(f"Analyzing single file: {args.file}\n")
        try:
            with open(args.file, 'r', encoding='utf-8') as f:
                content = f.read()

            classes = extract_classes_from_html(content)
            ids = extract_ids_from_html(content)

            print(f"CSS classes found ({len(classes)}):")
            for cls in classes:
                print(f"  .{cls}")

            print(f"\nCSS IDs found ({len(ids)}):")
            for id_name in ids:
                print(f"  #{id_name}")

        except Exception as e:
            print(f"Error reading {args.file}: {e}")

        return

    print("Analyzing CSS classes and IDs in HTML template files...\n")

    # Scan HTML files
    html_classes, html_ids, file_details = scan_html_files()

    # Scan CSS files
    css_classes, css_ids, css_details = scan_css_files()

    # Analyze usage
    unused_classes, unused_ids, missing_classes, missing_ids = analyze_usage(
        html_classes, html_ids, css_classes, css_ids
    )
    
    # Summary statistics
    total_html_classes = len(set().union(*html_classes.values()))
    total_html_ids = len(set().union(*html_ids.values()))

    if args.verbose:
        print(f"\n{'=' * 80}")
        print("CSS ANALYSIS RESULTS")
        print(f"{'=' * 80}\n")

        print("SUMMARY:")
        print("-" * 40)
        print(f"Total CSS classes found in HTML: {total_html_classes}")
        print(f"Total CSS IDs found in HTML: {total_html_ids}")
        print(f"Total CSS classes defined: {len(css_classes)}")
        print(f"Total CSS IDs defined: {len(css_ids)}")

    # Report errors (always shown)
    errors_found = False

    if missing_classes:
        if not errors_found:
            print("ERRORS: Missing CSS definitions found")
            errors_found = True
        print("\nMISSING CSS CLASSES (used but not defined):")
        print("-" * 50)
        for cls in sorted(missing_classes):
            print(f"  ❌ .{cls}")

    if missing_ids:
        if not errors_found:
            print("ERRORS: Missing CSS definitions found")
            errors_found = True
        print(f"\nMISSING CSS IDS (used but not defined):")
        print("-" * 50)
        for id_name in sorted(missing_ids):
            print(f"  ❌ #{id_name}")

    if not errors_found:
        print("✅ No missing CSS definitions found!")

    if args.verbose:
        print(f"\n{'=' * 80}")
        print("UNUSED CSS DEFINITIONS")
        print(f"{'=' * 80}\n")

        if unused_classes:
            print("UNUSED CSS CLASSES (defined but not used):")
            print("-" * 50)
            for cls in sorted(unused_classes):
                print(f"  .{cls}")
        else:
            print("✅ No unused CSS classes found!")

        if unused_ids:
            print(f"\nUNUSED CSS IDS (defined but not used):")
            print("-" * 50)
            for id_name in sorted(unused_ids):
                print(f"  #{id_name}")
        else:
            print("\n✅ No unused CSS IDs found!")

        print(f"\n{'=' * 80}")
        print("CSS FILE BREAKDOWN")
        print(f"{'=' * 80}\n")

        for css_file, details in css_details.items():
            print(f"{css_file}:")
            print(f"  Classes: {len(details['classes'])}")
            print(f"  IDs: {len(details['ids'])}")
            if len(details['classes']) <= 20:  # Show classes if not too many
                for cls in details['classes']:
                    print(f"    .{cls}")
            print()

if __name__ == "__main__":
    main()
