import subprocess
import sys
import os
import pickle
import re
import html

# List of required packages
REQUIRED_PACKAGES = ["emoji", "contractions", "html5lib"]

def install_packages():
    """Install missing required packages."""
    for package in REQUIRED_PACKAGES:
        try:
            __import__(package)
        except ImportError:
            print(f"Package '{package}' is missing. Attempting to install...")
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
            except Exception as e:
                print(f"Failed to install package '{package}': {e}")

# Ensure required packages are installed
install_packages()

# Import dependencies after installation
import contractions
import emoji

# Define paths for pickle files
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
name_pickle_path = os.path.join(BASE_DIR, "dict_joypixels_emot_name.pickle")
emji_pickle_path = os.path.join(BASE_DIR, "dict_joypixels_emot_emji.pickle")

# Load emoji-related dictionaries
try:
    with open(name_pickle_path, "rb") as f:
        dict_joypixels_emot_name = pickle.load(f)
    with open(emji_pickle_path, "rb") as f:
        dict_joypixels_emot_emji = pickle.load(f)
except FileNotFoundError as e:
    print(f"Error loading pickle file: {e}")
    dict_joypixels_emot_name = {}
    dict_joypixels_emot_emji = {}

# Expand Contractions
def expand_contractions(contraction_dict):
    dict_expanded_nltk_contraction = {}
    for contraction, expansion in contraction_dict.items():
        if ("'t" in contraction) or ("’t" in contraction):
            contraction_expanded = contraction.replace("’", " ").replace("'", " ")
            expansion_expanded = expansion.replace("’", "").replace("'", "")
            dict_expanded_nltk_contraction[contraction_expanded + ' '] = expansion_expanded + ' '

    # Additional expansions
    dict_expanded_nltk_contraction.update({
        ' i m ': ' i am ',
        ' ive ': ' i have ',
        ' he s ': ' he is ',
        ' she s ': ' she is ',
        ' it s ': ' it is ',
        ' weve ': ' we have ',
        ' they re ': ' they are ',
        ' theyve ': ' they have '
    })
    return dict_expanded_nltk_contraction

dict_expanded_nltk_contraction = expand_contractions(contractions.contractions_dict)

# Emoji Functions
def find_emoji(text):
    words = text.split()
    return ' '.join([w for w in words if emoji.is_emoji(w)])

def get_joypixels_emot_emji_detection(text):
    for k, v in dict_joypixels_emot_emji.items():
        text = text.replace(k, v)
    return text

# Helper Functions
def remove_retweet_label(text):
    return re.sub(r'\bRT\b|rt\s@\[\w_]+:', '', text)

def remove_twitterhandle(text):
    return re.sub(r'@\w+', '', text)

def remove_html_noise(text):
    text = html.unescape(text)
    text = re.sub(r'<br\s*/?>|http\S+|&amp;|&quot;', ' ', text)
    return re.sub(r'\s+', ' ', text).strip()

def remove_custom_noise(text, custom_noise=None):
    if custom_noise:
        for word in custom_noise.split('|'):
            text = re.sub(r'\b' + re.escape(word) + r'\b', '', text, flags=re.IGNORECASE)
    return re.sub(r'\s+', ' ', text).strip()

def remove_contraction(text):
    return contractions.fix(text)

def remove_expanded_contraction(text):
    for pattern, replacement in dict_expanded_nltk_contraction.items():
        text = re.sub(pattern, replacement, text)
    return text

def remove_puncnumb(text):
    text = re.sub(r'\d\w+', '', text)
    list_char= r'''.,!()+-[]{}|;:=`'"\<>/?@#$£%^&*_~…0123456789“”'''
    return ''.join(char for char in text if char not in list_char)

def remove_non_ascii(text):
    return ''.join(c for c in str(text) if ord(c) < 128 or emoji.is_emoji(c))

# Main Cleaning Function
import re
import emoji
def clean_social_text(text, custom_noise=None, map_emoticon=False):
    """
    Cleans social media text.

    Args:
        text (str): The input text.
        custom_noise (str): Optional custom noise keywords to remove.
        map_emoticon (bool): If True, maps JoyPixels emoticons to emojis.
    
    Returns:
        str: Cleaned text.
    """
    try:
        if not text:  # Handle None or empty text
            return ""

        # Ensure text is a string
        text = str(text)

        # Sequential cleaning steps
        text = remove_non_ascii(text)          # Remove non-ASCII characters
        text = remove_retweet_label(text)      # Remove RT labels
        text = remove_twitterhandle(text)      # Remove Twitter handles
        text = remove_html_noise(text)         # Remove HTML noise
        text = remove_custom_noise(text, custom_noise)  # Remove custom noise
        text = remove_contraction(text)        # Expand contractions

        # Map JoyPixels emoticons to emojis if requested
        if map_emoticon:
            text = get_joypixels_emot_emji_detection(text)

        # Add spaces around emojis
        text = ''.join(f' {char} ' if emoji.is_emoji(char) else char for char in text).strip()

        # Remove punctuation and numbers
        text = remove_puncnumb(text)

        # Remove extra spaces using regex
        text = re.sub(r'\s+', ' ', text).strip()

        # Convert to lowercase
        text = text.lower()

    except Exception as e:
        print(f"Error processing text: {e}")
        return ""
    
    return text

