import subprocess
import sys
import emoji
from itertools import combinations

# List of required packages (for documentation or verification purposes)
REQUIRED_PACKAGES = ["emoji"]
def install_packages():
    """Install missing required packages."""
    for package in REQUIRED_PACKAGES:
        try:
            __import__(package)
        except ImportError:
            print(f"Package '{package}' is missing. Attempting to install...")
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
            except Exception as e:
                print(f"Failed to install package '{package}': {e}")
# Ensure required packages are installed
install_packages()

# Function to remove last token if it's an emoji using emoji.is_emoji()
def remove_last_token_if_emoji(text):
    words = text.split()
    if len(words) > 1 and emoji.is_emoji(words[-1]):  # Check if last token is an emoji
        return " ".join(words[:-1])  # Remove emoji
    return text  # Return original if no emoji

def extract_emojis(text):
    # Convert input to string to handle non-string types
    text = str(text)  # Convert to string
    return ' '.join([char for char in text if emoji.is_emoji(char)])

def generate_biterm(text, UNK='x', remove_last_token=True, keep_one_x=True):
    """
    Generates biterms (2-term word pairs) from a given text.
    
    :param text: Input string of words separated by spaces.
    :param UNK: Token to ignore for biterm generation (default = 'x').
    :param remove_last_token: Whether to remove the last token before processing (default = True).
    :param keep_one_x: If True, retains exactly one 'x' and ensures all valid words pair with it.
    :return: List of unique biterms.
    """
    # Step 1: Tokenize the text
    tokens = text.strip().split()  # Split by space to create a list of words

    # Step 2: Remove last token safely
    if remove_last_token and len(tokens) > 1:
        tokens = tokens[:-1]  # Remove last token only if there are at least 2 words

    # Step 3: Remove all "x" tokens but keep one if required
    filtered_tokens = [word for word in tokens if word != UNK]
    
    if keep_one_x and UNK in tokens:
        filtered_tokens.append(UNK)  # Add back a single "x"

    biterms = set()  # Use a set to store unique biterms

    # Step 4: Generate normal biterms
    for term1, term2 in combinations(filtered_tokens, 2):  
        biterms.add((term1, term2))

    # Step 5: Add `(word, "x")` pairs for every valid word
    for word in filtered_tokens:
        if word != UNK:  # Ensure only real words form a pair with "x"
            biterms.add((word, UNK))  # Add "happy_x", "bad_x", etc.

    return list(biterms)  # Convert set to list for final output

# Example Input
# text = "x x x pretty good x x x x x x x x x x x x"

# # Generate and print biterms
# biterms = generate_biterm(text, UNK='x')
# for b in biterms:
#     print(b)
# ('x', 'pretty')
# ('good', 'x')
# ('pretty', 'good')
# ('x', 'good')
# ('pretty', 'x')

def detect_emotion_token(text, set_negation_word, dict_emo_label, UNK='x'):
    # Split the text into words
    list_tokens = text.split()
    
    # Replace words with UNK or keep them if they are in the negation set or emotion labels
    list_token = [
        item if (item in set_negation_word or item in dict_emo_label) else UNK for item in list_tokens
    ]

    # Iterate through the words and join the negation word with adjacent words
    modified_words = []
    skip_next = False  # Flag to skip the next word if it's already joined

    for i in range(len(list_token)):
        if skip_next:
            skip_next = False
            continue

        if list_token[i] in set_negation_word:
            # Join with the next word if it's an emotion label
            if i < len(list_token) - 1 and list_token[i + 1] in dict_emo_label:
                modified_words.append(f"{list_token[i]}_{list_token[i + 1]}")
                skip_next = True  # Skip the next word since it's already joined
            else:
                modified_words.append(list_token[i])
        else:
            modified_words.append(list_token[i])  # Add the token (including UNK) to the result

    # Join the modified words back into a string
    return ' '.join(modified_words)