import emoji
from itertools import combinations

# List of required packages (for documentation or verification purposes)
REQUIRED_PACKAGES = ["emoji"]
# Function to remove last token if it's an emoji using emoji.is_emoji()
def remove_last_token_if_emoji(text):
    words = text.split()
    if len(words) > 1 and emoji.is_emoji(words[-1]):  # Check if last token is an emoji
        return " ".join(words[:-1])  # Remove emoji
    return text  # Return original if no emoji

def extract_emojis(text):
    # Convert input to string to handle non-string types
    text = str(text)  # Convert to string
    return ' '.join([char for char in text if emoji.is_emoji(char)])

def generate_biterm(text, UNK='x', remove_last_token=True):
    # Step 1: Tokenize the text
    tokens = text.strip().split()  # Split by space to create a list of words
    if remove_last_token:
        tokens = tokens[:-1]  # Remove the last token

    biterms = set()  # Use a set to store unique biterms

    # Step 2: Generate valid term pairs (term1, term2)
    for term1, term2 in combinations(tokens, 2):  # Generate all 2-term combinations
        if term1 == UNK and term2 == UNK:  # Skip pairs with two 'x'
            continue

        # Add the combination to the set without symmetry
        biterms.add((term1, term2))

    return list(biterms)  # Convert set to list for final output

# Example Input
# text = "x x x pretty good x x x x x x x x x x x x"

# # Generate and print biterms
# biterms = generate_biterm(text, UNK='x')
# for b in biterms:
#     print(b)
# ('x', 'pretty')
# ('good', 'x')
# ('pretty', 'good')
# ('x', 'good')
# ('pretty', 'x')

def generate_triplet(text, emj, UNK='x', remove_last_token=True):
    # Step 1: Tokenize the text
    tokens = text.strip().split()  # Split by space to create a list of words
    if remove_last_token:
        tokens = tokens[:-1]  # Remove the last token

    triplets = set()  # Use a set to store unique triplets

    # Step 2: Generate valid term pairs (term1, term2)
    for term1, term2 in combinations(tokens, 2):  # Generate all 2-term combinations
        if term1 == UNK and term2 == UNK:  # Skip pairs with two 'x'
            continue

        # Add the combination to the set without symmetry
        triplets.add(((term1, term2), emj))

    return list(triplets)  # Convert set to list for final output

# Example Input
# text = "x x x pretty good x x x x x x x x x x x x"
# emj = "❤"

# # Generate and print triplets
# triplets = generate_triplet(text, emj, UNK='x')
# for t in triplets:
#     print(t)
# (('good', 'x'), '❤')
# (('x', 'pretty'), '❤')
# (('x', 'good'), '❤')
# (('pretty', 'good'), '❤')
# (('pretty', 'x'), '❤')