import subprocess
import sys
import emoji
import pandas as pd
import numpy as np
import math
from itertools import combinations
from collections import Counter, defaultdict

# List of required packages (for documentation or verification purposes)
REQUIRED_PACKAGES = ["emoji", "pandas", "numpy"]

def install_packages():
    """Install missing required packages."""
    for package in REQUIRED_PACKAGES:
        try:
            __import__(package)
        except ImportError:
            print(f"Package '{package}' is missing. Attempting to install...")
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
            except Exception as e:
                print(f"Failed to install package '{package}': {e}")

# Ensure required packages are installed
install_packages()


def construct_list_EmoTokenPair(list_list_EmoTokenPair):
    return [item for sublist in list_list_EmoTokenPair for item in sublist]


def construct_df_EmoTokenPair(list_EmoTokenPair, columns=['EmoToken1', 'Emj'], sortby=''):
    df = pd.DataFrame(list_EmoTokenPair, columns=columns)
    if sortby:
        df = df.sort_values(by=sortby, ascending=True)
    df.reset_index(drop=True, inplace=True)
    return df


def quick_preview_df_EmoTokenPair(df, analysis_mode, key_colm_name='', keyword=None, value_colm_name=''):
    if analysis_mode == 'search_keyword' and key_colm_name and keyword is not None:
        print(len(df[df[key_colm_name] == keyword]))
        print(df[df[key_colm_name] == keyword])
    
    def count_items(x):
        counts = Counter(x)
        sorted_counts = sorted(counts.items(), key=lambda item: item[1], reverse=True)
        return ", ".join(f"{item}({count})" for item, count in sorted_counts)
    
    if analysis_mode == 'group_by_key' and key_colm_name and keyword is not None and value_colm_name:
        df_GroupbyEmoToken = df.groupby(key_colm_name).agg(
            Key=(value_colm_name, lambda x: ", ".join(set(x))),
            Key_DistinctCount=(value_colm_name, "nunique"),
            Key_Frequency=(value_colm_name, count_items),
        ).reset_index()
        print(len(df_GroupbyEmoToken))
        print(df_GroupbyEmoToken[df_GroupbyEmoToken[key_colm_name] == keyword])


def calculate_emotokenpair_count_pmi_npmi(list_EmoTokenPair):
    pair_counts = Counter(list_EmoTokenPair)
    word_counts = Counter()
    emoji_counts = Counter()
    total_pairs = sum(pair_counts.values())
    
    for (word, emoji_token), count in pair_counts.items():
        word_counts[word] += count
        emoji_counts[emoji_token] += count
    
    results = []
    for (word, emoji_token), count in pair_counts.items():
        p_word_emoji = count / total_pairs
        p_word = word_counts[word] / total_pairs
        p_emoji = emoji_counts[emoji_token] / total_pairs
        
        pmi = math.log(p_word_emoji / (p_word * p_emoji), 2) if p_word_emoji > 0 else 0
        npmi = pmi / -math.log(p_word_emoji, 2) if p_word_emoji > 0 else 0
        
        results.append((word, emoji_token, count, pmi, npmi))
    
    return results


def convert_list_npmi_to_df(list_emotoken_npmi, columns=['EmoToken', 'Emj', 'Count', 'PMI', 'NPMI'], sortby=''):
    df_EmoTokenPair_Npmi = pd.DataFrame(list_emotoken_npmi, columns=columns)
    if sortby:
        df_EmoTokenPair_Npmi = df_EmoTokenPair_Npmi.sort_values(by=sortby, ascending=True)
    df_EmoTokenPair_Npmi.reset_index(drop=True, inplace=True)
    return df_EmoTokenPair_Npmi


def get_emo_label_for_emo_token(text, dict_emo_label):
    dict_reverse = {
        'positive': 'negative', 'negative': 'positive',
        'joy': 'sadness', 'sadness': 'joy',
        'anger': 'fear', 'fear': 'anger',
        'trust': 'disgust', 'disgust': 'trust',
        'anticipation': 'surprise', 'surprise': 'anticipation'
    }
    
    list_output = []
    for token in text.split('.'):
        if '_' in token:
            token1 = token.split('_')[1]
            list_found = dict_emo_label.get(token1, [])
            list_output.append([dict_reverse.get(item, '') for item in list_found])
        else:
            list_found = dict_emo_label.get(token, [])
            list_output.append(list_found)
    
    return list_output


def convert_label_to_vector(list_label, dict_vector):
    dict_vector_output = dict_vector.copy()
    for item in list_label:
        dict_vector_output[item] = 1
    return list(dict_vector_output.values())


def elementwise_sum_list(list_of_vectors):
    if not list_of_vectors:
        return []
    vector_length = len(list_of_vectors[0])
    if any(len(vec) != vector_length for vec in list_of_vectors):
        raise ValueError("All vectors must have the same length.")
    return [sum(values) for values in zip(*list_of_vectors)]


def aggregate_vectors(df):
    return df.groupby("Emj")["EmoVector"].apply(lambda x: [sum(v) for v in zip(*x)]).reset_index()


def normalize_vector(vector):
    if not vector:
        raise ValueError("The vector is empty.")
    min_val, max_val = min(vector), max(vector)
    if min_val == max_val:
        return vector
    return [(x - min_val) / (max_val - min_val) for x in vector]


def discretize_vector(vector, cutoff_input):
    if not vector:
        raise ValueError("The vector is empty.")
    cutoff_value = cutoff_input if isinstance(cutoff_input, (int, float)) else sum(vector) / len(vector)
    return [1 if x >= cutoff_value else 0 for x in vector]


def convert_vector_to_dict(list_vector, dict_label_0):
    return dict(zip(dict_label_0.keys(), list_vector))
