import subprocess
import sys
import nltk
from nltk.corpus import wordnet

# List of required packages
REQUIRED_PACKAGES = ["nltk"]

def install_packages():
    """Install missing required packages."""
    for package in REQUIRED_PACKAGES:
        try:
            __import__(package)
        except ImportError:
            print(f"Package '{package}' is missing. Attempting to install...")
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
            except Exception as e:
                print(f"Failed to install package '{package}': {e}")

# Ensure required packages are installed
install_packages()

# Download required NLTK resources (run once)
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')

def parse_word_pos(tagged_words, set_stop_word=None, set_negation_word=None, UNK='x', return_string=False, join_negation=False):
    """
    Parses a list of (WORD, POSTAG) tuples and checks if the WORD exists in WordNet.
    """
    if set_stop_word is None:
        set_stop_word = set()
    if set_negation_word is None:
        set_negation_word = set()

    pos_map = {
        'NN': wordnet.NOUN, 'NNS': wordnet.NOUN,
        'VB': wordnet.VERB, 'VBD': wordnet.VERB, 'VBG': wordnet.VERB,
        'VBN': wordnet.VERB, 'VBP': wordnet.VERB, 'VBZ': wordnet.VERB,
        'JJ': wordnet.ADJ, 'JJR': wordnet.ADJ, 'JJS': wordnet.ADJ,
        'RB': wordnet.ADV, 'RBR': wordnet.ADV, 'RBS': wordnet.ADV
    }

    parsed_words = []
    i = 0
    while i < len(tagged_words):
        word, pos = tagged_words[i]

        if word in set_negation_word:
            if join_negation and (i + 1) < len(tagged_words):
                next_word, next_pos = tagged_words[i + 1]
                wn_pos = pos_map.get(next_pos)
                if wn_pos and wordnet.synsets(next_word, pos=wn_pos):
                    parsed_words.append((f"{word}_{next_word}", next_pos))
                    i += 2
                    continue
            parsed_words.append((word, pos))
            i += 1
            continue

        if word in set_stop_word:
            parsed_words.append((UNK, pos))
            i += 1
            continue

        wn_pos = pos_map.get(pos)
        if wn_pos and wordnet.synsets(word, pos=wn_pos):
            parsed_words.append((word, pos))
        else:
            parsed_words.append((UNK, pos))
        
        i += 1

    return " ".join([word for word, _ in parsed_words]) if return_string else parsed_words

# from nltk.corpus import wordnet
# import nltk
# Ensure that WordNet is downloaded
# nltk.download('wordnet')

def word_exists_wordnet(word):
    """
    Check if a single word exists in WordNet.

    Parameters:
    word (str): The word to check.

    Returns:
    bool: True if the word exists in WordNet, False otherwise.
    """
    # Remove punctuation and convert to lowercase
    cleaned_word = ''.join(char for char in word if char.isalpha()).lower()
    
    # Check if the cleaned word exists in WordNet
    return bool(wordnet.synsets(cleaned_word))

def all_words_exist_in_wordnet(text):
    """
    Check if all words in the given text exist in WordNet.

    Parameters:
    text (str): The text to check.

    Returns:
    bool: True if all words exist in WordNet, False otherwise.
    """
    # Split the text into words (considering only alphabetic words)
    words = text.split()

    # Check each word in WordNet
    for word in words:
        # Remove punctuation and convert to lowercase
        cleaned_word = ''.join(char for char in word if char.isalpha()).lower()
        if cleaned_word and not wordnet.synsets(cleaned_word):
            return False  # Return False if any word does not exist in WordNet

    return True  # Return True if all words exist in WordNet

def filter_wordnet_words(text, return_string=False):
    words = text.split()  # Split the text into words
    list_valid_word = []  # List to hold valid words

    for word in words:
        if wordnet.synsets(word):  # Check if the word exists in WordNet
            list_valid_word.append(word)

    if (return_string==True):
        return ' '.join(list_valid_word)
    else:
        return list_valid_word
    

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to map NLTK POS tags to WordNet POS tags
def get_wordnet_pos(word):
    """Map POS tag to the first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()  # Get first letter of POS tag
    tag_dict = {"J": wordnet.ADJ, "N": wordnet.NOUN, "V": wordnet.VERB, "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)  # Default to NOUN if tag not found

def lemmatize_text(text):
    """
    Function to lemmatize text using NLTK's WordNetLemmatizer.
    """
    if not text or not isinstance(text, str):  # Handle None or non-string values
        return ""

    words = word_tokenize(text)  # Tokenize text
    lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(word)) for word in words]  # Lemmatize with correct POS
    return " ".join(lemmatized_words)  # Join words back into a sentence

# Example usage
text = "The children are playing in the gardens happily."
lemmatized_text = lemmatize_text(text)
print(lemmatized_text)  # Output: "The child be play in the garden happily"
