import OpenAI from 'openai'
import { zodResponseFormat } from 'openai/helpers/zod'
import { z } from 'zod'
import { config } from '../config'
import { ChatMessage } from './types'

const FilteredWords = z.object({
  words: z.array(z.string()),
})

export type WordItem = {
  text: string
  value: number
}

export type WordCloudResult = {
  people: {
    name: string
    topWords: WordItem[]
  }[]
}

const openai = new OpenAI({
  apiKey: process.env.REACT_APP_OPENAI_API_KEY,
  dangerouslyAllowBrowser: true,
})

const createAdaptiveChunks = (words: string[], chunkSize = 70): string[][] => {
  const chunks: string[][] = []
  for (let i = 0; i < words.length; i += chunkSize) {
    chunks.push(words.slice(i, i + chunkSize))
  }
  return chunks
}

const createPrompt = (words: string[]): string => {
  return `Filter this list of words, removing insignificant entries such as conjunctions, variations of "ok", "k", or random numbers. Be very judicious in what words you remove. Nouns, Verbs and adjectives should almost always be kept. Slang and insults that le should also be kept. In the end any word that seems unique to that person in the chat should NOT BE REMOVED. But if there are things that appear like word cutoffs like "ems" or random numbers that don't seem to have any particular meaning in the context appear, those should be removed. Keep the original ratios and values for the words you retain.

  Words: ${words.join(' ')}`
}

const processChunk = async (chunk: string[]): Promise<string[]> => {
  const prompt = createPrompt(chunk)
  try {
    const completion = await openai.beta.chat.completions.parse({
      model: config.textModel,
      messages: [
        {
          role: 'system',
          content:
            'You are an expert at filtering significant words from a list.',
        },
        { role: 'user', content: prompt },
      ],
      response_format: zodResponseFormat(FilteredWords, 'filtered_words'),
      temperature: 0,
    })

    return completion.choices[0].message.parsed?.words || []
  } catch (error) {
    console.error('Error processing chunk:', error)
    return []
  }
}

const filterWords = (
  words: WordItem[],
  wordsToKeep: Set<string>
): WordItem[] => {
  return words.filter((word) => wordsToKeep.has(word.text))
}

export const getTopWords = async (
  messages: ChatMessage[]
): Promise<WordCloudResult> => {
  const LIMIT = 85
  const names = new Set<string>()
  const wordRegex = /\b[a-z]+\b/gi

  // Use a Map for faster lookups and insertions
  const count1 = new Map<string, number>()
  const count2 = new Map<string, number>()

  // Process messages in a single pass
  messages.forEach(({ message, user }) => {
    if (user) names.add(user)

    const words = message.toLowerCase().match(wordRegex) || []
    const countMap = user === Array.from(names)[0] ? count1 : count2

    words.forEach((word) => {
      if (!/\d/.test(word)) {
        countMap.set(word, (countMap.get(word) || 0) + 1)
      }
    })
  })

  if (names.size < 2) {
    throw new Error('Could not identify two distinct persons in the chat')
  }

  const [person1, person2] = Array.from(names)

  // Calculate ratios and sort in a single pass
  const ratios: WordItem[] = []
  const allWords = new Set([...count1.keys(), ...count2.keys()])

  allWords.forEach((word) => {
    const c1 = count1.get(word) || 0
    const c2 = count2.get(word) || 0
    ratios.push({ text: word, value: (c1 + 1) / (c2 + 1) })
  })

  ratios.sort((a, b) => b.value - a.value)
  let topAndBottom = ratios.slice(0, 2 * LIMIT).concat(ratios.slice(-2 * LIMIT))

  const chunks = createAdaptiveChunks(topAndBottom.map((w) => w.text))
  const processedChunks = await Promise.all(
    chunks.map((chunk, index) =>
      processChunk(chunk).then((result) => {
        console.log(`Processed chunk ${index + 1}/${chunks.length}`)
        console.log('Words in this chunk:', chunk)
        console.log('Words kept after filtering:', result)
        return result
      })
    )
  )
  const wordsToKeep = new Set(processedChunks.flat())
  let filteredWords = filterWords(topAndBottom, wordsToKeep)

  let topWords1 = filteredWords.filter((w) => w.value > 1).slice(0, LIMIT)

  let topWords2 = filteredWords
    .filter((w) => w.value <= 1)
    .slice(-LIMIT)
    .map((w) => ({ ...w, value: 1 / w.value }))

  // normalize value
  const sum1 = topWords1.reduce((acc, val) => acc + val.value, 0)
  topWords1.forEach((w) => (w.value /= sum1))
  const sum2 = topWords2.reduce((acc, val) => acc + val.value, 0)
  topWords2.forEach((w) => (w.value /= sum2))

  const result = {
    people: [
      { name: person1, topWords: topWords1 },
      { name: person2, topWords: topWords2 },
    ],
  }
  return result
}
