import { Commands, ITranscriptWord } from "../components/Editor/IEditor";
import { abbreviations } from "../components/Editor/constants";
import { IReceivedTranscript } from "../components/Libraries/ILibraries";

const LOWER_WORD_LIMIT = 200;
const UPPER_WORD_LIMIT = 300;
const SECOND_LIMIT = 2;

const isNumber = (text: string) => /^[0-9]+$/.test(text);
const isPunctuation = (text: string) => /[.?!]/.test(text);
const isAbbreviation = (text: string) => abbreviations.includes(text);


interface ITokenParagrapher {
    transcriptMessage: IReceivedTranscript,
    tokenCount: number;
}

const paragraphTokens = (transcriptMessage: IReceivedTranscript, outerTokenCount?: number): ITokenParagrapher => {
    if (!transcriptMessage.isFinal) {
        return {
            transcriptMessage,
            tokenCount: outerTokenCount ? outerTokenCount : 0,
        }
    }
    let parsedContent: ITranscriptWord[] = [];
    if (typeof transcriptMessage.transcript.content === 'string') {
        parsedContent = JSON.parse(transcriptMessage.transcript.content);
    } else {
        parsedContent = transcriptMessage.transcript.content as ITranscriptWord[];
    }

    const transcriptMessageLocal: IReceivedTranscript = {
        ...transcriptMessage,
        transcript: {
            ...transcriptMessage.transcript,
            content: []
        },
    }

    let tokenCount = outerTokenCount ? outerTokenCount : 0;

    for (let tokenIndex = 0; tokenIndex < parsedContent.length; tokenIndex++) {
        tokenCount++;
        (transcriptMessageLocal.transcript.content as ITranscriptWord[]).push(parsedContent[tokenIndex])

        if (LOWER_WORD_LIMIT < tokenCount && tokenCount < UPPER_WORD_LIMIT) {
            if (tokenIndex === 0) continue;
            const previousToken = parsedContent[tokenIndex - 1]
            const currentToken = parsedContent[tokenIndex]

            if (!isPunctuation(currentToken.text) || isNumber(previousToken.text) || isAbbreviation(previousToken.text)) continue;

            (transcriptMessageLocal.transcript.content as ITranscriptWord[]).push({
                startTime: currentToken.endTime,
                endTime: currentToken.endTime,
                confidence: 1,
                text: Commands.NEW_PARAGRAPH,
            })

            console.log("Inserted new paragraph after LOWER_WORD_LIMIT < tokenCount && tokenCount < UPPER_WORD_LIMIT")

            tokenCount = 0;
        } else if (tokenCount >= UPPER_WORD_LIMIT) {
            if (tokenIndex === parsedContent.length - 1) continue;

            const currentToken = parsedContent[tokenIndex]
            const nextToken = parsedContent[tokenIndex + 1]

            if (nextToken.startTime - currentToken.endTime < SECOND_LIMIT) continue;


            (transcriptMessageLocal.transcript.content as ITranscriptWord[]).push({
                startTime: currentToken.endTime,
                endTime: currentToken.endTime,
                confidence: 1,
                text: Commands.NEW_PARAGRAPH,
            })
            
            console.log("Inserted new paragraph after tokenCount >= UPPER_WORD_LIMIT")
            
            tokenCount = 0;
        }
    }

    return {
        transcriptMessage: transcriptMessageLocal,
        tokenCount,
    };
}

export {
    paragraphTokens
}
