/* eslint-disable @typescript-eslint/no-unused-vars */
/* eslint-disable no-continue */
export type Token = {
  isSpace: boolean;
  isWord: boolean;
  token: string;
  isBlank: boolean;
  blankIndex: number;
  wordIndex: number;
};

// TODO: lookbehind regex is not supported in Safari.
// function isWord(token: string): boolean {
//   return /^(?=[\p{L}\p{N}])[\p{L}\p{N}\-'*]+(?<=[\p{L}\p{N}])$/gu.test(token);
// }

function isNumberedBlank(token: string): boolean {
  return token === "____";
}

function isBlank(token: string): boolean {
  return token === "____" || token === "___";
}

function isWord(token: string): boolean {
  const startsWithLetterOrNumber = /^[\p{L}\p{N}]/u.test(token);
  const endsWithLetterOrNumber = /[\p{L}\p{N}]$/u.test(token);
  const mainPattern = /^[\p{L}\p{N}\-'*]+$/u.test(token);

  return startsWithLetterOrNumber && endsWithLetterOrNumber && mainPattern;
}

function isSpace(token: string): boolean {
  return /^\s$/u.test(token);
}

function findMatches(string: string) {
  const startPunctuation = string.match(/^([\p{P}´`]+)/u);
  const endPunctuation = string.slice(startPunctuation ? startPunctuation[0].length : 0).match(/([\p{P}´`]+)$/u);
  const middle = string.slice(
    startPunctuation ? startPunctuation[0].length : 0,
    endPunctuation ? -endPunctuation[0].length : undefined,
  );

  let matches = [];

  if (startPunctuation) {
    matches.push(startPunctuation[0]);
  }
  if (middle) {
    matches = matches.concat(middle);
  }
  if (endPunctuation) {
    matches.push(endPunctuation[0]);
  }

  return matches;
}
let runningBlankIndex = 0;

export function resetBlankIndex() {
  runningBlankIndex = 0;
}

function groupTokens(shattered: string[]): Token[][] {
  let groupings: Token[][] = [];
  let runningWordIndex = -1;

  shattered.forEach((string) => {
    if (string.length === 0) return;
    if (string.length === 1 || isWord(string)) {
      groupings = [
        ...groupings,
        [
          {
            isSpace: isSpace(string),
            isWord: isWord(string),
            isBlank: false,
            blankIndex: -1,
            token: string,
            wordIndex: isWord(string) ? (runningWordIndex += 1) : -1,
          },
        ],
      ];
      return;
    }

    if (isBlank(string)) {
      groupings = [
        ...groupings,
        [
          {
            isSpace: false,
            isWord: false,
            isBlank: true,
            blankIndex: isNumberedBlank(string) ? (runningBlankIndex += 1) : -1,
            token: string,
            wordIndex: -1,
          },
        ],
      ];
      return;
    }

    // TODO: lookbehind regex is not supported in Safari.
    // const matches = string.match(/^([\p{P}´`]+)|((?=[\p{L}\p{N}])[\p{L}\p{N}\-'*]+(?<=[\p{L}\p{N}]))|([\p{P}´`]+)$/gu);
    const matches = findMatches(string);
    if (matches)
      groupings = [
        ...groupings,
        [
          ...matches.map((match) => {
            if (isWord(match)) runningWordIndex += 1;
            return {
              isSpace: isSpace(match),
              isWord: isWord(match),
              isBlank: false,
              blankIndex: -1,
              token: match,
              wordIndex: isWord(match) ? runningWordIndex : -1,
            };
          }),
        ],
      ];
  });

  return groupings;
}

function groupSecondOrder(groupings: Token[][]): Token[][] {
  let newGroupings: Token[][] = [];

  let nextIndex = 0;
  groupings.forEach((group, index) => {
    if (index < nextIndex) return;
    const nextGroup = groupings[index + 1];
    const nextNextGroup = groupings[index + 2];
    if (
      !nextGroup ||
      !nextNextGroup ||
      nextGroup.length !== 1 ||
      nextNextGroup.length !== 1 ||
      !nextGroup[0].isSpace ||
      !/[\p{Pe}`´]/gu.test(nextNextGroup[0].token)
    ) {
      // nothing to do here
      newGroupings = [...newGroupings, group];
      return;
    }

    nextIndex = index + 3;
    const nextGroupAmalgamated = {
      isSpace: false,
      isWord: false,
      isBlank: false,
      blankIndex: -1,
      token: `${nextGroup[0].token}${nextNextGroup[0].token}`,
      wordIndex: -1,
    };
    newGroupings = [...newGroupings, [...group, nextGroupAmalgamated]];
  });

  return newGroupings;
}

/**
 * Tokenize a string into an array of tokens.
 * @param text The string to pretreat.
 * @returns An array of tokens.
 */
export default function pretreatText(text: string): Token[][] | null {
  // const matches = text.match(/([\p{L}\p{N}]+|\s|\P{L}|\P{N})/gu);

  const shattered = text.split(/(\s)/gu);

  // simple groupings
  let groups: Token[][];

  groups = groupTokens(shattered);

  // second-order groupings, just handles the case word\s<punctuation>
  groups = groupSecondOrder(groups);

  return groups;
}
