import type { ThreatKey } from "@interfaces/SerializedThreat";
import type { IntrospectionsResponse } from "@queries/useIntrospections";
import { type LexerToken, TOKEN_TYPE } from "@utils/Lexer";
import { getFieldConfig } from "@utils/getFieldConfig";

/**
 * Validate tokens order/amount/parenthesis. Checks if the tokens follow a specific order (field, operator, value), verifies that field names are valid, ensures that logical tokens are used correctly, checks for balanced parentheses, and detects duplicate teokens (except for brackets).
 * @param introspections
 * @param tokens
 * @returns
 */
function validateTokens(introspections: IntrospectionsResponse, tokens: LexerToken[]) {
  // tokens always must be in this order: TOKEN_TYPE.FIELD, TOKEN_TYPE.OPERATOR, TOKEN_TYPE.VALUE. TOKEN_TYPE.MISC can be anywhere and ignored
  const fieldTokens = tokens.filter((token: LexerToken) => token.type === TOKEN_TYPE.FIELD);
  const operatorTokens = tokens.filter((token: LexerToken) => token.type === TOKEN_TYPE.OPERATOR);
  const valueTokens = tokens.filter((token: LexerToken) => token.type === TOKEN_TYPE.VALUE);
  const logicalTokens = tokens.filter((token: LexerToken) => token.type === TOKEN_TYPE.LOGIC);

  if (valueTokens.length > 0) {
    // Empty string values are considered as invalid
    const emptyStringValues = valueTokens.filter(
      (token: LexerToken) => token.name === "STRING_VALUE" && token.value === "",
    );

    if (emptyStringValues.length > 0) {
      throw new Error("Search query is invalid. Please check your query for empty string values.");
    }

    // If theres a type OPERATOR token and then a VALUE, OPERATOR might be one of 'WITHIN' or 'MORE_THAN', then its following 'VALUE' should be a specific format, in order to consider this use case correct: X "hour", "day", "week", "month", "year", f.e. "2 days", "8 months", "1 year"
    const validTimeUnits = ["hour", "day", "week", "month", "year", "hours", "days", "weeks", "months", "years"];
    for (let i = 0; i < tokens.length - 1; i++) {
      if (
        tokens[i].type === TOKEN_TYPE.OPERATOR &&
        (tokens[i].name === "WITHIN" || tokens[i].name === "MORE_THAN" || tokens[i].name === "WITHIN_NEXT") &&
        tokens[i + 1].type === TOKEN_TYPE.VALUE
      ) {
        const valueToken = tokens[i + 1];
        const valueParts = valueToken.value.split(" ");
        if (valueParts.length !== 2 || Number.isNaN(Number(valueParts[0])) || !validTimeUnits.includes(valueParts[1])) {
          throw new Error(
            `Search query is invalid. The value after '${tokens[i].name}' must be in the format 'X hours/days/weeks/months/years', e.g., '2 days', '8 months', '1 year'.`,
          );
        }
      }
    }
  }

  if (
    fieldTokens.length !== operatorTokens.length ||
    fieldTokens.length !== valueTokens.length ||
    operatorTokens.length !== valueTokens.length
  ) {
    throw new Error("Search query is invalid. Please check your query for invalid tokens.");
  }

  // verify that field names are valid
  const invalidFieldNames = fieldTokens.filter(
    (token: LexerToken) => !getFieldConfig(introspections, token.value as ThreatKey),
  );

  if (invalidFieldNames.length > 0) {
    throw new Error("Search query is invalid. Please check your query for invalid field names.");
  }

  // logicalTokens increases amount of fieldTokens and can be validated
  if (logicalTokens.length > 0) {
    // 1 logical token can be between 2 field tokens
    // 2 logical tokens requires 3 field tokens
    // 3 logical tokens requires 4 field tokens
    // etc
    const logicalTokensCount = logicalTokens.length;
    const fieldTokensCount = fieldTokens.length;
    if (fieldTokensCount < logicalTokensCount + 1) {
      throw new Error("Search query is invalid. Please check your query for invalid tokens.");
    }
  }

  // verify that parenthesis is balanced
  const openingBrackets = tokens.filter((token: LexerToken) => token.name === "PAREN_R");
  const closingBrackets = tokens.filter((token: LexerToken) => token.name === "PAREN_L");

  if (openingBrackets.length !== closingBrackets.length) {
    throw new Error("Search query is invalid. Please check parenthesis in your query.");
  }

  // verify that there are no two same tokens in a row, unless its a brackets
  const sameTokens = tokens.filter(
    (token: LexerToken, index) =>
      index > 0 && token.name === tokens[index - 1].name && token.name !== "PAREN_L" && token.name !== "PAREN_R",
  );

  if (sameTokens.length > 0) {
    throw new Error("Search query is invalid. Please check your query for duplicate tokens.");
  }

  return true;
}

export default validateTokens;
