import type { ThreatKey } from "@interfaces/SerializedThreat";

export class ValidationError extends Error {
  constructor(message?: string) {
    super(message);
    this.name = "ValidationError";
  }
}

export interface LexerToken {
  name: LexerTokenName;
  value: string;
  start: number;
  end: number;
  type: TOKEN_TYPE;
}

const reIntValue = "(-?0|-?[1-9][0-9]*)";
const reFractionPart = "\\.[0-9]+";
const reExponentPart = "[eE][+-]?[0-9]+";
const intRegex = new RegExp(reIntValue);
const floatRegex = new RegExp(
  `${reIntValue}${reFractionPart}${reExponentPart}|${reIntValue}${reFractionPart}|${reIntValue}${reExponentPart}|${reFractionPart}`,
);

// u2028 and u2029 are line terminators
const reLineTerminators = "\\n\\r\\u2028\\u2029";
// \b \f \n \r \t \v are escaped characters
const reEscapedChar = '\\\\[\\\\"/bfnrt]';
// uXXXX are unicode characters
const reEscapedUnicode = "\\\\u[0-9A-Fa-f]{4}";
// any character except line terminators and \" and \\ are string characters
const reStringChar = `[^\\\\"\\\\\\\\${reLineTerminators}]`;
// any character except line terminators and \" are string characters
const stringRegex = new RegExp(`\\"(${reEscapedChar}|${reEscapedUnicode}|${reStringChar})*"?`);

// field name regex
const nameRegex = /[_A-Za-z][_0-9A-Za-z]*(\.[_A-Za-z][_0-9A-Za-z]*)*/;

// any character that is not followed by a letter, number, or underscore. This means that the pattern will match any character that is not part of a valid identifier name
const reNotFollowedByName = "(?![_0-9A-Za-z])";

// one or more whitespace characters, including spaces, tabs, vertical tabs, form feeds, and non-breaking spaces.
const whitespaceRegex = /[ \t\v\f\u00A0]+/;

// Anything that could be translated to a CVE ID
export const CVE_ID_LIKE_REGEX = new RegExp(/^((?:[cm]ve-)?\d{1,4}-?(?:\d{1,5})?|(?<![cm]ve-)\d{1,5})$/i);

// Contains CVE ID, e.g. 'CVE-20'+
export const CONTAINS_CVE_ID_REGEX = new RegExp(/cve-20\d{2}-\d{4,5}/i);

// Only complete values, e.g. 'CVE-2008-3844' or 'CVE-2008-38445'
export const FULL_CVE_ID_REGEX = new RegExp(/^cve-20\d{2}-\d{4,5}$/i);

export const CVE_FORMATS = {
  // Support various CVE ID formats
  PARTIAL: /(?:^|[^a-zA-Z])(?:cve-)?20\d{2}-\d{1,5}/gi,
  WHITESPACE_SEPARATED: /(?:cve-20\d{2}-\d{4,5}\s*)+/gi,
  COMMA_SEPARATED: /(?:cve-20\d{2}-\d{4,5}\s*,\s*)+/gi,
  NEWLINE_SEPARATED: /(?:cve-20\d{2}-\d{4,5}[\n\r]+)+/gi,
};

// Tokens that are used in the lexer and parser
export const LEXER_TOKENS = {
  CVE_ID: "CVE_ID",
  UNEXPECTED: "UNEXPECTED",
  NOT: "NOT",
  IN: "IN",
  PAREN_L: "PAREN_L",
  PAREN_R: "PAREN_R",
  EQUALS: "EQUALS",
  NOT_EQUALS: "NOT_EQUALS",
  GREATER: "GREATER",
  GREATER_EQUAL: "GREATER_EQUAL",
  LESS: "LESS",
  LESS_EQUAL: "LESS_EQUAL",
  CONTAINS: "CONTAINS",
  NOT_CONTAINS: "NOT_CONTAINS",
  STRING_VALUE: "STRING_VALUE",
  INT_VALUE: "INT_VALUE",
  FLOAT_VALUE: "FLOAT_VALUE",
  TRUE: "TRUE",
  FALSE: "FALSE",
  NONE: "NONE",
  ASC: "ASC",
  DESC: "DESC",
  STARTSWITH: "STARTSWITH",
  ENDSWITH: "ENDSWITH",
  NAME: "NAME",
  ORDER_BY: "ORDER_BY",
  AND: "AND",
  OR: "OR",
  HISTORY: "HISTORY",
  IS_NOT: "IS_NOT",
  IS: "IS",
  EMPTY: "EMPTY",
  WITHIN: "WITHIN",
  WITHIN_NEXT: "WITHIN_NEXT",
  MORE_THAN: "MORE_THAN",
  // Custom tokens, specifically for Notes custom field
  AUTHOR_IS: "AUTHOR_IS",
  AUTHOR_IS_NOT: "AUTHOR_IS_NOT",
  CREATED_AT: "CREATED_AT",
  UPDATED_AT: "UPDATED_AT",
  PRIVATE: "PRIVATE",
  PUBLIC: "PUBLIC",
  ORGANIZATION_IS: "ORGANIZATION_IS",
  ORGANIZATION_IS_NOT: "ORGANIZATION_IS_NOT",
} as const;

export type LexerTokenName = keyof typeof LEXER_TOKENS;

// Grouping tokens by their logical meaning
export enum TOKEN_TYPE {
  FIELD = "FIELD",
  VALUE = "VALUE",
  OPERATOR = "OPERATOR",
  MISC = "MISC",
  LOGIC = "LOGIC",
  FUNCTION = "FUNCTION",
}

const token = (name: LexerTokenName, value: any, type: TOKEN_TYPE) => {
  return { name, value, type };
};

type LexerState = {
  tokens: LexerToken[];
  remove: number;
  state: number;
  index: number;
  input: string;
  isValidParsing: boolean;
  parsingErrorMessage: string;
};

// TODO: exclude Lexer calls from initial page render(prolly saved things)
class Lexer {
  mutableState: LexerState;
  private rules: {
    pattern: RegExp;
    global: boolean;
    action: (l: any) => LexerToken;
    start: number[];
  }[] = [];
  private isInitialized = false;
  allFields: ThreatKey[] = [];

  constructor() {
    this.mutableState = {
      tokens: [],
      remove: 0,
      state: 0,
      index: 0,
      input: "",
      isValidParsing: true,
      parsingErrorMessage: "",
    };
    this.setupRules();
  }

  initialize() {
    if (this.isInitialized) {
      return;
    }

    // TODO: this could be used in order to identify correct/full fields
    // const introspections = getIntrospections();
    // this.allFields = Object.keys(introspections.fields).filter(
    //   (key: ThreatKey) => introspections.fields[key]?.constrains?.searchable !== false,
    // ) as ThreatKey[];

    this.isInitialized = true;
  }

  setupRules() {
    this.addRule(/order by\b/, (l: any) => token(LEXER_TOKENS.ORDER_BY, l, TOKEN_TYPE.OPERATOR));

    this.addRule(whitespaceRegex, () => {
      /* ignore whitespace */
    });

    this.addRule(new RegExp(`or${reNotFollowedByName}`), (l: any) => token(LEXER_TOKENS.OR, l, TOKEN_TYPE.LOGIC));
    this.addRule(new RegExp(`and${reNotFollowedByName}`), (l: any) => token(LEXER_TOKENS.AND, l, TOKEN_TYPE.LOGIC));

    this.addRule(new RegExp(`is not${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.IS_NOT, l, TOKEN_TYPE.OPERATOR),
    ); // custom rule which has to be transformed in 'searchQueryToUrl' function
    this.addRule(new RegExp(`is${reNotFollowedByName}`), (l: any) => token(LEXER_TOKENS.IS, l, TOKEN_TYPE.OPERATOR)); // custom rule which has to be transformed in 'searchQueryToUrl' function
    this.addRule(new RegExp(`empty${reNotFollowedByName}`), (l: any) => token(LEXER_TOKENS.EMPTY, l, TOKEN_TYPE.VALUE)); // custom rule which has to be transformed in 'searchQueryToUrl' function
    this.addRule(new RegExp(`within${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.WITHIN, l, TOKEN_TYPE.OPERATOR),
    ); // custom rule which has to be transformed in 'searchQueryToUrl' function
    this.addRule(new RegExp(`within the next${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.WITHIN_NEXT, l, TOKEN_TYPE.OPERATOR),
    );
    this.addRule(new RegExp(`within t(h?e?)?( n?e?x?t?)?${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.NAME, "within the next", TOKEN_TYPE.FIELD),
    );

    this.addRule(new RegExp(`more than${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.MORE_THAN, l, TOKEN_TYPE.OPERATOR),
    );
    this.addRule(new RegExp(`more( t?h?a?n?)?${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.NAME, "more than", TOKEN_TYPE.FIELD),
    ); // custom rule which has to be transformed in 'searchQueryToUrl' function

    // custom rules for Notes custom field
    this.addRule(new RegExp(`author is${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.AUTHOR_IS, l, TOKEN_TYPE.OPERATOR),
    );
    this.addRule(new RegExp(`author is not${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.AUTHOR_IS_NOT, l, TOKEN_TYPE.OPERATOR),
    );
    // created before, created after, created within and created more than
    this.addRule(new RegExp(`created (before|after|within|more than)${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.CREATED_AT, l, TOKEN_TYPE.OPERATOR),
    );
    // updated before, updated after, updated within and updated more than
    this.addRule(new RegExp(`updated (before|after|within|more than)${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.UPDATED_AT, l, TOKEN_TYPE.OPERATOR),
    );
    // organization is and organization is not
    this.addRule(new RegExp(`organization is${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.ORGANIZATION_IS, l, TOKEN_TYPE.OPERATOR),
    );
    this.addRule(new RegExp(`organization is not${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.ORGANIZATION_IS_NOT, l, TOKEN_TYPE.OPERATOR),
    );

    this.addRule(new RegExp(`private${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.PRIVATE, l, TOKEN_TYPE.VALUE),
    );

    this.addRule(new RegExp(`(public|global)${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.PUBLIC, l, TOKEN_TYPE.VALUE),
    );

    this.addRule(new RegExp(`not${reNotFollowedByName}`), (l: any) => token("NOT", l, TOKEN_TYPE.MISC));
    this.addRule(new RegExp(`in${reNotFollowedByName}`), (l: any) => token("IN", l, TOKEN_TYPE.OPERATOR));

    this.addRule(new RegExp(`startswith${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.STARTSWITH, l, TOKEN_TYPE.OPERATOR),
    );
    this.addRule(new RegExp(`endswith${reNotFollowedByName}`), (l: any) =>
      token(LEXER_TOKENS.ENDSWITH, l, TOKEN_TYPE.OPERATOR),
    );
    this.addRule(new RegExp(`True${reNotFollowedByName}`), (l: any) => token("TRUE", l, TOKEN_TYPE.VALUE));
    this.addRule(new RegExp(`False${reNotFollowedByName}`), (l: any) => token("FALSE", l, TOKEN_TYPE.VALUE));
    this.addRule(new RegExp(`None${reNotFollowedByName}`), (l: any) => token("NONE", l, TOKEN_TYPE.VALUE));

    this.addRule(new RegExp(`asc${reNotFollowedByName}`), (l: any) => token("ASC", l, TOKEN_TYPE.VALUE));
    this.addRule(new RegExp(`desc${reNotFollowedByName}`), (l: any) => token("DESC", l, TOKEN_TYPE.VALUE));

    // custom rule for history function
    this.addRule(/history(?:\s*\([^)]*\)?)?/, (l: any) => {
      const content = l.match(/history\s*\(([^)]*)\)?/)?.[1]?.trim() ?? "";
      return token(LEXER_TOKENS.HISTORY, content, TOKEN_TYPE.FUNCTION);
    });

    // specific case when only vip_id or part of it is provided, e.g. 'CVE-2019-1234' or '2019-1234' or '1234', case insensitive
    this.addRule(CVE_ID_LIKE_REGEX, (l: any) => token("CVE_ID", l, TOKEN_TYPE.VALUE));
    this.addRule(nameRegex, (l: any) => token("NAME", l, TOKEN_TYPE.FIELD));

    this.addRule(
      stringRegex,
      // Trim leading and trailing quotes:
      (l: string | any[]) => token("STRING_VALUE", l.slice(1, l.length - 1), TOKEN_TYPE.VALUE),
    );
    this.addRule(intRegex, (l: any) => token("INT_VALUE", l, TOKEN_TYPE.VALUE));
    this.addRule(floatRegex, (l: any) => token("FLOAT_VALUE", l, TOKEN_TYPE.VALUE));
    this.addRule(/\(/, (l: any) => token("PAREN_L", l, TOKEN_TYPE.MISC));
    this.addRule(/\)/, (l: any) => token("PAREN_R", l, TOKEN_TYPE.MISC));
    this.addRule(/=/, (l: any) => token("EQUALS", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/!=/, (l: any) => token("NOT_EQUALS", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/>/, (l: any) => token("GREATER", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/>=/, (l: any) => token("GREATER_EQUAL", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/</, (l: any) => token("LESS", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/<=/, (l: any) => token("LESS_EQUAL", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/~/, (l: any) => token("CONTAINS", l, TOKEN_TYPE.OPERATOR));
    this.addRule(/!~/, (l: any) => token("NOT_CONTAINS", l, TOKEN_TYPE.OPERATOR));
  }

  // default error handling
  unexpectedCharacterHandle(character: string, index: number): LexerToken {
    this.mutableState.isValidParsing = false;
    this.mutableState.parsingErrorMessage = `Unexpected character at index ${index}: ${character}`;
    return {
      name: "UNEXPECTED",
      value: character,
      type: TOKEN_TYPE.MISC,
      start: index,
      end: index + 1,
    };
  }

  addRule(pattern: any, action: any, start?: any) {
    const global = pattern.global;

    if (!global) {
      let flags = "g";
      if (pattern.multiline) {
        flags += "m";
      }
      if (pattern.ignoreCase) {
        flags += "i";
      }
      pattern = new RegExp(pattern.source, flags);
    }

    if (Object.prototype.toString.call(start) !== "[object Array]") {
      start = [0];
    }

    this.rules.push({
      pattern: pattern,
      global: global,
      action: action,
      start: start,
    });

    return this;
  }

  setInput(input: string) {
    this.mutableState.remove = 0;
    this.mutableState.state = 0;
    this.mutableState.index = 0;
    this.mutableState.tokens = [];
    this.mutableState.input = input;
    return this;
  }

  lex(): LexerToken | undefined {
    this.initialize();

    this.mutableState.isValidParsing = true;
    this.mutableState.parsingErrorMessage = "";

    if (this.mutableState.tokens.length) {
      return this.mutableState.tokens.shift();
    }

    let reject = true;

    while (this.mutableState.index <= this.mutableState.input.length) {
      const matches = this.scan().splice(this.mutableState.remove);
      const index = this.mutableState.index;

      while (matches.length) {
        if (reject) {
          const match = matches.shift();
          const result = match.result;
          const length = match.length;
          this.mutableState.index += length;
          reject = false;
          this.mutableState.remove++;

          let token: LexerToken | undefined = match.action.apply(this, result);
          if (reject) {
            this.mutableState.index = result.index;
          } else if (typeof token !== "undefined") {
            if (Array.isArray(token)) {
              this.mutableState.tokens = token.slice(1);
              token = token[0];
            }

            if (length) {
              this.mutableState.remove = 0;
            }
            return token;
          }
        } else {
          break;
        }
      }

      const input = this.mutableState.input;

      if (index < input.length) {
        if (reject) {
          this.mutableState.remove = 0;
          const unexpectedToken = this.unexpectedCharacterHandle(
            input.charAt(this.mutableState.index),
            this.mutableState.index,
          );
          this.mutableState.index++;
          return unexpectedToken;
        }

        if (this.mutableState.index !== index) {
          this.mutableState.remove = 0;
        }
        reject = true;
      } else if (matches.length) {
        reject = true;
      } else {
        break;
      }
    }

    return undefined;
  }

  lexAll() {
    this.initialize();

    const result: LexerToken[] = [];
    let match: any;
    while (true) {
      match = this.lex();
      if (!match) {
        break;
      }

      match.start = this.mutableState.index - (match?.value?.length || 0);
      match.end = this.mutableState.index;
      result.push(match);
    }

    return result;
  }

  scan() {
    this.initialize();

    const matches: any[] = [];
    let index = 0;

    const state = this.mutableState.state;
    const lastIndex = this.mutableState.index;
    const input = this.mutableState.input;

    for (let i = 0, length = this.rules.length; i < length; i++) {
      const rule = this.rules[i]!;
      const start = rule.start;
      const states = start.length;

      if (!states || start.indexOf(state) >= 0 || (state % 2 && states === 1 && !start[0])) {
        const pattern = rule.pattern;
        pattern.lastIndex = lastIndex;
        const result = pattern.exec(input);

        if (result && result.index === lastIndex) {
          let j = matches.push({
            result: result,
            action: rule.action,
            length: result[0].length,
          });

          if (rule.global) {
            index = j;
          }

          while (--j > index) {
            const k = j - 1;
            if (matches[j].length > matches[k].length) {
              [matches[j], matches[k]] = [matches[k], matches[j]];
            }
          }
        }
      }
    }

    return matches;
  }
}

const lexerInstance = new Lexer();

const getLexer = (): Lexer => {
  return lexerInstance;
};

export { getLexer };
