import cloneDeep from "lodash/cloneDeep";
import YAML from "yaml";

import {
  defaultMetavariableConstraintOfKind,
  emptyPattern,
  MetavariableConstraint,
  MetavariableConstraintInner,
  MetavariableConstraintKind,
  Pattern,
  PatternConstraint,
  PatternConstraintInner,
  RawPattern,
  RawPatternInner,
  Rule,
  RuleInner,
} from "../types/rule";

/******************************************************************************/
/* Constants */
/******************************************************************************/

// A "known key" is one which Structure Mode is able to understand, and which
// it can parse and then restore when converting back to YAML.
// This means that essentially there will be no data loss with these fields,
// in a given rule.
// For instance, we do not currently support `pattern-propagators`, so that
// field is not present here, and thus will be stored in the `extra` field.
const STRUCTURE_KNOWN_TOP_LEVEL_KEYS = [
  "id",
  "severity",
  "languages",
  "message",
  // top-level patterns in old syntax
  "pattern",
  "patterns",
  "pattern-either",
  "pattern-inside",
  "pattern-regex",
  "pattern-sources",
  "pattern-sinks",
  "pattern-sanitizers",
  // misc
  "mode",
  "fix",
  // new syntax operators
  "match",
  "taint",
];

// NOTE(data-loss): At the same time, there is potential data loss from nested
// fields which we do not know about.
// We can't just store these in the `extra` field, because they are nested
// and we will not know where to put them in the resulting dictionary.
// As such, we will whitelist specific nested keys which are allowed, and raise
// an exception if we encounter any others, to force us to default back to Advanced
// Mode.
// Notable things excluded from this includes taint labels, like `requires:` and
// `label:`.
const STRUCTURE_KNOWN_NESTED_KEYS = [
  // ...all previously known top-level keys are OK
  // this actually is a little over-permissive, but we expect to not overlap here
  ...STRUCTURE_KNOWN_TOP_LEVEL_KEYS,
  // we don't allow pattern propagators rn, but these would theoretically be
  // needed at some point:
  // "from",
  // "to",
  // non-top level old syntax patterns
  "pattern-not",
  "pattern-not-inside",
  // new syntax patterns
  "any",
  "all",
  "inside",
  "not",
  "regex",
  // new syntax taint
  "sources",
  "sanitizers",
  "propagators",
  "sinks",
  "where",
  // new syntax pattern operators in general
  // nested stuff
  "metavariable",
  "metavariable-pattern",
  "metavariable-type",
  "type",
  "metavariable-analysis",
  "analyzer",
  "metavariable-regex",
  // regex: covered above in new syntax
  "metavariable-comparison",
  "comparison",
  // notably, not `language:`
  "focus-metavariable",
];

/******************************************************************************/
/* Helpers */
/******************************************************************************/

// sometimes, we need to get a value without letting `undefined` return
// this is because `.get(...)`'s return type is `any`, unfortunately
function getValue(x: Map<string, any>, key: string) {
  if (x.has(key)) {
    return x.get(key);
  } else {
    throw new InvalidYAML(`expected key ${key} in ${JSON.stringify(x)}`);
  }
}

function validate_nested_keys(input: Map<string, any> | string | Array<any>) {
  if (typeof input === "string") {
    return;
  } else if (Array.isArray(input)) {
    for (const elem of input) {
      validate_nested_keys(elem);
    }
    return;
  }

  const keys = Array.from(input.keys());

  for (const key of keys) {
    // data-loss:
    if (!STRUCTURE_KNOWN_NESTED_KEYS.includes(key)) {
      throw new InvalidYAML(`unknown nested key ${key}`);
    }

    validate_nested_keys(input.get(key));
  }
}

/******************************************************************************/
/* Old syntax */
/******************************************************************************/

const ALLOWED_OLD_KEYS = [
  // patterns
  "pattern",
  "pattern-either",
  "patterns",
  "pattern-regex",
  "pattern-not",
  "pattern-inside",
  "pattern-not-inside",
  "pattern-not-regex",
  // constraints
  "metavariable-regex",
  "metavariable-analysis",
  "metavariable-pattern",
  "metavariable-comparison",
  "metavariable-type",
  "focus-metavariable",
];

const structurifyOldPattern = (input: Map<string, any>): Pattern => {
  const res = structurifyOld(input);
  if (res instanceof Pattern) {
    return res;
  } else {
    throw new InvalidYAML(`expected old pattern, got ${res}`);
  }
};

const structurifyOldPatternOrString = (
  input: Map<string, any> | string
): Pattern => {
  if (typeof input === "string") {
    return emptyPattern(input);
  } else {
    const res = structurifyOld(input);
    if (res instanceof Pattern) {
      return res;
    } else {
      throw new InvalidYAML(`expected old pattern, got ${res}`);
    }
  }
};

const structurifyOldMetavariableConstraint = (
  key: string,
  value: Map<string, any>
): PatternConstraint => {
  // comparison is weird because it can either have the metavariable or not
  // so let's handle it first
  if (key === "metavariable-comparison") {
    return new PatternConstraint({
      kind: "comparison",
      comparison: getValue(value, "comparison"),
    });
  }

  // these ones all must define an inner metavariable: key
  const metavariable = getValue(value, "metavariable");
  let metavariableConstraintInner: MetavariableConstraintInner | null = null;
  switch (key) {
    // metavariable-comparison not here because it's represented differently
    // in our structure mode AST
    case "metavariable-regex":
      metavariableConstraintInner = {
        kind: "regex",
        regex: value.get("regex"),
      };
      break;
    case "metavariable-pattern":
      metavariableConstraintInner = {
        kind: "pattern",
        pattern: structurifyOldPattern(value),
      };
      break;
    case "metavariable-analysis":
      metavariableConstraintInner = {
        kind: "analyzer",
        analyzer: getValue(value, "analyzer"),
      };
      break;
    case "metavariable-type": {
      // error-handling:
      if (value.has("types") && Array.isArray(value.get("types"))) {
        if (value.get("types").length >= 1) {
          throw new UnsupportedRule("multiple types not supported");
        }
        metavariableConstraintInner = {
          kind: "type",
          type: value.get("types")[0],
        };
      }
      metavariableConstraintInner = {
        kind: "type",
        type: getValue(value, "type"),
      };
      break;
    }
  }

  if (metavariableConstraintInner === null) {
    throw new InvalidYAML("metavariable constraint not found");
  }

  return new PatternConstraint({
    kind: "metavariable",
    metavariable,
    metavariableConstraints: [
      new MetavariableConstraint(metavariableConstraintInner),
    ],
  });
};

export const structurifyOld = (
  input: Map<string, any>
): Pattern | PatternConstraint => {
  const mkPattern = (
    rawPatternInner: RawPatternInner,
    constraint: PatternConstraint[] = []
  ) =>
    new Pattern({
      rawPattern: new RawPattern(rawPatternInner),
      constraint: constraint,
    });

  for (const key of ALLOWED_OLD_KEYS) {
    if (input.has(key)) {
      switch (key) {
        case "pattern":
          return mkPattern({
            kind: "pattern",
            pattern: input.get(key),
          });
        // Only a `patterns` should permit metavariable constraints underneath it
        case "patterns": {
          const elems: Pattern[] | PatternConstraint[] = input
            .get(key)
            .map((p: Map<string, any>) => structurifyOld(p));
          const patterns = elems.filter(
            (p) => p instanceof Pattern
          ) as Pattern[];
          const constraints = elems.filter(
            (p) => p instanceof PatternConstraint
          ) as PatternConstraint[];
          return mkPattern(
            {
              kind: "all",
              patterns,
            },
            constraints
          );
        }
        case "pattern-either":
          return mkPattern({
            kind: "any",
            patterns: input
              .get(key)
              .map((p: Map<string, any>) => structurifyOldPattern(p)),
          });
        case "pattern-inside":
          return mkPattern({
            kind: "inside",
            pattern: structurifyOldPatternOrString(input.get(key)),
          });
        case "pattern-not": {
          return mkPattern({
            kind: "not",
            pattern: structurifyOldPatternOrString(input.get(key)),
          });
        }
        case "pattern-regex":
          return mkPattern({
            kind: "regex",
            regex: input.get(key),
          });

        case "pattern-not-inside":
          return mkPattern({
            kind: "not",
            pattern: mkPattern({
              kind: "inside",
              pattern: structurifyOldPatternOrString(input.get(key)),
            }),
          });
        case "pattern-not-regex":
          return mkPattern({
            kind: "not",
            pattern: mkPattern({
              kind: "regex",
              regex: input.get(key),
            }),
          });
        case "focus-metavariable": {
          const body: string[] | string = input.get(key);
          if (typeof body === "string") {
            return new PatternConstraint({
              kind: "focus",
              metavariables: [body],
            });
          } else {
            return new PatternConstraint({
              kind: "focus",
              metavariables: body,
            });
          }
        }
        case "metavariable-comparison":
        case "metavariable-regex":
        case "metavariable-analysis":
        case "metavariable-pattern":
        case "metavariable-type":
          return structurifyOldMetavariableConstraint(key, input.get(key));
      }
    }
  }

  throw new InvalidYAML(`no valid key found in ${JSON.stringify(input)}`);
};

/******************************************************************************/
/* New syntax */
/******************************************************************************/

const extractPattern = (input: Map<string, any>): Pattern | null => {
  for (const key of ["pattern", "any", "all", "regex"]) {
    if (input.has(key)) {
      const newMap = cloneDeep(input);
      newMap.delete("metavariable");
      return structurifyNew(newMap);
    }
  }
  return null;
};

const extractWhere = (input: Map<string, any>): PatternConstraintInner => {
  const focus = input.get("focus");
  const comparison = input.get("comparison");
  const metavariable = input.get("metavariable");
  if (focus !== undefined) {
    return { kind: "focus", metavariables: focus };
  } else if (comparison !== undefined) {
    return { kind: "comparison", comparison: comparison };
  } else if (metavariable !== undefined) {
    const regex = input.get("regex");
    const analyzer = input.get("analyzer");
    const type = input.get("type");
    // TODO: I think this can be "not"
    const modPattern = extractPattern(input);

    const mkConstraint = (kind: MetavariableConstraintKind) => ({
      kind: "metavariable",
      metavariable: metavariable,
      metavariableConstraints: [
        new MetavariableConstraint(defaultMetavariableConstraintOfKind(kind)),
      ],
    });

    if (regex !== undefined) {
      return mkConstraint("regex") as PatternConstraintInner;
    } else if (analyzer !== undefined) {
      return mkConstraint("analyzer") as PatternConstraintInner;
    } else if (type !== undefined) {
      return mkConstraint("type") as PatternConstraintInner;
    } else if (modPattern !== null) {
      return mkConstraint("pattern") as PatternConstraintInner;
    } else {
      throw new InvalidYAML("improper metavariable constraint type");
    }
  } else {
    throw new InvalidYAML("improper constraint type");
  }
};

export const structurifyNew = (input: Map<string, any> | string): Pattern => {
  if (typeof input === "string") {
    const pattern = new Pattern({
      rawPattern: new RawPattern({
        kind: "pattern",
        pattern: input,
      }),
      constraint: null,
    });
    return pattern;
  } else {
    const keys = Array.from(input.keys());

    const constraints =
      keys.length === 2
        ? input
            .get("where")
            ?.map(
              (c: Map<string, any>) => new PatternConstraint(extractWhere(c))
            )
        : [];

    const mkPattern = (rawPatternInner: RawPatternInner) =>
      new Pattern({
        rawPattern: new RawPattern(rawPatternInner),
        constraint: constraints,
      });

    // TODO(brandon): what if the kind is second, and where is first?
    const kind = keys[0];
    switch (kind) {
      case "pattern":
        return mkPattern({
          kind: "pattern",
          pattern: input.get(kind),
        });
      case "all":
      case "any":
        return mkPattern({
          kind: kind,
          patterns: input
            .get(kind)
            .map((p: Map<string, any>) => structurifyNew(p)),
        });
      case "inside":
      case "not": {
        return mkPattern({
          kind: kind,
          pattern: structurifyNew(input.get(kind)),
        });
      }
      case "regex":
        return mkPattern({
          kind: kind,
          regex: input.get(kind),
        });
      default:
        throw new InvalidYAML("no legitimate new syntax key found");
    }
  }
};

/******************************************************************************/
/* Entry point */
/******************************************************************************/

export class UnsupportedRule extends Error {
  constructor(public readonly reason: string) {
    super(`Rule is not supported by Structure Mode: ${reason}`);
  }
}

export class InvalidYAML extends Error {
  constructor(public readonly reason: string) {
    super(`YAML parse error: ${reason}`);
  }
}

// Exception-raising!
export const fromYAML = (inputYaml: string): Rule => {
  const ruleMap: Map<string, any> = YAML.parse(inputYaml, {
    mapAsMap: true,
  }).get("rules")[0];

  let inner: RuleInner | null = null;
  if (ruleMap.has("match")) {
    const outerPattern = ruleMap.get("match");
    // data-loss:
    validate_nested_keys(outerPattern);
    inner = {
      kind: "search",
      pattern: structurifyNew(outerPattern),
    };
  } else if (ruleMap.has("taint")) {
    // we should raise an error if we fail to execute a translation with 100%
    // coverage, because otherwise we could accidentally lose information and
    // delete parts of rules untransparently
    if (ruleMap.has("propagators")) {
      throw new UnsupportedRule("pattern-propagators currently unhandled");
    }
    const taint = ruleMap.get("taint");
    // data-loss:
    validate_nested_keys(taint);
    inner = {
      kind: "taint",
      sources: getValue(taint, "sources").map(structurifyNew) ?? [],
      sinks: getValue(taint, "sinks").map(structurifyNew) ?? [],
      sanitizers: taint.get("sanitizers")?.map(structurifyNew) ?? [],
    };
  } else if (ruleMap.has("mode") && ruleMap.get("mode") === "taint") {
    if (ruleMap.has("pattern-propagators")) {
      throw new UnsupportedRule("pattern-propagators currently unhandled");
    }
    // data-loss:
    for (const key of [
      "pattern-sources",
      "pattern-sinks",
      "pattern-sanitizers",
    ]) {
      validate_nested_keys(ruleMap.get(key));
    }
    inner = {
      kind: "taint",
      sources: getValue(ruleMap, "pattern-sources").map(structurifyOld) ?? [],
      sinks: getValue(ruleMap, "pattern-sinks").map(structurifyOld) ?? [],
      sanitizers: ruleMap.get("pattern-sanitizers")?.map(structurifyOld) ?? [],
      /* TODO: pattern-propagators */
    };
  } else {
    // data-loss:
    for (const key of ALLOWED_OLD_KEYS) {
      if (ruleMap.has(key)) {
        validate_nested_keys(ruleMap.get(key));
      }
    }
    const res = structurifyOld(ruleMap);
    if (res instanceof PatternConstraint) {
      throw new InvalidYAML("expected pattern, got constraint");
    }
    inner = {
      kind: "search",
      pattern: res,
    };
  }

  const fix = ruleMap.get("fix") ?? null;

  // We keep an `extra` field so that we can restore all of the rule keys
  // that we don't know how to deal with.
  // This ensures we have no loss of information when converting back and forth.
  const extra = new Map<string, any>();
  for (const key of ruleMap.keys()) {
    if (!STRUCTURE_KNOWN_TOP_LEVEL_KEYS.includes(key)) {
      extra.set(key, ruleMap.get(key));
    }
  }

  const languages = getValue(ruleMap, "languages");
  let language = "unknown";
  if (Array.isArray(languages) && languages.length > 0) {
    language = languages[0];
  } else {
    throw new InvalidYAML("invalid `languages` field");
  }

  return new Rule(
    inner ?? { kind: "search", pattern: structurifyNew("") },
    language,
    getValue(ruleMap, "message"),
    getValue(ruleMap, "id"),
    undefined,
    fix,
    extra
  );
};
