const fs = require("fs");
const path = require("path");
const { v5: uuidv5 } = require("uuid");

// Should take in a folder that is a subfolder of documents
// eg: youtube-subject/video-123.json
async function fileData(filePath = null) {
  if (!filePath) throw new Error("No docPath provided in request");

  const fullPath =
    process.env.NODE_ENV === "development"
      ? path.resolve(
          __dirname,
          `../../storage/documents/${normalizePath(filePath)}`
        )
      : path.resolve(
          process.env.STORAGE_DIR,
          `documents/${normalizePath(filePath)}`
        );

  const fileExists = fs.existsSync(fullPath);
  if (!fileExists) return null;

  const data = fs.readFileSync(fullPath, "utf8");
  return JSON.parse(data);
}

async function viewLocalFiles() {
  const folder =
    process.env.NODE_ENV === "development"
      ? path.resolve(__dirname, `../../storage/documents`)
      : path.resolve(process.env.STORAGE_DIR, `documents`);
  const dirExists = fs.existsSync(folder);
  if (!dirExists) fs.mkdirSync(folder);

  const directory = {
    name: "documents",
    type: "folder",
    items: [],
  };

  for (const file of fs.readdirSync(folder)) {
    if (path.extname(file) === ".md") continue;

    const folderPath =
      process.env.NODE_ENV === "development"
        ? path.resolve(__dirname, `../../storage/documents/${file}`)
        : path.resolve(process.env.STORAGE_DIR, `documents/${file}`);

    const isFolder = fs.lstatSync(folderPath).isDirectory();
    if (isFolder) {
      const subdocs = {
        name: file,
        type: "folder",
        items: [],
      };
      const subfiles = fs.readdirSync(folderPath);

      for (const subfile of subfiles) {
        if (path.extname(subfile) !== ".json") continue;
        const filePath = path.join(folderPath, subfile);
        const rawData = fs.readFileSync(filePath, "utf8");
        const cachefilename = `${file}/${subfile}`;
        const { pageContent, ...metadata } = JSON.parse(rawData);

        subdocs.items.push({
          name: subfile,
          type: "file",
          ...metadata,
          cached: await cachedVectorInformation(cachefilename, true),
        });
      }
      directory.items.push(subdocs);
    }
  }

  return directory;
}

// Searches the vector-cache folder for existing information so we dont have to re-embed a
// document and can instead push directly to vector db.
async function cachedVectorInformation(filename = null, checkOnly = false) {
  if (!filename) return checkOnly ? false : { exists: false, chunks: [] };

  const digest = uuidv5(filename, uuidv5.URL);
  const file =
    process.env.NODE_ENV === "development"
      ? path.resolve(__dirname, `../../storage/vector-cache/${digest}.json`)
      : path.resolve(process.env.STORAGE_DIR, `vector-cache/${digest}.json`);
  const exists = fs.existsSync(file);

  if (checkOnly) return exists;
  if (!exists) return { exists, chunks: [] };

  console.log(
    `Cached vectorized results of ${filename} found! Using cached data to save on embed costs.`
  );
  const rawData = fs.readFileSync(file, "utf8");
  return { exists: true, chunks: JSON.parse(rawData) };
}

// vectorData: pre-chunked vectorized data for a given file that includes the proper metadata and chunk-size limit so it can be iterated and dumped into Pinecone, etc
// filename is the fullpath to the doc so we can compare by filename to find cached matches.
async function storeVectorResult(vectorData = [], filename = null) {
  if (!filename) return;
  console.log(
    `Caching vectorized results of ${filename} to prevent duplicated embedding.`
  );
  const folder =
    process.env.NODE_ENV === "development"
      ? path.resolve(__dirname, `../../storage/vector-cache`)
      : path.resolve(process.env.STORAGE_DIR, `vector-cache`);

  if (!fs.existsSync(folder)) fs.mkdirSync(folder);

  const digest = uuidv5(filename, uuidv5.URL);
  const writeTo = path.resolve(folder, `${digest}.json`);
  fs.writeFileSync(writeTo, JSON.stringify(vectorData), "utf8");
  return;
}

// Purges a file from the documents/ folder.
async function purgeSourceDocument(filename = null) {
  if (!filename) return;
  console.log(`Purging source document of ${filename}.`);
  const filePath =
    process.env.NODE_ENV === "development"
      ? path.resolve(
          __dirname,
          `../../storage/documents`,
          normalizePath(filename)
        )
      : path.resolve(
          process.env.STORAGE_DIR,
          `documents`,
          normalizePath(filename)
        );

  if (!fs.existsSync(filePath)) return;
  fs.rmSync(filePath);
  return;
}

// Purges a vector-cache file from the vector-cache/ folder.
async function purgeVectorCache(filename = null) {
  if (!filename) return;
  console.log(`Purging vector-cache of ${filename}.`);

  const digest = uuidv5(filename, uuidv5.URL);
  const filePath =
    process.env.NODE_ENV === "development"
      ? path.resolve(__dirname, `../../storage/vector-cache`, `${digest}.json`)
      : path.resolve(process.env.STORAGE_DIR, `vector-cache`, `${digest}.json`);

  if (!fs.existsSync(filePath)) return;
  fs.rmSync(filePath);
  return;
}

// Search for a specific document by its unique name in the entire `documents`
// folder via iteration of all folders and checking if the expected file exists.
async function findDocumentInDocuments(documentName = null) {
  if (!documentName) return null;
  const documentsFolder =
    process.env.NODE_ENV === "development"
      ? path.resolve(__dirname, `../../storage/documents`)
      : path.resolve(process.env.STORAGE_DIR, `documents`);

  for (const folder of fs.readdirSync(documentsFolder)) {
    const isFolder = fs
      .lstatSync(path.join(documentsFolder, folder))
      .isDirectory();
    if (!isFolder) continue;

    const targetFilename = normalizePath(documentName);
    const targetFileLocation = path.join(
      documentsFolder,
      folder,
      targetFilename
    );
    if (!fs.existsSync(targetFileLocation)) continue;

    const fileData = fs.readFileSync(targetFileLocation, "utf8");
    const cachefilename = `${folder}/${targetFilename}`;
    const { pageContent, ...metadata } = JSON.parse(fileData);
    return {
      name: targetFilename,
      type: "file",
      ...metadata,
      cached: await cachedVectorInformation(cachefilename, true),
    };
  }

  return null;
}

function normalizePath(filepath = "") {
  return path.normalize(filepath).replace(/^(\.\.(\/|\\|$))+/, "");
}

module.exports = {
  findDocumentInDocuments,
  cachedVectorInformation,
  viewLocalFiles,
  purgeSourceDocument,
  purgeVectorCache,
  storeVectorResult,
  fileData,
  normalizePath,
};