/*
 * This is a custom implementation of the Confluence langchain loader. There was an issue where
 * code blocks were not being extracted. This is a temporary fix until this issue is resolved.*/

const { htmlToText } = require("html-to-text");

class ConfluencePagesLoader {
  constructor({
    baseUrl,
    spaceKey,
    username,
    accessToken,
    limit = 25,
    expand = "body.storage,version",
    personalAccessToken,
    cloud = true,
  }) {
    this.baseUrl = baseUrl;
    this.spaceKey = spaceKey;
    this.username = username;
    this.accessToken = accessToken;
    this.limit = limit;
    this.expand = expand;
    this.personalAccessToken = personalAccessToken;
    this.cloud = cloud;
  }

  get authorizationHeader() {
    if (this.personalAccessToken) {
      return `Bearer ${this.personalAccessToken}`;
    } else if (this.username && this.accessToken) {
      const authToken = Buffer.from(
        `${this.username}:${this.accessToken}`
      ).toString("base64");
      return `Basic ${authToken}`;
    }
    return undefined;
  }

  async load(options) {
    try {
      const pages = await this.fetchAllPagesInSpace(
        options?.start,
        options?.limit
      );
      return pages.map((page) => this.createDocumentFromPage(page));
    } catch (error) {
      console.error("Error:", error);
      return [];
    }
  }

  async fetchConfluenceData(url) {
    try {
      const initialHeaders = {
        "Content-Type": "application/json",
        Accept: "application/json",
      };
      const authHeader = this.authorizationHeader;
      if (authHeader) {
        initialHeaders.Authorization = authHeader;
      }
      const response = await fetch(url, {
        headers: initialHeaders,
      });
      if (!response.ok) {
        throw new Error(
          `Failed to fetch ${url} from Confluence: ${response.status}`
        );
      }
      return await response.json();
    } catch (error) {
      throw new Error(`Failed to fetch ${url} from Confluence: ${error}`);
    }
  }

  // https://developer.atlassian.com/cloud/confluence/rest/v2/intro/#auth
  async fetchAllPagesInSpace(start = 0, limit = this.limit) {
    const url = `${this.baseUrl}${
      this.cloud ? "/wiki" : ""
    }/rest/api/content?spaceKey=${
      this.spaceKey
    }&limit=${limit}&start=${start}&expand=${this.expand}`;
    const data = await this.fetchConfluenceData(url);
    if (data.size === 0) {
      return [];
    }
    const nextPageStart = start + data.size;
    const nextPageResults = await this.fetchAllPagesInSpace(
      nextPageStart,
      limit
    );
    return data.results.concat(nextPageResults);
  }

  createDocumentFromPage(page) {
    // Function to extract code blocks
    const extractCodeBlocks = (content) => {
      const codeBlockRegex =
        /<ac:structured-macro ac:name="code"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g;
      const languageRegex =
        /<ac:parameter ac:name="language">(.*?)<\/ac:parameter>/;

      return content.replace(codeBlockRegex, (match) => {
        const language = match.match(languageRegex)?.[1] || "";
        const code =
          match.match(
            /<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>/
          )?.[1] || "";
        return `\n\`\`\`${language}\n${code.trim()}\n\`\`\`\n`;
      });
    };

    const contentWithCodeBlocks = extractCodeBlocks(page.body.storage.value);
    const plainTextContent = htmlToText(contentWithCodeBlocks, {
      wordwrap: false,
      preserveNewlines: true,
    });
    const textWithPreservedStructure = plainTextContent.replace(
      /\n{3,}/g,
      "\n\n"
    );
    const pageUrl = `${this.baseUrl}/spaces/${this.spaceKey}/pages/${page.id}`;

    return {
      pageContent: textWithPreservedStructure,
      metadata: {
        id: page.id,
        status: page.status,
        title: page.title,
        type: page.type,
        url: pageUrl,
        version: page.version?.number,
        updated_by: page.version?.by?.displayName,
        updated_at: page.version?.when,
      },
    };
  }
}

module.exports = { ConfluencePagesLoader };