import { useEffect, useState } from "react";
import { FILE_EXTENSIONS } from "../../consts/general";
import { composeParseFn, composeParseResult } from "./utils";
import { fileUtil } from "../../utilities/file";

const parseText = (pageContents) => {
  return pageContents
    .map((page) =>
      page.items.reduce((r, l) => (r += `${l.str}${l.hasEOL ? "\n" : ""}`), ""),
    )
    .join("\n");
};

const parseHtml = (pageContents) => {
  return pageContents
    .map((page) => {
      const items = page.items;
      return items.reduce((r, l, i) => {
        const prev = items[i - 1];
        const next = items[i + 1];
        const prefix = prev && !prev.hasEOL && l.hasEOL ? "<p>" : "";
        const suffix = next && !next.hasEOL && l.hasEOL ? "</p>" : "";
        return (r += `${prefix}${l.str}${suffix}`);
      }, "");
    })
    .join("");
};

const getPageContents = async (pdf) => {
  const pagesRange = Array.from(Array(pdf.numPages).keys());
  const pages = await Promise.all(pagesRange.map((i) => pdf.getPage(i + 1)));
  const content = await Promise.all(pages.map((p) => p.getTextContent()));
  return content;
};

export function usePdfParser() {
  const [parser, setParser] = useState(null);

  const parse = async (file, { contentParser, extra }) => {
    try {
      if (!file) {
        throw new Error("File is empty");
      }
      if (fileUtil.getExtensionFromName(file.name) !== FILE_EXTENSIONS.pdf) {
        throw new Error("File is not .pdf");
      }
      if (!parser) {
        throw new Error("Parser is not ready");
      }
      const readerResult = await fileUtil.read(file);
      const arrayBuffer = new Uint8Array(readerResult);
      const pdf = await parser.getDocument(arrayBuffer).promise;
      const content = await getPageContents(pdf);
      return composeParseResult({
        content: contentParser(content),
        extra,
      });
    } catch (e) {
      throw new Error(e.message);
    }
  };

  const parseToHtml = composeParseFn((file, extra) => {
    return parse(file, {
      contentParser: parseHtml,
      extra,
    });
  });

  const parseToText = composeParseFn((file, extra) => {
    return parse(file, {
      contentParser: parseText,
      extra,
    });
  });

  useEffect(() => {
    if (window.pdfJs && !parser) {
      setParser(window.pdfJs);
    }
  }, []);

  return {
    parseToHtml,
    parseToText,
  };
}
