diff --git a/pyproject.toml b/pyproject.toml index c8858365..0b7483a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "openai >= 1.0.0", "tiktoken >= 0.3.2", "tenacity >= 8.2.2", + "magika ~= 0.5.1", "pillow ~= 9.5.0", "pydantic >= 2.0.0", "pyyaml ~= 6.0", diff --git a/src/interface/desktop/main.js b/src/interface/desktop/main.js index 1610a5e7..b3589c14 100644 --- a/src/interface/desktop/main.js +++ b/src/interface/desktop/main.js @@ -1,4 +1,5 @@ const { app, BrowserWindow, ipcMain, Tray, Menu, nativeImage, shell } = require('electron'); +const Magika = require('magika').MagikaNode; const todesktop = require("@todesktop/runtime"); const khojPackage = require('./package.json'); @@ -14,8 +15,8 @@ const KHOJ_URL = 'https://app.khoj.dev'; const Store = require('electron-store'); -const validFileTypes = ['org', 'md', 'markdown', 'txt', 'html', 'xml', 'pdf'] - +const magika = new Magika(); +let validFileTypes; const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg'] const schema = { @@ -67,6 +68,7 @@ const schema = { } }; +let isMagikaLoaded = false; let syncing = false; let state = {} const store = new Store({ schema }); @@ -111,22 +113,39 @@ function filenameToMimeType (filename) { } } -function processDirectory(filesToPush, folder) { - const files = fs.readdirSync(folder.path, { withFileTypes: true, recursive: true }); +async function isPlainTextFile(filePath) { + if (!isMagikaLoaded) { + await magika.load(); + isMagikaLoaded = true; + } + try { + const fileContent = fs.readFileSync(filePath); + const fileType = await magika.identifyBytes(fileContent); + const fileLabel = magika.config.labels.filter(l => l.name == fileType.label)?.[0] + return fileLabel?.is_text + } catch (err) { + console.error("Failed to identify file type: ", err); + return false; + } +} + +async function processDirectory(filesToPush, folder) { + const files = fs.readdirSync(folder.path, { withFileTypes: true }); for (const file of files) { - if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) { - console.log(`Add ${file.name} in ${folder.path} for indexing`); - filesToPush.push(path.join(folder.path, file.name)); + const filePath = path.join(file.path, file.name || ''); + if (file.isFile() && await isPlainTextFile(filePath)) { + console.log(`Add ${file.name} in ${file.path} for indexing`); + filesToPush.push(filePath); } if (file.isDirectory()) { - processDirectory(filesToPush, {'path': path.join(folder.path, file.name)}); + await processDirectory(filesToPush, {'path': filePath}); } } } -function pushDataToKhoj (regenerate = false) { +async function pushDataToKhoj (regenerate = false) { // Don't sync if token or hostURL is not set or if already syncing if (store.get('khojToken') === '' || store.get('hostURL') === '' || syncing === true) { const win = BrowserWindow.getAllWindows()[0]; @@ -148,7 +167,7 @@ function pushDataToKhoj (regenerate = false) { // Collect paths of all indexable files in configured folders for (const folder of folders) { - processDirectory(filesToPush, folder); + await processDirectory(filesToPush, folder); } const lastSync = store.get('lastSync') || []; @@ -222,6 +241,7 @@ function pushDataToKhoj (regenerate = false) { } else if (error?.code === 'ECONNREFUSED') { state["error"] = `Could not connect to Khoj server. Ensure you can connect to it at ${error.address}:${error.port}.`; } else { + currentTime = new Date(); state["error"] = `Sync was unsuccessful at ${currentTime.toLocaleTimeString()}. Contact team@khoj.dev to report this issue.`; } }) @@ -238,9 +258,18 @@ function pushDataToKhoj (regenerate = false) { pushDataToKhoj(); async function handleFileOpen (type) { + if (!isMagikaLoaded) { + await magika.load(); + isMagikaLoaded = true; + validFileTypes = [ + "org", "md", "pdf", + // all text file extensions known to Magika + ...magika.config.labels.filter(l => l.is_text == true).map(l => l.name)]; + } + let { canceled, filePaths } = {canceled: true, filePaths: []}; if (type === 'file') { - ({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openFile' ], filters: [{ name: "Valid Khoj Files", extensions: validFileTypes}] })); + ({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openFile' ], filters: [{ name: "Valid Khoj Files", extensions: validFileTypes }] })); } else if (type === 'folder') { ({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openDirectory' ]})); } @@ -331,7 +360,7 @@ async function removeFolder (event, folderPath) { async function syncData (regenerate = false) { try { - pushDataToKhoj(regenerate); + await pushDataToKhoj(regenerate); const date = new Date(); console.log('Pushing data to Khoj at: ', date); } catch (err) { @@ -343,7 +372,7 @@ async function deleteAllFiles () { try { store.set('files', []); store.set('folders', []); - pushDataToKhoj(true); + await pushDataToKhoj(true); const date = new Date(); console.log('Pushing data to Khoj at: ', date); } catch (err) { @@ -379,9 +408,9 @@ const createWindow = (tab = 'chat.html') => { } }) - const job = new cron('0 */10 * * * *', function() { + const job = new cron('0 */10 * * * *', async function() { try { - pushDataToKhoj(); + await pushDataToKhoj(); const date = new Date(); console.log('Pushing data to Khoj at: ', date); win.webContents.send('update-state', state); @@ -501,11 +530,11 @@ app.whenReady().then(() => { try { const result = await todesktop.autoUpdater.checkForUpdates(); if (result.updateInfo) { - console.log("Update found:", result.updateInfo.version); + console.log("Desktop app update found:", result.updateInfo.version); todesktop.autoUpdater.restartAndInstall(); } } catch (e) { - console.log("Update check failed:", e); + console.warn("Desktop app update check failed:", e); } }) diff --git a/src/interface/desktop/package.json b/src/interface/desktop/package.json index 79caa6c2..8c34d078 100644 --- a/src/interface/desktop/package.json +++ b/src/interface/desktop/package.json @@ -20,6 +20,6 @@ "axios": "^1.6.4", "cron": "^2.4.3", "electron-store": "^8.1.0", - "fs": "^0.0.1-security" + "magika": "^0.2.13" } } diff --git a/src/interface/desktop/yarn.lock b/src/interface/desktop/yarn.lock index 539bfa97..3fab17a4 100644 --- a/src/interface/desktop/yarn.lock +++ b/src/interface/desktop/yarn.lock @@ -17,6 +17,21 @@ optionalDependencies: global-agent "^3.0.0" +"@mapbox/node-pre-gyp@1.0.9": + version "1.0.9" + resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.9.tgz#09a8781a3a036151cdebbe8719d6f8b25d4058bc" + integrity sha512-aDF3S3rK9Q2gey/WAttUlISduDItz5BU3306M9Eyv6/oS40aMprnopshtlKTykxRNIBEZuRMaZAnbrQ4QtKGyw== + dependencies: + detect-libc "^2.0.0" + https-proxy-agent "^5.0.0" + make-dir "^3.1.0" + node-fetch "^2.6.7" + nopt "^5.0.0" + npmlog "^5.0.1" + rimraf "^3.0.2" + semver "^7.3.5" + tar "^6.1.11" + "@nodelib/fs.scandir@2.1.5": version "2.1.5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" @@ -50,6 +65,87 @@ dependencies: defer-to-connect "^2.0.0" +"@tensorflow/tfjs-backend-cpu@4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-backend-cpu/-/tfjs-backend-cpu-4.17.0.tgz#b0c495de686cf700f2ae1f6d8bc2eb6f1964d250" + integrity sha512-2VSCHnX9qhYTjw9HiVwTBSnRVlntKXeBlK7aSVsmZfHGwWE2faErTtO7bWmqNqw0U7gyznJbVAjlow/p+0RNGw== + dependencies: + "@types/seedrandom" "^2.4.28" + seedrandom "^3.0.5" + +"@tensorflow/tfjs-backend-webgl@4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-backend-webgl/-/tfjs-backend-webgl-4.17.0.tgz#7d540a92343582d37d2cdf9509060598a19cd17a" + integrity sha512-CC5GsGECCd7eYAUaKq0XJ48FjEZdgXZWPxgUYx4djvfUx5fQPp35hCSP9w/k463jllBMbjl2tKRg8u7Ia/LYzg== + dependencies: + "@tensorflow/tfjs-backend-cpu" "4.17.0" + "@types/offscreencanvas" "~2019.3.0" + "@types/seedrandom" "^2.4.28" + seedrandom "^3.0.5" + +"@tensorflow/tfjs-converter@4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-converter/-/tfjs-converter-4.17.0.tgz#f4407bd53d5e300b05ed0b0f068506bc50c956b0" + integrity sha512-qFxIjPfomCuTrYxsFjtKbi3QfdmTTCWo+RvqD64oCMS0sjp7sUDNhJyKDoLx6LZhXlwXpHIVDJctLMRMwet0Zw== + +"@tensorflow/tfjs-core@4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-core/-/tfjs-core-4.17.0.tgz#1ea128555a4d197aed417d70461fcbc7eaec635f" + integrity sha512-v9Q5430EnRpyhWNd9LVgXadciKvxLiq+sTrLKRowh26BHyAsams4tZIgX3lFKjB7b90p+FYifVMcqLTTHgjGpQ== + dependencies: + "@types/long" "^4.0.1" + "@types/offscreencanvas" "~2019.7.0" + "@types/seedrandom" "^2.4.28" + "@webgpu/types" "0.1.38" + long "4.0.0" + node-fetch "~2.6.1" + seedrandom "^3.0.5" + +"@tensorflow/tfjs-data@4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-data/-/tfjs-data-4.17.0.tgz#612c095576724f1dbec9488373e9ed6027282a05" + integrity sha512-aPKrDFip+gXicWOFALeNT7KKQjRXFkHd/hNe/zs4mCFcIN00hy1PkZ6xkYsgrsdLDQMBSGeS4B4ZM0k5Cs88QA== + dependencies: + "@types/node-fetch" "^2.1.2" + node-fetch "~2.6.1" + string_decoder "^1.3.0" + +"@tensorflow/tfjs-layers@4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-layers/-/tfjs-layers-4.17.0.tgz#c789f4285f358b192675790365bdd491b490501e" + integrity sha512-DEE0zRKvf3LJ0EcvG5XouJYOgFGWYAneZ0K1d23969z7LfSyqVmBdLC6BTwdLKuJk3ouUJIKXU1TcpFmjDuh7g== + +"@tensorflow/tfjs-node@^4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node/-/tfjs-node-4.17.0.tgz#d35cc1466cc58a5a12d27e6fed4b730d8b0a982d" + integrity sha512-lRe5XPwLzVgpLoxgKWWlqCX9uYybklMai3npgVcvniLQnd6JjkGx+RY2D+7jyQmdo1zJUACfxw3conP88OcBug== + dependencies: + "@mapbox/node-pre-gyp" "1.0.9" + "@tensorflow/tfjs" "4.17.0" + adm-zip "^0.5.2" + google-protobuf "^3.9.2" + https-proxy-agent "^2.2.1" + progress "^2.0.0" + rimraf "^2.6.2" + tar "^4.4.6" + +"@tensorflow/tfjs@4.17.0", "@tensorflow/tfjs@^4.17.0": + version "4.17.0" + resolved "https://registry.yarnpkg.com/@tensorflow/tfjs/-/tfjs-4.17.0.tgz#895fed25872a572cb1ec4ddee91586c2590c4019" + integrity sha512-yXRBhpM3frlNA/YaPp6HNk9EfIi8han5RYeQA3R8OCa0Od+AfoG1PUmlxV8fE2wCorlGVyHsgpiJ6M9YZPB56w== + dependencies: + "@tensorflow/tfjs-backend-cpu" "4.17.0" + "@tensorflow/tfjs-backend-webgl" "4.17.0" + "@tensorflow/tfjs-converter" "4.17.0" + "@tensorflow/tfjs-core" "4.17.0" + "@tensorflow/tfjs-data" "4.17.0" + "@tensorflow/tfjs-layers" "4.17.0" + argparse "^1.0.10" + chalk "^4.1.0" + core-js "3.29.1" + regenerator-runtime "^0.13.5" + yargs "^16.0.3" + "@todesktop/runtime@^1.3.0": version "1.3.0" resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-1.3.0.tgz#7baa64fd5c2e4daa591bda96270a0e39947ec3c7" @@ -84,11 +180,24 @@ dependencies: "@types/node" "*" +"@types/long@^4.0.1": + version "4.0.2" + resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a" + integrity sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA== + "@types/luxon@~3.3.0": version "3.3.1" resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-3.3.1.tgz#08727da7d81ee6a6c702b9dc6c8f86be010eb4dc" integrity sha512-XOS5nBcgEeP2PpcqJHjCWhUCAzGfXIU8ILOSLpx2FhxqMW9KdxgCGXNOEKGVBfveKtIpztHzKK5vSRVLyW/NqA== +"@types/node-fetch@^2.1.2": + version "2.6.11" + resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24" + integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g== + dependencies: + "@types/node" "*" + form-data "^4.0.0" + "@types/node@*": version "20.5.7" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.5.7.tgz#4b8ecac87fbefbc92f431d09c30e176fc0a7c377" @@ -99,6 +208,16 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-18.17.12.tgz#c6bd7413a13e6ad9cfb7e97dd5c4e904c1821e50" integrity sha512-d6xjC9fJ/nSnfDeU0AMDsaJyb1iHsqCSOdi84w4u+SlN/UgQdY5tRhpMzaFYsI4mnpvgTivEaQd0yOUhAtOnEQ== +"@types/offscreencanvas@~2019.3.0": + version "2019.3.0" + resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.3.0.tgz#3336428ec7e9180cf4566dfea5da04eb586a6553" + integrity sha512-esIJx9bQg+QYF0ra8GnvfianIY8qWB0GBx54PK5Eps6m+xTj86KLavHv6qDhzKcu5UUOgNfJ2pWaIIV7TRUd9Q== + +"@types/offscreencanvas@~2019.7.0": + version "2019.7.3" + resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.7.3.tgz#90267db13f64d6e9ccb5ae3eac92786a7c77a516" + integrity sha512-ieXiYmgSRXUDeOntE1InxjWyvEelZGP63M+cGuquuRLuIKKT1osnkXjxev9B7d1nXSug5vpunx+gNlbVxMlC9A== + "@types/responselike@^1.0.0": version "1.0.0" resolved "https://registry.yarnpkg.com/@types/responselike/-/responselike-1.0.0.tgz#251f4fe7d154d2bad125abe1b429b23afd262e29" @@ -106,6 +225,11 @@ dependencies: "@types/node" "*" +"@types/seedrandom@^2.4.28": + version "2.4.34" + resolved "https://registry.yarnpkg.com/@types/seedrandom/-/seedrandom-2.4.34.tgz#c725cd0fc0442e2d3d0e5913af005686ffb7eb99" + integrity sha512-ytDiArvrn/3Xk6/vtylys5tlY6eo7Ane0hvcx++TKo6RxQXuVfW0AF/oeWqAj9dN29SyhtawuXstgmPlwNcv/A== + "@types/semver@^7.3.6": version "7.5.1" resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.1.tgz#0480eeb7221eb9bc398ad7432c9d7e14b1a5a367" @@ -118,6 +242,35 @@ dependencies: "@types/node" "*" +"@webgpu/types@0.1.38": + version "0.1.38" + resolved "https://registry.yarnpkg.com/@webgpu/types/-/types-0.1.38.tgz#6fda4b410edc753d3213c648320ebcf319669020" + integrity sha512-7LrhVKz2PRh+DD7+S+PVaFd5HxaWQvoMqBbsV9fNJO1pjUs1P8bM2vQVNfk+3URTqbuTI7gkXi0rfsN0IadoBA== + +abbrev@1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8" + integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q== + +adm-zip@^0.5.2: + version "0.5.12" + resolved "https://registry.yarnpkg.com/adm-zip/-/adm-zip-0.5.12.tgz#87786328e91d54b37358d8a50f954c4cd73ba60b" + integrity sha512-6TVU49mK6KZb4qG6xWaaM4C7sA/sgUMLy/JYMOzkcp3BvVLpW0fXDFQiIzAuxFCt/2+xD7fNIiPFAoLZPhVNLQ== + +agent-base@6: + version "6.0.2" + resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77" + integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ== + dependencies: + debug "4" + +agent-base@^4.3.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee" + integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg== + dependencies: + es6-promisify "^5.0.0" + aggregate-error@^3.0.0: version "3.1.0" resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a" @@ -143,6 +296,38 @@ ajv@^8.0.0, ajv@^8.6.3: require-from-string "^2.0.2" uri-js "^4.2.2" +ansi-regex@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" + integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ== + +ansi-styles@^4.0.0, ansi-styles@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937" + integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg== + dependencies: + color-convert "^2.0.1" + +"aproba@^1.0.3 || ^2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/aproba/-/aproba-2.0.0.tgz#52520b8ae5b569215b354efc0caa3fe1e45a8adc" + integrity sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ== + +are-we-there-yet@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz#372e0e7bd279d8e94c653aaa1f67200884bf3e1c" + integrity sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw== + dependencies: + delegates "^1.0.0" + readable-stream "^3.6.0" + +argparse@^1.0.10: + version "1.0.10" + resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911" + integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg== + dependencies: + sprintf-js "~1.0.2" + argparse@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" @@ -228,11 +413,43 @@ cacheable-request@^7.0.2: normalize-url "^6.0.1" responselike "^2.0.0" +chalk@^4.1.0: + version "4.1.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" + integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +chalk@^5.3.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-5.3.0.tgz#67c20a7ebef70e7f3970a01f90fa210cb6860385" + integrity sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w== + +chownr@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b" + integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg== + +chownr@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece" + integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ== + clean-stack@^2.0.0: version "2.2.0" resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b" integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A== +cliui@^7.0.2: + version "7.0.4" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f" + integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ== + dependencies: + string-width "^4.2.0" + strip-ansi "^6.0.0" + wrap-ansi "^7.0.0" + clone-response@^1.0.2: version "1.0.3" resolved "https://registry.yarnpkg.com/clone-response/-/clone-response-1.0.3.tgz#af2032aa47816399cf5f0a1d0db902f517abb8c3" @@ -240,6 +457,23 @@ clone-response@^1.0.2: dependencies: mimic-response "^1.0.0" +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + +color-support@^1.1.2: + version "1.1.3" + resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2" + integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg== + combined-stream@^1.0.8: version "1.0.8" resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" @@ -247,6 +481,11 @@ combined-stream@^1.0.8: dependencies: delayed-stream "~1.0.0" +commander@^12.0.0: + version "12.0.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-12.0.0.tgz#b929db6df8546080adfd004ab215ed48cf6f2592" + integrity sha512-MwVNWlYjDTtOjX5PiD7o5pK0UrFU/OYgcJfjjK4RaHZETNtjJqrZa9Y9ds88+A+f+d5lv+561eZ+yCKoS3gbAA== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -268,6 +507,16 @@ conf@^10.2.0: pkg-up "^3.1.0" semver "^7.3.5" +console-control-strings@^1.0.0, console-control-strings@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e" + integrity sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ== + +core-js@3.29.1: + version "3.29.1" + resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.29.1.tgz#40ff3b41588b091aaed19ca1aa5cb111803fa9a6" + integrity sha512-+jwgnhg6cQxKYIIjGtAHq2nwUOolo9eoFZ4sHfUH09BLXBgxnH4gA0zEd+t+BO2cNB8idaBtZFcFTRjQJRJmAw== + cron@^2.4.3: version "2.4.3" resolved "https://registry.yarnpkg.com/cron/-/cron-2.4.3.tgz#4e43d8d9a6373b8f28d876c4e9a47c14422d8652" @@ -292,13 +541,20 @@ debounce-fn@^4.0.0: dependencies: mimic-fn "^3.0.0" -debug@^4.1.0, debug@^4.1.1, debug@^4.3.2: +debug@4, debug@^4.1.0, debug@^4.1.1, debug@^4.3.2: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== dependencies: ms "2.1.2" +debug@^3.1.0: + version "3.2.7" + resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a" + integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ== + dependencies: + ms "^2.1.1" + decompress-response@^6.0.0: version "6.0.0" resolved "https://registry.yarnpkg.com/decompress-response/-/decompress-response-6.0.0.tgz#ca387612ddb7e104bd16d85aab00d5ecf09c66fc" @@ -338,6 +594,16 @@ delayed-stream@~1.0.0: resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ== +delegates@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a" + integrity sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ== + +detect-libc@^2.0.0: + version "2.0.3" + resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.3.tgz#f0cd503b40f9939b894697d19ad50895e30cf700" + integrity sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw== + detect-node@^2.0.4: version "2.1.0" resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.1.0.tgz#c9c70775a49c3d03bc2c06d9a73be550f978f8b1" @@ -388,6 +654,11 @@ electron@28.2.1: "@types/node" "^18.11.18" extract-zip "^2.0.1" +emoji-regex@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" + integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== + end-of-stream@^1.1.0: version "1.4.4" resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" @@ -405,6 +676,23 @@ es6-error@^4.1.1: resolved "https://registry.yarnpkg.com/es6-error/-/es6-error-4.1.1.tgz#9e3af407459deed47e9a91f9b885a84eb05c561d" integrity sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg== +es6-promise@^4.0.3: + version "4.2.8" + resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a" + integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w== + +es6-promisify@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203" + integrity sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ== + dependencies: + es6-promise "^4.0.3" + +escalade@^3.1.1: + version "3.1.2" + resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.2.tgz#54076e9ab29ea5bf3d8f1ed62acffbb88272df27" + integrity sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA== + escape-string-regexp@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" @@ -517,21 +805,50 @@ fs-extra@^8.1.0: jsonfile "^4.0.0" universalify "^0.1.0" +fs-minipass@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-1.2.7.tgz#ccff8570841e7fe4265693da88936c55aed7f7c7" + integrity sha512-GWSSJGFy4e9GUeCcbIkED+bgAoFyj7XF1mV8rma3QW4NIqX9Kyx79N/PF61H5udOV3aY1IaMLs6pGbH71nlCTA== + dependencies: + minipass "^2.6.0" + +fs-minipass@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb" + integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg== + dependencies: + minipass "^3.0.0" + fs.realpath@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== -fs@^0.0.1-security: - version "0.0.1-security" - resolved "https://registry.yarnpkg.com/fs/-/fs-0.0.1-security.tgz#8a7bd37186b6dddf3813f23858b57ecaaf5e41d4" - integrity sha512-3XY9e1pP0CVEUCdj5BmfIZxRBTSDycnbqhIOGec9QYtmVH2fbLpj86CFWkrNOkt/Fvty4KZG5lTglL9j/gJ87w== - function-bind@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== +gauge@^3.0.0: + version "3.0.2" + resolved "https://registry.yarnpkg.com/gauge/-/gauge-3.0.2.tgz#03bf4441c044383908bcfa0656ad91803259b395" + integrity sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q== + dependencies: + aproba "^1.0.3 || ^2.0.0" + color-support "^1.1.2" + console-control-strings "^1.0.0" + has-unicode "^2.0.1" + object-assign "^4.1.1" + signal-exit "^3.0.0" + string-width "^4.2.3" + strip-ansi "^6.0.1" + wide-align "^1.1.2" + +get-caller-file@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e" + integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== + get-intrinsic@^1.1.1: version "1.2.1" resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.1.tgz#d295644fed4505fc9cde952c37ee12b477a83d82" @@ -604,6 +921,11 @@ globby@^11.0.1: merge2 "^1.4.1" slash "^3.0.0" +google-protobuf@^3.9.2: + version "3.21.2" + resolved "https://registry.yarnpkg.com/google-protobuf/-/google-protobuf-3.21.2.tgz#4580a2bea8bbb291ee579d1fefb14d6fa3070ea4" + integrity sha512-3MSOYFO5U9mPGikIYCzK0SaThypfGgS6bHqrUGXG3DPHCrb+txNqeEcns1W0lkGfk0rCyNXm7xB9rMxnCiZOoA== + got@^11.8.5: version "11.8.6" resolved "https://registry.yarnpkg.com/got/-/got-11.8.6.tgz#276e827ead8772eddbcfc97170590b841823233a" @@ -626,6 +948,11 @@ graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4: resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + has-property-descriptors@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz#610708600606d36961ed04c196193b6a607fa861" @@ -643,6 +970,11 @@ has-symbols@^1.0.3: resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== +has-unicode@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/has-unicode/-/has-unicode-2.0.1.tgz#e0e6fe6a28cf51138855e086d1691e771de2a8b9" + integrity sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ== + has@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" @@ -663,6 +995,22 @@ http2-wrapper@^1.0.0-beta.5.2: quick-lru "^5.1.1" resolve-alpn "^1.0.0" +https-proxy-agent@^2.2.1: + version "2.2.4" + resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b" + integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg== + dependencies: + agent-base "^4.3.0" + debug "^3.1.0" + +https-proxy-agent@^5.0.0: + version "5.0.1" + resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6" + integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA== + dependencies: + agent-base "6" + debug "4" + human-signals@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" @@ -686,7 +1034,7 @@ inflight@^1.0.4: once "^1.3.0" wrappy "1" -inherits@2: +inherits@2, inherits@^2.0.3: version "2.0.4" resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== @@ -696,6 +1044,11 @@ is-extglob@^2.1.1: resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== +is-fullwidth-code-point@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" + integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== + is-glob@^4.0.1: version "4.0.3" resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" @@ -811,6 +1164,11 @@ lodash.once@^4.1.1: resolved "https://registry.yarnpkg.com/lodash.once/-/lodash.once-4.1.1.tgz#0dd3971213c7c56df880977d504c88fb471a97ac" integrity sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg== +long@4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28" + integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA== + lowercase-keys@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-2.0.0.tgz#2603e78b7b4b0006cbca2fbcc8a3202558ac9479" @@ -828,6 +1186,24 @@ luxon@~3.3.0: resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.3.0.tgz#d73ab5b5d2b49a461c47cedbc7e73309b4805b48" integrity sha512-An0UCfG/rSiqtAIiBPO0Y9/zAnHUZxAMiCpTd5h2smgsj7GGmcenvrvww2cqNA8/4A5ZrD1gJpHN2mIHZQF+Mg== +magika@^0.2.13: + version "0.2.13" + resolved "https://registry.yarnpkg.com/magika/-/magika-0.2.13.tgz#f39bc565f0d3703b592d3fc155a5d3ee056b0471" + integrity sha512-CIMx6aJ6SCX2uiAdLpghGeQPhMIOsnvmVpYjX/ao5ks0PFvJF1f9fWNGlayGfazNpJmTA1cuZG21DVVhfZkoUw== + dependencies: + "@tensorflow/tfjs" "^4.17.0" + optionalDependencies: + "@tensorflow/tfjs-node" "^4.17.0" + chalk "^5.3.0" + commander "^12.0.0" + +make-dir@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f" + integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw== + dependencies: + semver "^6.0.0" + matcher@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/matcher/-/matcher-3.0.0.tgz#bd9060f4c5b70aa8041ccc6f80368760994f30ca" @@ -892,11 +1268,89 @@ minimatch@^3.1.1: dependencies: brace-expansion "^1.1.7" +minimist@^1.2.6: + version "1.2.8" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c" + integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA== + +minipass@^2.6.0, minipass@^2.9.0: + version "2.9.0" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-2.9.0.tgz#e713762e7d3e32fed803115cf93e04bca9fcc9a6" + integrity sha512-wxfUjg9WebH+CUDX/CdbRlh5SmfZiy/hpkxaRI16Y9W56Pa75sWgd/rvFilSgrauD9NyFymP/+JFV3KwzIsJeg== + dependencies: + safe-buffer "^5.1.2" + yallist "^3.0.0" + +minipass@^3.0.0: + version "3.3.6" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.3.6.tgz#7bba384db3a1520d18c9c0e5251c3444e95dd94a" + integrity sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw== + dependencies: + yallist "^4.0.0" + +minipass@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-5.0.0.tgz#3e9788ffb90b694a5d0ec94479a45b5d8738133d" + integrity sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ== + +minizlib@^1.3.3: + version "1.3.3" + resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-1.3.3.tgz#2290de96818a34c29551c8a8d301216bd65a861d" + integrity sha512-6ZYMOEnmVsdCeTJVE0W9ZD+pVnE8h9Hma/iOwwRDsdQoePpoX56/8B6z3P9VNwppJuBKNRuFDRNRqRWexT9G9Q== + dependencies: + minipass "^2.9.0" + +minizlib@^2.1.1: + version "2.1.2" + resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931" + integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg== + dependencies: + minipass "^3.0.0" + yallist "^4.0.0" + +mkdirp@^0.5.5: + version "0.5.6" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.6.tgz#7def03d2432dcae4ba1d611445c48396062255f6" + integrity sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw== + dependencies: + minimist "^1.2.6" + +mkdirp@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" + integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== + ms@2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== +ms@^2.1.1: + version "2.1.3" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" + integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== + +node-fetch@^2.6.7: + version "2.7.0" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" + integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== + dependencies: + whatwg-url "^5.0.0" + +node-fetch@~2.6.1: + version "2.6.13" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.13.tgz#a20acbbec73c2e09f9007de5cda17104122e0010" + integrity sha512-StxNAxh15zr77QvvkmveSQ8uCQ4+v5FkvNTj0OESmiHu+VRi/gXArXtkWMElOsOUNLtUEvI4yS+rdtOHZTwlQA== + dependencies: + whatwg-url "^5.0.0" + +nopt@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/nopt/-/nopt-5.0.0.tgz#530942bb58a512fccafe53fe210f13a25355dc88" + integrity sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ== + dependencies: + abbrev "1" + normalize-url@^6.0.1: version "6.1.0" resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.1.0.tgz#40d0885b535deffe3f3147bec877d05fe4c5668a" @@ -909,6 +1363,21 @@ npm-run-path@^4.0.1: dependencies: path-key "^3.0.0" +npmlog@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-5.0.1.tgz#f06678e80e29419ad67ab964e0fa69959c1eb8b0" + integrity sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw== + dependencies: + are-we-there-yet "^2.0.0" + console-control-strings "^1.1.0" + gauge "^3.0.0" + set-blocking "^2.0.0" + +object-assign@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" + integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== + object-keys@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" @@ -996,7 +1465,7 @@ pkg-up@^3.1.0: dependencies: find-up "^3.0.0" -progress@^2.0.3: +progress@^2.0.0, progress@^2.0.3: version "2.0.3" resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== @@ -1029,6 +1498,25 @@ quick-lru@^5.1.1: resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932" integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA== +readable-stream@^3.6.0: + version "3.6.2" + resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.2.tgz#56a9b36ea965c00c5a93ef31eb111a0f11056967" + integrity sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA== + dependencies: + inherits "^2.0.3" + string_decoder "^1.1.1" + util-deprecate "^1.0.1" + +regenerator-runtime@^0.13.5: + version "0.13.11" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz#f6dca3e7ceec20590d07ada785636a90cdca17f9" + integrity sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg== + +require-directory@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" + integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== + require-from-string@^2.0.2: version "2.0.2" resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" @@ -1051,6 +1539,13 @@ reusify@^1.0.4: resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== +rimraf@^2.6.2: + version "2.7.1" + resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec" + integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w== + dependencies: + glob "^7.1.3" + rimraf@^3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" @@ -1077,17 +1572,27 @@ run-parallel@^1.1.9: dependencies: queue-microtask "^1.2.2" +safe-buffer@^5.1.2, safe-buffer@^5.2.1, safe-buffer@~5.2.0: + version "5.2.1" + resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" + integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== + sax@^1.2.4: version "1.2.4" resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw== +seedrandom@^3.0.5: + version "3.0.5" + resolved "https://registry.yarnpkg.com/seedrandom/-/seedrandom-3.0.5.tgz#54edc85c95222525b0c7a6f6b3543d8e0b3aa0a7" + integrity sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg== + semver-compare@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/semver-compare/-/semver-compare-1.0.0.tgz#0dee216a1c941ab37e9efb1788f6afc5ff5537fc" integrity sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow== -semver@^6.2.0: +semver@^6.0.0, semver@^6.2.0: version "6.3.1" resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== @@ -1106,6 +1611,11 @@ serialize-error@^7.0.1: dependencies: type-fest "^0.13.1" +set-blocking@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" + integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw== + shebang-command@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea" @@ -1118,7 +1628,7 @@ shebang-regex@^3.0.0: resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172" integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== -signal-exit@^3.0.3: +signal-exit@^3.0.0, signal-exit@^3.0.3: version "3.0.7" resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== @@ -1133,6 +1643,34 @@ sprintf-js@^1.1.2: resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.1.2.tgz#da1765262bf8c0f571749f2ad6c26300207ae673" integrity sha512-VE0SOVEHCk7Qc8ulkWw3ntAzXuqf7S2lvwQaDLRnUeIEaKNQJzV6BwmLKhOqT61aGhfUMrXeaBk+oDGCzvhcug== +sprintf-js@~1.0.2: + version "1.0.3" + resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" + integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g== + +"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +string_decoder@^1.1.1, string_decoder@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e" + integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== + dependencies: + safe-buffer "~5.2.0" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + strip-final-newline@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad" @@ -1145,6 +1683,38 @@ sumchecker@^3.0.1: dependencies: debug "^4.1.0" +supports-color@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da" + integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw== + dependencies: + has-flag "^4.0.0" + +tar@^4.4.6: + version "4.4.19" + resolved "https://registry.yarnpkg.com/tar/-/tar-4.4.19.tgz#2e4d7263df26f2b914dee10c825ab132123742f3" + integrity sha512-a20gEsvHnWe0ygBY8JbxoM4w3SJdhc7ZAuxkLqh+nvNQN2IOt0B5lLgM490X5Hl8FF0dl0tOf2ewFYAlIFgzVA== + dependencies: + chownr "^1.1.4" + fs-minipass "^1.2.7" + minipass "^2.9.0" + minizlib "^1.3.3" + mkdirp "^0.5.5" + safe-buffer "^5.2.1" + yallist "^3.1.1" + +tar@^6.1.11: + version "6.2.1" + resolved "https://registry.yarnpkg.com/tar/-/tar-6.2.1.tgz#717549c541bc3c2af15751bea94b1dd068d4b03a" + integrity sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A== + dependencies: + chownr "^2.0.0" + fs-minipass "^2.0.0" + minipass "^5.0.0" + minizlib "^2.1.1" + mkdirp "^1.0.3" + yallist "^4.0.0" + to-regex-range@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" @@ -1152,6 +1722,11 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== + type-fest@^0.13.1: version "0.13.1" resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934" @@ -1179,6 +1754,24 @@ uri-js@^4.2.2: dependencies: punycode "^2.1.0" +util-deprecate@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" + integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw== + +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== + +whatwg-url@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" + integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw== + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + which@^2.0.1: version "2.0.2" resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1" @@ -1186,16 +1779,60 @@ which@^2.0.1: dependencies: isexe "^2.0.0" +wide-align@^1.1.2: + version "1.1.5" + resolved "https://registry.yarnpkg.com/wide-align/-/wide-align-1.1.5.tgz#df1d4c206854369ecf3c9a4898f1b23fbd9d15d3" + integrity sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg== + dependencies: + string-width "^1.0.2 || 2 || 3 || 4" + +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + wrappy@1: version "1.0.2" resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ== +y18n@^5.0.5: + version "5.0.8" + resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55" + integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA== + +yallist@^3.0.0, yallist@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" + integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g== + yallist@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72" integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== +yargs-parser@^20.2.2: + version "20.2.9" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee" + integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w== + +yargs@^16.0.3: + version "16.2.0" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66" + integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw== + dependencies: + cliui "^7.0.2" + escalade "^3.1.1" + get-caller-file "^2.0.5" + require-directory "^2.1.1" + string-width "^4.2.0" + y18n "^5.0.5" + yargs-parser "^20.2.2" + yauzl@^2.10.0: version "2.10.0" resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" diff --git a/src/khoj/processor/content/github/github_to_entries.py b/src/khoj/processor/content/github/github_to_entries.py index a3b4cdbc..8fc1e18b 100644 --- a/src/khoj/processor/content/github/github_to_entries.py +++ b/src/khoj/processor/content/github/github_to_entries.py @@ -1,19 +1,21 @@ import logging import time -from datetime import datetime -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple import requests +from magika import Magika from khoj.database.models import Entry as DbEntry from khoj.database.models import GithubConfig, KhojUser from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries from khoj.processor.content.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.processor.content.text_to_entries import TextToEntries from khoj.utils.helpers import timer -from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig +from khoj.utils.rawconfig import GithubContentConfig, GithubRepoConfig logger = logging.getLogger(__name__) +magika = Magika() class GithubToEntries(TextToEntries): @@ -62,15 +64,18 @@ class GithubToEntries(TextToEntries): repo_url = f"https://api.github.com/repos/{repo.owner}/{repo.name}" repo_shorthand = f"{repo.owner}/{repo.name}" logger.info(f"Processing github repo {repo_shorthand}") - with timer("Download markdown files from github repo", logger): + with timer("Download files from github repo", logger): try: - markdown_files, org_files = self.get_files(repo_url, repo) + markdown_files, org_files, plaintext_files = self.get_files(repo_url, repo) + except ConnectionAbortedError as e: + logger.error(f"Github rate limit reached. Skip indexing github repo {repo_shorthand}") except Exception as e: logger.error(f"Unable to download github repo {repo_shorthand}", exc_info=True) raise e - logger.info(f"Found {len(markdown_files)} markdown files in github repo {repo_shorthand}") - logger.info(f"Found {len(org_files)} org files in github repo {repo_shorthand}") + logger.info( + f"Found {len(markdown_files)} md, {len(org_files)} org and {len(plaintext_files)} text files in github repo {repo_shorthand}" + ) current_entries = [] with timer(f"Extract markdown entries from github repo {repo_shorthand}", logger): @@ -83,14 +88,10 @@ class GithubToEntries(TextToEntries): *GithubToEntries.extract_org_entries(org_files) ) - with timer(f"Extract commit messages from github repo {repo_shorthand}", logger): - current_entries += self.convert_commits_to_entries(self.get_commits(repo_url), repo) - - with timer(f"Extract issues from github repo {repo_shorthand}", logger): - issue_entries = GithubToEntries.convert_issues_to_entries( - *GithubToEntries.extract_github_issues(self.get_issues(repo_url)) + with timer(f"Extract plaintext entries from github repo {repo_shorthand}", logger): + current_entries += PlaintextToEntries.convert_text_files_to_entries( + *GithubToEntries.extract_plaintext_entries(plaintext_files) ) - current_entries += issue_entries with timer(f"Split entries by max token size supported by model {repo_shorthand}", logger): current_entries = TextToEntries.split_entries_by_max_tokens(current_entries, max_tokens=256) @@ -119,16 +120,16 @@ class GithubToEntries(TextToEntries): response = requests.get(repo_content_url, headers=headers, params=params) contents = response.json() - # Wait for rate limit reset if needed - result = self.wait_for_rate_limit_reset(response, self.get_files, repo_url, repo) - if result is not None: - return result + # Raise exception if hit rate limit + if response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0": + raise ConnectionAbortedError("Github rate limit reached") # Extract markdown files from the repository - markdown_files: List[Any] = [] - org_files: List[Any] = [] + markdown_files: List[Dict[str, str]] = [] + org_files: List[Dict[str, str]] = [] + plaintext_files: List[Dict[str, str]] = [] if "tree" not in contents: - return markdown_files, org_files + return markdown_files, org_files, plaintext_files for item in contents["tree"]: # Find all markdown files in the repository @@ -147,144 +148,46 @@ class GithubToEntries(TextToEntries): # Add org file contents and URL to list org_files += [{"content": self.get_file_contents(item["url"]), "path": url_path}] - return markdown_files, org_files + # Find, index remaining non-binary files in the repository + elif item["type"] == "blob": + url_path = f'https://github.com/{repo.owner}/{repo.name}/blob/{repo.branch}/{item["path"]}' + content_bytes = self.get_file_contents(item["url"], decode=False) + content_type, content_str = None, None + try: + content_type = magika.identify_bytes(content_bytes).output.mime_type + content_str = content_bytes.decode("utf-8") + except: + logger.error( + f"Unable to identify content type or decode content of file at {url_path}. Skip indexing it" + ) + continue - def get_file_contents(self, file_url): + # Add non-binary file contents and URL to list + if content_type.startswith("text/"): + plaintext_files += [{"content": content_str, "path": url_path}] + + return markdown_files, org_files, plaintext_files + + def get_file_contents(self, file_url, decode=True): # Get text from each markdown file headers = {"Accept": "application/vnd.github.v3.raw"} response = self.session.get(file_url, headers=headers, stream=True) - # Wait for rate limit reset if needed - result = self.wait_for_rate_limit_reset(response, self.get_file_contents, file_url) - if result is not None: - return result + # Stop indexing on hitting rate limit + if response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0": + raise ConnectionAbortedError("Github rate limit reached") - content = "" + content = "" if decode else b"" for chunk in response.iter_content(chunk_size=2048): if chunk: try: - content += chunk.decode("utf-8") + content += chunk.decode("utf-8") if decode else chunk except Exception as e: logger.error(f"Unable to decode chunk from {file_url}") logger.error(e) return content - def get_commits(self, repo_url: str) -> List[Dict]: - return self._get_commits(f"{repo_url}/commits") - - def _get_commits(self, commits_url: Union[str, None]) -> List[Dict]: - # Get commit messages from the repository using the Github API - params = {"per_page": 100} - commits = [] - - while commits_url is not None: - # Get the next page of commits - response = self.session.get(commits_url, params=params, stream=True) - - # Read the streamed response into a JSON object - content = response.json() - - # Wait for rate limit reset if needed - result = self.wait_for_rate_limit_reset(response, self._get_commits, commits_url) - if result is not None: - return result - - # Extract commit messages from the response - for commit in content: - if "commit" in commit and "message" in commit["commit"] and "html_url" in commit: - commits += [{"content": commit["commit"]["message"], "path": commit["html_url"]}] - else: - logger.debug(f"Skipping commit with missing properties: {commit}") - - # Get the URL for the next page of commits, if any - commits_url = response.links.get("next", {}).get("url") - - return commits - - def get_issues(self, repo_url: str) -> List[Dict]: - return self._get_issues(f"{repo_url}/issues") - - def _get_issues(self, issues_url: Union[str, None]) -> List[Dict]: - issues = [] - per_page = 100 - params = {"per_page": per_page, "state": "all"} - - while issues_url is not None: - # Get the next page of issues - response = self.session.get(issues_url, params=params, stream=True) # type: ignore - raw_issues = response.json() - - # Wait for rate limit reset if needed - result = self.wait_for_rate_limit_reset(response, self._get_issues, issues_url) - if result is not None: - return result - - for issue in raw_issues: - username = issue["user"]["login"] - user_url = f"[{username}]({issue['user']['html_url']})" - issue_content = { - "content": f"## [Issue {issue['number']}]({issue['html_url']}) {issue['title']}\nby {user_url}\n\n{issue['body']}", - "path": issue["html_url"], - } - issue_content["created_at"] = {issue["created_at"]} - if issue["comments"] > 0: - issue_content["comments"] = self.get_comments(issue["comments_url"]) - issues += [issue_content] - - issues_url = response.links.get("next", {}).get("url") - - return issues - - def get_comments(self, comments_url: Union[str, None]) -> List[Dict]: - # By default, the number of results per page is 30. We'll keep it as-is for now. - comments = [] - per_page = 100 - params = {"per_page": per_page} - - while comments_url is not None: - # Get the next page of comments - response = self.session.get(comments_url, params=params, stream=True) - raw_comments = response.json() - - # Wait for rate limit reset if needed - result = self.wait_for_rate_limit_reset(response, self.get_comments, comments_url) - if result is not None: - return result - - for comment in raw_comments: - created_at = datetime.strptime(comment["created_at"], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d %H:%M") - commenter = comment["user"]["login"] - commenter_url = comment["user"]["html_url"] - comment_url = comment["html_url"] - comment_url_link = f"[{created_at}]({comment_url})" - avatar_url = comment["user"]["avatar_url"] - avatar = f"![{commenter}]({avatar_url})" - comments += [ - { - "content": f"### {avatar} [{commenter}]({commenter_url}) - ({comment_url_link})\n\n{comment['body']}" - } - ] - - comments_url = response.links.get("next", {}).get("url") - - return comments - - def convert_commits_to_entries(self, commits, repo: GithubRepoConfig) -> List[Entry]: - entries: List[Entry] = [] - for commit in commits: - compiled = f'Commit message from {repo.owner}/{repo.name}:\n{commit["content"]}' - entries.append( - Entry( - compiled=compiled, - raw=f'### {commit["content"]}', - heading=commit["content"].split("\n")[0], - file=commit["path"], - ) - ) - - return entries - @staticmethod def extract_markdown_entries(markdown_files): entries = [] @@ -307,30 +210,12 @@ class GithubToEntries(TextToEntries): return entries, dict(entry_to_file_map) @staticmethod - def extract_github_issues(issues): + def extract_plaintext_entries(plaintext_files): entries = [] - entry_to_file_map = {} - for issue in issues: - content = issue["content"] - if "comments" in issue: - for comment in issue["comments"]: - content += "\n\n" + comment["content"] - entries.append(content) - entry_to_file_map[content] = {"path": issue["path"]} - return entries, entry_to_file_map + entry_to_file_map = [] - @staticmethod - def convert_issues_to_entries(parsed_entries: List[str], entry_to_metadata_map: Dict[str, Dict]) -> List[Entry]: - entries = [] - for entry in parsed_entries: - entry_file_name = entry_to_metadata_map[entry]["path"] - entries.append( - Entry( - compiled=entry, - raw=entry, - heading=entry.split("\n")[0], - file=entry_file_name, - ) + for doc in plaintext_files: + entries, entry_to_file_map = PlaintextToEntries.process_single_plaintext_file( + doc["content"], doc["path"], entries, entry_to_file_map ) - - return entries + return entries, dict(entry_to_file_map) diff --git a/src/khoj/processor/content/plaintext/plaintext_to_entries.py b/src/khoj/processor/content/plaintext/plaintext_to_entries.py index 4fb0dd2e..c14bc359 100644 --- a/src/khoj/processor/content/plaintext/plaintext_to_entries.py +++ b/src/khoj/processor/content/plaintext/plaintext_to_entries.py @@ -1,7 +1,9 @@ import logging +import re from pathlib import Path -from typing import List, Tuple +from typing import Dict, List, Tuple +import urllib3 from bs4 import BeautifulSoup from khoj.database.models import Entry as DbEntry @@ -28,26 +30,13 @@ class PlaintextToEntries(TextToEntries): else: deletion_file_names = None - with timer("Scrub plaintext files and extract text", logger): - for file in files: - try: - plaintext_content = files[file] - if file.endswith(("html", "htm", "xml")): - plaintext_content = PlaintextToEntries.extract_html_content( - plaintext_content, file.split(".")[-1] - ) - files[file] = plaintext_content - except Exception as e: - logger.warning(f"Unable to read file: {file} as plaintext. Skipping file.") - logger.warning(e, exc_info=True) - # Extract Entries from specified plaintext files - with timer("Parse entries from specified Plaintext files", logger): + with timer("Extract entries from specified Plaintext files", logger): current_entries = PlaintextToEntries.extract_plaintext_entries(files) # Split entries by max tokens supported by model with timer("Split entries by max token size supported by model", logger): - current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256) + current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256, raw_is_compiled=True) # Identify, mark and merge any new entries with previous entries with timer("Identify new or updated entries", logger): @@ -74,16 +63,57 @@ class PlaintextToEntries(TextToEntries): return soup.get_text(strip=True, separator="\n") @staticmethod - def extract_plaintext_entries(entry_to_file_map: dict[str, str]) -> List[Entry]: - "Convert each plaintext entries into a dictionary" - entries = [] - for file, entry in entry_to_file_map.items(): + def extract_plaintext_entries(text_files: Dict[str, str]) -> List[Entry]: + entries: List[str] = [] + entry_to_file_map: List[Tuple[str, str]] = [] + for text_file in text_files: + try: + text_content = text_files[text_file] + entries, entry_to_file_map = PlaintextToEntries.process_single_plaintext_file( + text_content, text_file, entries, entry_to_file_map + ) + except Exception as e: + logger.warning(f"Unable to read file: {text_file} as plaintext. Skipping file.") + logger.warning(e, exc_info=True) + + # Extract Entries from specified plaintext files + return PlaintextToEntries.convert_text_files_to_entries(entries, dict(entry_to_file_map)) + + @staticmethod + def process_single_plaintext_file( + text_content: str, + text_file: str, + entries: List[str], + entry_to_file_map: List[Tuple[str, str]], + ) -> Tuple[List[str], List[Tuple[str, str]]]: + if text_file.endswith(("html", "htm", "xml")): + text_content = PlaintextToEntries.extract_html_content(text_content, text_file.split(".")[-1]) + entry_to_file_map += [(text_content, text_file)] + entries.extend([text_content]) + return entries, entry_to_file_map + + @staticmethod + def convert_text_files_to_entries(parsed_entries: List[str], entry_to_file_map: dict[str, str]) -> List[Entry]: + "Convert each plaintext file into an entry" + entries: List[Entry] = [] + for parsed_entry in parsed_entries: + raw_filename = entry_to_file_map[parsed_entry] + # Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path. + if type(raw_filename) == str and re.search(r"^https?://", raw_filename): + # Escape the URL to avoid issues with special characters + entry_filename = urllib3.util.parse_url(raw_filename).url + else: + entry_filename = raw_filename + + # Append base filename to compiled entry for context to model entries.append( Entry( - raw=entry, - file=file, - compiled=f"{Path(file).stem}\n{entry}", - heading=Path(file).stem, + raw=parsed_entry, + file=f"{entry_filename}", + compiled=f"{entry_filename}\n{parsed_entry}", + heading=entry_filename, ) ) + + logger.debug(f"Converted {len(parsed_entries)} plaintext files to entries") return entries diff --git a/src/khoj/processor/content/text_to_entries.py b/src/khoj/processor/content/text_to_entries.py index edd814f6..361d0220 100644 --- a/src/khoj/processor/content/text_to_entries.py +++ b/src/khoj/processor/content/text_to_entries.py @@ -59,7 +59,7 @@ class TextToEntries(ABC): @staticmethod def split_entries_by_max_tokens( - entries: List[Entry], max_tokens: int = 256, max_word_length: int = 500 + entries: List[Entry], max_tokens: int = 256, max_word_length: int = 500, raw_is_compiled: bool = False ) -> List[Entry]: "Split entries if compiled entry length exceeds the max tokens supported by the ML model." chunked_entries: List[Entry] = [] @@ -94,7 +94,7 @@ class TextToEntries(ABC): # Clean entry of unwanted characters like \0 character compiled_entry_chunk = TextToEntries.clean_field(compiled_entry_chunk) - entry.raw = TextToEntries.clean_field(entry.raw) + entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw) entry.heading = TextToEntries.clean_field(entry.heading) entry.file = TextToEntries.clean_field(entry.file) diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index 1bca6a25..d25d166a 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -67,11 +67,10 @@ async def update( try: logger.info(f"📬 Updating content index via API call by {client} client") for file in files: - file_type, encoding = get_file_type(file.content_type) + file_content = file.file.read() + file_type, encoding = get_file_type(file.content_type, file_content) if file_type in index_files: - index_files[file_type][file.filename] = ( - file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read() # type: ignore - ) + index_files[file_type][file.filename] = file_content.decode(encoding) if encoding else file_content else: logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}") diff --git a/src/khoj/utils/fs_syncer.py b/src/khoj/utils/fs_syncer.py index 31bc13b9..f9b7fc62 100644 --- a/src/khoj/utils/fs_syncer.py +++ b/src/khoj/utils/fs_syncer.py @@ -1,9 +1,11 @@ import glob import logging import os +from pathlib import Path from typing import Optional from bs4 import BeautifulSoup +from magika import Magika from khoj.database.models import ( LocalMarkdownConfig, @@ -16,6 +18,7 @@ from khoj.utils.helpers import get_absolute_path, is_none_or_empty from khoj.utils.rawconfig import TextContentConfig logger = logging.getLogger(__name__) +magika = Magika() def collect_files(search_type: Optional[SearchType] = SearchType.All, user=None) -> dict: @@ -47,6 +50,11 @@ def construct_config_from_db(db_config) -> TextContentConfig: def get_plaintext_files(config: TextContentConfig) -> dict[str, str]: def is_plaintextfile(file: str): "Check if file is plaintext file" + # Check if file path exists + mime_type = magika.identify_path(Path(file)).output.mime_type + if mime_type != "inode/x-empty" and mime_type != "application/unknown": + return mime_type.startswith("text/") + # Use file extension to decide plaintext if file content is not identifiable return file.endswith(("txt", "md", "markdown", "org", "mbox", "rst", "html", "htm", "xml")) def extract_html_content(html_content: str): @@ -65,7 +73,7 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]: logger.debug("At least one of input-files or input-file-filter is required to be specified") return {} - "Get all files to process" + # Get all plain text files to process absolute_plaintext_files, filtered_plaintext_files = set(), set() if input_files: absolute_plaintext_files = {get_absolute_path(jsonl_file) for jsonl_file in input_files} @@ -209,7 +217,7 @@ def get_pdf_files(config: TextContentConfig): logger.debug("At least one of pdf-files or pdf-file-filter is required to be specified") return {} - "Get PDF files to process" + # Get PDF files to process absolute_pdf_files, filtered_pdf_files = set(), set() if pdf_files: absolute_pdf_files = {get_absolute_path(pdf_file) for pdf_file in pdf_files} diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index d713c335..d5bf9b4b 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -19,6 +19,7 @@ from urllib.parse import urlparse import torch from asgiref.sync import sync_to_async +from magika import Magika from khoj.utils import constants @@ -29,6 +30,10 @@ if TYPE_CHECKING: from khoj.utils.rawconfig import AppConfig +# Initialize Magika for file type identification +magika = Magika() + + class AsyncIteratorWrapper: def __init__(self, obj): self._it = iter(obj) @@ -88,22 +93,31 @@ def merge_dicts(priority_dict: dict, default_dict: dict): return merged_dict -def get_file_type(file_type: str) -> tuple[str, str]: +def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]: "Get file type from file mime type" + # Extract encoding from file_type encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type - if file_type in ["text/markdown"]: + + # Infer content type from reading file content + try: + content_type = magika.identify_bytes(file_content).output.mime_type + except Exception: + # Fallback to using just file type if content type cannot be inferred + content_type = file_type + + if file_type in ["text/markdown"] and content_type.startswith("text/"): return "markdown", encoding - elif file_type in ["text/org"]: + elif file_type in ["text/org"] and content_type.startswith("text/"): return "org", encoding - elif file_type in ["application/pdf"]: + elif file_type in ["application/pdf"] and content_type == "application/pdf": return "pdf", encoding - elif file_type in ["image/jpeg"]: + elif file_type in ["image/jpeg"] and content_type == "image/jpeg": return "jpeg", encoding - elif file_type in ["image/png"]: + elif file_type in ["image/png"] and content_type == "image/png": return "png", encoding - elif file_type in ["text/plain", "text/html", "application/xml", "text/x-rst"]: + elif content_type.startswith("text/"): return "plaintext", encoding else: return "other", encoding diff --git a/tests/test_client.py b/tests/test_client.py index fb73ca3c..bb565794 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,6 +1,5 @@ # Standard Modules import os -from io import BytesIO from urllib.parse import quote import pytest diff --git a/tests/test_plaintext_to_entries.py b/tests/test_plaintext_to_entries.py index 41d841fc..ba908997 100644 --- a/tests/test_plaintext_to_entries.py +++ b/tests/test_plaintext_to_entries.py @@ -15,8 +15,6 @@ def test_plaintext_file(tmp_path): """ plaintextfile = create_file(tmp_path, raw_entry) - filename = plaintextfile.stem - # Act # Extract Entries from specified plaintext files @@ -24,7 +22,7 @@ def test_plaintext_file(tmp_path): f"{plaintextfile}": raw_entry, } - entries = PlaintextToEntries.extract_plaintext_entries(entry_to_file_map=data) + entries = PlaintextToEntries.extract_plaintext_entries(data) # Convert each entry.file to absolute path to make them JSON serializable for entry in entries: @@ -35,7 +33,7 @@ def test_plaintext_file(tmp_path): # Ensure raw entry with no headings do not get heading prefix prepended assert not entries[0].raw.startswith("#") # Ensure compiled entry has filename prepended as top level heading - assert entries[0].compiled == f"{filename}\n{raw_entry}" + assert entries[0].compiled == f"{plaintextfile}\n{raw_entry}" def test_get_plaintext_files(tmp_path): @@ -100,6 +98,35 @@ def test_parse_html_plaintext_file(content_config, default_user: KhojUser): assert "
" not in entries[0].raw +def test_large_plaintext_file_split_into_multiple_entries(tmp_path): + "Convert files with no heading to jsonl." + # Arrange + max_tokens = 256 + normal_entry = " ".join([f"{number}" for number in range(max_tokens - 1)]) + large_entry = " ".join([f"{number}" for number in range(max_tokens)]) + + normal_plaintextfile = create_file(tmp_path, normal_entry) + large_plaintextfile = create_file(tmp_path, large_entry) + + normal_data = {f"{normal_plaintextfile}": normal_entry} + large_data = {f"{large_plaintextfile}": large_entry} + + # Act + # Extract Entries from specified plaintext files + normal_entries = PlaintextToEntries.split_entries_by_max_tokens( + PlaintextToEntries.extract_plaintext_entries(normal_data), + max_tokens=max_tokens, + raw_is_compiled=True, + ) + large_entries = PlaintextToEntries.split_entries_by_max_tokens( + PlaintextToEntries.extract_plaintext_entries(large_data), max_tokens=max_tokens, raw_is_compiled=True + ) + + # Assert + assert len(normal_entries) == 1 + assert len(large_entries) == 2 + + # Helper Functions def create_file(tmp_path: Path, entry=None, filename="test.md"): file_ = tmp_path / filename