Index more text file types from Desktop, Github (#692)

### Index more text file types 
- Index all text, code files in Github repos. Not just md, org files
- Send more text file types from Desktop app and improve indexing them
- Identify file type by content & allow server to index all text files

### Deprecate Github Indexing Features
- Stop indexing commits, issues and issue comments in a Github repo
- Skip indexing Github repo on hitting Github API rate limit

### Fixes and Improvements
- **Fix indexing files in sub-folders from Desktop app**
- Standardize structure of text to entries to match other entry processors
This commit is contained in:
Debanjum 2024-04-12 00:08:29 +05:30 committed by GitHub
commit 9a48f72041
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 869 additions and 240 deletions

View file

@ -46,6 +46,7 @@ dependencies = [
"openai >= 1.0.0", "openai >= 1.0.0",
"tiktoken >= 0.3.2", "tiktoken >= 0.3.2",
"tenacity >= 8.2.2", "tenacity >= 8.2.2",
"magika ~= 0.5.1",
"pillow ~= 9.5.0", "pillow ~= 9.5.0",
"pydantic >= 2.0.0", "pydantic >= 2.0.0",
"pyyaml ~= 6.0", "pyyaml ~= 6.0",

View file

@ -1,4 +1,5 @@
const { app, BrowserWindow, ipcMain, Tray, Menu, nativeImage, shell } = require('electron'); const { app, BrowserWindow, ipcMain, Tray, Menu, nativeImage, shell } = require('electron');
const Magika = require('magika').MagikaNode;
const todesktop = require("@todesktop/runtime"); const todesktop = require("@todesktop/runtime");
const khojPackage = require('./package.json'); const khojPackage = require('./package.json');
@ -14,8 +15,8 @@ const KHOJ_URL = 'https://app.khoj.dev';
const Store = require('electron-store'); const Store = require('electron-store');
const validFileTypes = ['org', 'md', 'markdown', 'txt', 'html', 'xml', 'pdf'] const magika = new Magika();
let validFileTypes;
const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg'] const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg']
const schema = { const schema = {
@ -67,6 +68,7 @@ const schema = {
} }
}; };
let isMagikaLoaded = false;
let syncing = false; let syncing = false;
let state = {} let state = {}
const store = new Store({ schema }); const store = new Store({ schema });
@ -111,22 +113,39 @@ function filenameToMimeType (filename) {
} }
} }
function processDirectory(filesToPush, folder) { async function isPlainTextFile(filePath) {
const files = fs.readdirSync(folder.path, { withFileTypes: true, recursive: true }); if (!isMagikaLoaded) {
await magika.load();
isMagikaLoaded = true;
}
try {
const fileContent = fs.readFileSync(filePath);
const fileType = await magika.identifyBytes(fileContent);
const fileLabel = magika.config.labels.filter(l => l.name == fileType.label)?.[0]
return fileLabel?.is_text
} catch (err) {
console.error("Failed to identify file type: ", err);
return false;
}
}
async function processDirectory(filesToPush, folder) {
const files = fs.readdirSync(folder.path, { withFileTypes: true });
for (const file of files) { for (const file of files) {
if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) { const filePath = path.join(file.path, file.name || '');
console.log(`Add ${file.name} in ${folder.path} for indexing`); if (file.isFile() && await isPlainTextFile(filePath)) {
filesToPush.push(path.join(folder.path, file.name)); console.log(`Add ${file.name} in ${file.path} for indexing`);
filesToPush.push(filePath);
} }
if (file.isDirectory()) { if (file.isDirectory()) {
processDirectory(filesToPush, {'path': path.join(folder.path, file.name)}); await processDirectory(filesToPush, {'path': filePath});
} }
} }
} }
function pushDataToKhoj (regenerate = false) { async function pushDataToKhoj (regenerate = false) {
// Don't sync if token or hostURL is not set or if already syncing // Don't sync if token or hostURL is not set or if already syncing
if (store.get('khojToken') === '' || store.get('hostURL') === '' || syncing === true) { if (store.get('khojToken') === '' || store.get('hostURL') === '' || syncing === true) {
const win = BrowserWindow.getAllWindows()[0]; const win = BrowserWindow.getAllWindows()[0];
@ -148,7 +167,7 @@ function pushDataToKhoj (regenerate = false) {
// Collect paths of all indexable files in configured folders // Collect paths of all indexable files in configured folders
for (const folder of folders) { for (const folder of folders) {
processDirectory(filesToPush, folder); await processDirectory(filesToPush, folder);
} }
const lastSync = store.get('lastSync') || []; const lastSync = store.get('lastSync') || [];
@ -222,6 +241,7 @@ function pushDataToKhoj (regenerate = false) {
} else if (error?.code === 'ECONNREFUSED') { } else if (error?.code === 'ECONNREFUSED') {
state["error"] = `Could not connect to Khoj server. Ensure you can connect to it at ${error.address}:${error.port}.`; state["error"] = `Could not connect to Khoj server. Ensure you can connect to it at ${error.address}:${error.port}.`;
} else { } else {
currentTime = new Date();
state["error"] = `Sync was unsuccessful at ${currentTime.toLocaleTimeString()}. Contact team@khoj.dev to report this issue.`; state["error"] = `Sync was unsuccessful at ${currentTime.toLocaleTimeString()}. Contact team@khoj.dev to report this issue.`;
} }
}) })
@ -238,9 +258,18 @@ function pushDataToKhoj (regenerate = false) {
pushDataToKhoj(); pushDataToKhoj();
async function handleFileOpen (type) { async function handleFileOpen (type) {
if (!isMagikaLoaded) {
await magika.load();
isMagikaLoaded = true;
validFileTypes = [
"org", "md", "pdf",
// all text file extensions known to Magika
...magika.config.labels.filter(l => l.is_text == true).map(l => l.name)];
}
let { canceled, filePaths } = {canceled: true, filePaths: []}; let { canceled, filePaths } = {canceled: true, filePaths: []};
if (type === 'file') { if (type === 'file') {
({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openFile' ], filters: [{ name: "Valid Khoj Files", extensions: validFileTypes}] })); ({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openFile' ], filters: [{ name: "Valid Khoj Files", extensions: validFileTypes }] }));
} else if (type === 'folder') { } else if (type === 'folder') {
({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openDirectory' ]})); ({ canceled, filePaths } = await dialog.showOpenDialog({properties: ['openDirectory' ]}));
} }
@ -331,7 +360,7 @@ async function removeFolder (event, folderPath) {
async function syncData (regenerate = false) { async function syncData (regenerate = false) {
try { try {
pushDataToKhoj(regenerate); await pushDataToKhoj(regenerate);
const date = new Date(); const date = new Date();
console.log('Pushing data to Khoj at: ', date); console.log('Pushing data to Khoj at: ', date);
} catch (err) { } catch (err) {
@ -343,7 +372,7 @@ async function deleteAllFiles () {
try { try {
store.set('files', []); store.set('files', []);
store.set('folders', []); store.set('folders', []);
pushDataToKhoj(true); await pushDataToKhoj(true);
const date = new Date(); const date = new Date();
console.log('Pushing data to Khoj at: ', date); console.log('Pushing data to Khoj at: ', date);
} catch (err) { } catch (err) {
@ -379,9 +408,9 @@ const createWindow = (tab = 'chat.html') => {
} }
}) })
const job = new cron('0 */10 * * * *', function() { const job = new cron('0 */10 * * * *', async function() {
try { try {
pushDataToKhoj(); await pushDataToKhoj();
const date = new Date(); const date = new Date();
console.log('Pushing data to Khoj at: ', date); console.log('Pushing data to Khoj at: ', date);
win.webContents.send('update-state', state); win.webContents.send('update-state', state);
@ -501,11 +530,11 @@ app.whenReady().then(() => {
try { try {
const result = await todesktop.autoUpdater.checkForUpdates(); const result = await todesktop.autoUpdater.checkForUpdates();
if (result.updateInfo) { if (result.updateInfo) {
console.log("Update found:", result.updateInfo.version); console.log("Desktop app update found:", result.updateInfo.version);
todesktop.autoUpdater.restartAndInstall(); todesktop.autoUpdater.restartAndInstall();
} }
} catch (e) { } catch (e) {
console.log("Update check failed:", e); console.warn("Desktop app update check failed:", e);
} }
}) })

View file

@ -20,6 +20,6 @@
"axios": "^1.6.4", "axios": "^1.6.4",
"cron": "^2.4.3", "cron": "^2.4.3",
"electron-store": "^8.1.0", "electron-store": "^8.1.0",
"fs": "^0.0.1-security" "magika": "^0.2.13"
} }
} }

View file

@ -17,6 +17,21 @@
optionalDependencies: optionalDependencies:
global-agent "^3.0.0" global-agent "^3.0.0"
"@mapbox/node-pre-gyp@1.0.9":
version "1.0.9"
resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.9.tgz#09a8781a3a036151cdebbe8719d6f8b25d4058bc"
integrity sha512-aDF3S3rK9Q2gey/WAttUlISduDItz5BU3306M9Eyv6/oS40aMprnopshtlKTykxRNIBEZuRMaZAnbrQ4QtKGyw==
dependencies:
detect-libc "^2.0.0"
https-proxy-agent "^5.0.0"
make-dir "^3.1.0"
node-fetch "^2.6.7"
nopt "^5.0.0"
npmlog "^5.0.1"
rimraf "^3.0.2"
semver "^7.3.5"
tar "^6.1.11"
"@nodelib/fs.scandir@2.1.5": "@nodelib/fs.scandir@2.1.5":
version "2.1.5" version "2.1.5"
resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
@ -50,6 +65,87 @@
dependencies: dependencies:
defer-to-connect "^2.0.0" defer-to-connect "^2.0.0"
"@tensorflow/tfjs-backend-cpu@4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-backend-cpu/-/tfjs-backend-cpu-4.17.0.tgz#b0c495de686cf700f2ae1f6d8bc2eb6f1964d250"
integrity sha512-2VSCHnX9qhYTjw9HiVwTBSnRVlntKXeBlK7aSVsmZfHGwWE2faErTtO7bWmqNqw0U7gyznJbVAjlow/p+0RNGw==
dependencies:
"@types/seedrandom" "^2.4.28"
seedrandom "^3.0.5"
"@tensorflow/tfjs-backend-webgl@4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-backend-webgl/-/tfjs-backend-webgl-4.17.0.tgz#7d540a92343582d37d2cdf9509060598a19cd17a"
integrity sha512-CC5GsGECCd7eYAUaKq0XJ48FjEZdgXZWPxgUYx4djvfUx5fQPp35hCSP9w/k463jllBMbjl2tKRg8u7Ia/LYzg==
dependencies:
"@tensorflow/tfjs-backend-cpu" "4.17.0"
"@types/offscreencanvas" "~2019.3.0"
"@types/seedrandom" "^2.4.28"
seedrandom "^3.0.5"
"@tensorflow/tfjs-converter@4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-converter/-/tfjs-converter-4.17.0.tgz#f4407bd53d5e300b05ed0b0f068506bc50c956b0"
integrity sha512-qFxIjPfomCuTrYxsFjtKbi3QfdmTTCWo+RvqD64oCMS0sjp7sUDNhJyKDoLx6LZhXlwXpHIVDJctLMRMwet0Zw==
"@tensorflow/tfjs-core@4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-core/-/tfjs-core-4.17.0.tgz#1ea128555a4d197aed417d70461fcbc7eaec635f"
integrity sha512-v9Q5430EnRpyhWNd9LVgXadciKvxLiq+sTrLKRowh26BHyAsams4tZIgX3lFKjB7b90p+FYifVMcqLTTHgjGpQ==
dependencies:
"@types/long" "^4.0.1"
"@types/offscreencanvas" "~2019.7.0"
"@types/seedrandom" "^2.4.28"
"@webgpu/types" "0.1.38"
long "4.0.0"
node-fetch "~2.6.1"
seedrandom "^3.0.5"
"@tensorflow/tfjs-data@4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-data/-/tfjs-data-4.17.0.tgz#612c095576724f1dbec9488373e9ed6027282a05"
integrity sha512-aPKrDFip+gXicWOFALeNT7KKQjRXFkHd/hNe/zs4mCFcIN00hy1PkZ6xkYsgrsdLDQMBSGeS4B4ZM0k5Cs88QA==
dependencies:
"@types/node-fetch" "^2.1.2"
node-fetch "~2.6.1"
string_decoder "^1.3.0"
"@tensorflow/tfjs-layers@4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-layers/-/tfjs-layers-4.17.0.tgz#c789f4285f358b192675790365bdd491b490501e"
integrity sha512-DEE0zRKvf3LJ0EcvG5XouJYOgFGWYAneZ0K1d23969z7LfSyqVmBdLC6BTwdLKuJk3ouUJIKXU1TcpFmjDuh7g==
"@tensorflow/tfjs-node@^4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node/-/tfjs-node-4.17.0.tgz#d35cc1466cc58a5a12d27e6fed4b730d8b0a982d"
integrity sha512-lRe5XPwLzVgpLoxgKWWlqCX9uYybklMai3npgVcvniLQnd6JjkGx+RY2D+7jyQmdo1zJUACfxw3conP88OcBug==
dependencies:
"@mapbox/node-pre-gyp" "1.0.9"
"@tensorflow/tfjs" "4.17.0"
adm-zip "^0.5.2"
google-protobuf "^3.9.2"
https-proxy-agent "^2.2.1"
progress "^2.0.0"
rimraf "^2.6.2"
tar "^4.4.6"
"@tensorflow/tfjs@4.17.0", "@tensorflow/tfjs@^4.17.0":
version "4.17.0"
resolved "https://registry.yarnpkg.com/@tensorflow/tfjs/-/tfjs-4.17.0.tgz#895fed25872a572cb1ec4ddee91586c2590c4019"
integrity sha512-yXRBhpM3frlNA/YaPp6HNk9EfIi8han5RYeQA3R8OCa0Od+AfoG1PUmlxV8fE2wCorlGVyHsgpiJ6M9YZPB56w==
dependencies:
"@tensorflow/tfjs-backend-cpu" "4.17.0"
"@tensorflow/tfjs-backend-webgl" "4.17.0"
"@tensorflow/tfjs-converter" "4.17.0"
"@tensorflow/tfjs-core" "4.17.0"
"@tensorflow/tfjs-data" "4.17.0"
"@tensorflow/tfjs-layers" "4.17.0"
argparse "^1.0.10"
chalk "^4.1.0"
core-js "3.29.1"
regenerator-runtime "^0.13.5"
yargs "^16.0.3"
"@todesktop/runtime@^1.3.0": "@todesktop/runtime@^1.3.0":
version "1.3.0" version "1.3.0"
resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-1.3.0.tgz#7baa64fd5c2e4daa591bda96270a0e39947ec3c7" resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-1.3.0.tgz#7baa64fd5c2e4daa591bda96270a0e39947ec3c7"
@ -84,11 +180,24 @@
dependencies: dependencies:
"@types/node" "*" "@types/node" "*"
"@types/long@^4.0.1":
version "4.0.2"
resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a"
integrity sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==
"@types/luxon@~3.3.0": "@types/luxon@~3.3.0":
version "3.3.1" version "3.3.1"
resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-3.3.1.tgz#08727da7d81ee6a6c702b9dc6c8f86be010eb4dc" resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-3.3.1.tgz#08727da7d81ee6a6c702b9dc6c8f86be010eb4dc"
integrity sha512-XOS5nBcgEeP2PpcqJHjCWhUCAzGfXIU8ILOSLpx2FhxqMW9KdxgCGXNOEKGVBfveKtIpztHzKK5vSRVLyW/NqA== integrity sha512-XOS5nBcgEeP2PpcqJHjCWhUCAzGfXIU8ILOSLpx2FhxqMW9KdxgCGXNOEKGVBfveKtIpztHzKK5vSRVLyW/NqA==
"@types/node-fetch@^2.1.2":
version "2.6.11"
resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24"
integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==
dependencies:
"@types/node" "*"
form-data "^4.0.0"
"@types/node@*": "@types/node@*":
version "20.5.7" version "20.5.7"
resolved "https://registry.yarnpkg.com/@types/node/-/node-20.5.7.tgz#4b8ecac87fbefbc92f431d09c30e176fc0a7c377" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.5.7.tgz#4b8ecac87fbefbc92f431d09c30e176fc0a7c377"
@ -99,6 +208,16 @@
resolved "https://registry.yarnpkg.com/@types/node/-/node-18.17.12.tgz#c6bd7413a13e6ad9cfb7e97dd5c4e904c1821e50" resolved "https://registry.yarnpkg.com/@types/node/-/node-18.17.12.tgz#c6bd7413a13e6ad9cfb7e97dd5c4e904c1821e50"
integrity sha512-d6xjC9fJ/nSnfDeU0AMDsaJyb1iHsqCSOdi84w4u+SlN/UgQdY5tRhpMzaFYsI4mnpvgTivEaQd0yOUhAtOnEQ== integrity sha512-d6xjC9fJ/nSnfDeU0AMDsaJyb1iHsqCSOdi84w4u+SlN/UgQdY5tRhpMzaFYsI4mnpvgTivEaQd0yOUhAtOnEQ==
"@types/offscreencanvas@~2019.3.0":
version "2019.3.0"
resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.3.0.tgz#3336428ec7e9180cf4566dfea5da04eb586a6553"
integrity sha512-esIJx9bQg+QYF0ra8GnvfianIY8qWB0GBx54PK5Eps6m+xTj86KLavHv6qDhzKcu5UUOgNfJ2pWaIIV7TRUd9Q==
"@types/offscreencanvas@~2019.7.0":
version "2019.7.3"
resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.7.3.tgz#90267db13f64d6e9ccb5ae3eac92786a7c77a516"
integrity sha512-ieXiYmgSRXUDeOntE1InxjWyvEelZGP63M+cGuquuRLuIKKT1osnkXjxev9B7d1nXSug5vpunx+gNlbVxMlC9A==
"@types/responselike@^1.0.0": "@types/responselike@^1.0.0":
version "1.0.0" version "1.0.0"
resolved "https://registry.yarnpkg.com/@types/responselike/-/responselike-1.0.0.tgz#251f4fe7d154d2bad125abe1b429b23afd262e29" resolved "https://registry.yarnpkg.com/@types/responselike/-/responselike-1.0.0.tgz#251f4fe7d154d2bad125abe1b429b23afd262e29"
@ -106,6 +225,11 @@
dependencies: dependencies:
"@types/node" "*" "@types/node" "*"
"@types/seedrandom@^2.4.28":
version "2.4.34"
resolved "https://registry.yarnpkg.com/@types/seedrandom/-/seedrandom-2.4.34.tgz#c725cd0fc0442e2d3d0e5913af005686ffb7eb99"
integrity sha512-ytDiArvrn/3Xk6/vtylys5tlY6eo7Ane0hvcx++TKo6RxQXuVfW0AF/oeWqAj9dN29SyhtawuXstgmPlwNcv/A==
"@types/semver@^7.3.6": "@types/semver@^7.3.6":
version "7.5.1" version "7.5.1"
resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.1.tgz#0480eeb7221eb9bc398ad7432c9d7e14b1a5a367" resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.1.tgz#0480eeb7221eb9bc398ad7432c9d7e14b1a5a367"
@ -118,6 +242,35 @@
dependencies: dependencies:
"@types/node" "*" "@types/node" "*"
"@webgpu/types@0.1.38":
version "0.1.38"
resolved "https://registry.yarnpkg.com/@webgpu/types/-/types-0.1.38.tgz#6fda4b410edc753d3213c648320ebcf319669020"
integrity sha512-7LrhVKz2PRh+DD7+S+PVaFd5HxaWQvoMqBbsV9fNJO1pjUs1P8bM2vQVNfk+3URTqbuTI7gkXi0rfsN0IadoBA==
abbrev@1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8"
integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==
adm-zip@^0.5.2:
version "0.5.12"
resolved "https://registry.yarnpkg.com/adm-zip/-/adm-zip-0.5.12.tgz#87786328e91d54b37358d8a50f954c4cd73ba60b"
integrity sha512-6TVU49mK6KZb4qG6xWaaM4C7sA/sgUMLy/JYMOzkcp3BvVLpW0fXDFQiIzAuxFCt/2+xD7fNIiPFAoLZPhVNLQ==
agent-base@6:
version "6.0.2"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77"
integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==
dependencies:
debug "4"
agent-base@^4.3.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee"
integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==
dependencies:
es6-promisify "^5.0.0"
aggregate-error@^3.0.0: aggregate-error@^3.0.0:
version "3.1.0" version "3.1.0"
resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a" resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a"
@ -143,6 +296,38 @@ ajv@^8.0.0, ajv@^8.6.3:
require-from-string "^2.0.2" require-from-string "^2.0.2"
uri-js "^4.2.2" uri-js "^4.2.2"
ansi-regex@^5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304"
integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==
ansi-styles@^4.0.0, ansi-styles@^4.1.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
dependencies:
color-convert "^2.0.1"
"aproba@^1.0.3 || ^2.0.0":
version "2.0.0"
resolved "https://registry.yarnpkg.com/aproba/-/aproba-2.0.0.tgz#52520b8ae5b569215b354efc0caa3fe1e45a8adc"
integrity sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==
are-we-there-yet@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz#372e0e7bd279d8e94c653aaa1f67200884bf3e1c"
integrity sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==
dependencies:
delegates "^1.0.0"
readable-stream "^3.6.0"
argparse@^1.0.10:
version "1.0.10"
resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911"
integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==
dependencies:
sprintf-js "~1.0.2"
argparse@^2.0.1: argparse@^2.0.1:
version "2.0.1" version "2.0.1"
resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38"
@ -228,11 +413,43 @@ cacheable-request@^7.0.2:
normalize-url "^6.0.1" normalize-url "^6.0.1"
responselike "^2.0.0" responselike "^2.0.0"
chalk@^4.1.0:
version "4.1.2"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01"
integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==
dependencies:
ansi-styles "^4.1.0"
supports-color "^7.1.0"
chalk@^5.3.0:
version "5.3.0"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-5.3.0.tgz#67c20a7ebef70e7f3970a01f90fa210cb6860385"
integrity sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==
chownr@^1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
chownr@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
clean-stack@^2.0.0: clean-stack@^2.0.0:
version "2.2.0" version "2.2.0"
resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b" resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b"
integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A== integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
cliui@^7.0.2:
version "7.0.4"
resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f"
integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==
dependencies:
string-width "^4.2.0"
strip-ansi "^6.0.0"
wrap-ansi "^7.0.0"
clone-response@^1.0.2: clone-response@^1.0.2:
version "1.0.3" version "1.0.3"
resolved "https://registry.yarnpkg.com/clone-response/-/clone-response-1.0.3.tgz#af2032aa47816399cf5f0a1d0db902f517abb8c3" resolved "https://registry.yarnpkg.com/clone-response/-/clone-response-1.0.3.tgz#af2032aa47816399cf5f0a1d0db902f517abb8c3"
@ -240,6 +457,23 @@ clone-response@^1.0.2:
dependencies: dependencies:
mimic-response "^1.0.0" mimic-response "^1.0.0"
color-convert@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
dependencies:
color-name "~1.1.4"
color-name@~1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
color-support@^1.1.2:
version "1.1.3"
resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2"
integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==
combined-stream@^1.0.8: combined-stream@^1.0.8:
version "1.0.8" version "1.0.8"
resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
@ -247,6 +481,11 @@ combined-stream@^1.0.8:
dependencies: dependencies:
delayed-stream "~1.0.0" delayed-stream "~1.0.0"
commander@^12.0.0:
version "12.0.0"
resolved "https://registry.yarnpkg.com/commander/-/commander-12.0.0.tgz#b929db6df8546080adfd004ab215ed48cf6f2592"
integrity sha512-MwVNWlYjDTtOjX5PiD7o5pK0UrFU/OYgcJfjjK4RaHZETNtjJqrZa9Y9ds88+A+f+d5lv+561eZ+yCKoS3gbAA==
concat-map@0.0.1: concat-map@0.0.1:
version "0.0.1" version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
@ -268,6 +507,16 @@ conf@^10.2.0:
pkg-up "^3.1.0" pkg-up "^3.1.0"
semver "^7.3.5" semver "^7.3.5"
console-control-strings@^1.0.0, console-control-strings@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e"
integrity sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==
core-js@3.29.1:
version "3.29.1"
resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.29.1.tgz#40ff3b41588b091aaed19ca1aa5cb111803fa9a6"
integrity sha512-+jwgnhg6cQxKYIIjGtAHq2nwUOolo9eoFZ4sHfUH09BLXBgxnH4gA0zEd+t+BO2cNB8idaBtZFcFTRjQJRJmAw==
cron@^2.4.3: cron@^2.4.3:
version "2.4.3" version "2.4.3"
resolved "https://registry.yarnpkg.com/cron/-/cron-2.4.3.tgz#4e43d8d9a6373b8f28d876c4e9a47c14422d8652" resolved "https://registry.yarnpkg.com/cron/-/cron-2.4.3.tgz#4e43d8d9a6373b8f28d876c4e9a47c14422d8652"
@ -292,13 +541,20 @@ debounce-fn@^4.0.0:
dependencies: dependencies:
mimic-fn "^3.0.0" mimic-fn "^3.0.0"
debug@^4.1.0, debug@^4.1.1, debug@^4.3.2: debug@4, debug@^4.1.0, debug@^4.1.1, debug@^4.3.2:
version "4.3.4" version "4.3.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies: dependencies:
ms "2.1.2" ms "2.1.2"
debug@^3.1.0:
version "3.2.7"
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
dependencies:
ms "^2.1.1"
decompress-response@^6.0.0: decompress-response@^6.0.0:
version "6.0.0" version "6.0.0"
resolved "https://registry.yarnpkg.com/decompress-response/-/decompress-response-6.0.0.tgz#ca387612ddb7e104bd16d85aab00d5ecf09c66fc" resolved "https://registry.yarnpkg.com/decompress-response/-/decompress-response-6.0.0.tgz#ca387612ddb7e104bd16d85aab00d5ecf09c66fc"
@ -338,6 +594,16 @@ delayed-stream@~1.0.0:
resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ== integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
delegates@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a"
integrity sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==
detect-libc@^2.0.0:
version "2.0.3"
resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.3.tgz#f0cd503b40f9939b894697d19ad50895e30cf700"
integrity sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==
detect-node@^2.0.4: detect-node@^2.0.4:
version "2.1.0" version "2.1.0"
resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.1.0.tgz#c9c70775a49c3d03bc2c06d9a73be550f978f8b1" resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.1.0.tgz#c9c70775a49c3d03bc2c06d9a73be550f978f8b1"
@ -388,6 +654,11 @@ electron@28.2.1:
"@types/node" "^18.11.18" "@types/node" "^18.11.18"
extract-zip "^2.0.1" extract-zip "^2.0.1"
emoji-regex@^8.0.0:
version "8.0.0"
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
end-of-stream@^1.1.0: end-of-stream@^1.1.0:
version "1.4.4" version "1.4.4"
resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0"
@ -405,6 +676,23 @@ es6-error@^4.1.1:
resolved "https://registry.yarnpkg.com/es6-error/-/es6-error-4.1.1.tgz#9e3af407459deed47e9a91f9b885a84eb05c561d" resolved "https://registry.yarnpkg.com/es6-error/-/es6-error-4.1.1.tgz#9e3af407459deed47e9a91f9b885a84eb05c561d"
integrity sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg== integrity sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==
es6-promise@^4.0.3:
version "4.2.8"
resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a"
integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==
es6-promisify@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203"
integrity sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==
dependencies:
es6-promise "^4.0.3"
escalade@^3.1.1:
version "3.1.2"
resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.2.tgz#54076e9ab29ea5bf3d8f1ed62acffbb88272df27"
integrity sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==
escape-string-regexp@^4.0.0: escape-string-regexp@^4.0.0:
version "4.0.0" version "4.0.0"
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
@ -517,21 +805,50 @@ fs-extra@^8.1.0:
jsonfile "^4.0.0" jsonfile "^4.0.0"
universalify "^0.1.0" universalify "^0.1.0"
fs-minipass@^1.2.7:
version "1.2.7"
resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-1.2.7.tgz#ccff8570841e7fe4265693da88936c55aed7f7c7"
integrity sha512-GWSSJGFy4e9GUeCcbIkED+bgAoFyj7XF1mV8rma3QW4NIqX9Kyx79N/PF61H5udOV3aY1IaMLs6pGbH71nlCTA==
dependencies:
minipass "^2.6.0"
fs-minipass@^2.0.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==
dependencies:
minipass "^3.0.0"
fs.realpath@^1.0.0: fs.realpath@^1.0.0:
version "1.0.0" version "1.0.0"
resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==
fs@^0.0.1-security:
version "0.0.1-security"
resolved "https://registry.yarnpkg.com/fs/-/fs-0.0.1-security.tgz#8a7bd37186b6dddf3813f23858b57ecaaf5e41d4"
integrity sha512-3XY9e1pP0CVEUCdj5BmfIZxRBTSDycnbqhIOGec9QYtmVH2fbLpj86CFWkrNOkt/Fvty4KZG5lTglL9j/gJ87w==
function-bind@^1.1.1: function-bind@^1.1.1:
version "1.1.1" version "1.1.1"
resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
gauge@^3.0.0:
version "3.0.2"
resolved "https://registry.yarnpkg.com/gauge/-/gauge-3.0.2.tgz#03bf4441c044383908bcfa0656ad91803259b395"
integrity sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==
dependencies:
aproba "^1.0.3 || ^2.0.0"
color-support "^1.1.2"
console-control-strings "^1.0.0"
has-unicode "^2.0.1"
object-assign "^4.1.1"
signal-exit "^3.0.0"
string-width "^4.2.3"
strip-ansi "^6.0.1"
wide-align "^1.1.2"
get-caller-file@^2.0.5:
version "2.0.5"
resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e"
integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==
get-intrinsic@^1.1.1: get-intrinsic@^1.1.1:
version "1.2.1" version "1.2.1"
resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.1.tgz#d295644fed4505fc9cde952c37ee12b477a83d82" resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.1.tgz#d295644fed4505fc9cde952c37ee12b477a83d82"
@ -604,6 +921,11 @@ globby@^11.0.1:
merge2 "^1.4.1" merge2 "^1.4.1"
slash "^3.0.0" slash "^3.0.0"
google-protobuf@^3.9.2:
version "3.21.2"
resolved "https://registry.yarnpkg.com/google-protobuf/-/google-protobuf-3.21.2.tgz#4580a2bea8bbb291ee579d1fefb14d6fa3070ea4"
integrity sha512-3MSOYFO5U9mPGikIYCzK0SaThypfGgS6bHqrUGXG3DPHCrb+txNqeEcns1W0lkGfk0rCyNXm7xB9rMxnCiZOoA==
got@^11.8.5: got@^11.8.5:
version "11.8.6" version "11.8.6"
resolved "https://registry.yarnpkg.com/got/-/got-11.8.6.tgz#276e827ead8772eddbcfc97170590b841823233a" resolved "https://registry.yarnpkg.com/got/-/got-11.8.6.tgz#276e827ead8772eddbcfc97170590b841823233a"
@ -626,6 +948,11 @@ graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4:
resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"
integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==
has-flag@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
has-property-descriptors@^1.0.0: has-property-descriptors@^1.0.0:
version "1.0.0" version "1.0.0"
resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz#610708600606d36961ed04c196193b6a607fa861" resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz#610708600606d36961ed04c196193b6a607fa861"
@ -643,6 +970,11 @@ has-symbols@^1.0.3:
resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8"
integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==
has-unicode@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/has-unicode/-/has-unicode-2.0.1.tgz#e0e6fe6a28cf51138855e086d1691e771de2a8b9"
integrity sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==
has@^1.0.3: has@^1.0.3:
version "1.0.3" version "1.0.3"
resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796"
@ -663,6 +995,22 @@ http2-wrapper@^1.0.0-beta.5.2:
quick-lru "^5.1.1" quick-lru "^5.1.1"
resolve-alpn "^1.0.0" resolve-alpn "^1.0.0"
https-proxy-agent@^2.2.1:
version "2.2.4"
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b"
integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==
dependencies:
agent-base "^4.3.0"
debug "^3.1.0"
https-proxy-agent@^5.0.0:
version "5.0.1"
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6"
integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==
dependencies:
agent-base "6"
debug "4"
human-signals@^2.1.0: human-signals@^2.1.0:
version "2.1.0" version "2.1.0"
resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0"
@ -686,7 +1034,7 @@ inflight@^1.0.4:
once "^1.3.0" once "^1.3.0"
wrappy "1" wrappy "1"
inherits@2: inherits@2, inherits@^2.0.3:
version "2.0.4" version "2.0.4"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
@ -696,6 +1044,11 @@ is-extglob@^2.1.1:
resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2"
integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==
is-fullwidth-code-point@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d"
integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==
is-glob@^4.0.1: is-glob@^4.0.1:
version "4.0.3" version "4.0.3"
resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084"
@ -811,6 +1164,11 @@ lodash.once@^4.1.1:
resolved "https://registry.yarnpkg.com/lodash.once/-/lodash.once-4.1.1.tgz#0dd3971213c7c56df880977d504c88fb471a97ac" resolved "https://registry.yarnpkg.com/lodash.once/-/lodash.once-4.1.1.tgz#0dd3971213c7c56df880977d504c88fb471a97ac"
integrity sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg== integrity sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==
long@4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28"
integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==
lowercase-keys@^2.0.0: lowercase-keys@^2.0.0:
version "2.0.0" version "2.0.0"
resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-2.0.0.tgz#2603e78b7b4b0006cbca2fbcc8a3202558ac9479" resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-2.0.0.tgz#2603e78b7b4b0006cbca2fbcc8a3202558ac9479"
@ -828,6 +1186,24 @@ luxon@~3.3.0:
resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.3.0.tgz#d73ab5b5d2b49a461c47cedbc7e73309b4805b48" resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.3.0.tgz#d73ab5b5d2b49a461c47cedbc7e73309b4805b48"
integrity sha512-An0UCfG/rSiqtAIiBPO0Y9/zAnHUZxAMiCpTd5h2smgsj7GGmcenvrvww2cqNA8/4A5ZrD1gJpHN2mIHZQF+Mg== integrity sha512-An0UCfG/rSiqtAIiBPO0Y9/zAnHUZxAMiCpTd5h2smgsj7GGmcenvrvww2cqNA8/4A5ZrD1gJpHN2mIHZQF+Mg==
magika@^0.2.13:
version "0.2.13"
resolved "https://registry.yarnpkg.com/magika/-/magika-0.2.13.tgz#f39bc565f0d3703b592d3fc155a5d3ee056b0471"
integrity sha512-CIMx6aJ6SCX2uiAdLpghGeQPhMIOsnvmVpYjX/ao5ks0PFvJF1f9fWNGlayGfazNpJmTA1cuZG21DVVhfZkoUw==
dependencies:
"@tensorflow/tfjs" "^4.17.0"
optionalDependencies:
"@tensorflow/tfjs-node" "^4.17.0"
chalk "^5.3.0"
commander "^12.0.0"
make-dir@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
dependencies:
semver "^6.0.0"
matcher@^3.0.0: matcher@^3.0.0:
version "3.0.0" version "3.0.0"
resolved "https://registry.yarnpkg.com/matcher/-/matcher-3.0.0.tgz#bd9060f4c5b70aa8041ccc6f80368760994f30ca" resolved "https://registry.yarnpkg.com/matcher/-/matcher-3.0.0.tgz#bd9060f4c5b70aa8041ccc6f80368760994f30ca"
@ -892,11 +1268,89 @@ minimatch@^3.1.1:
dependencies: dependencies:
brace-expansion "^1.1.7" brace-expansion "^1.1.7"
minimist@^1.2.6:
version "1.2.8"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c"
integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==
minipass@^2.6.0, minipass@^2.9.0:
version "2.9.0"
resolved "https://registry.yarnpkg.com/minipass/-/minipass-2.9.0.tgz#e713762e7d3e32fed803115cf93e04bca9fcc9a6"
integrity sha512-wxfUjg9WebH+CUDX/CdbRlh5SmfZiy/hpkxaRI16Y9W56Pa75sWgd/rvFilSgrauD9NyFymP/+JFV3KwzIsJeg==
dependencies:
safe-buffer "^5.1.2"
yallist "^3.0.0"
minipass@^3.0.0:
version "3.3.6"
resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.3.6.tgz#7bba384db3a1520d18c9c0e5251c3444e95dd94a"
integrity sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==
dependencies:
yallist "^4.0.0"
minipass@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/minipass/-/minipass-5.0.0.tgz#3e9788ffb90b694a5d0ec94479a45b5d8738133d"
integrity sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==
minizlib@^1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-1.3.3.tgz#2290de96818a34c29551c8a8d301216bd65a861d"
integrity sha512-6ZYMOEnmVsdCeTJVE0W9ZD+pVnE8h9Hma/iOwwRDsdQoePpoX56/8B6z3P9VNwppJuBKNRuFDRNRqRWexT9G9Q==
dependencies:
minipass "^2.9.0"
minizlib@^2.1.1:
version "2.1.2"
resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==
dependencies:
minipass "^3.0.0"
yallist "^4.0.0"
mkdirp@^0.5.5:
version "0.5.6"
resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.6.tgz#7def03d2432dcae4ba1d611445c48396062255f6"
integrity sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==
dependencies:
minimist "^1.2.6"
mkdirp@^1.0.3:
version "1.0.4"
resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
ms@2.1.2: ms@2.1.2:
version "2.1.2" version "2.1.2"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
ms@^2.1.1:
version "2.1.3"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
node-fetch@^2.6.7:
version "2.7.0"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d"
integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==
dependencies:
whatwg-url "^5.0.0"
node-fetch@~2.6.1:
version "2.6.13"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.13.tgz#a20acbbec73c2e09f9007de5cda17104122e0010"
integrity sha512-StxNAxh15zr77QvvkmveSQ8uCQ4+v5FkvNTj0OESmiHu+VRi/gXArXtkWMElOsOUNLtUEvI4yS+rdtOHZTwlQA==
dependencies:
whatwg-url "^5.0.0"
nopt@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/nopt/-/nopt-5.0.0.tgz#530942bb58a512fccafe53fe210f13a25355dc88"
integrity sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==
dependencies:
abbrev "1"
normalize-url@^6.0.1: normalize-url@^6.0.1:
version "6.1.0" version "6.1.0"
resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.1.0.tgz#40d0885b535deffe3f3147bec877d05fe4c5668a" resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.1.0.tgz#40d0885b535deffe3f3147bec877d05fe4c5668a"
@ -909,6 +1363,21 @@ npm-run-path@^4.0.1:
dependencies: dependencies:
path-key "^3.0.0" path-key "^3.0.0"
npmlog@^5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-5.0.1.tgz#f06678e80e29419ad67ab964e0fa69959c1eb8b0"
integrity sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==
dependencies:
are-we-there-yet "^2.0.0"
console-control-strings "^1.1.0"
gauge "^3.0.0"
set-blocking "^2.0.0"
object-assign@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==
object-keys@^1.1.1: object-keys@^1.1.1:
version "1.1.1" version "1.1.1"
resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e"
@ -996,7 +1465,7 @@ pkg-up@^3.1.0:
dependencies: dependencies:
find-up "^3.0.0" find-up "^3.0.0"
progress@^2.0.3: progress@^2.0.0, progress@^2.0.3:
version "2.0.3" version "2.0.3"
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
@ -1029,6 +1498,25 @@ quick-lru@^5.1.1:
resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932" resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932"
integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA== integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==
readable-stream@^3.6.0:
version "3.6.2"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.2.tgz#56a9b36ea965c00c5a93ef31eb111a0f11056967"
integrity sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==
dependencies:
inherits "^2.0.3"
string_decoder "^1.1.1"
util-deprecate "^1.0.1"
regenerator-runtime@^0.13.5:
version "0.13.11"
resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz#f6dca3e7ceec20590d07ada785636a90cdca17f9"
integrity sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==
require-directory@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==
require-from-string@^2.0.2: require-from-string@^2.0.2:
version "2.0.2" version "2.0.2"
resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909"
@ -1051,6 +1539,13 @@ reusify@^1.0.4:
resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76"
integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==
rimraf@^2.6.2:
version "2.7.1"
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
dependencies:
glob "^7.1.3"
rimraf@^3.0.2: rimraf@^3.0.2:
version "3.0.2" version "3.0.2"
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
@ -1077,17 +1572,27 @@ run-parallel@^1.1.9:
dependencies: dependencies:
queue-microtask "^1.2.2" queue-microtask "^1.2.2"
safe-buffer@^5.1.2, safe-buffer@^5.2.1, safe-buffer@~5.2.0:
version "5.2.1"
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
sax@^1.2.4: sax@^1.2.4:
version "1.2.4" version "1.2.4"
resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw== integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==
seedrandom@^3.0.5:
version "3.0.5"
resolved "https://registry.yarnpkg.com/seedrandom/-/seedrandom-3.0.5.tgz#54edc85c95222525b0c7a6f6b3543d8e0b3aa0a7"
integrity sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==
semver-compare@^1.0.0: semver-compare@^1.0.0:
version "1.0.0" version "1.0.0"
resolved "https://registry.yarnpkg.com/semver-compare/-/semver-compare-1.0.0.tgz#0dee216a1c941ab37e9efb1788f6afc5ff5537fc" resolved "https://registry.yarnpkg.com/semver-compare/-/semver-compare-1.0.0.tgz#0dee216a1c941ab37e9efb1788f6afc5ff5537fc"
integrity sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow== integrity sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==
semver@^6.2.0: semver@^6.0.0, semver@^6.2.0:
version "6.3.1" version "6.3.1"
resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
@ -1106,6 +1611,11 @@ serialize-error@^7.0.1:
dependencies: dependencies:
type-fest "^0.13.1" type-fest "^0.13.1"
set-blocking@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==
shebang-command@^2.0.0: shebang-command@^2.0.0:
version "2.0.0" version "2.0.0"
resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea" resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
@ -1118,7 +1628,7 @@ shebang-regex@^3.0.0:
resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172" resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
signal-exit@^3.0.3: signal-exit@^3.0.0, signal-exit@^3.0.3:
version "3.0.7" version "3.0.7"
resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9"
integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==
@ -1133,6 +1643,34 @@ sprintf-js@^1.1.2:
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.1.2.tgz#da1765262bf8c0f571749f2ad6c26300207ae673" resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.1.2.tgz#da1765262bf8c0f571749f2ad6c26300207ae673"
integrity sha512-VE0SOVEHCk7Qc8ulkWw3ntAzXuqf7S2lvwQaDLRnUeIEaKNQJzV6BwmLKhOqT61aGhfUMrXeaBk+oDGCzvhcug== integrity sha512-VE0SOVEHCk7Qc8ulkWw3ntAzXuqf7S2lvwQaDLRnUeIEaKNQJzV6BwmLKhOqT61aGhfUMrXeaBk+oDGCzvhcug==
sprintf-js@~1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==
"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
dependencies:
emoji-regex "^8.0.0"
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"
string_decoder@^1.1.1, string_decoder@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
dependencies:
safe-buffer "~5.2.0"
strip-ansi@^6.0.0, strip-ansi@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
dependencies:
ansi-regex "^5.0.1"
strip-final-newline@^2.0.0: strip-final-newline@^2.0.0:
version "2.0.0" version "2.0.0"
resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad" resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
@ -1145,6 +1683,38 @@ sumchecker@^3.0.1:
dependencies: dependencies:
debug "^4.1.0" debug "^4.1.0"
supports-color@^7.1.0:
version "7.2.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
dependencies:
has-flag "^4.0.0"
tar@^4.4.6:
version "4.4.19"
resolved "https://registry.yarnpkg.com/tar/-/tar-4.4.19.tgz#2e4d7263df26f2b914dee10c825ab132123742f3"
integrity sha512-a20gEsvHnWe0ygBY8JbxoM4w3SJdhc7ZAuxkLqh+nvNQN2IOt0B5lLgM490X5Hl8FF0dl0tOf2ewFYAlIFgzVA==
dependencies:
chownr "^1.1.4"
fs-minipass "^1.2.7"
minipass "^2.9.0"
minizlib "^1.3.3"
mkdirp "^0.5.5"
safe-buffer "^5.2.1"
yallist "^3.1.1"
tar@^6.1.11:
version "6.2.1"
resolved "https://registry.yarnpkg.com/tar/-/tar-6.2.1.tgz#717549c541bc3c2af15751bea94b1dd068d4b03a"
integrity sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==
dependencies:
chownr "^2.0.0"
fs-minipass "^2.0.0"
minipass "^5.0.0"
minizlib "^2.1.1"
mkdirp "^1.0.3"
yallist "^4.0.0"
to-regex-range@^5.0.1: to-regex-range@^5.0.1:
version "5.0.1" version "5.0.1"
resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
@ -1152,6 +1722,11 @@ to-regex-range@^5.0.1:
dependencies: dependencies:
is-number "^7.0.0" is-number "^7.0.0"
tr46@~0.0.3:
version "0.0.3"
resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a"
integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==
type-fest@^0.13.1: type-fest@^0.13.1:
version "0.13.1" version "0.13.1"
resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934" resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934"
@ -1179,6 +1754,24 @@ uri-js@^4.2.2:
dependencies: dependencies:
punycode "^2.1.0" punycode "^2.1.0"
util-deprecate@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==
webidl-conversions@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871"
integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==
whatwg-url@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d"
integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==
dependencies:
tr46 "~0.0.3"
webidl-conversions "^3.0.0"
which@^2.0.1: which@^2.0.1:
version "2.0.2" version "2.0.2"
resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1" resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
@ -1186,16 +1779,60 @@ which@^2.0.1:
dependencies: dependencies:
isexe "^2.0.0" isexe "^2.0.0"
wide-align@^1.1.2:
version "1.1.5"
resolved "https://registry.yarnpkg.com/wide-align/-/wide-align-1.1.5.tgz#df1d4c206854369ecf3c9a4898f1b23fbd9d15d3"
integrity sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==
dependencies:
string-width "^1.0.2 || 2 || 3 || 4"
wrap-ansi@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
dependencies:
ansi-styles "^4.0.0"
string-width "^4.1.0"
strip-ansi "^6.0.0"
wrappy@1: wrappy@1:
version "1.0.2" version "1.0.2"
resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ== integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==
y18n@^5.0.5:
version "5.0.8"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==
yallist@^3.0.0, yallist@^3.1.1:
version "3.1.1"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==
yallist@^4.0.0: yallist@^4.0.0:
version "4.0.0" version "4.0.0"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72" resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
yargs-parser@^20.2.2:
version "20.2.9"
resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==
yargs@^16.0.3:
version "16.2.0"
resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66"
integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==
dependencies:
cliui "^7.0.2"
escalade "^3.1.1"
get-caller-file "^2.0.5"
require-directory "^2.1.1"
string-width "^4.2.0"
y18n "^5.0.5"
yargs-parser "^20.2.2"
yauzl@^2.10.0: yauzl@^2.10.0:
version "2.10.0" version "2.10.0"
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"

View file

@ -1,19 +1,21 @@
import logging import logging
import time import time
from datetime import datetime from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Tuple, Union
import requests import requests
from magika import Magika
from khoj.database.models import Entry as DbEntry from khoj.database.models import Entry as DbEntry
from khoj.database.models import GithubConfig, KhojUser from khoj.database.models import GithubConfig, KhojUser
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.processor.content.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import timer from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig from khoj.utils.rawconfig import GithubContentConfig, GithubRepoConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
magika = Magika()
class GithubToEntries(TextToEntries): class GithubToEntries(TextToEntries):
@ -62,15 +64,18 @@ class GithubToEntries(TextToEntries):
repo_url = f"https://api.github.com/repos/{repo.owner}/{repo.name}" repo_url = f"https://api.github.com/repos/{repo.owner}/{repo.name}"
repo_shorthand = f"{repo.owner}/{repo.name}" repo_shorthand = f"{repo.owner}/{repo.name}"
logger.info(f"Processing github repo {repo_shorthand}") logger.info(f"Processing github repo {repo_shorthand}")
with timer("Download markdown files from github repo", logger): with timer("Download files from github repo", logger):
try: try:
markdown_files, org_files = self.get_files(repo_url, repo) markdown_files, org_files, plaintext_files = self.get_files(repo_url, repo)
except ConnectionAbortedError as e:
logger.error(f"Github rate limit reached. Skip indexing github repo {repo_shorthand}")
except Exception as e: except Exception as e:
logger.error(f"Unable to download github repo {repo_shorthand}", exc_info=True) logger.error(f"Unable to download github repo {repo_shorthand}", exc_info=True)
raise e raise e
logger.info(f"Found {len(markdown_files)} markdown files in github repo {repo_shorthand}") logger.info(
logger.info(f"Found {len(org_files)} org files in github repo {repo_shorthand}") f"Found {len(markdown_files)} md, {len(org_files)} org and {len(plaintext_files)} text files in github repo {repo_shorthand}"
)
current_entries = [] current_entries = []
with timer(f"Extract markdown entries from github repo {repo_shorthand}", logger): with timer(f"Extract markdown entries from github repo {repo_shorthand}", logger):
@ -83,14 +88,10 @@ class GithubToEntries(TextToEntries):
*GithubToEntries.extract_org_entries(org_files) *GithubToEntries.extract_org_entries(org_files)
) )
with timer(f"Extract commit messages from github repo {repo_shorthand}", logger): with timer(f"Extract plaintext entries from github repo {repo_shorthand}", logger):
current_entries += self.convert_commits_to_entries(self.get_commits(repo_url), repo) current_entries += PlaintextToEntries.convert_text_files_to_entries(
*GithubToEntries.extract_plaintext_entries(plaintext_files)
with timer(f"Extract issues from github repo {repo_shorthand}", logger):
issue_entries = GithubToEntries.convert_issues_to_entries(
*GithubToEntries.extract_github_issues(self.get_issues(repo_url))
) )
current_entries += issue_entries
with timer(f"Split entries by max token size supported by model {repo_shorthand}", logger): with timer(f"Split entries by max token size supported by model {repo_shorthand}", logger):
current_entries = TextToEntries.split_entries_by_max_tokens(current_entries, max_tokens=256) current_entries = TextToEntries.split_entries_by_max_tokens(current_entries, max_tokens=256)
@ -119,16 +120,16 @@ class GithubToEntries(TextToEntries):
response = requests.get(repo_content_url, headers=headers, params=params) response = requests.get(repo_content_url, headers=headers, params=params)
contents = response.json() contents = response.json()
# Wait for rate limit reset if needed # Raise exception if hit rate limit
result = self.wait_for_rate_limit_reset(response, self.get_files, repo_url, repo) if response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0":
if result is not None: raise ConnectionAbortedError("Github rate limit reached")
return result
# Extract markdown files from the repository # Extract markdown files from the repository
markdown_files: List[Any] = [] markdown_files: List[Dict[str, str]] = []
org_files: List[Any] = [] org_files: List[Dict[str, str]] = []
plaintext_files: List[Dict[str, str]] = []
if "tree" not in contents: if "tree" not in contents:
return markdown_files, org_files return markdown_files, org_files, plaintext_files
for item in contents["tree"]: for item in contents["tree"]:
# Find all markdown files in the repository # Find all markdown files in the repository
@ -147,144 +148,46 @@ class GithubToEntries(TextToEntries):
# Add org file contents and URL to list # Add org file contents and URL to list
org_files += [{"content": self.get_file_contents(item["url"]), "path": url_path}] org_files += [{"content": self.get_file_contents(item["url"]), "path": url_path}]
return markdown_files, org_files # Find, index remaining non-binary files in the repository
elif item["type"] == "blob":
url_path = f'https://github.com/{repo.owner}/{repo.name}/blob/{repo.branch}/{item["path"]}'
content_bytes = self.get_file_contents(item["url"], decode=False)
content_type, content_str = None, None
try:
content_type = magika.identify_bytes(content_bytes).output.mime_type
content_str = content_bytes.decode("utf-8")
except:
logger.error(
f"Unable to identify content type or decode content of file at {url_path}. Skip indexing it"
)
continue
def get_file_contents(self, file_url): # Add non-binary file contents and URL to list
if content_type.startswith("text/"):
plaintext_files += [{"content": content_str, "path": url_path}]
return markdown_files, org_files, plaintext_files
def get_file_contents(self, file_url, decode=True):
# Get text from each markdown file # Get text from each markdown file
headers = {"Accept": "application/vnd.github.v3.raw"} headers = {"Accept": "application/vnd.github.v3.raw"}
response = self.session.get(file_url, headers=headers, stream=True) response = self.session.get(file_url, headers=headers, stream=True)
# Wait for rate limit reset if needed # Stop indexing on hitting rate limit
result = self.wait_for_rate_limit_reset(response, self.get_file_contents, file_url) if response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0":
if result is not None: raise ConnectionAbortedError("Github rate limit reached")
return result
content = "" content = "" if decode else b""
for chunk in response.iter_content(chunk_size=2048): for chunk in response.iter_content(chunk_size=2048):
if chunk: if chunk:
try: try:
content += chunk.decode("utf-8") content += chunk.decode("utf-8") if decode else chunk
except Exception as e: except Exception as e:
logger.error(f"Unable to decode chunk from {file_url}") logger.error(f"Unable to decode chunk from {file_url}")
logger.error(e) logger.error(e)
return content return content
def get_commits(self, repo_url: str) -> List[Dict]:
return self._get_commits(f"{repo_url}/commits")
def _get_commits(self, commits_url: Union[str, None]) -> List[Dict]:
# Get commit messages from the repository using the Github API
params = {"per_page": 100}
commits = []
while commits_url is not None:
# Get the next page of commits
response = self.session.get(commits_url, params=params, stream=True)
# Read the streamed response into a JSON object
content = response.json()
# Wait for rate limit reset if needed
result = self.wait_for_rate_limit_reset(response, self._get_commits, commits_url)
if result is not None:
return result
# Extract commit messages from the response
for commit in content:
if "commit" in commit and "message" in commit["commit"] and "html_url" in commit:
commits += [{"content": commit["commit"]["message"], "path": commit["html_url"]}]
else:
logger.debug(f"Skipping commit with missing properties: {commit}")
# Get the URL for the next page of commits, if any
commits_url = response.links.get("next", {}).get("url")
return commits
def get_issues(self, repo_url: str) -> List[Dict]:
return self._get_issues(f"{repo_url}/issues")
def _get_issues(self, issues_url: Union[str, None]) -> List[Dict]:
issues = []
per_page = 100
params = {"per_page": per_page, "state": "all"}
while issues_url is not None:
# Get the next page of issues
response = self.session.get(issues_url, params=params, stream=True) # type: ignore
raw_issues = response.json()
# Wait for rate limit reset if needed
result = self.wait_for_rate_limit_reset(response, self._get_issues, issues_url)
if result is not None:
return result
for issue in raw_issues:
username = issue["user"]["login"]
user_url = f"[{username}]({issue['user']['html_url']})"
issue_content = {
"content": f"## [Issue {issue['number']}]({issue['html_url']}) {issue['title']}\nby {user_url}\n\n{issue['body']}",
"path": issue["html_url"],
}
issue_content["created_at"] = {issue["created_at"]}
if issue["comments"] > 0:
issue_content["comments"] = self.get_comments(issue["comments_url"])
issues += [issue_content]
issues_url = response.links.get("next", {}).get("url")
return issues
def get_comments(self, comments_url: Union[str, None]) -> List[Dict]:
# By default, the number of results per page is 30. We'll keep it as-is for now.
comments = []
per_page = 100
params = {"per_page": per_page}
while comments_url is not None:
# Get the next page of comments
response = self.session.get(comments_url, params=params, stream=True)
raw_comments = response.json()
# Wait for rate limit reset if needed
result = self.wait_for_rate_limit_reset(response, self.get_comments, comments_url)
if result is not None:
return result
for comment in raw_comments:
created_at = datetime.strptime(comment["created_at"], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d %H:%M")
commenter = comment["user"]["login"]
commenter_url = comment["user"]["html_url"]
comment_url = comment["html_url"]
comment_url_link = f"[{created_at}]({comment_url})"
avatar_url = comment["user"]["avatar_url"]
avatar = f"![{commenter}]({avatar_url})"
comments += [
{
"content": f"### {avatar} [{commenter}]({commenter_url}) - ({comment_url_link})\n\n{comment['body']}"
}
]
comments_url = response.links.get("next", {}).get("url")
return comments
def convert_commits_to_entries(self, commits, repo: GithubRepoConfig) -> List[Entry]:
entries: List[Entry] = []
for commit in commits:
compiled = f'Commit message from {repo.owner}/{repo.name}:\n{commit["content"]}'
entries.append(
Entry(
compiled=compiled,
raw=f'### {commit["content"]}',
heading=commit["content"].split("\n")[0],
file=commit["path"],
)
)
return entries
@staticmethod @staticmethod
def extract_markdown_entries(markdown_files): def extract_markdown_entries(markdown_files):
entries = [] entries = []
@ -307,30 +210,12 @@ class GithubToEntries(TextToEntries):
return entries, dict(entry_to_file_map) return entries, dict(entry_to_file_map)
@staticmethod @staticmethod
def extract_github_issues(issues): def extract_plaintext_entries(plaintext_files):
entries = [] entries = []
entry_to_file_map = {} entry_to_file_map = []
for issue in issues:
content = issue["content"]
if "comments" in issue:
for comment in issue["comments"]:
content += "\n\n" + comment["content"]
entries.append(content)
entry_to_file_map[content] = {"path": issue["path"]}
return entries, entry_to_file_map
@staticmethod for doc in plaintext_files:
def convert_issues_to_entries(parsed_entries: List[str], entry_to_metadata_map: Dict[str, Dict]) -> List[Entry]: entries, entry_to_file_map = PlaintextToEntries.process_single_plaintext_file(
entries = [] doc["content"], doc["path"], entries, entry_to_file_map
for entry in parsed_entries:
entry_file_name = entry_to_metadata_map[entry]["path"]
entries.append(
Entry(
compiled=entry,
raw=entry,
heading=entry.split("\n")[0],
file=entry_file_name,
) )
) return entries, dict(entry_to_file_map)
return entries

View file

@ -1,7 +1,9 @@
import logging import logging
import re
from pathlib import Path from pathlib import Path
from typing import List, Tuple from typing import Dict, List, Tuple
import urllib3
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from khoj.database.models import Entry as DbEntry from khoj.database.models import Entry as DbEntry
@ -28,26 +30,13 @@ class PlaintextToEntries(TextToEntries):
else: else:
deletion_file_names = None deletion_file_names = None
with timer("Scrub plaintext files and extract text", logger):
for file in files:
try:
plaintext_content = files[file]
if file.endswith(("html", "htm", "xml")):
plaintext_content = PlaintextToEntries.extract_html_content(
plaintext_content, file.split(".")[-1]
)
files[file] = plaintext_content
except Exception as e:
logger.warning(f"Unable to read file: {file} as plaintext. Skipping file.")
logger.warning(e, exc_info=True)
# Extract Entries from specified plaintext files # Extract Entries from specified plaintext files
with timer("Parse entries from specified Plaintext files", logger): with timer("Extract entries from specified Plaintext files", logger):
current_entries = PlaintextToEntries.extract_plaintext_entries(files) current_entries = PlaintextToEntries.extract_plaintext_entries(files)
# Split entries by max tokens supported by model # Split entries by max tokens supported by model
with timer("Split entries by max token size supported by model", logger): with timer("Split entries by max token size supported by model", logger):
current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256) current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256, raw_is_compiled=True)
# Identify, mark and merge any new entries with previous entries # Identify, mark and merge any new entries with previous entries
with timer("Identify new or updated entries", logger): with timer("Identify new or updated entries", logger):
@ -74,16 +63,57 @@ class PlaintextToEntries(TextToEntries):
return soup.get_text(strip=True, separator="\n") return soup.get_text(strip=True, separator="\n")
@staticmethod @staticmethod
def extract_plaintext_entries(entry_to_file_map: dict[str, str]) -> List[Entry]: def extract_plaintext_entries(text_files: Dict[str, str]) -> List[Entry]:
"Convert each plaintext entries into a dictionary" entries: List[str] = []
entries = [] entry_to_file_map: List[Tuple[str, str]] = []
for file, entry in entry_to_file_map.items(): for text_file in text_files:
try:
text_content = text_files[text_file]
entries, entry_to_file_map = PlaintextToEntries.process_single_plaintext_file(
text_content, text_file, entries, entry_to_file_map
)
except Exception as e:
logger.warning(f"Unable to read file: {text_file} as plaintext. Skipping file.")
logger.warning(e, exc_info=True)
# Extract Entries from specified plaintext files
return PlaintextToEntries.convert_text_files_to_entries(entries, dict(entry_to_file_map))
@staticmethod
def process_single_plaintext_file(
text_content: str,
text_file: str,
entries: List[str],
entry_to_file_map: List[Tuple[str, str]],
) -> Tuple[List[str], List[Tuple[str, str]]]:
if text_file.endswith(("html", "htm", "xml")):
text_content = PlaintextToEntries.extract_html_content(text_content, text_file.split(".")[-1])
entry_to_file_map += [(text_content, text_file)]
entries.extend([text_content])
return entries, entry_to_file_map
@staticmethod
def convert_text_files_to_entries(parsed_entries: List[str], entry_to_file_map: dict[str, str]) -> List[Entry]:
"Convert each plaintext file into an entry"
entries: List[Entry] = []
for parsed_entry in parsed_entries:
raw_filename = entry_to_file_map[parsed_entry]
# Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path.
if type(raw_filename) == str and re.search(r"^https?://", raw_filename):
# Escape the URL to avoid issues with special characters
entry_filename = urllib3.util.parse_url(raw_filename).url
else:
entry_filename = raw_filename
# Append base filename to compiled entry for context to model
entries.append( entries.append(
Entry( Entry(
raw=entry, raw=parsed_entry,
file=file, file=f"{entry_filename}",
compiled=f"{Path(file).stem}\n{entry}", compiled=f"{entry_filename}\n{parsed_entry}",
heading=Path(file).stem, heading=entry_filename,
) )
) )
logger.debug(f"Converted {len(parsed_entries)} plaintext files to entries")
return entries return entries

View file

@ -59,7 +59,7 @@ class TextToEntries(ABC):
@staticmethod @staticmethod
def split_entries_by_max_tokens( def split_entries_by_max_tokens(
entries: List[Entry], max_tokens: int = 256, max_word_length: int = 500 entries: List[Entry], max_tokens: int = 256, max_word_length: int = 500, raw_is_compiled: bool = False
) -> List[Entry]: ) -> List[Entry]:
"Split entries if compiled entry length exceeds the max tokens supported by the ML model." "Split entries if compiled entry length exceeds the max tokens supported by the ML model."
chunked_entries: List[Entry] = [] chunked_entries: List[Entry] = []
@ -94,7 +94,7 @@ class TextToEntries(ABC):
# Clean entry of unwanted characters like \0 character # Clean entry of unwanted characters like \0 character
compiled_entry_chunk = TextToEntries.clean_field(compiled_entry_chunk) compiled_entry_chunk = TextToEntries.clean_field(compiled_entry_chunk)
entry.raw = TextToEntries.clean_field(entry.raw) entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw)
entry.heading = TextToEntries.clean_field(entry.heading) entry.heading = TextToEntries.clean_field(entry.heading)
entry.file = TextToEntries.clean_field(entry.file) entry.file = TextToEntries.clean_field(entry.file)

View file

@ -67,11 +67,10 @@ async def update(
try: try:
logger.info(f"📬 Updating content index via API call by {client} client") logger.info(f"📬 Updating content index via API call by {client} client")
for file in files: for file in files:
file_type, encoding = get_file_type(file.content_type) file_content = file.file.read()
file_type, encoding = get_file_type(file.content_type, file_content)
if file_type in index_files: if file_type in index_files:
index_files[file_type][file.filename] = ( index_files[file_type][file.filename] = file_content.decode(encoding) if encoding else file_content
file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read() # type: ignore
)
else: else:
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}") logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")

View file

@ -1,9 +1,11 @@
import glob import glob
import logging import logging
import os import os
from pathlib import Path
from typing import Optional from typing import Optional
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from magika import Magika
from khoj.database.models import ( from khoj.database.models import (
LocalMarkdownConfig, LocalMarkdownConfig,
@ -16,6 +18,7 @@ from khoj.utils.helpers import get_absolute_path, is_none_or_empty
from khoj.utils.rawconfig import TextContentConfig from khoj.utils.rawconfig import TextContentConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
magika = Magika()
def collect_files(search_type: Optional[SearchType] = SearchType.All, user=None) -> dict: def collect_files(search_type: Optional[SearchType] = SearchType.All, user=None) -> dict:
@ -47,6 +50,11 @@ def construct_config_from_db(db_config) -> TextContentConfig:
def get_plaintext_files(config: TextContentConfig) -> dict[str, str]: def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
def is_plaintextfile(file: str): def is_plaintextfile(file: str):
"Check if file is plaintext file" "Check if file is plaintext file"
# Check if file path exists
mime_type = magika.identify_path(Path(file)).output.mime_type
if mime_type != "inode/x-empty" and mime_type != "application/unknown":
return mime_type.startswith("text/")
# Use file extension to decide plaintext if file content is not identifiable
return file.endswith(("txt", "md", "markdown", "org", "mbox", "rst", "html", "htm", "xml")) return file.endswith(("txt", "md", "markdown", "org", "mbox", "rst", "html", "htm", "xml"))
def extract_html_content(html_content: str): def extract_html_content(html_content: str):
@ -65,7 +73,7 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
logger.debug("At least one of input-files or input-file-filter is required to be specified") logger.debug("At least one of input-files or input-file-filter is required to be specified")
return {} return {}
"Get all files to process" # Get all plain text files to process
absolute_plaintext_files, filtered_plaintext_files = set(), set() absolute_plaintext_files, filtered_plaintext_files = set(), set()
if input_files: if input_files:
absolute_plaintext_files = {get_absolute_path(jsonl_file) for jsonl_file in input_files} absolute_plaintext_files = {get_absolute_path(jsonl_file) for jsonl_file in input_files}
@ -209,7 +217,7 @@ def get_pdf_files(config: TextContentConfig):
logger.debug("At least one of pdf-files or pdf-file-filter is required to be specified") logger.debug("At least one of pdf-files or pdf-file-filter is required to be specified")
return {} return {}
"Get PDF files to process" # Get PDF files to process
absolute_pdf_files, filtered_pdf_files = set(), set() absolute_pdf_files, filtered_pdf_files = set(), set()
if pdf_files: if pdf_files:
absolute_pdf_files = {get_absolute_path(pdf_file) for pdf_file in pdf_files} absolute_pdf_files = {get_absolute_path(pdf_file) for pdf_file in pdf_files}

View file

@ -19,6 +19,7 @@ from urllib.parse import urlparse
import torch import torch
from asgiref.sync import sync_to_async from asgiref.sync import sync_to_async
from magika import Magika
from khoj.utils import constants from khoj.utils import constants
@ -29,6 +30,10 @@ if TYPE_CHECKING:
from khoj.utils.rawconfig import AppConfig from khoj.utils.rawconfig import AppConfig
# Initialize Magika for file type identification
magika = Magika()
class AsyncIteratorWrapper: class AsyncIteratorWrapper:
def __init__(self, obj): def __init__(self, obj):
self._it = iter(obj) self._it = iter(obj)
@ -88,22 +93,31 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
return merged_dict return merged_dict
def get_file_type(file_type: str) -> tuple[str, str]: def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]:
"Get file type from file mime type" "Get file type from file mime type"
# Extract encoding from file_type
encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None
file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type
if file_type in ["text/markdown"]:
# Infer content type from reading file content
try:
content_type = magika.identify_bytes(file_content).output.mime_type
except Exception:
# Fallback to using just file type if content type cannot be inferred
content_type = file_type
if file_type in ["text/markdown"] and content_type.startswith("text/"):
return "markdown", encoding return "markdown", encoding
elif file_type in ["text/org"]: elif file_type in ["text/org"] and content_type.startswith("text/"):
return "org", encoding return "org", encoding
elif file_type in ["application/pdf"]: elif file_type in ["application/pdf"] and content_type == "application/pdf":
return "pdf", encoding return "pdf", encoding
elif file_type in ["image/jpeg"]: elif file_type in ["image/jpeg"] and content_type == "image/jpeg":
return "jpeg", encoding return "jpeg", encoding
elif file_type in ["image/png"]: elif file_type in ["image/png"] and content_type == "image/png":
return "png", encoding return "png", encoding
elif file_type in ["text/plain", "text/html", "application/xml", "text/x-rst"]: elif content_type.startswith("text/"):
return "plaintext", encoding return "plaintext", encoding
else: else:
return "other", encoding return "other", encoding

View file

@ -1,6 +1,5 @@
# Standard Modules # Standard Modules
import os import os
from io import BytesIO
from urllib.parse import quote from urllib.parse import quote
import pytest import pytest

View file

@ -15,8 +15,6 @@ def test_plaintext_file(tmp_path):
""" """
plaintextfile = create_file(tmp_path, raw_entry) plaintextfile = create_file(tmp_path, raw_entry)
filename = plaintextfile.stem
# Act # Act
# Extract Entries from specified plaintext files # Extract Entries from specified plaintext files
@ -24,7 +22,7 @@ def test_plaintext_file(tmp_path):
f"{plaintextfile}": raw_entry, f"{plaintextfile}": raw_entry,
} }
entries = PlaintextToEntries.extract_plaintext_entries(entry_to_file_map=data) entries = PlaintextToEntries.extract_plaintext_entries(data)
# Convert each entry.file to absolute path to make them JSON serializable # Convert each entry.file to absolute path to make them JSON serializable
for entry in entries: for entry in entries:
@ -35,7 +33,7 @@ def test_plaintext_file(tmp_path):
# Ensure raw entry with no headings do not get heading prefix prepended # Ensure raw entry with no headings do not get heading prefix prepended
assert not entries[0].raw.startswith("#") assert not entries[0].raw.startswith("#")
# Ensure compiled entry has filename prepended as top level heading # Ensure compiled entry has filename prepended as top level heading
assert entries[0].compiled == f"{filename}\n{raw_entry}" assert entries[0].compiled == f"{plaintextfile}\n{raw_entry}"
def test_get_plaintext_files(tmp_path): def test_get_plaintext_files(tmp_path):
@ -100,6 +98,35 @@ def test_parse_html_plaintext_file(content_config, default_user: KhojUser):
assert "<div>" not in entries[0].raw assert "<div>" not in entries[0].raw
def test_large_plaintext_file_split_into_multiple_entries(tmp_path):
"Convert files with no heading to jsonl."
# Arrange
max_tokens = 256
normal_entry = " ".join([f"{number}" for number in range(max_tokens - 1)])
large_entry = " ".join([f"{number}" for number in range(max_tokens)])
normal_plaintextfile = create_file(tmp_path, normal_entry)
large_plaintextfile = create_file(tmp_path, large_entry)
normal_data = {f"{normal_plaintextfile}": normal_entry}
large_data = {f"{large_plaintextfile}": large_entry}
# Act
# Extract Entries from specified plaintext files
normal_entries = PlaintextToEntries.split_entries_by_max_tokens(
PlaintextToEntries.extract_plaintext_entries(normal_data),
max_tokens=max_tokens,
raw_is_compiled=True,
)
large_entries = PlaintextToEntries.split_entries_by_max_tokens(
PlaintextToEntries.extract_plaintext_entries(large_data), max_tokens=max_tokens, raw_is_compiled=True
)
# Assert
assert len(normal_entries) == 1
assert len(large_entries) == 2
# Helper Functions # Helper Functions
def create_file(tmp_path: Path, entry=None, filename="test.md"): def create_file(tmp_path: Path, entry=None, filename="test.md"):
file_ = tmp_path / filename file_ = tmp_path / filename