mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Index only files with valid text extension in folders synced by Desktop app
This maintains consistent set of indexable files from Desktop app, whether indexing via file or folder filters
This commit is contained in:
parent
9a48f72041
commit
34c3f70203
1 changed files with 5 additions and 1 deletions
|
@ -117,12 +117,16 @@ async function isPlainTextFile(filePath) {
|
||||||
if (!isMagikaLoaded) {
|
if (!isMagikaLoaded) {
|
||||||
await magika.load();
|
await magika.load();
|
||||||
isMagikaLoaded = true;
|
isMagikaLoaded = true;
|
||||||
|
validFileTypes = [
|
||||||
|
"org", "md", "pdf",
|
||||||
|
// all text file extensions known to Magika
|
||||||
|
...magika.config.labels.filter(l => l.is_text == true).map(l => l.name)];
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const fileContent = fs.readFileSync(filePath);
|
const fileContent = fs.readFileSync(filePath);
|
||||||
const fileType = await magika.identifyBytes(fileContent);
|
const fileType = await magika.identifyBytes(fileContent);
|
||||||
const fileLabel = magika.config.labels.filter(l => l.name == fileType.label)?.[0]
|
const fileLabel = magika.config.labels.filter(l => l.name == fileType.label)?.[0]
|
||||||
return fileLabel?.is_text
|
return fileLabel?.is_text && validFileTypes.includes(fileType?.label);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Failed to identify file type: ", err);
|
console.error("Failed to identify file type: ", err);
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Add table
Reference in a new issue