Add new API to batch delete a list of files by filename

- Rearrange DELETE content API definitions order to go from more
  specific to more general
- Create batched file deletion DB adapter
This commit is contained in:
Debanjum Singh Solanky 2024-07-29 01:37:40 +05:30
parent 9d86cb57ac
commit ccc46a09b5
2 changed files with 69 additions and 34 deletions

View file

@ -1106,6 +1106,16 @@ class EntryAdapters:
async def adelete_entry_by_file(user: KhojUser, file_path: str):
return await Entry.objects.filter(user=user, file_path=file_path).adelete()
@staticmethod
async def adelete_entries_by_filenames(user: KhojUser, filenames: List[str], batch_size=1000):
deleted_count = 0
for i in range(0, len(filenames), batch_size):
batch = filenames[i : i + batch_size]
count, _ = await Entry.objects.filter(user=user, file_path__in=batch).adelete()
deleted_count += count
return deleted_count
@staticmethod
def get_all_filenames_by_source(user: KhojUser, file_source: str):
return (

View file

@ -236,42 +236,9 @@ async def set_content_notion(
return {"status": "ok"}
@api_content.delete("/{content_source}", status_code=200)
@requires(["authenticated"])
async def delete_content_source(
request: Request,
content_source: str,
client: Optional[str] = None,
):
user = request.user.object
content_object = map_config_to_object(content_source)
if content_object is None:
raise ValueError(f"Invalid content source: {content_source}")
elif content_object != "Computer":
await content_object.objects.filter(user=user).adelete()
await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source)
if content_source == DbEntry.EntrySource.NOTION:
await NotionConfig.objects.filter(user=user).adelete()
elif content_source == DbEntry.EntrySource.GITHUB:
await GithubConfig.objects.filter(user=user).adelete()
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_source": content_source},
)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"}
@api_content.delete("/file", status_code=201)
@requires(["authenticated"])
async def delete_content_file(
async def delete_content_files(
request: Request,
filename: str,
client: Optional[str] = None,
@ -290,6 +257,31 @@ async def delete_content_file(
return {"status": "ok"}
class DeleteFilesRequest(BaseModel):
files: List[str]
@api_content.delete("/files", status_code=201)
@requires(["authenticated"])
async def delete_content_file(
request: Request,
files: DeleteFilesRequest,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_file",
client=client,
)
deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
return {"status": "ok", "deleted_count": deleted_count}
@api_content.get("/size", response_model=Dict[str, int])
@requires(["authenticated"])
async def get_content_size(request: Request, common: CommonQueryParams, client: Optional[str] = None):
@ -336,6 +328,39 @@ async def get_content_source(
return await sync_to_async(list)(EntryAdapters.get_all_filenames_by_source(user, content_source)) # type: ignore[call-arg]
@api_content.delete("/{content_source}", status_code=200)
@requires(["authenticated"])
async def delete_content_source(
request: Request,
content_source: str,
client: Optional[str] = None,
):
user = request.user.object
content_object = map_config_to_object(content_source)
if content_object is None:
raise ValueError(f"Invalid content source: {content_source}")
elif content_object != "Computer":
await content_object.objects.filter(user=user).adelete()
await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source)
if content_source == DbEntry.EntrySource.NOTION:
await NotionConfig.objects.filter(user=user).adelete()
elif content_source == DbEntry.EntrySource.GITHUB:
await GithubConfig.objects.filter(user=user).adelete()
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_source": content_source},
)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"}
async def indexer(
request: Request,
files: list[UploadFile],