mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-03-13 05:32:24 +00:00
dockerfile cleanup; enforce text LF line endings (#81)
This commit is contained in:
parent
3945a77290
commit
4079020de0
5 changed files with 46 additions and 52 deletions
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
* text=auto eol=lf
|
|
@ -1,39 +1,39 @@
|
|||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
from scripts.link import parse_links
|
||||
import re
|
||||
|
||||
def parse_sitemap(url):
|
||||
response = requests.get(url)
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
urls = []
|
||||
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
|
||||
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
|
||||
if not has_extension_to_ignore(loc.text):
|
||||
urls.append(loc.text)
|
||||
else:
|
||||
print(f"Skipping filetype: {loc.text}")
|
||||
|
||||
return urls
|
||||
|
||||
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
|
||||
def sitemap():
|
||||
sitemap_url = input("Enter the URL of the sitemap: ")
|
||||
|
||||
if(len(sitemap_url) == 0):
|
||||
print("No valid sitemap provided!")
|
||||
exit(1)
|
||||
|
||||
url_array = parse_sitemap(sitemap_url)
|
||||
|
||||
#parse links from array
|
||||
parse_links(url_array)
|
||||
|
||||
def has_extension_to_ignore(string):
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
|
||||
|
||||
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
|
||||
match = re.search(pattern, string, re.IGNORECASE)
|
||||
|
||||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
from scripts.link import parse_links
|
||||
import re
|
||||
|
||||
def parse_sitemap(url):
|
||||
response = requests.get(url)
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
urls = []
|
||||
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
|
||||
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
|
||||
if not has_extension_to_ignore(loc.text):
|
||||
urls.append(loc.text)
|
||||
else:
|
||||
print(f"Skipping filetype: {loc.text}")
|
||||
|
||||
return urls
|
||||
|
||||
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
|
||||
def sitemap():
|
||||
sitemap_url = input("Enter the URL of the sitemap: ")
|
||||
|
||||
if(len(sitemap_url) == 0):
|
||||
print("No valid sitemap provided!")
|
||||
exit(1)
|
||||
|
||||
url_array = parse_sitemap(sitemap_url)
|
||||
|
||||
#parse links from array
|
||||
parse_links(url_array)
|
||||
|
||||
def has_extension_to_ignore(string):
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
|
||||
|
||||
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
|
||||
match = re.search(pattern, string, re.IGNORECASE)
|
||||
|
||||
return match is not None
|
|
@ -34,12 +34,10 @@ RUN groupadd -g $ARG_GID anythingllm && \
|
|||
# Copy docker helper scripts
|
||||
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
|
||||
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
|
||||
COPY ./docker/dual_boot.sh /usr/local/bin/
|
||||
|
||||
# Ensure the scripts are executable
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
|
||||
chmod +x /usr/local/bin/docker-healthcheck.sh && \
|
||||
chmod 777 /usr/local/bin/dual_boot.sh
|
||||
chmod +x /usr/local/bin/docker-healthcheck.sh
|
||||
|
||||
USER anythingllm
|
||||
|
||||
|
@ -91,6 +89,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
|
|||
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
|
||||
|
||||
# Run the server
|
||||
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||
|
||||
CMD /bin/bash /usr/local/bin/dual_boot.sh
|
||||
ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"]
|
|
@ -1,3 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
exec "$@"
|
||||
#!/bin/bash
|
||||
node /app/server/index.js &
|
||||
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
|
||||
wait -n
|
||||
exit $?
|
|
@ -1,5 +0,0 @@
|
|||
#!/bin/bash
|
||||
node /app/server/index.js &
|
||||
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
|
||||
wait -n
|
||||
exit $?
|
Loading…
Add table
Reference in a new issue