From 92425782637175998ac9691fa7b55d5c998aae31 Mon Sep 17 00:00:00 2001
From: Timothy Carambat <rambat1010@gmail.com>
Date: Sun, 29 Oct 2023 11:03:41 -0700
Subject: [PATCH] Aws docker fixes (#309)

* WIP aws fixes to latest changes;

* change default in docker env

* bump storage default for docker

* Updates to docker deployment
---
 .../aws_build_from_source_no_credentials.json |  12 +-
 .../aws/cloudformation/cf_template.template   | 255 ------------------
 .../aws/cloudformation/generate.mjs           |  70 -----
 docker/.env.example                           |   4 +-
 docker/HOW_TO_USE_DOCKER.md                   |  11 +-
 docker/docker-entrypoint.sh                   |   6 +-
 6 files changed, 15 insertions(+), 343 deletions(-)
 delete mode 100644 cloud-deployments/aws/cloudformation/cf_template.template
 delete mode 100644 cloud-deployments/aws/cloudformation/generate.mjs

diff --git a/cloud-deployments/aws/cloudformation/aws_build_from_source_no_credentials.json b/cloud-deployments/aws/cloudformation/aws_build_from_source_no_credentials.json
index 86971696e..c96da5421 100644
--- a/cloud-deployments/aws/cloudformation/aws_build_from_source_no_credentials.json
+++ b/cloud-deployments/aws/cloudformation/aws_build_from_source_no_credentials.json
@@ -91,17 +91,15 @@
                 "sudo systemctl start docker\n",
                 "sudo yum install git -y\n",
                 "git clone https://github.com/Mintplex-Labs/anything-llm.git /home/ec2-user/anything-llm\n",
+                "sudo touch /home/ec2-user/anything-llm/server/storage/anythingllm.db\n",
                 "cd /home/ec2-user/anything-llm/docker\n",
                 "cat >> .env << \"END\"\n",
                 "SERVER_PORT=3001\n",
                 "OPEN_AI_KEY=\n",
                 "OPEN_MODEL_PREF='gpt-3.5-turbo'\n",
                 "CACHE_VECTORS=\"true\"\n",
-                "VECTOR_DB=\"pinecone\"\n",
-                "PINECONE_ENVIRONMENT=\n",
-                "PINECONE_API_KEY=\n",
-                "PINECONE_INDEX=\n",
-                "STORAGE_DIR=\"./server/storage\"\n",
+                "VECTOR_DB=\"lancedb\"\n",
+                "STORAGE_DIR=\"/app/server/storage\"\n",
                 "GOOGLE_APIS_KEY=\n",
                 "UID=\"1000\"\n",
                 "GID=\"1000\"\n",
@@ -116,8 +114,6 @@
                 "echo \"Container ID: $(sudo docker ps --latest --quiet)\"\n",
                 "sudo docker container exec -u 0 -t $(sudo docker ps --latest --quiet) mkdir -p /app/server/storage /app/server/storage/documents /app/server/storage/vector-cache /app/server/storage/lancedb\n",
                 "echo \"Placeholder folders in storage created.\"\n",
-                "sudo docker container exec -u 0 -t $(sudo docker ps --latest --quiet) touch /app/server/storage/anythingllm.db\n",
-                "echo \"SQLite DB placeholder set.\"\n",
                 "sudo docker container exec -u 0 -t $(sudo docker ps --latest --quiet) chown -R anythingllm:anythingllm /app/collector /app/server\n",
                 "echo \"File permissions corrected.\"\n",
                 "export ONLINE=$(curl -Is http://localhost:3001/api/ping | head -n 1|cut -d$' ' -f2)\n",
@@ -260,4 +256,4 @@
       }
     }
   }
-}
+}
\ No newline at end of file
diff --git a/cloud-deployments/aws/cloudformation/cf_template.template b/cloud-deployments/aws/cloudformation/cf_template.template
deleted file mode 100644
index 7aeb28b14..000000000
--- a/cloud-deployments/aws/cloudformation/cf_template.template
+++ /dev/null
@@ -1,255 +0,0 @@
-{
-  "AWSTemplateFormatVersion": "2010-09-09",
-  "Description": "Create a stack that runs AnythingLLM on a single instance",
-  "Parameters": {
-    "InstanceType": {
-      "Description": "EC2 instance type",
-      "Type": "String",
-      "Default": "t2.small"
-    },
-    "InstanceVolume": {
-      "Description": "Storage size of disk on Instance in GB",
-      "Type": "Number",
-      "Default": 10,
-      "MinValue": 4
-    }
-  },
-  "Resources": {
-    "AnythingLLMInstance": {
-      "Type": "AWS::EC2::Instance",
-      "Properties": {
-        "ImageId": {
-          "Fn::FindInMap": [
-            "Region2AMI",
-            {
-              "Ref": "AWS::Region"
-            },
-            "AMI"
-          ]
-        },
-        "InstanceType": {
-          "Ref": "InstanceType"
-        },
-        "SecurityGroupIds": [
-          {
-            "Ref": "AnythingLLMInstanceSecurityGroup"
-          }
-        ],
-        "BlockDeviceMappings": [
-          {
-            "DeviceName": {
-              "Fn::FindInMap": [
-                "Region2AMI",
-                {
-                  "Ref": "AWS::Region"
-                },
-                "RootDeviceName"
-              ]
-            },
-            "Ebs": {
-              "VolumeSize": {
-                "Ref": "InstanceVolume"
-              }
-            }
-          }
-        ],
-        "UserData": {
-          "Fn::Base64": {
-            "Fn::Join": [
-              "",
-              [
-                "Content-Type: multipart/mixed; boundary=\"//\"\n",
-                "MIME-Version: 1.0\n",
-                "\n",
-                "--//\n",
-                "Content-Type: text/cloud-config; charset=\"us-ascii\"\n",
-                "MIME-Version: 1.0\n",
-                "Content-Transfer-Encoding: 7bit\n",
-                "Content-Disposition: attachment; filename=\"cloud-config.txt\"\n",
-                "\n",
-                "\n",
-                "#cloud-config\n",
-                "cloud_final_modules:\n",
-                "- [scripts-user, once-per-instance]\n",
-                "\n",
-                "\n",
-                "--//\n",
-                "Content-Type: text/x-shellscript; charset=\"us-ascii\"\n",
-                "MIME-Version: 1.0\n",
-                "Content-Transfer-Encoding: 7bit\n",
-                "Content-Disposition: attachment; filename=\"userdata.txt\"\n",
-                "\n",
-                "\n",
-                "#!/bin/bash\n",
-                "# check output of userdata script with sudo tail -f /var/log/cloud-init-output.log\n",
-                "sudo yum install docker -y\n",
-                "sudo usermod -a -G docker ec2-user\n",
-                "curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose\n",
-                "sudo chmod +x /usr/local/bin/docker-compose\n",
-                "sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose\n",
-                "sudo systemctl enable docker\n",
-                "sudo systemctl start docker\n",
-                "sudo yum install git -y\n",
-                "git clone https://github.com/Mintplex-Labs/anything-llm.git /home/ec2-user/anything-llm\n",
-                "cd /home/ec2-user/anything-llm/docker\n",
-                "cat >> .env << \"END\"\n",
-                "!SUB::USER::CONTENT!",
-                "UID=\"1000\"\n",
-                "GID=\"1000\"\n",
-                "NO_DEBUG=\"true\"\n",
-                "END\n",
-                "cd ../frontend\n",
-                "rm -rf .env.production\n",
-                "cat >> .env.production << \"END\"\n",
-                "GENERATE_SOURCEMAP=true\n",
-                "VITE_API_BASE=\"/api\"\n",
-                "END\n",
-                "sudo docker-compose -f /home/ec2-user/anything-llm/docker/docker-compose.yml up -d\n",
-                "echo \"Container ID: $(sudo docker ps --latest --quiet)\"\n",
-                "sudo docker container exec -u 0 -t $(sudo docker ps --latest --quiet) mkdir -p /app/server/storage /app/server/storage/documents /app/server/storage/vector-cache /app/server/storage/lancedb\n",
-                "echo \"Placeholder folders in storage created.\"\n",
-                "sudo docker container exec -u 0 -t $(sudo docker ps --latest --quiet) touch /app/server/storage/anythingllm.db\n",
-                "echo \"SQLite DB placeholder set.\"\n",
-                "sudo docker container exec -u 0 -t $(sudo docker ps --latest --quiet) chown -R anythingllm:anythingllm /app/collector /app/server\n",
-                "echo \"File permissions corrected.\"\n",
-                "export ONLINE=$(curl -Is http://localhost:3001/api/ping | head -n 1|cut -d$' ' -f2)\n",
-                "echo \"Health check: $ONLINE\"\n",
-                "if [ \"$ONLINE\" = 200 ] ; then echo \"Running migrations...\" && curl -Is http://localhost:3001/api/migrate | head -n 1|cut -d$' ' -f2; fi\n",
-                "echo \"Setup complete! AnythingLLM instance is now online!\"\n",
-                "\n",
-                "--//--\n"
-              ]
-            ]
-          }
-        }
-      }
-    },
-    "AnythingLLMInstanceSecurityGroup": {
-      "Type": "AWS::EC2::SecurityGroup",
-      "Properties": {
-        "GroupDescription": "AnythingLLm Instance Security Group",
-        "SecurityGroupIngress": [
-          {
-            "IpProtocol": "tcp",
-            "FromPort": "22",
-            "ToPort": "22",
-            "CidrIp": "0.0.0.0/0"
-          },
-          {
-            "IpProtocol": "tcp",
-            "FromPort": "3001",
-            "ToPort": "3001",
-            "CidrIp": "0.0.0.0/0"
-          },
-          {
-            "IpProtocol": "tcp",
-            "FromPort": "3001",
-            "ToPort": "3001",
-            "CidrIpv6": "::/0"
-          }
-        ]
-      }
-    }
-  },
-  "Outputs": {
-    "ServerIp": {
-      "Description": "IP address of the AnythingLLM instance",
-      "Value": {
-        "Fn::GetAtt": [
-          "AnythingLLMInstance",
-          "PublicIp"
-        ]
-      }
-    },
-    "ServerURL": {
-      "Description": "URL of the AnythingLLM server",
-      "Value": {
-        "Fn::Join": [
-          "",
-          [
-            "http://",
-            {
-              "Fn::GetAtt": [
-                "AnythingLLMInstance",
-                "PublicIp"
-              ]
-            },
-            ":3001"
-          ]
-        ]
-      }
-    }
-  },
-  "Mappings": {
-    "Region2AMI": {
-      "ap-south-1": {
-        "AMI": "ami-0e6329e222e662a52",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "eu-north-1": {
-        "AMI": "ami-08c308b1bb265e927",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "eu-west-3": {
-        "AMI": "ami-069d1ea6bc64443f0",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "eu-west-2": {
-        "AMI": "ami-06a566ca43e14780d",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "eu-west-1": {
-        "AMI": "ami-0a8dc52684ee2fee2",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "ap-northeast-3": {
-        "AMI": "ami-0c8a89b455fae8513",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "ap-northeast-2": {
-        "AMI": "ami-0ff56409a6e8ea2a0",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "ap-northeast-1": {
-        "AMI": "ami-0ab0bbbd329f565e6",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "ca-central-1": {
-        "AMI": "ami-033c256a10931f206",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "sa-east-1": {
-        "AMI": "ami-0dabf4dab6b183eef",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "ap-southeast-1": {
-        "AMI": "ami-0dc5785603ad4ff54",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "ap-southeast-2": {
-        "AMI": "ami-0c5d61202c3b9c33e",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "eu-central-1": {
-        "AMI": "ami-004359656ecac6a95",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "us-east-1": {
-        "AMI": "ami-0cff7528ff583bf9a",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "us-east-2": {
-        "AMI": "ami-02238ac43d6385ab3",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "us-west-1": {
-        "AMI": "ami-01163e76c844a2129",
-        "RootDeviceName": "/dev/xvda"
-      },
-      "us-west-2": {
-        "AMI": "ami-0ceecbb0f30a902a6",
-        "RootDeviceName": "/dev/xvda"
-      }
-    }
-  }
-}
diff --git a/cloud-deployments/aws/cloudformation/generate.mjs b/cloud-deployments/aws/cloudformation/generate.mjs
deleted file mode 100644
index 99ed03ca9..000000000
--- a/cloud-deployments/aws/cloudformation/generate.mjs
+++ /dev/null
@@ -1,70 +0,0 @@
-// Note (tcarambat) This script should be executed from root via the `yarn generate::cloudformation` command only.
-// This script will copy your current Docker .env settings being used into a slightly custom AWS CloudFormation template
-// that you can upload and deploy on AWS in a single click!
-// Recommended settings are already defined in the template but you can modify them as needed.
-// AnythingLLM can run within the free tier services of AWS (t2.micro w/10GB of storage)
-//
-// This will deploy a fully public AnythingLLM so if you do not want anyone to access it please set the AUTH_TOKEN & JWT_SECRET envs
-// before running this script. You can still run the collector scripts on AWS so no FTP or file uploads are required.
-// Your documents and data do not leave your AWS instance when you host in the cloud this way.
-
-import fs from 'fs';
-import { fileURLToPath } from 'url';
-import path, { dirname } from 'path';
-import { exit } from 'process';
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const REPLACEMENT_KEY = '!SUB::USER::CONTENT!'
-
-const envPath = path.resolve(__dirname, `../../../docker/.env`)
-const envFileExists = fs.existsSync(envPath);
-
-const chalk = {
-  redBright: function (text) {
-    return `\x1b[31m${text}\x1b[0m`
-  },
-  cyan: function (text) {
-    return `\x1b[36m${text}\x1b[0m`
-  },
-  greenBright: function (text) {
-    return `\x1b[32m${text}\x1b[0m`
-  },
-  blueBright: function (text) {
-    return `\x1b[34m${text}\x1b[0m`
-  }
-}
-
-if (!envFileExists) {
-  console.log(chalk.redBright('[ABORT]'), 'You do not have an .env file in your ./docker/ folder. You need to create it first.');
-  console.log('You can start by running', chalk.cyan('cp -n ./docker/.env.example ./docker/.env'))
-  exit(1);
-}
-
-// Remove comments
-// Remove UID,GID,etc
-// Remove empty strings
-// Split into array
-const settings = fs.readFileSync(envPath, "utf8")
-  .replace(/^#.*\n?/gm, '')
-  .replace(/^UID.*\n?/gm, '')
-  .replace(/^GID.*\n?/gm, '')
-  .replace(/^CLOUD_BUILD.*\n?/gm, '')
-  .replace(/^\s*\n/gm, "")
-  .split('\n')
-  .filter((i) => !!i)
-  .map((i) => i + '\n')
-
-const templatePath = path.resolve(__dirname, `cf_template.template`);
-const templateString = fs.readFileSync(templatePath, "utf8");
-const template = JSON.parse(templateString);
-
-const cmdIdx = template.Resources.AnythingLLMInstance.Properties.UserData['Fn::Base64']['Fn::Join'][1].findIndex((cmd) => cmd === REPLACEMENT_KEY)
-template.Resources.AnythingLLMInstance.Properties.UserData['Fn::Base64']['Fn::Join'][1].splice(cmdIdx, 1, ...settings);
-
-const output = path.resolve(__dirname, `aws_cf_deploy_anything_llm.json`);
-fs.writeFileSync(output, JSON.stringify(template, null, 2), "utf8");
-
-console.log(chalk.greenBright('[SUCCESS]'), 'Deploy AnythingLLM on AWS CloudFormation using your template document.');
-console.log(chalk.greenBright('File Created:'), 'aws_cf_deploy_anything_llm.json in aws/cloudformation directory.');
-console.log(chalk.blueBright('[INFO]'), 'Refer to aws/cloudformation/DEPLOY.md for how to use this file.');
-
-exit();
\ No newline at end of file
diff --git a/docker/.env.example b/docker/.env.example
index 508b20f40..b0182cc80 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -26,7 +26,7 @@ OPEN_MODEL_PREF='gpt-3.5-turbo'
 # CHROMA_API_KEY="sk-123abc"
 
 # Enable all below if you are using vector database: Pinecone.
-VECTOR_DB="pinecone"
+VECTOR_DB="lancedb"
 PINECONE_ENVIRONMENT=
 PINECONE_API_KEY=
 PINECONE_INDEX=
@@ -47,7 +47,7 @@ PINECONE_INDEX=
 # CLOUD DEPLOYMENT VARIRABLES ONLY
 # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
 # NO_DEBUG="true"
-STORAGE_DIR="./server/storage"
+STORAGE_DIR="/app/server/storage"
 GOOGLE_APIS_KEY=
 UID='1000'
 GID='1000'
diff --git a/docker/HOW_TO_USE_DOCKER.md b/docker/HOW_TO_USE_DOCKER.md
index 12c94e72c..2cc5a41ef 100644
--- a/docker/HOW_TO_USE_DOCKER.md
+++ b/docker/HOW_TO_USE_DOCKER.md
@@ -5,11 +5,11 @@ Use the Dockerized version of AnythingLLM for a much faster and complete startup
 ## Requirements
 - Install [Docker](https://www.docker.com/) on your computer or machine.
 
-## How to install
+## How to install & run locally
 - `git clone` this repo and `cd anything-llm` to get to the root directory.
-- `yarn setup`
+- `touch server/storage/anythingllm.db` to create empty SQLite DB file.
 - `cd docker/`
-- Edit `.env` file and update the variables
+- `cp .env.example .env`
 - `docker-compose up -d --build` to build the image - this will take a few moments.
 
 Your docker host will show the image as online once the build process is completed. This will build the app to `http://localhost:3001`.
@@ -17,7 +17,7 @@ Your docker host will show the image as online once the build process is complet
 ## How to use the user interface
 - To access the full application, visit `http://localhost:3001` in your browser.
 
-## How to add files to my system
+## How to add files to my system using the standalone scripts
 - Upload files from the UI in your Workspace settings
 
 - To run the collector scripts to grab external data (articles, URLs, etc.)
@@ -27,9 +27,6 @@ Your docker host will show the image as online once the build process is complet
   - `docker exec -it --workdir=/app/collector anything-llm python watch.py`
   - Upload [compliant files](../collector/hotdir/__HOTDIR__.md) to `./collector/hotdir` and they will be processed and made available in the UI.
 
-## How to update and rebuild the ENV?
-- Update the `./docker/.env` and run `docker-compose up -d --build` to rebuild with new environments.
-
 ## About UID and GID in the ENV
 - The UID and GID are set to 1000 by default. This is the default user in the Docker container and on most host operating systems. If there is a mismatch between your host user UID and GID and what is set in the `.env` file, you may experience permission issues.
 
diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh
index eb9285a9d..4c264e883 100755
--- a/docker/docker-entrypoint.sh
+++ b/docker/docker-entrypoint.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
-node /app/server/index.js &
+{ cd /app/server/ &&\
+  npx prisma generate --schema=./prisma/schema.prisma &&\
+  npx prisma migrate deploy --schema=./prisma/schema.prisma &&\
+  node /app/server/index.js
+} &
 { FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --timeout 300 --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
 wait -n
 exit $?
\ No newline at end of file