Auto-update: Tue Jul 30 10:14:37 PDT 2024
This commit is contained in:
parent
f707aa7529
commit
a1c31a7c58
2 changed files with 217 additions and 116 deletions
|
@ -83,6 +83,7 @@ async def lifespan(app: FastAPI):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
|
|
|
@ -5,6 +5,7 @@ import yaml
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import uuid
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncpg
|
import asyncpg
|
||||||
|
@ -315,9 +316,9 @@ class APIConfig(BaseModel):
|
||||||
err(f"Error: {str(e)}")
|
err(f"Error: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def initialize_sync(self):
|
async def initialize_sync(self):
|
||||||
for pool_entry in self.POOL:
|
for pool_entry in self.POOL:
|
||||||
|
try:
|
||||||
async with self.get_connection(pool_entry) as conn:
|
async with self.get_connection(pool_entry) as conn:
|
||||||
tables = await conn.fetch("""
|
tables = await conn.fetch("""
|
||||||
SELECT tablename FROM pg_tables
|
SELECT tablename FROM pg_tables
|
||||||
|
@ -326,7 +327,14 @@ class APIConfig(BaseModel):
|
||||||
|
|
||||||
for table in tables:
|
for table in tables:
|
||||||
table_name = table['tablename']
|
table_name = table['tablename']
|
||||||
# Add version and server_id columns if they don't exist
|
await self.ensure_sync_columns(conn, table_name)
|
||||||
|
await self.create_sync_trigger(conn, table_name)
|
||||||
|
|
||||||
|
info(f"Sync initialization complete for {pool_entry['ts_ip']}. All tables now have version and server_id columns with appropriate triggers.")
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Error initializing sync for {pool_entry['ts_ip']}: {str(e)}")
|
||||||
|
|
||||||
|
async def ensure_sync_columns(self, conn, table_name):
|
||||||
await conn.execute(f"""
|
await conn.execute(f"""
|
||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
|
@ -341,7 +349,7 @@ class APIConfig(BaseModel):
|
||||||
END $$;
|
END $$;
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Create or replace the trigger function
|
async def create_sync_trigger(self, conn, table_name):
|
||||||
await conn.execute(f"""
|
await conn.execute(f"""
|
||||||
CREATE OR REPLACE FUNCTION update_version_and_server_id()
|
CREATE OR REPLACE FUNCTION update_version_and_server_id()
|
||||||
RETURNS TRIGGER AS $$
|
RETURNS TRIGGER AS $$
|
||||||
|
@ -351,10 +359,7 @@ class APIConfig(BaseModel):
|
||||||
RETURN NEW;
|
RETURN NEW;
|
||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql;
|
$$ LANGUAGE plpgsql;
|
||||||
""")
|
|
||||||
|
|
||||||
# Create the trigger if it doesn't exist
|
|
||||||
await conn.execute(f"""
|
|
||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'update_version_and_server_id_trigger' AND tgrelid = '{table_name}'::regclass) THEN
|
IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'update_version_and_server_id_trigger' AND tgrelid = '{table_name}'::regclass) THEN
|
||||||
|
@ -365,11 +370,6 @@ class APIConfig(BaseModel):
|
||||||
END $$;
|
END $$;
|
||||||
""")
|
""")
|
||||||
|
|
||||||
info(f"Sync initialization complete for {pool_entry['ts_ip']}. All tables now have version and server_id columns with appropriate triggers.")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_most_recent_source(self):
|
async def get_most_recent_source(self):
|
||||||
most_recent_source = None
|
most_recent_source = None
|
||||||
max_version = -1
|
max_version = -1
|
||||||
|
@ -394,27 +394,26 @@ class APIConfig(BaseModel):
|
||||||
|
|
||||||
return most_recent_source
|
return most_recent_source
|
||||||
|
|
||||||
|
async def pull_changes(self, source_pool_entry, batch_size=10000):
|
||||||
|
|
||||||
async def pull_changes(self, source_pool_entry):
|
|
||||||
if source_pool_entry['ts_id'] == os.environ.get('TS_ID'):
|
if source_pool_entry['ts_id'] == os.environ.get('TS_ID'):
|
||||||
info("Skipping self-sync")
|
info("Skipping self-sync")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
total_inserts = 0
|
total_changes = 0
|
||||||
total_updates = 0
|
|
||||||
table_changes = {}
|
|
||||||
|
|
||||||
source_id = source_pool_entry['ts_id']
|
source_id = source_pool_entry['ts_id']
|
||||||
source_ip = source_pool_entry['ts_ip']
|
source_ip = source_pool_entry['ts_ip']
|
||||||
dest_id = os.environ.get('TS_ID')
|
dest_id = os.environ.get('TS_ID')
|
||||||
dest_ip = self.local_db['ts_ip']
|
dest_ip = self.local_db['ts_ip']
|
||||||
|
|
||||||
info(f"Starting comprehensive sync from source {source_id} ({source_ip}) to destination {dest_id} ({dest_ip})")
|
info(f"Starting sync from source {source_id} ({source_ip}) to destination {dest_id} ({dest_ip})")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with self.get_connection(source_pool_entry) as source_conn:
|
async with self.get_connection(source_pool_entry) as source_conn:
|
||||||
async with self.get_connection(self.local_db) as dest_conn:
|
async with self.get_connection(self.local_db) as dest_conn:
|
||||||
|
# Sync schema first
|
||||||
|
schema_changes = await self.detect_schema_changes(source_conn, dest_conn)
|
||||||
|
await self.apply_schema_changes(dest_conn, schema_changes)
|
||||||
|
|
||||||
tables = await source_conn.fetch("""
|
tables = await source_conn.fetch("""
|
||||||
SELECT tablename FROM pg_tables
|
SELECT tablename FROM pg_tables
|
||||||
WHERE schemaname = 'public'
|
WHERE schemaname = 'public'
|
||||||
|
@ -424,62 +423,92 @@ class APIConfig(BaseModel):
|
||||||
table_name = table['tablename']
|
table_name = table['tablename']
|
||||||
last_synced_version = await self.get_last_synced_version(dest_conn, table_name, source_id)
|
last_synced_version = await self.get_last_synced_version(dest_conn, table_name, source_id)
|
||||||
|
|
||||||
|
while True:
|
||||||
changes = await source_conn.fetch(f"""
|
changes = await source_conn.fetch(f"""
|
||||||
SELECT * FROM "{table_name}"
|
SELECT * FROM "{table_name}"
|
||||||
WHERE version > $1 AND server_id = $2
|
WHERE version > $1 AND server_id = $2
|
||||||
ORDER BY version ASC
|
ORDER BY version ASC
|
||||||
""", last_synced_version, source_id)
|
LIMIT $3
|
||||||
|
""", last_synced_version, source_id, batch_size)
|
||||||
|
|
||||||
inserts = 0
|
if not changes:
|
||||||
updates = 0
|
break
|
||||||
for change in changes:
|
|
||||||
columns = list(change.keys())
|
|
||||||
values = [change[col] for col in columns]
|
|
||||||
placeholders = [f'${i+1}' for i in range(len(columns))]
|
|
||||||
|
|
||||||
insert_query = f"""
|
changes_count = await self.apply_batch_changes(dest_conn, table_name, changes)
|
||||||
INSERT INTO "{table_name}" ({', '.join(columns)})
|
total_changes += changes_count
|
||||||
VALUES ({', '.join(placeholders)})
|
|
||||||
ON CONFLICT (id) DO UPDATE SET
|
|
||||||
{', '.join(f"{col} = EXCLUDED.{col}" for col in columns if col != 'id')}
|
|
||||||
"""
|
|
||||||
|
|
||||||
result = await dest_conn.execute(insert_query, *values)
|
last_synced_version = changes[-1]['version']
|
||||||
if 'UPDATE' in result:
|
await self.update_last_synced_version(dest_conn, table_name, source_id, last_synced_version)
|
||||||
updates += 1
|
|
||||||
else:
|
|
||||||
inserts += 1
|
|
||||||
|
|
||||||
if changes:
|
info(f"Synced batch for {table_name}: {changes_count} changes. Total so far: {total_changes}")
|
||||||
await self.update_last_synced_version(dest_conn, table_name, source_id, changes[-1]['version'])
|
|
||||||
|
|
||||||
total_inserts += inserts
|
info(f"Sync complete from {source_id} ({source_ip}) to {dest_id} ({dest_ip}). Total changes: {total_changes}")
|
||||||
total_updates += updates
|
|
||||||
table_changes[table_name] = {'inserts': inserts, 'updates': updates}
|
|
||||||
|
|
||||||
info(f"Synced {table_name} from {source_id} to {dest_id}: {inserts} inserts, {updates} updates")
|
|
||||||
|
|
||||||
info(f"Comprehensive sync complete from {source_id} ({source_ip}) to {dest_id} ({dest_ip})")
|
|
||||||
info(f"Total changes: {total_inserts} inserts, {total_updates} updates")
|
|
||||||
info("Changes by table:")
|
|
||||||
for table, changes in table_changes.items():
|
|
||||||
info(f" {table}: {changes['inserts']} inserts, {changes['updates']} updates")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error during sync process: {str(e)}")
|
err(f"Error during sync process: {str(e)}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
err(f"Traceback: {traceback.format_exc()}")
|
||||||
|
|
||||||
return total_inserts + total_updates
|
return total_changes
|
||||||
|
|
||||||
|
async def apply_batch_changes(self, conn, table_name, changes):
|
||||||
|
if not changes:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
temp_table_name = f"temp_{table_name}_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create temporary table
|
||||||
|
await conn.execute(f"""
|
||||||
|
CREATE TEMPORARY TABLE {temp_table_name} (LIKE "{table_name}" INCLUDING ALL)
|
||||||
|
ON COMMIT DROP
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Bulk insert changes into temporary table
|
||||||
|
columns = changes[0].keys()
|
||||||
|
await conn.copy_records_to_table(temp_table_name, records=[tuple(change[col] for col in columns) for change in changes])
|
||||||
|
|
||||||
|
# Perform upsert with spatial awareness
|
||||||
|
result = await conn.execute(f"""
|
||||||
|
INSERT INTO "{table_name}"
|
||||||
|
SELECT tc.*
|
||||||
|
FROM {temp_table_name} tc
|
||||||
|
LEFT JOIN "{table_name}" t ON t.id = tc.id
|
||||||
|
WHERE t.id IS NULL
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
{', '.join(f"{col} = EXCLUDED.{col}" for col in columns if col != 'id')}
|
||||||
|
WHERE (
|
||||||
|
CASE
|
||||||
|
WHEN "{table_name}".geometry IS NOT NULL AND EXCLUDED.geometry IS NOT NULL
|
||||||
|
THEN NOT ST_Equals("{table_name}".geometry, EXCLUDED.geometry)
|
||||||
|
ELSE FALSE
|
||||||
|
END
|
||||||
|
) OR {' OR '.join(f"COALESCE({col} <> EXCLUDED.{col}, TRUE)" for col in columns if col not in ['id', 'geometry'])}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Parse the result to get the number of affected rows
|
||||||
|
affected_rows = int(result.split()[-1])
|
||||||
|
return affected_rows
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Ensure temporary table is dropped
|
||||||
|
await conn.execute(f"DROP TABLE IF EXISTS {temp_table_name}")
|
||||||
|
|
||||||
async def push_changes_to_all(self):
|
async def push_changes_to_all(self):
|
||||||
for pool_entry in self.POOL:
|
for pool_entry in self.POOL:
|
||||||
if pool_entry['ts_id'] != os.environ.get('TS_ID'):
|
if pool_entry['ts_id'] != os.environ.get('TS_ID'):
|
||||||
|
try:
|
||||||
await self.push_changes_to_one(pool_entry)
|
await self.push_changes_to_one(pool_entry)
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Error pushing changes to {pool_entry['ts_id']}: {str(e)}")
|
||||||
|
|
||||||
async def push_changes_to_one(self, pool_entry):
|
async def push_changes_to_one(self, pool_entry, batch_size=10000):
|
||||||
try:
|
try:
|
||||||
async with self.get_connection() as local_conn:
|
async with self.get_connection() as local_conn:
|
||||||
async with self.get_connection(pool_entry) as remote_conn:
|
async with self.get_connection(pool_entry) as remote_conn:
|
||||||
|
# Sync schema first
|
||||||
|
schema_changes = await self.detect_schema_changes(local_conn, remote_conn)
|
||||||
|
await self.apply_schema_changes(remote_conn, schema_changes)
|
||||||
|
|
||||||
tables = await local_conn.fetch("""
|
tables = await local_conn.fetch("""
|
||||||
SELECT tablename FROM pg_tables
|
SELECT tablename FROM pg_tables
|
||||||
WHERE schemaname = 'public'
|
WHERE schemaname = 'public'
|
||||||
|
@ -489,28 +518,23 @@ class APIConfig(BaseModel):
|
||||||
table_name = table['tablename']
|
table_name = table['tablename']
|
||||||
last_synced_version = await self.get_last_synced_version(remote_conn, table_name, os.environ.get('TS_ID'))
|
last_synced_version = await self.get_last_synced_version(remote_conn, table_name, os.environ.get('TS_ID'))
|
||||||
|
|
||||||
|
while True:
|
||||||
changes = await local_conn.fetch(f"""
|
changes = await local_conn.fetch(f"""
|
||||||
SELECT * FROM "{table_name}"
|
SELECT * FROM "{table_name}"
|
||||||
WHERE version > $1 AND server_id = $2
|
WHERE version > $1 AND server_id = $2
|
||||||
ORDER BY version ASC
|
ORDER BY version ASC
|
||||||
""", last_synced_version, os.environ.get('TS_ID'))
|
LIMIT $3
|
||||||
|
""", last_synced_version, os.environ.get('TS_ID'), batch_size)
|
||||||
|
|
||||||
for change in changes:
|
if not changes:
|
||||||
columns = list(change.keys())
|
break
|
||||||
values = [change[col] for col in columns]
|
|
||||||
placeholders = [f'${i+1}' for i in range(len(columns))]
|
|
||||||
|
|
||||||
insert_query = f"""
|
changes_count = await self.apply_batch_changes(remote_conn, table_name, changes)
|
||||||
INSERT INTO "{table_name}" ({', '.join(columns)})
|
|
||||||
VALUES ({', '.join(placeholders)})
|
|
||||||
ON CONFLICT (id) DO UPDATE SET
|
|
||||||
{', '.join(f"{col} = EXCLUDED.{col}" for col in columns if col != 'id')}
|
|
||||||
"""
|
|
||||||
|
|
||||||
await remote_conn.execute(insert_query, *values)
|
last_synced_version = changes[-1]['version']
|
||||||
|
await self.update_last_synced_version(remote_conn, table_name, os.environ.get('TS_ID'), last_synced_version)
|
||||||
|
|
||||||
if changes:
|
info(f"Pushed batch for {table_name}: {changes_count} changes to {pool_entry['ts_id']}")
|
||||||
await self.update_last_synced_version(remote_conn, table_name, os.environ.get('TS_ID'), changes[-1]['version'])
|
|
||||||
|
|
||||||
info(f"Successfully pushed changes to {pool_entry['ts_id']}")
|
info(f"Successfully pushed changes to {pool_entry['ts_id']}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -552,6 +576,82 @@ class APIConfig(BaseModel):
|
||||||
END $$;
|
END $$;
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
async def detect_schema_changes(self, source_conn, dest_conn):
|
||||||
|
schema_changes = {
|
||||||
|
'new_tables': [],
|
||||||
|
'new_columns': {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect new tables
|
||||||
|
source_tables = await source_conn.fetch("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
|
||||||
|
dest_tables = await dest_conn.fetch("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
|
||||||
|
|
||||||
|
source_table_names = set(table['tablename'] for table in source_tables)
|
||||||
|
dest_table_names = set(table['tablename'] for table in dest_tables)
|
||||||
|
|
||||||
|
new_tables = source_table_names - dest_table_names
|
||||||
|
schema_changes['new_tables'] = list(new_tables)
|
||||||
|
|
||||||
|
# Detect new columns
|
||||||
|
for table_name in source_table_names:
|
||||||
|
if table_name in dest_table_names:
|
||||||
|
source_columns = await source_conn.fetch(f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}'")
|
||||||
|
dest_columns = await dest_conn.fetch(f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}'")
|
||||||
|
|
||||||
|
source_column_names = set(column['column_name'] for column in source_columns)
|
||||||
|
dest_column_names = set(column['column_name'] for column in dest_columns)
|
||||||
|
|
||||||
|
new_columns = source_column_names - dest_column_names
|
||||||
|
if new_columns:
|
||||||
|
schema_changes['new_columns'][table_name] = [
|
||||||
|
{'name': column['column_name'], 'type': column['data_type']}
|
||||||
|
for column in source_columns if column['column_name'] in new_columns
|
||||||
|
]
|
||||||
|
|
||||||
|
return schema_changes
|
||||||
|
|
||||||
|
async def apply_schema_changes(self, conn, schema_changes):
|
||||||
|
for table_name in schema_changes['new_tables']:
|
||||||
|
create_table_sql = await self.get_table_creation_sql(conn, table_name)
|
||||||
|
await conn.execute(create_table_sql)
|
||||||
|
info(f"Created new table: {table_name}")
|
||||||
|
|
||||||
|
for table_name, columns in schema_changes['new_columns'].items():
|
||||||
|
for column in columns:
|
||||||
|
await conn.execute(f"""
|
||||||
|
ALTER TABLE "{table_name}"
|
||||||
|
ADD COLUMN IF NOT EXISTS {column['name']} {column['type']}
|
||||||
|
""")
|
||||||
|
info(f"Added new column {column['name']} to table {table_name}")
|
||||||
|
|
||||||
|
async def get_table_creation_sql(self, conn, table_name):
|
||||||
|
create_table_sql = await conn.fetchval(f"""
|
||||||
|
SELECT pg_get_tabledef('{table_name}'::regclass::oid)
|
||||||
|
""")
|
||||||
|
return create_table_sql
|
||||||
|
|
||||||
|
async def table_exists(self, conn, table_name):
|
||||||
|
exists = await conn.fetchval(f"""
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name = $1
|
||||||
|
)
|
||||||
|
""", table_name)
|
||||||
|
return exists
|
||||||
|
|
||||||
|
async def column_exists(self, conn, table_name, column_name):
|
||||||
|
exists = await conn.fetchval(f"""
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name = $1
|
||||||
|
AND column_name = $2
|
||||||
|
)
|
||||||
|
""", table_name, column_name)
|
||||||
|
return exists
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue