fix: More Backup Restore Fixes (#2859)

* refactor normalized search migration to use dummy default

* changed group slug migration to use raw SQL

* updated comment

* added tests with anonymized backups (currently failing)

* typo

* fixed LDAP enum in test data

* fix for adding label settings across groups

* add migration data fixes

* fix shopping list label settings test

* re-run db init instead of just running alembic migration, to include fixes

* intentionally broke SQLAlchemy GUID handling

* safely convert between GUID types in different databases

* restore original test data after testing backup restores

* added missing group name update to migration
This commit is contained in:
Michael Genson 2024-01-02 22:19:04 -06:00 committed by GitHub
parent b3f7f2d89f
commit 7602c67449
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 422 additions and 45 deletions

View file

@ -0,0 +1,74 @@
import json
import logging
import random
import string
from datetime import datetime
from uuid import UUID
logger = logging.getLogger("anonymize_backups")
def is_uuid4(value: str):
try:
UUID(value)
return True
except ValueError:
return False
def is_iso_datetime(value: str):
try:
datetime.fromisoformat(value)
return True
except ValueError:
return False
def random_string(length=10):
return "".join(random.choice(string.ascii_lowercase) for _ in range(length))
def clean_value(value):
try:
match value:
# preserve non-strings
case int(value) | float(value):
return value
case None:
return value
# preserve UUIDs and datetimes
case str(value) if is_uuid4(value) or is_iso_datetime(value):
return value
# randomize strings
case str(value):
return random_string()
case _:
pass
except Exception as e:
logger.exception(e)
logger.error(f"Failed to anonymize value: {value}")
return value
def walk_data_and_anonymize(data):
for k, v in data.items():
if isinstance(v, list):
for item in v:
walk_data_and_anonymize(item)
else:
# preserve alembic version number and enums
if k in ["auth_method", "version_num"]:
continue
data[k] = clean_value(v)
def anonymize_database_json(input_filepath: str, output_filepath: str):
with open(input_filepath) as f:
data = json.load(f)
walk_data_and_anonymize(data)
with open(output_filepath, "w") as f:
json.dump(data, f)