matrix-synapse-gdpr/user-delete.py

148 lines
5.7 KiB
Python

#!/usr/bin/python3
import psycopg2
import argparse
import json
import sys
import re
# Note:
# Not all queries are safe for SQL injection,
# please dont use this as an API for user input,
# or at least secure it for system administrators!
def main():
parser = argparse.ArgumentParser(description='GDPR 100% compilant matrix synapse user data deletion script in Python3.')
group = parser.add_mutually_exclusive_group(required=False)
group.add_argument('-t', '--dryrun', action='store_true', help='Dry run, just SELECT rows instead of DELETE')
group.add_argument('-y', '--yes', action='store_true', help='Do not ask confirmations to DELETE commands')
group2 = parser.add_argument_group('required arguments')
group2.add_argument('user_ids', metavar='UID', type=str, nargs='+', help='The user id/ids to delete')
args = parser.parse_args(sys.argv[1:])
try:
conn = psql_connect()
tables = psql_showtables(conn)
for user in args.user_ids:
ushort = re.search(r'(?<=^@)\w+', user).group(0).strip()
print('UID {} NAME {}'.format(user, ushort))
for table in tables:
cols = psql_describe(conn, table)
for col in cols:
if col in ('user_id', 'name'):
_psql_deletesafe(args, conn, 'DELETE FROM {} WHERE {} = %s'.format(table, col), (user,))
_psql_deletesafe(args, conn, 'DELETE FROM device_inbox WHERE message_json LIKE \'%{}%\''.format(user))
_psql_deletesafe(args, conn, 'DELETE FROM current_state_delta_stream WHERE state_key LIKE \'%{}%\''.format(user))
_psql_deletesafe(args, conn, 'DELETE FROM cache_invalidation_stream WHERE %s = ANY(keys)', (user,))
_psql_deletesafe(args, conn, 'DELETE FROM current_state_events WHERE state_key = %s', (user,))
_psql_deletesafe(args, conn, 'DELETE FROM event_json WHERE json LIKE \'%{}%\''.format(user))
_psql_deletesafe(args, conn, 'DELETE FROM events WHERE sender = %s', (user,))
_psql_deletesafe(args, conn, 'DELETE FROM local_invites WHERE invitee = %s OR inviter = %s', (user,user,))
_psql_deletesafe(args, conn, 'DELETE FROM profiles WHERE user_id = %s', (ushort,))
_psql_deletesafe(args, conn, 'DELETE FROM rooms WHERE creator = %s', (user,))
_psql_deletesafe(args, conn, 'DELETE FROM state_events WHERE state_key LIKE \'%{}%\' OR prev_state LIKE \'%{}%\''.format(user, user))
_psql_deletesafe(args, conn, 'DELETE FROM state_groups_state WHERE state_key LIKE \'%{}%\''.format(user))
_psql_deletesafe(args, conn, 'DELETE FROM user_filters WHERE user_id = %s', (ushort,))
_psql_deletesafe(args, conn, 'DELETE FROM room_memberships WHERE user_id = %s OR sender = %s', (user,user,))
_custom__delete_account_relations(conn, user)
except KeyboardInterrupt:
print("\nAborted.")
finally:
conn.close()
def _custom__delete_account_relations(conn, user):
print('*** [CUSTOM] UPDATE account_data "content" json')
md5s = psql_getall(conn, 'SELECT md5(content) FROM account_data WHERE content LIKE \'%{}%\''.format(user))
if len(md5s) == 0:
print('NOTHING TO UPDATE')
return False
for md5 in md5s:
print('MD5 content: {}'.format(md5))
content = psql_getone(conn, 'SELECT content FROM account_data WHERE md5(content) = %s', (md5,))
jscontent = json.loads(content); del jscontent[user]
content = json.dumps(jscontent)
rcnt = psql_update(conn, 'UPDATE account_data SET content = %s WHERE md5(content) = %s', (content, md5,))
print('UPDATED {}'.format(rcnt))
return True
# Note: to myself
# i could do this using the conn.commit() function instead
# but i wanted to make it more human fail safe and call another function
# to keep things separated
def _psql_deletesafe(pargs, conn, sql, args=None):
sqls = re.sub(r'(?i)^delete from', 'SELECT COUNT(1) FROM', sql)
count = psql_getone(conn, sqls, args)
if not args is None:
print('*** [ARGS] >> {}'.format(', '.join(args)))
if pargs.dryrun:
print('*** [DRY_RUN] >> {}'.format(sql))
print('DELETE {}'.format(count))
print()
return -1
if not pargs.yes:
y = input('{} [Y/n]: '.format(sql)).strip().lower()
if y == 'n':
return -2
print('*** [EXECUTE] >> {}'.format(sql))
rcnt = psql_delete(conn, sql, args)
print('DELETE {}'.format(rcnt))
print()
return rcnt
def psql_describe(conn, table):
cur = conn.cursor()
cur.execute('SELECT * FROM {} LIMIT 0'.format(table))
cols = [desc[0] for desc in cur.description]
cur.close()
return cols
def psql_showtables(conn):
out = psql_getall(conn, "SELECT relname FROM pg_class WHERE relkind='r' AND relname !~ '^(pg_|sql_)'")
tables = []
for row in out:
tables.append(row[0])
return tables
def psql_connect():
conf = json.loads(fread('postgres.conf'))
conn = psycopg2.connect(host=conf['hostname'], port=conf['port'],\
user=conf['username'], database=conf['database'])
return conn
def psql_update(conn, sql, args=None):
return psql_delete(conn, sql, args)
def psql_delete(conn, sql, args=None):
cur = conn.cursor()
cur.execute(sql, args)
rowcount = cur.rowcount
conn.commit()
cur.close()
return rowcount
def psql_getone(conn, sql, args=None):
cur = conn.cursor()
cur.execute(sql, args)
row = cur.fetchone()
cur.close()
return row[0]
def psql_getall(conn, sql, args=None):
cur = conn.cursor()
cur.execute(sql, args)
rows = cur.fetchall()
cur.close()
return rows
def fread(f):
r=open(f,'r');c=r.read().strip();r.close();return c
if __name__ == '__main__':
main()