purge_history.sh 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #!/usr/bin/env bash
  2. # this script will use the api:
  3. # https://matrix-org.github.io/synapse/latest/admin_api/purge_history_api.html
  4. #
  5. # It will purge all messages in a list of rooms up to a cetrain event
  6. ###################################################################################################
  7. # define your domain and admin user
  8. ###################################################################################################
  9. # add this user as admin in your home server:
  10. DOMAIN=yourserver.tld
  11. # add this user as admin in your home server:
  12. ADMIN="@you_admin_username:$DOMAIN"
  13. API_URL="$DOMAIN:8008/_matrix/client/r0"
  14. ###################################################################################################
  15. #choose the rooms to prune old messages from (add a free comment at the end)
  16. ###################################################################################################
  17. # the room_id's you can get e.g. from your Riot clients "View Source" button on each message
  18. ROOMS_ARRAY=(
  19. '!DgvjtOljKujDBrxyHk:matrix.org#riot:matrix.org'
  20. '!QtykxKocfZaZOUrTwp:matrix.org#Matrix HQ'
  21. )
  22. # ALTERNATIVELY:
  23. # you can select all the rooms that are not encrypted and loop over the result:
  24. # SELECT room_id FROM rooms WHERE room_id NOT IN (SELECT DISTINCT room_id FROM events WHERE type ='m.room.encrypted')
  25. # or
  26. # select all rooms with at least 100 members:
  27. # SELECT q.room_id FROM (select count(*) as numberofusers, room_id FROM current_state_events WHERE type ='m.room.member'
  28. # GROUP BY room_id) AS q LEFT JOIN room_aliases a ON q.room_id=a.room_id WHERE q.numberofusers > 100 ORDER BY numberofusers desc
  29. ###################################################################################################
  30. # evaluate the EVENT_ID before which should be pruned
  31. ###################################################################################################
  32. # choose a time before which the messages should be pruned:
  33. TIME='12 months ago'
  34. # ALTERNATIVELY:
  35. # a certain time:
  36. # TIME='2016-08-31 23:59:59'
  37. # creates a timestamp from the given time string:
  38. UNIX_TIMESTAMP=$(date +%s%3N --date='TZ="UTC+2" '"$TIME")
  39. # ALTERNATIVELY:
  40. # prune all messages that are older than 1000 messages ago:
  41. # LAST_MESSAGES=1000
  42. # SQL_GET_EVENT="SELECT event_id from events WHERE type='m.room.message' AND room_id ='$ROOM' ORDER BY received_ts DESC LIMIT 1 offset $(($LAST_MESSAGES - 1))"
  43. # ALTERNATIVELY:
  44. # select the EVENT_ID manually:
  45. #EVENT_ID='$1471814088343495zpPNI:matrix.org' # an example event from 21st of Aug 2016 by Matthew
  46. ###################################################################################################
  47. # make the admin user a server admin in the database with
  48. ###################################################################################################
  49. # psql -A -t --dbname=synapse -c "UPDATE users SET admin=1 WHERE name LIKE '$ADMIN'"
  50. ###################################################################################################
  51. # database function
  52. ###################################################################################################
  53. sql (){
  54. # for sqlite3:
  55. #sqlite3 homeserver.db "pragma busy_timeout=20000;$1" | awk '{print $2}'
  56. # for postgres:
  57. psql -A -t --dbname=synapse -c "$1" | grep -v 'Pager'
  58. }
  59. ###################################################################################################
  60. # get an access token
  61. ###################################################################################################
  62. # for example externally by watching Riot in your browser's network inspector
  63. # or internally on the server locally, use this:
  64. TOKEN=$(sql "SELECT token FROM access_tokens WHERE user_id='$ADMIN' ORDER BY id DESC LIMIT 1")
  65. AUTH="Authorization: Bearer $TOKEN"
  66. ###################################################################################################
  67. # check, if your TOKEN works. For example this works:
  68. ###################################################################################################
  69. # $ curl --header "$AUTH" "$API_URL/rooms/$ROOM/state/m.room.power_levels"
  70. ###################################################################################################
  71. # finally start pruning the room:
  72. ###################################################################################################
  73. # this will really delete local events, so the messages in the room really
  74. # disappear unless they are restored by remote federation. This is because
  75. # we pass {"delete_local_events":true} to the curl invocation below.
  76. for ROOM in "${ROOMS_ARRAY[@]}"; do
  77. echo "########################################### $(date) ################# "
  78. echo "pruning room: $ROOM ..."
  79. ROOM=${ROOM%#*}
  80. #set -x
  81. echo "check for alias in db..."
  82. # for postgres:
  83. sql "SELECT * FROM room_aliases WHERE room_id='$ROOM'"
  84. echo "get event..."
  85. # for postgres:
  86. EVENT_ID=$(sql "SELECT event_id FROM events WHERE type='m.room.message' AND received_ts<'$UNIX_TIMESTAMP' AND room_id='$ROOM' ORDER BY received_ts DESC LIMIT 1;")
  87. if [ "$EVENT_ID" == "" ]; then
  88. echo "no event $TIME"
  89. else
  90. echo "event: $EVENT_ID"
  91. SLEEP=2
  92. set -x
  93. # call purge
  94. OUT=$(curl --header "$AUTH" -s -d '{"delete_local_events":true}' POST "$API_URL/admin/purge_history/$ROOM/$EVENT_ID")
  95. PURGE_ID=$(echo "$OUT" |grep purge_id|cut -d'"' -f4 )
  96. if [ "$PURGE_ID" == "" ]; then
  97. # probably the history purge is already in progress for $ROOM
  98. : "continuing with next room"
  99. else
  100. while : ; do
  101. # get status of purge and sleep longer each time if still active
  102. sleep $SLEEP
  103. STATUS=$(curl --header "$AUTH" -s GET "$API_URL/admin/purge_history_status/$PURGE_ID" |grep status|cut -d'"' -f4)
  104. : "$ROOM --> Status: $STATUS"
  105. [[ "$STATUS" == "active" ]] || break
  106. SLEEP=$((SLEEP + 1))
  107. done
  108. fi
  109. set +x
  110. sleep 1
  111. fi
  112. done
  113. ###################################################################################################
  114. # additionally
  115. ###################################################################################################
  116. # to benefit from pruning large amounts of data, you need to call VACUUM to free the unused space.
  117. # This can take a very long time (hours) and the client have to be stopped while you do so:
  118. # $ synctl stop
  119. # $ sqlite3 -line homeserver.db "vacuum;"
  120. # $ synctl start
  121. # This could be set, so you don't need to prune every time after deleting some rows:
  122. # $ sqlite3 homeserver.db "PRAGMA auto_vacuum = FULL;"
  123. # be cautious, it could make the database somewhat slow if there are a lot of deletions
  124. exit