move_remote_media_to_new_store.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Copyright 2017 New Vector Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """
  17. Moves a list of remote media from one media store to another.
  18. The input should be a list of media files to be moved, one per line. Each line
  19. should be formatted::
  20. <origin server>|<file id>
  21. This can be extracted from postgres with::
  22. psql --tuples-only -A -c "select media_origin, filesystem_id from
  23. matrix.remote_media_cache where ..."
  24. To use, pipe the above into::
  25. PYTHON_PATH=. ./scripts/move_remote_media_to_new_store.py <source repo> <dest repo>
  26. """
  27. import argparse
  28. import logging
  29. import os
  30. import shutil
  31. import sys
  32. from synapse.rest.media.v1.filepath import MediaFilePaths
  33. logger = logging.getLogger()
  34. def main(src_repo, dest_repo):
  35. src_paths = MediaFilePaths(src_repo)
  36. dest_paths = MediaFilePaths(dest_repo)
  37. for line in sys.stdin:
  38. line = line.strip()
  39. parts = line.split("|")
  40. if len(parts) != 2:
  41. print("Unable to parse input line %s" % line, file=sys.stderr)
  42. sys.exit(1)
  43. move_media(parts[0], parts[1], src_paths, dest_paths)
  44. def move_media(origin_server, file_id, src_paths, dest_paths):
  45. """Move the given file, and any thumbnails, to the dest repo
  46. Args:
  47. origin_server (str):
  48. file_id (str):
  49. src_paths (MediaFilePaths):
  50. dest_paths (MediaFilePaths):
  51. """
  52. logger.info("%s/%s", origin_server, file_id)
  53. # check that the original exists
  54. original_file = src_paths.remote_media_filepath(origin_server, file_id)
  55. if not os.path.exists(original_file):
  56. logger.warning(
  57. "Original for %s/%s (%s) does not exist",
  58. origin_server,
  59. file_id,
  60. original_file,
  61. )
  62. else:
  63. mkdir_and_move(
  64. original_file, dest_paths.remote_media_filepath(origin_server, file_id)
  65. )
  66. # now look for thumbnails
  67. original_thumb_dir = src_paths.remote_media_thumbnail_dir(origin_server, file_id)
  68. if not os.path.exists(original_thumb_dir):
  69. return
  70. mkdir_and_move(
  71. original_thumb_dir,
  72. dest_paths.remote_media_thumbnail_dir(origin_server, file_id),
  73. )
  74. def mkdir_and_move(original_file, dest_file):
  75. dirname = os.path.dirname(dest_file)
  76. if not os.path.exists(dirname):
  77. logger.debug("mkdir %s", dirname)
  78. os.makedirs(dirname)
  79. logger.debug("mv %s %s", original_file, dest_file)
  80. shutil.move(original_file, dest_file)
  81. if __name__ == "__main__":
  82. parser = argparse.ArgumentParser(
  83. description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
  84. )
  85. parser.add_argument("-v", action="store_true", help="enable debug logging")
  86. parser.add_argument("src_repo", help="Path to source content repo")
  87. parser.add_argument("dest_repo", help="Path to source content repo")
  88. args = parser.parse_args()
  89. logging_config = {
  90. "level": logging.DEBUG if args.v else logging.INFO,
  91. "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s",
  92. }
  93. logging.basicConfig(**logging_config)
  94. main(args.src_repo, args.dest_repo)