test_lock.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. # Copyright 2021 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from twisted.internet import defer, reactor
  15. from twisted.internet.base import ReactorBase
  16. from twisted.internet.defer import Deferred
  17. from twisted.test.proto_helpers import MemoryReactor
  18. from synapse.server import HomeServer
  19. from synapse.storage.databases.main.lock import _LOCK_TIMEOUT_MS
  20. from synapse.util import Clock
  21. from tests import unittest
  22. class LockTestCase(unittest.HomeserverTestCase):
  23. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  24. self.store = hs.get_datastores().main
  25. def test_acquire_contention(self) -> None:
  26. # Track the number of tasks holding the lock.
  27. # Should be at most 1.
  28. in_lock = 0
  29. max_in_lock = 0
  30. release_lock: "Deferred[None]" = Deferred()
  31. async def task() -> None:
  32. nonlocal in_lock
  33. nonlocal max_in_lock
  34. lock = await self.store.try_acquire_lock("name", "key")
  35. if not lock:
  36. return
  37. async with lock:
  38. in_lock += 1
  39. max_in_lock = max(max_in_lock, in_lock)
  40. # Block to allow other tasks to attempt to take the lock.
  41. await release_lock
  42. in_lock -= 1
  43. # Start 3 tasks.
  44. task1 = defer.ensureDeferred(task())
  45. task2 = defer.ensureDeferred(task())
  46. task3 = defer.ensureDeferred(task())
  47. # Give the reactor a kick so that the database transaction returns.
  48. self.pump()
  49. release_lock.callback(None)
  50. # Run the tasks to completion.
  51. # To work around `Linearizer`s using a different reactor to sleep when
  52. # contended (#12841), we call `runUntilCurrent` on
  53. # `twisted.internet.reactor`, which is a different reactor to that used
  54. # by the homeserver.
  55. assert isinstance(reactor, ReactorBase)
  56. self.get_success(task1)
  57. reactor.runUntilCurrent()
  58. self.get_success(task2)
  59. reactor.runUntilCurrent()
  60. self.get_success(task3)
  61. # At most one task should have held the lock at a time.
  62. self.assertEqual(max_in_lock, 1)
  63. def test_simple_lock(self) -> None:
  64. """Test that we can take out a lock and that while we hold it nobody
  65. else can take it out.
  66. """
  67. # First to acquire this lock, so it should complete
  68. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  69. assert lock is not None
  70. # Enter the context manager
  71. self.get_success(lock.__aenter__())
  72. # Attempting to acquire the lock again fails.
  73. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  74. self.assertIsNone(lock2)
  75. # Calling `is_still_valid` reports true.
  76. self.assertTrue(self.get_success(lock.is_still_valid()))
  77. # Drop the lock
  78. self.get_success(lock.__aexit__(None, None, None))
  79. # We can now acquire the lock again.
  80. lock3 = self.get_success(self.store.try_acquire_lock("name", "key"))
  81. assert lock3 is not None
  82. self.get_success(lock3.__aenter__())
  83. self.get_success(lock3.__aexit__(None, None, None))
  84. def test_maintain_lock(self) -> None:
  85. """Test that we don't time out locks while they're still active"""
  86. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  87. assert lock is not None
  88. self.get_success(lock.__aenter__())
  89. # Wait for ages with the lock, we should not be able to get the lock.
  90. self.reactor.advance(5 * _LOCK_TIMEOUT_MS / 1000)
  91. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  92. self.assertIsNone(lock2)
  93. self.get_success(lock.__aexit__(None, None, None))
  94. def test_timeout_lock(self) -> None:
  95. """Test that we time out locks if they're not updated for ages"""
  96. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  97. assert lock is not None
  98. self.get_success(lock.__aenter__())
  99. # We simulate the process getting stuck by cancelling the looping call
  100. # that keeps the lock active.
  101. lock._looping_call.stop()
  102. # Wait for the lock to timeout.
  103. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  104. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  105. self.assertIsNotNone(lock2)
  106. self.assertFalse(self.get_success(lock.is_still_valid()))
  107. def test_drop(self) -> None:
  108. """Test that dropping the context manager means we stop renewing the lock"""
  109. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  110. self.assertIsNotNone(lock)
  111. del lock
  112. # Wait for the lock to timeout.
  113. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  114. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  115. self.assertIsNotNone(lock2)
  116. def test_shutdown(self) -> None:
  117. """Test that shutting down Synapse releases the locks"""
  118. # Acquire two locks
  119. lock = self.get_success(self.store.try_acquire_lock("name", "key1"))
  120. self.assertIsNotNone(lock)
  121. lock2 = self.get_success(self.store.try_acquire_lock("name", "key2"))
  122. self.assertIsNotNone(lock2)
  123. # Now call the shutdown code
  124. self.get_success(self.store._on_shutdown())
  125. self.assertEqual(self.store._live_lock_tokens, {})
  126. class ReadWriteLockTestCase(unittest.HomeserverTestCase):
  127. """Test the read/write lock implementation."""
  128. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  129. self.store = hs.get_datastores().main
  130. def test_acquire_write_contention(self) -> None:
  131. """Test that we can only acquire one write lock at a time"""
  132. # Track the number of tasks holding the lock.
  133. # Should be at most 1.
  134. in_lock = 0
  135. max_in_lock = 0
  136. release_lock: "Deferred[None]" = Deferred()
  137. async def task() -> None:
  138. nonlocal in_lock
  139. nonlocal max_in_lock
  140. lock = await self.store.try_acquire_read_write_lock(
  141. "name", "key", write=True
  142. )
  143. if not lock:
  144. return
  145. async with lock:
  146. in_lock += 1
  147. max_in_lock = max(max_in_lock, in_lock)
  148. # Block to allow other tasks to attempt to take the lock.
  149. await release_lock
  150. in_lock -= 1
  151. # Start 3 tasks.
  152. task1 = defer.ensureDeferred(task())
  153. task2 = defer.ensureDeferred(task())
  154. task3 = defer.ensureDeferred(task())
  155. # Give the reactor a kick so that the database transaction returns.
  156. self.pump()
  157. release_lock.callback(None)
  158. # Run the tasks to completion.
  159. # To work around `Linearizer`s using a different reactor to sleep when
  160. # contended (#12841), we call `runUntilCurrent` on
  161. # `twisted.internet.reactor`, which is a different reactor to that used
  162. # by the homeserver.
  163. assert isinstance(reactor, ReactorBase)
  164. self.get_success(task1)
  165. reactor.runUntilCurrent()
  166. self.get_success(task2)
  167. reactor.runUntilCurrent()
  168. self.get_success(task3)
  169. # At most one task should have held the lock at a time.
  170. self.assertEqual(max_in_lock, 1)
  171. def test_acquire_multiple_reads(self) -> None:
  172. """Test that we can acquire multiple read locks at a time"""
  173. # Track the number of tasks holding the lock.
  174. in_lock = 0
  175. max_in_lock = 0
  176. release_lock: "Deferred[None]" = Deferred()
  177. async def task() -> None:
  178. nonlocal in_lock
  179. nonlocal max_in_lock
  180. lock = await self.store.try_acquire_read_write_lock(
  181. "name", "key", write=False
  182. )
  183. if not lock:
  184. return
  185. async with lock:
  186. in_lock += 1
  187. max_in_lock = max(max_in_lock, in_lock)
  188. # Block to allow other tasks to attempt to take the lock.
  189. await release_lock
  190. in_lock -= 1
  191. # Start 3 tasks.
  192. task1 = defer.ensureDeferred(task())
  193. task2 = defer.ensureDeferred(task())
  194. task3 = defer.ensureDeferred(task())
  195. # Give the reactor a kick so that the database transaction returns.
  196. self.pump()
  197. release_lock.callback(None)
  198. # Run the tasks to completion.
  199. # To work around `Linearizer`s using a different reactor to sleep when
  200. # contended (#12841), we call `runUntilCurrent` on
  201. # `twisted.internet.reactor`, which is a different reactor to that used
  202. # by the homeserver.
  203. assert isinstance(reactor, ReactorBase)
  204. self.get_success(task1)
  205. reactor.runUntilCurrent()
  206. self.get_success(task2)
  207. reactor.runUntilCurrent()
  208. self.get_success(task3)
  209. # At most one task should have held the lock at a time.
  210. self.assertEqual(max_in_lock, 3)
  211. def test_write_lock_acquired(self) -> None:
  212. """Test that we can take out a write lock and that while we hold it
  213. nobody else can take it out.
  214. """
  215. # First to acquire this lock, so it should complete
  216. lock = self.get_success(
  217. self.store.try_acquire_read_write_lock("name", "key", write=True)
  218. )
  219. assert lock is not None
  220. # Enter the context manager
  221. self.get_success(lock.__aenter__())
  222. # Attempting to acquire the lock again fails, as both read and write.
  223. lock2 = self.get_success(
  224. self.store.try_acquire_read_write_lock("name", "key", write=True)
  225. )
  226. self.assertIsNone(lock2)
  227. lock3 = self.get_success(
  228. self.store.try_acquire_read_write_lock("name", "key", write=False)
  229. )
  230. self.assertIsNone(lock3)
  231. # Calling `is_still_valid` reports true.
  232. self.assertTrue(self.get_success(lock.is_still_valid()))
  233. # Drop the lock
  234. self.get_success(lock.__aexit__(None, None, None))
  235. # We can now acquire the lock again.
  236. lock4 = self.get_success(
  237. self.store.try_acquire_read_write_lock("name", "key", write=True)
  238. )
  239. assert lock4 is not None
  240. self.get_success(lock4.__aenter__())
  241. self.get_success(lock4.__aexit__(None, None, None))
  242. def test_read_lock_acquired(self) -> None:
  243. """Test that we can take out a read lock and that while we hold it
  244. only other reads can use it.
  245. """
  246. # First to acquire this lock, so it should complete
  247. lock = self.get_success(
  248. self.store.try_acquire_read_write_lock("name", "key", write=False)
  249. )
  250. assert lock is not None
  251. # Enter the context manager
  252. self.get_success(lock.__aenter__())
  253. # Attempting to acquire the write lock fails
  254. lock2 = self.get_success(
  255. self.store.try_acquire_read_write_lock("name", "key", write=True)
  256. )
  257. self.assertIsNone(lock2)
  258. # Attempting to acquire a read lock succeeds
  259. lock3 = self.get_success(
  260. self.store.try_acquire_read_write_lock("name", "key", write=False)
  261. )
  262. assert lock3 is not None
  263. self.get_success(lock3.__aenter__())
  264. # Calling `is_still_valid` reports true.
  265. self.assertTrue(self.get_success(lock.is_still_valid()))
  266. # Drop the first lock
  267. self.get_success(lock.__aexit__(None, None, None))
  268. # Attempting to acquire the write lock still fails, as lock3 is still
  269. # active.
  270. lock4 = self.get_success(
  271. self.store.try_acquire_read_write_lock("name", "key", write=True)
  272. )
  273. self.assertIsNone(lock4)
  274. # Drop the still open third lock
  275. self.get_success(lock3.__aexit__(None, None, None))
  276. # We can now acquire the lock again.
  277. lock5 = self.get_success(
  278. self.store.try_acquire_read_write_lock("name", "key", write=True)
  279. )
  280. assert lock5 is not None
  281. self.get_success(lock5.__aenter__())
  282. self.get_success(lock5.__aexit__(None, None, None))
  283. def test_maintain_lock(self) -> None:
  284. """Test that we don't time out locks while they're still active (lock is
  285. renewed in the background if the process is still alive)"""
  286. lock = self.get_success(
  287. self.store.try_acquire_read_write_lock("name", "key", write=True)
  288. )
  289. assert lock is not None
  290. self.get_success(lock.__aenter__())
  291. # Wait for ages with the lock, we should not be able to get the lock.
  292. self.reactor.advance(5 * _LOCK_TIMEOUT_MS / 1000)
  293. self.pump()
  294. lock2 = self.get_success(
  295. self.store.try_acquire_read_write_lock("name", "key", write=True)
  296. )
  297. self.assertIsNone(lock2)
  298. self.get_success(lock.__aexit__(None, None, None))
  299. def test_timeout_lock(self) -> None:
  300. """Test that we time out locks if they're not updated for ages"""
  301. lock = self.get_success(
  302. self.store.try_acquire_read_write_lock("name", "key", write=True)
  303. )
  304. assert lock is not None
  305. self.get_success(lock.__aenter__())
  306. # We simulate the process getting stuck by cancelling the looping call
  307. # that keeps the lock active.
  308. lock._looping_call.stop()
  309. # Wait for the lock to timeout.
  310. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  311. lock2 = self.get_success(
  312. self.store.try_acquire_read_write_lock("name", "key", write=True)
  313. )
  314. self.assertIsNotNone(lock2)
  315. self.assertFalse(self.get_success(lock.is_still_valid()))
  316. def test_drop(self) -> None:
  317. """Test that dropping the context manager means we stop renewing the lock"""
  318. lock = self.get_success(
  319. self.store.try_acquire_read_write_lock("name", "key", write=True)
  320. )
  321. self.assertIsNotNone(lock)
  322. del lock
  323. # Wait for the lock to timeout.
  324. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  325. lock2 = self.get_success(
  326. self.store.try_acquire_read_write_lock("name", "key", write=True)
  327. )
  328. self.assertIsNotNone(lock2)
  329. def test_shutdown(self) -> None:
  330. """Test that shutting down Synapse releases the locks"""
  331. # Acquire two locks
  332. lock = self.get_success(
  333. self.store.try_acquire_read_write_lock("name", "key", write=True)
  334. )
  335. self.assertIsNotNone(lock)
  336. lock2 = self.get_success(
  337. self.store.try_acquire_read_write_lock("name", "key2", write=True)
  338. )
  339. self.assertIsNotNone(lock2)
  340. # Now call the shutdown code
  341. self.get_success(self.store._on_shutdown())
  342. self.assertEqual(self.store._live_read_write_lock_tokens, {})
  343. def test_acquire_multiple_locks(self) -> None:
  344. """Tests that acquiring multiple locks at once works."""
  345. # Take out multiple locks and ensure that we can't get those locks out
  346. # again.
  347. lock = self.get_success(
  348. self.store.try_acquire_multi_read_write_lock(
  349. [("name1", "key1"), ("name2", "key2")], write=True
  350. )
  351. )
  352. self.assertIsNotNone(lock)
  353. assert lock is not None
  354. self.get_success(lock.__aenter__())
  355. lock2 = self.get_success(
  356. self.store.try_acquire_read_write_lock("name1", "key1", write=True)
  357. )
  358. self.assertIsNone(lock2)
  359. lock3 = self.get_success(
  360. self.store.try_acquire_read_write_lock("name2", "key2", write=False)
  361. )
  362. self.assertIsNone(lock3)
  363. # Overlapping locks attempts will fail, and won't lock any locks.
  364. lock4 = self.get_success(
  365. self.store.try_acquire_multi_read_write_lock(
  366. [("name1", "key1"), ("name3", "key3")], write=True
  367. )
  368. )
  369. self.assertIsNone(lock4)
  370. lock5 = self.get_success(
  371. self.store.try_acquire_read_write_lock("name3", "key3", write=True)
  372. )
  373. self.assertIsNotNone(lock5)
  374. assert lock5 is not None
  375. self.get_success(lock5.__aenter__())
  376. self.get_success(lock5.__aexit__(None, None, None))
  377. # Once we release the lock we can take out the locks again.
  378. self.get_success(lock.__aexit__(None, None, None))
  379. lock6 = self.get_success(
  380. self.store.try_acquire_read_write_lock("name1", "key1", write=True)
  381. )
  382. self.assertIsNotNone(lock6)
  383. assert lock6 is not None
  384. self.get_success(lock6.__aenter__())
  385. self.get_success(lock6.__aexit__(None, None, None))