test_lock.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. # Copyright 2021 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from twisted.internet import defer, reactor
  15. from twisted.internet.base import ReactorBase
  16. from twisted.internet.defer import Deferred
  17. from twisted.test.proto_helpers import MemoryReactor
  18. from synapse.server import HomeServer
  19. from synapse.storage.databases.main.lock import _LOCK_TIMEOUT_MS, _RENEWAL_INTERVAL_MS
  20. from synapse.util import Clock
  21. from tests import unittest
  22. class LockTestCase(unittest.HomeserverTestCase):
  23. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  24. self.store = hs.get_datastores().main
  25. def test_acquire_contention(self) -> None:
  26. # Track the number of tasks holding the lock.
  27. # Should be at most 1.
  28. in_lock = 0
  29. max_in_lock = 0
  30. release_lock: "Deferred[None]" = Deferred()
  31. async def task() -> None:
  32. nonlocal in_lock
  33. nonlocal max_in_lock
  34. lock = await self.store.try_acquire_lock("name", "key")
  35. if not lock:
  36. return
  37. async with lock:
  38. in_lock += 1
  39. max_in_lock = max(max_in_lock, in_lock)
  40. # Block to allow other tasks to attempt to take the lock.
  41. await release_lock
  42. in_lock -= 1
  43. # Start 3 tasks.
  44. task1 = defer.ensureDeferred(task())
  45. task2 = defer.ensureDeferred(task())
  46. task3 = defer.ensureDeferred(task())
  47. # Give the reactor a kick so that the database transaction returns.
  48. self.pump()
  49. release_lock.callback(None)
  50. # Run the tasks to completion.
  51. # To work around `Linearizer`s using a different reactor to sleep when
  52. # contended (#12841), we call `runUntilCurrent` on
  53. # `twisted.internet.reactor`, which is a different reactor to that used
  54. # by the homeserver.
  55. assert isinstance(reactor, ReactorBase)
  56. self.get_success(task1)
  57. reactor.runUntilCurrent()
  58. self.get_success(task2)
  59. reactor.runUntilCurrent()
  60. self.get_success(task3)
  61. # At most one task should have held the lock at a time.
  62. self.assertEqual(max_in_lock, 1)
  63. def test_simple_lock(self) -> None:
  64. """Test that we can take out a lock and that while we hold it nobody
  65. else can take it out.
  66. """
  67. # First to acquire this lock, so it should complete
  68. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  69. assert lock is not None
  70. # Enter the context manager
  71. self.get_success(lock.__aenter__())
  72. # Attempting to acquire the lock again fails.
  73. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  74. self.assertIsNone(lock2)
  75. # Calling `is_still_valid` reports true.
  76. self.assertTrue(self.get_success(lock.is_still_valid()))
  77. # Drop the lock
  78. self.get_success(lock.__aexit__(None, None, None))
  79. # We can now acquire the lock again.
  80. lock3 = self.get_success(self.store.try_acquire_lock("name", "key"))
  81. assert lock3 is not None
  82. self.get_success(lock3.__aenter__())
  83. self.get_success(lock3.__aexit__(None, None, None))
  84. def test_maintain_lock(self) -> None:
  85. """Test that we don't time out locks while they're still active"""
  86. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  87. assert lock is not None
  88. self.get_success(lock.__aenter__())
  89. # Wait for ages with the lock, we should not be able to get the lock.
  90. self.reactor.advance(5 * _LOCK_TIMEOUT_MS / 1000)
  91. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  92. self.assertIsNone(lock2)
  93. self.get_success(lock.__aexit__(None, None, None))
  94. def test_timeout_lock(self) -> None:
  95. """Test that we time out locks if they're not updated for ages"""
  96. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  97. assert lock is not None
  98. self.get_success(lock.__aenter__())
  99. # We simulate the process getting stuck by cancelling the looping call
  100. # that keeps the lock active.
  101. assert lock._looping_call
  102. lock._looping_call.stop()
  103. # Wait for the lock to timeout.
  104. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  105. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  106. self.assertIsNotNone(lock2)
  107. self.assertFalse(self.get_success(lock.is_still_valid()))
  108. def test_drop(self) -> None:
  109. """Test that dropping the context manager means we stop renewing the lock"""
  110. lock = self.get_success(self.store.try_acquire_lock("name", "key"))
  111. self.assertIsNotNone(lock)
  112. del lock
  113. # Wait for the lock to timeout.
  114. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  115. lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
  116. self.assertIsNotNone(lock2)
  117. def test_shutdown(self) -> None:
  118. """Test that shutting down Synapse releases the locks"""
  119. # Acquire two locks
  120. lock = self.get_success(self.store.try_acquire_lock("name", "key1"))
  121. self.assertIsNotNone(lock)
  122. lock2 = self.get_success(self.store.try_acquire_lock("name", "key2"))
  123. self.assertIsNotNone(lock2)
  124. # Now call the shutdown code
  125. self.get_success(self.store._on_shutdown())
  126. self.assertEqual(self.store._live_lock_tokens, {})
  127. class ReadWriteLockTestCase(unittest.HomeserverTestCase):
  128. """Test the read/write lock implementation."""
  129. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  130. self.store = hs.get_datastores().main
  131. def test_acquire_write_contention(self) -> None:
  132. """Test that we can only acquire one write lock at a time"""
  133. # Track the number of tasks holding the lock.
  134. # Should be at most 1.
  135. in_lock = 0
  136. max_in_lock = 0
  137. release_lock: "Deferred[None]" = Deferred()
  138. async def task() -> None:
  139. nonlocal in_lock
  140. nonlocal max_in_lock
  141. lock = await self.store.try_acquire_read_write_lock(
  142. "name", "key", write=True
  143. )
  144. if not lock:
  145. return
  146. async with lock:
  147. in_lock += 1
  148. max_in_lock = max(max_in_lock, in_lock)
  149. # Block to allow other tasks to attempt to take the lock.
  150. await release_lock
  151. in_lock -= 1
  152. # Start 3 tasks.
  153. task1 = defer.ensureDeferred(task())
  154. task2 = defer.ensureDeferred(task())
  155. task3 = defer.ensureDeferred(task())
  156. # Give the reactor a kick so that the database transaction returns.
  157. self.pump()
  158. release_lock.callback(None)
  159. # Run the tasks to completion.
  160. # To work around `Linearizer`s using a different reactor to sleep when
  161. # contended (#12841), we call `runUntilCurrent` on
  162. # `twisted.internet.reactor`, which is a different reactor to that used
  163. # by the homeserver.
  164. assert isinstance(reactor, ReactorBase)
  165. self.get_success(task1)
  166. reactor.runUntilCurrent()
  167. self.get_success(task2)
  168. reactor.runUntilCurrent()
  169. self.get_success(task3)
  170. # At most one task should have held the lock at a time.
  171. self.assertEqual(max_in_lock, 1)
  172. def test_acquire_multiple_reads(self) -> None:
  173. """Test that we can acquire multiple read locks at a time"""
  174. # Track the number of tasks holding the lock.
  175. in_lock = 0
  176. max_in_lock = 0
  177. release_lock: "Deferred[None]" = Deferred()
  178. async def task() -> None:
  179. nonlocal in_lock
  180. nonlocal max_in_lock
  181. lock = await self.store.try_acquire_read_write_lock(
  182. "name", "key", write=False
  183. )
  184. if not lock:
  185. return
  186. async with lock:
  187. in_lock += 1
  188. max_in_lock = max(max_in_lock, in_lock)
  189. # Block to allow other tasks to attempt to take the lock.
  190. await release_lock
  191. in_lock -= 1
  192. # Start 3 tasks.
  193. task1 = defer.ensureDeferred(task())
  194. task2 = defer.ensureDeferred(task())
  195. task3 = defer.ensureDeferred(task())
  196. # Give the reactor a kick so that the database transaction returns.
  197. self.pump()
  198. release_lock.callback(None)
  199. # Run the tasks to completion.
  200. # To work around `Linearizer`s using a different reactor to sleep when
  201. # contended (#12841), we call `runUntilCurrent` on
  202. # `twisted.internet.reactor`, which is a different reactor to that used
  203. # by the homeserver.
  204. assert isinstance(reactor, ReactorBase)
  205. self.get_success(task1)
  206. reactor.runUntilCurrent()
  207. self.get_success(task2)
  208. reactor.runUntilCurrent()
  209. self.get_success(task3)
  210. # At most one task should have held the lock at a time.
  211. self.assertEqual(max_in_lock, 3)
  212. def test_write_lock_acquired(self) -> None:
  213. """Test that we can take out a write lock and that while we hold it
  214. nobody else can take it out.
  215. """
  216. # First to acquire this lock, so it should complete
  217. lock = self.get_success(
  218. self.store.try_acquire_read_write_lock("name", "key", write=True)
  219. )
  220. assert lock is not None
  221. # Enter the context manager
  222. self.get_success(lock.__aenter__())
  223. # Attempting to acquire the lock again fails, as both read and write.
  224. lock2 = self.get_success(
  225. self.store.try_acquire_read_write_lock("name", "key", write=True)
  226. )
  227. self.assertIsNone(lock2)
  228. lock3 = self.get_success(
  229. self.store.try_acquire_read_write_lock("name", "key", write=False)
  230. )
  231. self.assertIsNone(lock3)
  232. # Calling `is_still_valid` reports true.
  233. self.assertTrue(self.get_success(lock.is_still_valid()))
  234. # Drop the lock
  235. self.get_success(lock.__aexit__(None, None, None))
  236. # We can now acquire the lock again.
  237. lock4 = self.get_success(
  238. self.store.try_acquire_read_write_lock("name", "key", write=True)
  239. )
  240. assert lock4 is not None
  241. self.get_success(lock4.__aenter__())
  242. self.get_success(lock4.__aexit__(None, None, None))
  243. def test_read_lock_acquired(self) -> None:
  244. """Test that we can take out a read lock and that while we hold it
  245. only other reads can use it.
  246. """
  247. # First to acquire this lock, so it should complete
  248. lock = self.get_success(
  249. self.store.try_acquire_read_write_lock("name", "key", write=False)
  250. )
  251. assert lock is not None
  252. # Enter the context manager
  253. self.get_success(lock.__aenter__())
  254. # Attempting to acquire the write lock fails
  255. lock2 = self.get_success(
  256. self.store.try_acquire_read_write_lock("name", "key", write=True)
  257. )
  258. self.assertIsNone(lock2)
  259. # Attempting to acquire a read lock succeeds
  260. lock3 = self.get_success(
  261. self.store.try_acquire_read_write_lock("name", "key", write=False)
  262. )
  263. assert lock3 is not None
  264. self.get_success(lock3.__aenter__())
  265. # Calling `is_still_valid` reports true.
  266. self.assertTrue(self.get_success(lock.is_still_valid()))
  267. # Drop the first lock
  268. self.get_success(lock.__aexit__(None, None, None))
  269. # Attempting to acquire the write lock still fails, as lock3 is still
  270. # active.
  271. lock4 = self.get_success(
  272. self.store.try_acquire_read_write_lock("name", "key", write=True)
  273. )
  274. self.assertIsNone(lock4)
  275. # Drop the still open third lock
  276. self.get_success(lock3.__aexit__(None, None, None))
  277. # We can now acquire the lock again.
  278. lock5 = self.get_success(
  279. self.store.try_acquire_read_write_lock("name", "key", write=True)
  280. )
  281. assert lock5 is not None
  282. self.get_success(lock5.__aenter__())
  283. self.get_success(lock5.__aexit__(None, None, None))
  284. def test_maintain_lock(self) -> None:
  285. """Test that we don't time out locks while they're still active (lock is
  286. renewed in the background if the process is still alive)"""
  287. lock = self.get_success(
  288. self.store.try_acquire_read_write_lock("name", "key", write=True)
  289. )
  290. assert lock is not None
  291. self.get_success(lock.__aenter__())
  292. # Wait for ages with the lock, we should not be able to get the lock.
  293. for _ in range(10):
  294. self.reactor.advance((_RENEWAL_INTERVAL_MS / 1000))
  295. lock2 = self.get_success(
  296. self.store.try_acquire_read_write_lock("name", "key", write=True)
  297. )
  298. self.assertIsNone(lock2)
  299. self.get_success(lock.__aexit__(None, None, None))
  300. def test_timeout_lock(self) -> None:
  301. """Test that we time out locks if they're not updated for ages"""
  302. lock = self.get_success(
  303. self.store.try_acquire_read_write_lock("name", "key", write=True)
  304. )
  305. assert lock is not None
  306. self.get_success(lock.__aenter__())
  307. # We simulate the process getting stuck by cancelling the looping call
  308. # that keeps the lock active.
  309. assert lock._looping_call
  310. lock._looping_call.stop()
  311. # Wait for the lock to timeout.
  312. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  313. lock2 = self.get_success(
  314. self.store.try_acquire_read_write_lock("name", "key", write=True)
  315. )
  316. self.assertIsNotNone(lock2)
  317. self.assertFalse(self.get_success(lock.is_still_valid()))
  318. def test_drop(self) -> None:
  319. """Test that dropping the context manager means we stop renewing the lock"""
  320. lock = self.get_success(
  321. self.store.try_acquire_read_write_lock("name", "key", write=True)
  322. )
  323. self.assertIsNotNone(lock)
  324. del lock
  325. # Wait for the lock to timeout.
  326. self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
  327. lock2 = self.get_success(
  328. self.store.try_acquire_read_write_lock("name", "key", write=True)
  329. )
  330. self.assertIsNotNone(lock2)
  331. def test_shutdown(self) -> None:
  332. """Test that shutting down Synapse releases the locks"""
  333. # Acquire two locks
  334. lock = self.get_success(
  335. self.store.try_acquire_read_write_lock("name", "key", write=True)
  336. )
  337. self.assertIsNotNone(lock)
  338. lock2 = self.get_success(
  339. self.store.try_acquire_read_write_lock("name", "key2", write=True)
  340. )
  341. self.assertIsNotNone(lock2)
  342. # Now call the shutdown code
  343. self.get_success(self.store._on_shutdown())
  344. self.assertEqual(self.store._live_read_write_lock_tokens, {})
  345. def test_acquire_multiple_locks(self) -> None:
  346. """Tests that acquiring multiple locks at once works."""
  347. # Take out multiple locks and ensure that we can't get those locks out
  348. # again.
  349. lock = self.get_success(
  350. self.store.try_acquire_multi_read_write_lock(
  351. [("name1", "key1"), ("name2", "key2")], write=True
  352. )
  353. )
  354. self.assertIsNotNone(lock)
  355. assert lock is not None
  356. self.get_success(lock.__aenter__())
  357. lock2 = self.get_success(
  358. self.store.try_acquire_read_write_lock("name1", "key1", write=True)
  359. )
  360. self.assertIsNone(lock2)
  361. lock3 = self.get_success(
  362. self.store.try_acquire_read_write_lock("name2", "key2", write=False)
  363. )
  364. self.assertIsNone(lock3)
  365. # Overlapping locks attempts will fail, and won't lock any locks.
  366. lock4 = self.get_success(
  367. self.store.try_acquire_multi_read_write_lock(
  368. [("name1", "key1"), ("name3", "key3")], write=True
  369. )
  370. )
  371. self.assertIsNone(lock4)
  372. lock5 = self.get_success(
  373. self.store.try_acquire_read_write_lock("name3", "key3", write=True)
  374. )
  375. self.assertIsNotNone(lock5)
  376. assert lock5 is not None
  377. self.get_success(lock5.__aenter__())
  378. self.get_success(lock5.__aexit__(None, None, None))
  379. # Once we release the lock we can take out the locks again.
  380. self.get_success(lock.__aexit__(None, None, None))
  381. lock6 = self.get_success(
  382. self.store.try_acquire_read_write_lock("name1", "key1", write=True)
  383. )
  384. self.assertIsNotNone(lock6)
  385. assert lock6 is not None
  386. self.get_success(lock6.__aenter__())
  387. self.get_success(lock6.__aexit__(None, None, None))