030-patch.4.7.25.3.patch 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. --- a/lock/lock_deadlock.c
  2. +++ b/lock/lock_deadlock.c
  3. @@ -121,7 +121,7 @@ __lock_detect(env, atype, rejectp)
  4. DB_LOCKTAB *lt;
  5. db_timespec now;
  6. locker_info *idmap;
  7. - u_int32_t *bitmap, *copymap, **deadp, **free_me, *tmpmap;
  8. + u_int32_t *bitmap, *copymap, **deadp, **deadlist, *tmpmap;
  9. u_int32_t i, cid, keeper, killid, limit, nalloc, nlockers;
  10. u_int32_t lock_max, txn_max;
  11. int ret, status;
  12. @@ -133,7 +133,8 @@ __lock_detect(env, atype, rejectp)
  13. if (IS_REP_CLIENT(env))
  14. atype = DB_LOCK_MINWRITE;
  15. - free_me = NULL;
  16. + copymap = tmpmap = NULL;
  17. + deadlist = NULL;
  18. lt = env->lk_handle;
  19. if (rejectp != NULL)
  20. @@ -179,11 +180,11 @@ __lock_detect(env, atype, rejectp)
  21. memcpy(copymap, bitmap, nlockers * sizeof(u_int32_t) * nalloc);
  22. if ((ret = __os_calloc(env, sizeof(u_int32_t), nalloc, &tmpmap)) != 0)
  23. - goto err1;
  24. + goto err;
  25. /* Find a deadlock. */
  26. if ((ret =
  27. - __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadp)) != 0)
  28. + __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadlist)) != 0)
  29. return (ret);
  30. /*
  31. @@ -204,8 +205,7 @@ __lock_detect(env, atype, rejectp)
  32. txn_max = TXN_MAXIMUM;
  33. killid = BAD_KILLID;
  34. - free_me = deadp;
  35. - for (; *deadp != NULL; deadp++) {
  36. + for (deadp = deadlist; *deadp != NULL; deadp++) {
  37. if (rejectp != NULL)
  38. ++*rejectp;
  39. killid = (u_int32_t)(*deadp - bitmap) / nalloc;
  40. @@ -342,11 +342,12 @@ dokill: if (killid == BAD_KILLID) {
  41. __db_msg(env,
  42. "Aborting locker %lx", (u_long)idmap[killid].id);
  43. }
  44. - __os_free(env, tmpmap);
  45. -err1: __os_free(env, copymap);
  46. -
  47. -err: if (free_me != NULL)
  48. - __os_free(env, free_me);
  49. +err: if(copymap != NULL)
  50. + __os_free(env, copymap);
  51. + if (deadlist != NULL)
  52. + __os_free(env, deadlist);
  53. + if(tmpmap != NULL)
  54. + __os_free(env, tmpmap);
  55. __os_free(env, bitmap);
  56. __os_free(env, idmap);
  57. @@ -360,6 +361,17 @@ err: if (free_me != NULL)
  58. #define DD_INVALID_ID ((u_int32_t) -1)
  59. +/*
  60. + * __dd_build --
  61. + * Build the lock dependency bit maps.
  62. + * Notes on synchronization:
  63. + * LOCK_SYSTEM_LOCK is used to hold objects locked when we have
  64. + * a single partition.
  65. + * LOCK_LOCKERS is held while we are walking the lockers list and
  66. + * to single thread the use of lockerp->dd_id.
  67. + * LOCK_DD protects the DD list of objects.
  68. + */
  69. +
  70. static int
  71. __dd_build(env, atype, bmp, nlockers, allocp, idmap, rejectp)
  72. ENV *env;
  73. @@ -393,6 +405,7 @@ __dd_build(env, atype, bmp, nlockers, al
  74. * In particular we do not build the conflict array and our caller
  75. * needs to expect this.
  76. */
  77. + LOCK_SYSTEM_LOCK(lt, region);
  78. if (atype == DB_LOCK_EXPIRE) {
  79. skip: LOCK_DD(env, region);
  80. op = SH_TAILQ_FIRST(&region->dd_objs, __db_lockobj);
  81. @@ -430,17 +443,18 @@ skip: LOCK_DD(env, region);
  82. OBJECT_UNLOCK(lt, region, indx);
  83. }
  84. UNLOCK_DD(env, region);
  85. + LOCK_SYSTEM_UNLOCK(lt, region);
  86. goto done;
  87. }
  88. /*
  89. - * We'll check how many lockers there are, add a few more in for
  90. - * good measure and then allocate all the structures. Then we'll
  91. - * verify that we have enough room when we go back in and get the
  92. - * mutex the second time.
  93. + * Allocate after locking the region
  94. + * to make sure the structures are large enough.
  95. */
  96. -retry: count = region->stat.st_nlockers;
  97. + LOCK_LOCKERS(env, region);
  98. + count = region->stat.st_nlockers;
  99. if (count == 0) {
  100. + UNLOCK_LOCKERS(env, region);
  101. *nlockers = 0;
  102. return (0);
  103. }
  104. @@ -448,50 +462,37 @@ retry: count = region->stat.st_nlockers;
  105. if (FLD_ISSET(env->dbenv->verbose, DB_VERB_DEADLOCK))
  106. __db_msg(env, "%lu lockers", (u_long)count);
  107. - count += 20;
  108. nentries = (u_int32_t)DB_ALIGN(count, 32) / 32;
  109. - /*
  110. - * Allocate enough space for a count by count bitmap matrix.
  111. - *
  112. - * XXX
  113. - * We can probably save the malloc's between iterations just
  114. - * reallocing if necessary because count grew by too much.
  115. - */
  116. + /* Allocate enough space for a count by count bitmap matrix. */
  117. if ((ret = __os_calloc(env, (size_t)count,
  118. - sizeof(u_int32_t) * nentries, &bitmap)) != 0)
  119. + sizeof(u_int32_t) * nentries, &bitmap)) != 0) {
  120. + UNLOCK_LOCKERS(env, region);
  121. return (ret);
  122. + }
  123. if ((ret = __os_calloc(env,
  124. sizeof(u_int32_t), nentries, &tmpmap)) != 0) {
  125. + UNLOCK_LOCKERS(env, region);
  126. __os_free(env, bitmap);
  127. return (ret);
  128. }
  129. if ((ret = __os_calloc(env,
  130. (size_t)count, sizeof(locker_info), &id_array)) != 0) {
  131. + UNLOCK_LOCKERS(env, region);
  132. __os_free(env, bitmap);
  133. __os_free(env, tmpmap);
  134. return (ret);
  135. }
  136. /*
  137. - * Now go back in and actually fill in the matrix.
  138. - */
  139. - if (region->stat.st_nlockers > count) {
  140. - __os_free(env, bitmap);
  141. - __os_free(env, tmpmap);
  142. - __os_free(env, id_array);
  143. - goto retry;
  144. - }
  145. -
  146. - /*
  147. * First we go through and assign each locker a deadlock detector id.
  148. */
  149. id = 0;
  150. - LOCK_LOCKERS(env, region);
  151. SH_TAILQ_FOREACH(lip, &region->lockers, ulinks, __db_locker) {
  152. if (lip->master_locker == INVALID_ROFF) {
  153. + DB_ASSERT(env, id < count);
  154. lip->dd_id = id++;
  155. id_array[lip->dd_id].id = lip->id;
  156. switch (atype) {
  157. @@ -510,7 +511,6 @@ retry: count = region->stat.st_nlockers;
  158. lip->dd_id = DD_INVALID_ID;
  159. }
  160. - UNLOCK_LOCKERS(env, region);
  161. /*
  162. * We only need consider objects that have waiters, so we use
  163. @@ -669,7 +669,6 @@ again: memset(bitmap, 0, count * sizeof
  164. * status after building the bit maps so that we will not detect
  165. * a blocked transaction without noting that it is already aborting.
  166. */
  167. - LOCK_LOCKERS(env, region);
  168. for (id = 0; id < count; id++) {
  169. if (!id_array[id].valid)
  170. continue;
  171. @@ -738,6 +737,7 @@ get_lock: id_array[id].last_lock = R_OF
  172. id_array[id].in_abort = 1;
  173. }
  174. UNLOCK_LOCKERS(env, region);
  175. + LOCK_SYSTEM_UNLOCK(lt, region);
  176. /*
  177. * Now we can release everything except the bitmap matrix that we
  178. @@ -839,6 +839,7 @@ __dd_abort(env, info, statusp)
  179. ret = 0;
  180. /* We must lock so this locker cannot go away while we abort it. */
  181. + LOCK_SYSTEM_LOCK(lt, region);
  182. LOCK_LOCKERS(env, region);
  183. /*
  184. @@ -895,6 +896,7 @@ __dd_abort(env, info, statusp)
  185. done: OBJECT_UNLOCK(lt, region, info->last_ndx);
  186. err:
  187. out: UNLOCK_LOCKERS(env, region);
  188. + LOCK_SYSTEM_UNLOCK(lt, region);
  189. return (ret);
  190. }