ffs_snapshot.c 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647
  1. /*-
  2. * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
  3. *
  4. * Further information about snapshots can be obtained from:
  5. *
  6. * Marshall Kirk McKusick http://www.mckusick.com/softdep/
  7. * 1614 Oxford Street mckusick@mckusick.com
  8. * Berkeley, CA 94709-1608 +1-510-843-9542
  9. * USA
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in the
  19. * documentation and/or other materials provided with the distribution.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
  22. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  23. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24. * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
  25. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31. * SUCH DAMAGE.
  32. *
  33. * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
  34. */
  35. #include "u.h"
  36. #include "port/lib.h"
  37. #include "mem.h"
  38. #include "dat.h"
  39. #include "fns.h"
  40. #include "ufsdat.h"
  41. #include <ufs/libufsdat.h>
  42. #include "ufsfns.h"
  43. #include <ufs/freebsd_util.h>
  44. //#include <ufs/ufs/extattr.h>
  45. #include "ufs/quota.h"
  46. //#include <ufs/ufs/ufsmount.h>
  47. #include "ufs/inode.h"
  48. #include "ufs/dinode.h"
  49. #include "softdep.h"
  50. #include "ufs_extern.h"
  51. #include "ufs/fs.h"
  52. //#include <ufs/ffs/ffs_extern.h>
  53. //#define KERNCRED thread0.td_ucred
  54. //#define DEBUG 1
  55. #ifdef NO_FFS_SNAPSHOT
  56. /*int
  57. ffs_snapshot (MountPoint *mp, char *snapfile)
  58. {
  59. return (EINVAL);
  60. }*/
  61. int
  62. ffs_snapblkfree(Fs *fs, vnode *devvp, ufs2_daddr_t bno, long size, ino_t inum,
  63. Vtype vtype, struct workhead *wkhd)
  64. {
  65. return (EINVAL);
  66. }
  67. /*void
  68. ffs_snapremove (vnode *vp)
  69. {
  70. }*/
  71. void
  72. ffs_snapshot_mount (MountPoint *mp)
  73. {
  74. }
  75. /*void
  76. ffs_snapshot_unmount (MountPoint *mp)
  77. {
  78. }*/
  79. void
  80. ffs_snapgone (inode *ip)
  81. {
  82. }
  83. /*int
  84. ffs_copyonwrite (vnode *devvp, buf *bp)
  85. {
  86. return (EINVAL);
  87. }
  88. void
  89. ffs_sync_snap (MountPoint *mp, int waitfor)
  90. {
  91. }*/
  92. #else
  93. #if 0
  94. FEATURE(ffs_snapshot, "FFS snapshot support");
  95. LIST_HEAD(, snapdata) snapfree;
  96. static struct mtx snapfree_lock;
  97. MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
  98. static int cgaccount(int, struct vnode *, struct buf *, int);
  99. static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
  100. int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
  101. ufs_lbn_t, int), int, int);
  102. static int indiracct_ufs1(struct vnode *, struct vnode *, int,
  103. ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
  104. int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
  105. ufs_lbn_t, int), int);
  106. static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  107. struct fs *, ufs_lbn_t, int);
  108. static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  109. struct fs *, ufs_lbn_t, int);
  110. static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  111. struct fs *, ufs_lbn_t, int);
  112. static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
  113. int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
  114. ufs_lbn_t, int), int, int);
  115. static int indiracct_ufs2(struct vnode *, struct vnode *, int,
  116. ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
  117. int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
  118. ufs_lbn_t, int), int);
  119. static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
  120. struct fs *, ufs_lbn_t, int);
  121. static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
  122. struct fs *, ufs_lbn_t, int);
  123. static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
  124. struct fs *, ufs_lbn_t, int);
  125. static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
  126. static void try_free_snapdata(struct vnode *devvp);
  127. static struct snapdata *ffs_snapdata_acquire(struct vnode *devvp);
  128. static int ffs_bp_snapblk(struct vnode *, struct buf *);
  129. /*
  130. * To ensure the consistency of snapshots across crashes, we must
  131. * synchronously write out copied blocks before allowing the
  132. * originals to be modified. Because of the rather severe speed
  133. * penalty that this imposes, the code normally only ensures
  134. * persistence for the filesystem metadata contained within a
  135. * snapshot. Setting the following flag allows this crash
  136. * persistence to be enabled for file contents.
  137. */
  138. int dopersistence = 0;
  139. #ifdef DEBUG
  140. SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, "");
  141. static int snapdebug = 0;
  142. SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
  143. int collectsnapstats = 0;
  144. SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats,
  145. 0, "");
  146. #endif /* DEBUG */
  147. /*
  148. * Create a snapshot file and initialize it for the filesystem.
  149. */
  150. int
  151. ffs_snapshot (struct mount *mp, char *snapfile)
  152. {
  153. ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
  154. int error, cg, snaploc;
  155. int i, size, len, loc;
  156. ufs2_daddr_t blockno;
  157. uint64_t flag;
  158. struct timespec starttime = {0, 0}, endtime;
  159. char saved_nice = 0;
  160. long redo = 0, snaplistsize = 0;
  161. int32_t *lp;
  162. void *space;
  163. struct fs *copy_fs = nil, *fs;
  164. struct thread *td = curthread;
  165. struct inode *ip, *xp;
  166. struct buf *bp, *nbp, *ibp;
  167. struct nameidata nd;
  168. struct mount *wrtmp;
  169. struct vattr vat;
  170. struct vnode *vp, *xvp, *mvp, *devvp;
  171. struct uio auio;
  172. struct iovec aiov;
  173. struct snapdata *sn;
  174. struct ufsmount *ump;
  175. ump = VFSTOUFS(mp);
  176. fs = ump->um_fs;
  177. sn = nil;
  178. /*
  179. * At the moment, journaled soft updates cannot support
  180. * taking snapshots.
  181. */
  182. if (MOUNTEDSUJ(mp)) {
  183. vfs_mount_error(mp, "%s: Snapshots are not yet supported when "
  184. "running with journaled soft updates", fs->fs_fsmnt);
  185. return (EOPNOTSUPP);
  186. }
  187. MNT_ILOCK(mp);
  188. flag = mp->mnt_flag;
  189. MNT_IUNLOCK(mp);
  190. /*
  191. * Need to serialize access to snapshot code per filesystem.
  192. */
  193. /*
  194. * Assign a snapshot slot in the superblock.
  195. */
  196. UFS_LOCK(ump);
  197. for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
  198. if (fs->fs_snapinum[snaploc] == 0)
  199. break;
  200. UFS_UNLOCK(ump);
  201. if (snaploc == FSMAXSNAP)
  202. return (ENOSPC);
  203. /*
  204. * Create the snapshot file.
  205. */
  206. restart:
  207. NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE, UIO_SYSSPACE,
  208. snapfile, td);
  209. if ((error = namei(&nd)) != 0)
  210. return (error);
  211. if (nd.ni_vp != nil) {
  212. vput(nd.ni_vp);
  213. error = EEXIST;
  214. }
  215. if (nd.ni_dvp->v_mount != mp)
  216. error = EXDEV;
  217. if (error) {
  218. NDFREE(&nd, NDF_ONLY_PNBUF);
  219. if (nd.ni_dvp == nd.ni_vp)
  220. vrele(nd.ni_dvp);
  221. else
  222. vput(nd.ni_dvp);
  223. return (error);
  224. }
  225. VATTR_NULL(&vat);
  226. vat.va_type = VREG;
  227. vat.va_mode = S_IRUSR;
  228. vat.va_vaflags |= VA_EXCLUSIVE;
  229. if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
  230. wrtmp = nil;
  231. if (wrtmp != mp)
  232. panic("ffs_snapshot: mount mismatch");
  233. vfs_rel(wrtmp);
  234. if (vn_start_write(nil, &wrtmp, V_NOWAIT) != 0) {
  235. NDFREE(&nd, NDF_ONLY_PNBUF);
  236. vput(nd.ni_dvp);
  237. if ((error = vn_start_write(nil, &wrtmp,
  238. V_XSLEEP | PCATCH)) != 0)
  239. return (error);
  240. goto restart;
  241. }
  242. error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
  243. VOP_UNLOCK(nd.ni_dvp, 0);
  244. if (error) {
  245. NDFREE(&nd, NDF_ONLY_PNBUF);
  246. vn_finished_write(wrtmp);
  247. vrele(nd.ni_dvp);
  248. return (error);
  249. }
  250. vp = nd.ni_vp;
  251. vp->v_vflag |= VV_SYSTEM;
  252. ip = VTOI(vp);
  253. devvp = ITODEVVP(ip);
  254. /*
  255. * Allocate and copy the last block contents so as to be able
  256. * to set size to that of the filesystem.
  257. */
  258. numblks = howmany(fs->fs_size, fs->fs_frag);
  259. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
  260. fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
  261. if (error)
  262. goto out;
  263. ip->i_size = lblktosize(fs, (off_t)numblks);
  264. DIP_SET(ip, i_size, ip->i_size);
  265. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  266. error = readblock(vp, bp, numblks - 1);
  267. bawrite(bp);
  268. if (error != 0)
  269. goto out;
  270. /*
  271. * Preallocate critical data structures so that we can copy
  272. * them in without further allocation after we suspend all
  273. * operations on the filesystem. We would like to just release
  274. * the allocated buffers without writing them since they will
  275. * be filled in below once we are ready to go, but this upsets
  276. * the soft update code, so we go ahead and write the new buffers.
  277. *
  278. * Allocate all indirect blocks and mark all of them as not
  279. * needing to be copied.
  280. */
  281. for (blkno = UFS_NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
  282. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
  283. fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp);
  284. if (error)
  285. goto out;
  286. bawrite(ibp);
  287. }
  288. /*
  289. * Allocate copies for the superblock and its summary information.
  290. */
  291. error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED,
  292. 0, &nbp);
  293. if (error)
  294. goto out;
  295. bawrite(nbp);
  296. blkno = fragstoblks(fs, fs->fs_csaddr);
  297. len = howmany(fs->fs_cssize, fs->fs_bsize);
  298. for (loc = 0; loc < len; loc++) {
  299. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
  300. fs->fs_bsize, KERNCRED, 0, &nbp);
  301. if (error)
  302. goto out;
  303. bawrite(nbp);
  304. }
  305. /*
  306. * Allocate all cylinder group blocks.
  307. */
  308. for (cg = 0; cg < fs->fs_ncg; cg++) {
  309. error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
  310. fs->fs_bsize, KERNCRED, 0, &nbp);
  311. if (error)
  312. goto out;
  313. bawrite(nbp);
  314. if (cg % 10 == 0)
  315. ffs_syncvnode(vp, MNT_WAIT, 0);
  316. }
  317. /*
  318. * Copy all the cylinder group maps. Although the
  319. * filesystem is still active, we hope that only a few
  320. * cylinder groups will change between now and when we
  321. * suspend operations. Thus, we will be able to quickly
  322. * touch up the few cylinder groups that changed during
  323. * the suspension period.
  324. */
  325. len = howmany(fs->fs_ncg, NBBY);
  326. space = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  327. UFS_LOCK(ump);
  328. fs->fs_active = space;
  329. UFS_UNLOCK(ump);
  330. for (cg = 0; cg < fs->fs_ncg; cg++) {
  331. error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
  332. fs->fs_bsize, KERNCRED, 0, &nbp);
  333. if (error)
  334. goto out;
  335. error = cgaccount(cg, vp, nbp, 1);
  336. bawrite(nbp);
  337. if (cg % 10 == 0)
  338. ffs_syncvnode(vp, MNT_WAIT, 0);
  339. if (error)
  340. goto out;
  341. }
  342. /*
  343. * Change inode to snapshot type file.
  344. */
  345. ip->i_flags |= SF_SNAPSHOT;
  346. DIP_SET(ip, i_flags, ip->i_flags);
  347. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  348. /*
  349. * Ensure that the snapshot is completely on disk.
  350. * Since we have marked it as a snapshot it is safe to
  351. * unlock it as no process will be allowed to write to it.
  352. */
  353. if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
  354. goto out;
  355. VOP_UNLOCK(vp, 0);
  356. /*
  357. * All allocations are done, so we can now snapshot the system.
  358. *
  359. * Recind nice scheduling while running with the filesystem suspended.
  360. */
  361. if (td->td_proc->p_nice > 0) {
  362. struct proc *p;
  363. p = td->td_proc;
  364. PROC_LOCK(p);
  365. saved_nice = p->p_nice;
  366. sched_nice(p, 0);
  367. PROC_UNLOCK(p);
  368. }
  369. /*
  370. * Suspend operation on filesystem.
  371. */
  372. for (;;) {
  373. vn_finished_write(wrtmp);
  374. if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) {
  375. vn_start_write(nil, &wrtmp, V_WAIT);
  376. vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  377. goto out;
  378. }
  379. if (mp->mnt_kern_flag & MNTK_SUSPENDED)
  380. break;
  381. vn_start_write(nil, &wrtmp, V_WAIT);
  382. }
  383. vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  384. if (ip->i_effnlink == 0) {
  385. error = ENOENT; /* Snapshot file unlinked */
  386. goto out1;
  387. }
  388. if (collectsnapstats)
  389. nanotime(&starttime);
  390. /* The last block might have changed. Copy it again to be sure. */
  391. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
  392. fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
  393. if (error != 0)
  394. goto out1;
  395. error = readblock(vp, bp, numblks - 1);
  396. bp->b_flags |= B_VALIDSUSPWRT;
  397. bawrite(bp);
  398. if (error != 0)
  399. goto out1;
  400. /*
  401. * First, copy all the cylinder group maps that have changed.
  402. */
  403. for (cg = 0; cg < fs->fs_ncg; cg++) {
  404. if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0)
  405. continue;
  406. redo++;
  407. error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
  408. fs->fs_bsize, KERNCRED, 0, &nbp);
  409. if (error)
  410. goto out1;
  411. error = cgaccount(cg, vp, nbp, 2);
  412. bawrite(nbp);
  413. if (error)
  414. goto out1;
  415. }
  416. /*
  417. * Grab a copy of the superblock and its summary information.
  418. * We delay writing it until the suspension is released below.
  419. */
  420. copy_fs = malloc((uint64_t)fs->fs_bsize, M_UFSMNT, M_WAITOK);
  421. bcopy(fs, copy_fs, fs->fs_sbsize);
  422. if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
  423. copy_fs->fs_clean = 1;
  424. size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
  425. if (fs->fs_sbsize < size)
  426. bzero(&((char *)copy_fs)[fs->fs_sbsize],
  427. size - fs->fs_sbsize);
  428. size = blkroundup(fs, fs->fs_cssize);
  429. if (fs->fs_contigsumsize > 0)
  430. size += fs->fs_ncg * sizeof(int32_t);
  431. space = malloc((uint64_t)size, M_UFSMNT, M_WAITOK);
  432. copy_fs->fs_csp = space;
  433. bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
  434. space = (char *)space + fs->fs_cssize;
  435. loc = howmany(fs->fs_cssize, fs->fs_fsize);
  436. i = fs->fs_frag - loc % fs->fs_frag;
  437. len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
  438. if (len > 0) {
  439. if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
  440. len, KERNCRED, &bp)) != 0) {
  441. brelse(bp);
  442. free(copy_fs->fs_csp, M_UFSMNT);
  443. free(copy_fs, M_UFSMNT);
  444. copy_fs = nil;
  445. goto out1;
  446. }
  447. bcopy(bp->b_data, space, (uint)len);
  448. space = (char *)space + len;
  449. bp->b_flags |= B_INVAL | B_NOCACHE;
  450. brelse(bp);
  451. }
  452. if (fs->fs_contigsumsize > 0) {
  453. copy_fs->fs_maxcluster = lp = space;
  454. for (i = 0; i < fs->fs_ncg; i++)
  455. *lp++ = fs->fs_contigsumsize;
  456. }
  457. /*
  458. * We must check for active files that have been unlinked
  459. * (e.g., with a zero link count). We have to expunge all
  460. * trace of these files from the snapshot so that they are
  461. * not reclaimed prematurely by fsck or unnecessarily dumped.
  462. * We turn off the MNTK_SUSPENDED flag to avoid a panic from
  463. * spec_strategy about writing on a suspended filesystem.
  464. * Note that we skip unlinked snapshot files as they will
  465. * be handled separately below.
  466. *
  467. * We also calculate the needed size for the snapshot list.
  468. */
  469. snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
  470. FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
  471. MNT_ILOCK(mp);
  472. mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
  473. MNT_IUNLOCK(mp);
  474. loop:
  475. MNT_VNODE_FOREACH_ALL(xvp, mp, mvp) {
  476. if ((xvp->v_usecount == 0 &&
  477. (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) ||
  478. xvp->v_type == VNON ||
  479. IS_SNAPSHOT(VTOI(xvp))) {
  480. VI_UNLOCK(xvp);
  481. continue;
  482. }
  483. /*
  484. * We can skip parent directory vnode because it must have
  485. * this snapshot file in it.
  486. */
  487. if (xvp == nd.ni_dvp) {
  488. VI_UNLOCK(xvp);
  489. continue;
  490. }
  491. vholdl(xvp);
  492. if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) {
  493. MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
  494. vdrop(xvp);
  495. goto loop;
  496. }
  497. VI_LOCK(xvp);
  498. if (xvp->v_usecount == 0 &&
  499. (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) {
  500. VI_UNLOCK(xvp);
  501. VOP_UNLOCK(xvp, 0);
  502. vdrop(xvp);
  503. continue;
  504. }
  505. VI_UNLOCK(xvp);
  506. if (snapdebug)
  507. vn_printf(xvp, "ffs_snapshot: busy vnode ");
  508. if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 &&
  509. vat.va_nlink > 0) {
  510. VOP_UNLOCK(xvp, 0);
  511. vdrop(xvp);
  512. continue;
  513. }
  514. xp = VTOI(xvp);
  515. if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
  516. VOP_UNLOCK(xvp, 0);
  517. vdrop(xvp);
  518. continue;
  519. }
  520. /*
  521. * If there is a fragment, clear it here.
  522. */
  523. blkno = 0;
  524. loc = howmany(xp->i_size, fs->fs_bsize) - 1;
  525. if (loc < UFS_NDADDR) {
  526. len = fragroundup(fs, blkoff(fs, xp->i_size));
  527. if (len != 0 && len < fs->fs_bsize) {
  528. ffs_blkfree(ump, copy_fs, vp,
  529. DIP(xp, i_db[loc]), len, xp->i_number,
  530. xvp->v_type, nil);
  531. blkno = DIP(xp, i_db[loc]);
  532. DIP_SET(xp, i_db[loc], 0);
  533. }
  534. }
  535. snaplistsize += 1;
  536. if (I_IS_UFS1(xp))
  537. error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
  538. BLK_NOCOPY, 1);
  539. else
  540. error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
  541. BLK_NOCOPY, 1);
  542. if (blkno)
  543. DIP_SET(xp, i_db[loc], blkno);
  544. if (!error)
  545. error = ffs_freefile(ump, copy_fs, vp, xp->i_number,
  546. xp->i_mode, nil);
  547. VOP_UNLOCK(xvp, 0);
  548. vdrop(xvp);
  549. if (error) {
  550. free(copy_fs->fs_csp, M_UFSMNT);
  551. free(copy_fs, M_UFSMNT);
  552. copy_fs = nil;
  553. MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
  554. goto out1;
  555. }
  556. }
  557. /*
  558. * Erase the journal file from the snapshot.
  559. */
  560. if (fs->fs_flags & FS_SUJ) {
  561. error = softdep_journal_lookup(mp, &xvp);
  562. if (error) {
  563. free(copy_fs->fs_csp, M_UFSMNT);
  564. free(copy_fs, M_UFSMNT);
  565. copy_fs = nil;
  566. goto out1;
  567. }
  568. xp = VTOI(xvp);
  569. if (I_IS_UFS1(xp))
  570. error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
  571. BLK_NOCOPY, 0);
  572. else
  573. error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
  574. BLK_NOCOPY, 0);
  575. vput(xvp);
  576. }
  577. /*
  578. * Acquire a lock on the snapdata structure, creating it if necessary.
  579. */
  580. sn = ffs_snapdata_acquire(devvp);
  581. /*
  582. * Change vnode to use shared snapshot lock instead of the original
  583. * private lock.
  584. */
  585. vp->v_vnlock = &sn->sn_lock;
  586. lockmgr(&vp->v_lock, LK_RELEASE, nil);
  587. xp = TAILQ_FIRST(&sn->sn_head);
  588. /*
  589. * If this is the first snapshot on this filesystem, then we need
  590. * to allocate the space for the list of preallocated snapshot blocks.
  591. * This list will be refined below, but this preliminary one will
  592. * keep us out of deadlock until the full one is ready.
  593. */
  594. if (xp == nil) {
  595. snapblklist = malloc(snaplistsize * sizeof(daddr_t),
  596. M_UFSMNT, M_WAITOK);
  597. blkp = &snapblklist[1];
  598. *blkp++ = lblkno(fs, fs->fs_sblockloc);
  599. blkno = fragstoblks(fs, fs->fs_csaddr);
  600. for (cg = 0; cg < fs->fs_ncg; cg++) {
  601. if (fragstoblks(fs, cgtod(fs, cg) > blkno))
  602. break;
  603. *blkp++ = fragstoblks(fs, cgtod(fs, cg));
  604. }
  605. len = howmany(fs->fs_cssize, fs->fs_bsize);
  606. for (loc = 0; loc < len; loc++)
  607. *blkp++ = blkno + loc;
  608. for (; cg < fs->fs_ncg; cg++)
  609. *blkp++ = fragstoblks(fs, cgtod(fs, cg));
  610. snapblklist[0] = blkp - snapblklist;
  611. VI_LOCK(devvp);
  612. if (sn->sn_blklist != nil)
  613. panic("ffs_snapshot: non-empty list");
  614. sn->sn_blklist = snapblklist;
  615. sn->sn_listsize = blkp - snapblklist;
  616. VI_UNLOCK(devvp);
  617. }
  618. /*
  619. * Record snapshot inode. Since this is the newest snapshot,
  620. * it must be placed at the end of the list.
  621. */
  622. VI_LOCK(devvp);
  623. fs->fs_snapinum[snaploc] = ip->i_number;
  624. if (ip->i_nextsnap.tqe_prev != 0)
  625. panic("ffs_snapshot: %ju already on list",
  626. (uintmax_t)ip->i_number);
  627. TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
  628. devvp->v_vflag |= VV_COPYONWRITE;
  629. VI_UNLOCK(devvp);
  630. ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp");
  631. out1:
  632. KASSERT((sn != nil && copy_fs != nil && error == 0) ||
  633. (sn == nil && copy_fs == nil && error != 0),
  634. ("email phk@ and mckusick@"));
  635. /*
  636. * Resume operation on filesystem.
  637. */
  638. vfs_write_resume(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
  639. if (collectsnapstats && starttime.tv_sec > 0) {
  640. nanotime(&endtime);
  641. timespecsub(&endtime, &starttime);
  642. printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
  643. vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
  644. endtime.tv_nsec / 1000000, redo, fs->fs_ncg);
  645. }
  646. if (copy_fs == nil)
  647. goto out;
  648. /*
  649. * Copy allocation information from all the snapshots in
  650. * this snapshot and then expunge them from its view.
  651. */
  652. TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) {
  653. if (xp == ip)
  654. break;
  655. if (I_IS_UFS1(xp))
  656. error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
  657. BLK_SNAP, 0);
  658. else
  659. error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
  660. BLK_SNAP, 0);
  661. if (error == 0 && xp->i_effnlink == 0) {
  662. error = ffs_freefile(ump,
  663. copy_fs,
  664. vp,
  665. xp->i_number,
  666. xp->i_mode, nil);
  667. }
  668. if (error) {
  669. fs->fs_snapinum[snaploc] = 0;
  670. goto done;
  671. }
  672. }
  673. /*
  674. * Allocate space for the full list of preallocated snapshot blocks.
  675. */
  676. snapblklist = malloc(snaplistsize * sizeof(daddr_t),
  677. M_UFSMNT, M_WAITOK);
  678. ip->i_snapblklist = &snapblklist[1];
  679. /*
  680. * Expunge the blocks used by the snapshots from the set of
  681. * blocks marked as used in the snapshot bitmaps. Also, collect
  682. * the list of allocated blocks in i_snapblklist.
  683. */
  684. if (I_IS_UFS1(ip))
  685. error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1,
  686. BLK_SNAP, 0);
  687. else
  688. error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2,
  689. BLK_SNAP, 0);
  690. if (error) {
  691. fs->fs_snapinum[snaploc] = 0;
  692. free(snapblklist, M_UFSMNT);
  693. goto done;
  694. }
  695. if (snaplistsize < ip->i_snapblklist - snapblklist)
  696. panic("ffs_snapshot: list too small");
  697. snaplistsize = ip->i_snapblklist - snapblklist;
  698. snapblklist[0] = snaplistsize;
  699. ip->i_snapblklist = 0;
  700. /*
  701. * Write out the list of allocated blocks to the end of the snapshot.
  702. */
  703. auio.uio_iov = &aiov;
  704. auio.uio_iovcnt = 1;
  705. aiov.iov_base = (void *)snapblklist;
  706. aiov.iov_len = snaplistsize * sizeof(daddr_t);
  707. auio.uio_resid = aiov.iov_len;
  708. auio.uio_offset = ip->i_size;
  709. auio.uio_segflg = UIO_SYSSPACE;
  710. auio.uio_rw = UIO_WRITE;
  711. auio.uio_td = td;
  712. if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
  713. fs->fs_snapinum[snaploc] = 0;
  714. free(snapblklist, M_UFSMNT);
  715. goto done;
  716. }
  717. /*
  718. * Write the superblock and its summary information
  719. * to the snapshot.
  720. */
  721. blkno = fragstoblks(fs, fs->fs_csaddr);
  722. len = howmany(fs->fs_cssize, fs->fs_bsize);
  723. space = copy_fs->fs_csp;
  724. for (loc = 0; loc < len; loc++) {
  725. error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp);
  726. if (error) {
  727. brelse(nbp);
  728. fs->fs_snapinum[snaploc] = 0;
  729. free(snapblklist, M_UFSMNT);
  730. goto done;
  731. }
  732. bcopy(space, nbp->b_data, fs->fs_bsize);
  733. space = (char *)space + fs->fs_bsize;
  734. bawrite(nbp);
  735. }
  736. error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
  737. KERNCRED, &nbp);
  738. if (error) {
  739. brelse(nbp);
  740. } else {
  741. loc = blkoff(fs, fs->fs_sblockloc);
  742. bcopy((char *)copy_fs, &nbp->b_data[loc], (uint)fs->fs_sbsize);
  743. bawrite(nbp);
  744. }
  745. /*
  746. * As this is the newest list, it is the most inclusive, so
  747. * should replace the previous list.
  748. */
  749. VI_LOCK(devvp);
  750. space = sn->sn_blklist;
  751. sn->sn_blklist = snapblklist;
  752. sn->sn_listsize = snaplistsize;
  753. VI_UNLOCK(devvp);
  754. if (space != nil)
  755. free(space, M_UFSMNT);
  756. /*
  757. * Preallocate all the direct blocks in the snapshot inode so
  758. * that we never have to write the inode itself to commit an
  759. * update to the contents of the snapshot. Note that once
  760. * created, the size of the snapshot will never change, so
  761. * there will never be a need to write the inode except to
  762. * update the non-integrity-critical time fields and
  763. * allocated-block count.
  764. */
  765. for (blockno = 0; blockno < UFS_NDADDR; blockno++) {
  766. if (DIP(ip, i_db[blockno]) != 0)
  767. continue;
  768. error = UFS_BALLOC(vp, lblktosize(fs, blockno),
  769. fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
  770. if (error)
  771. break;
  772. error = readblock(vp, bp, blockno);
  773. bawrite(bp);
  774. if (error != 0)
  775. break;
  776. }
  777. done:
  778. free(copy_fs->fs_csp, M_UFSMNT);
  779. free(copy_fs, M_UFSMNT);
  780. copy_fs = nil;
  781. out:
  782. NDFREE(&nd, NDF_ONLY_PNBUF);
  783. if (saved_nice > 0) {
  784. struct proc *p;
  785. p = td->td_proc;
  786. PROC_LOCK(p);
  787. sched_nice(td->td_proc, saved_nice);
  788. PROC_UNLOCK(td->td_proc);
  789. }
  790. UFS_LOCK(ump);
  791. if (fs->fs_active != 0) {
  792. free(fs->fs_active, M_DEVBUF);
  793. fs->fs_active = 0;
  794. }
  795. UFS_UNLOCK(ump);
  796. MNT_ILOCK(mp);
  797. mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
  798. MNT_IUNLOCK(mp);
  799. if (error)
  800. (void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
  801. (void) ffs_syncvnode(vp, MNT_WAIT, 0);
  802. if (error)
  803. vput(vp);
  804. else
  805. VOP_UNLOCK(vp, 0);
  806. vrele(nd.ni_dvp);
  807. vn_finished_write(wrtmp);
  808. process_deferred_inactive(mp);
  809. return (error);
  810. }
  811. /*
  812. * Copy a cylinder group map. All the unallocated blocks are marked
  813. * BLK_NOCOPY so that the snapshot knows that it need not copy them
  814. * if they are later written. If passno is one, then this is a first
  815. * pass, so only setting needs to be done. If passno is 2, then this
  816. * is a revision to a previous pass which must be undone as the
  817. * replacement pass is done.
  818. */
  819. static int
  820. cgaccount (int cg, struct vnode *vp, struct buf *nbp, int passno)
  821. {
  822. struct buf *bp, *ibp;
  823. struct inode *ip;
  824. struct cg *cgp;
  825. struct fs *fs;
  826. ufs2_daddr_t base, numblks;
  827. int error, len, loc, indiroff;
  828. ip = VTOI(vp);
  829. fs = ITOFS(ip);
  830. error = bread(ITODEVVP(ip), fsbtodb(fs, cgtod(fs, cg)),
  831. (int)fs->fs_cgsize, KERNCRED, &bp);
  832. if (error) {
  833. brelse(bp);
  834. return (error);
  835. }
  836. cgp = (struct cg *)bp->b_data;
  837. if (!cg_chkmagic(cgp)) {
  838. brelse(bp);
  839. return (EIO);
  840. }
  841. UFS_LOCK(ITOUMP(ip));
  842. ACTIVESET(fs, cg);
  843. /*
  844. * Recomputation of summary information might not have been performed
  845. * at mount time. Sync up summary information for current cylinder
  846. * group while data is in memory to ensure that result of background
  847. * fsck is slightly more consistent.
  848. */
  849. fs->fs_cs(fs, cg) = cgp->cg_cs;
  850. UFS_UNLOCK(ITOUMP(ip));
  851. bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
  852. if (fs->fs_cgsize < fs->fs_bsize)
  853. bzero(&nbp->b_data[fs->fs_cgsize],
  854. fs->fs_bsize - fs->fs_cgsize);
  855. cgp = (struct cg *)nbp->b_data;
  856. bqrelse(bp);
  857. if (passno == 2)
  858. nbp->b_flags |= B_VALIDSUSPWRT;
  859. numblks = howmany(fs->fs_size, fs->fs_frag);
  860. len = howmany(fs->fs_fpg, fs->fs_frag);
  861. base = cgbase(fs, cg) / fs->fs_frag;
  862. if (base + len >= numblks)
  863. len = numblks - base - 1;
  864. loc = 0;
  865. if (base < UFS_NDADDR) {
  866. for ( ; loc < UFS_NDADDR; loc++) {
  867. if (ffs_isblock(fs, cg_blksfree(cgp), loc))
  868. DIP_SET(ip, i_db[loc], BLK_NOCOPY);
  869. else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY)
  870. DIP_SET(ip, i_db[loc], 0);
  871. else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY)
  872. panic("ffs_snapshot: lost direct block");
  873. }
  874. }
  875. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
  876. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  877. if (error) {
  878. return (error);
  879. }
  880. indiroff = (base + loc - UFS_NDADDR) % NINDIR(fs);
  881. for ( ; loc < len; loc++, indiroff++) {
  882. if (indiroff >= NINDIR(fs)) {
  883. if (passno == 2)
  884. ibp->b_flags |= B_VALIDSUSPWRT;
  885. bawrite(ibp);
  886. error = UFS_BALLOC(vp,
  887. lblktosize(fs, (off_t)(base + loc)),
  888. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  889. if (error) {
  890. return (error);
  891. }
  892. indiroff = 0;
  893. }
  894. if (I_IS_UFS1(ip)) {
  895. if (ffs_isblock(fs, cg_blksfree(cgp), loc))
  896. ((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
  897. BLK_NOCOPY;
  898. else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data))
  899. [indiroff] == BLK_NOCOPY)
  900. ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0;
  901. else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data))
  902. [indiroff] == BLK_NOCOPY)
  903. panic("ffs_snapshot: lost indirect block");
  904. continue;
  905. }
  906. if (ffs_isblock(fs, cg_blksfree(cgp), loc))
  907. ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
  908. else if (passno == 2 &&
  909. ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
  910. ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0;
  911. else if (passno == 1 &&
  912. ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
  913. panic("ffs_snapshot: lost indirect block");
  914. }
  915. if (passno == 2)
  916. ibp->b_flags |= B_VALIDSUSPWRT;
  917. bdwrite(ibp);
  918. return (0);
  919. }
  920. /*
  921. * Before expunging a snapshot inode, note all the
  922. * blocks that it claims with BLK_SNAP so that fsck will
  923. * be able to account for those blocks properly and so
  924. * that this snapshot knows that it need not copy them
  925. * if the other snapshot holding them is freed. This code
  926. * is reproduced once each for UFS1 and UFS2.
  927. */
  928. static int
  929. expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
  930. struct vnode *snapvp;
  931. struct inode *cancelip;
  932. struct fs *fs;
  933. int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  934. struct fs *, ufs_lbn_t, int);
  935. int expungetype;
  936. int clearmode;
  937. {
  938. int i, error, indiroff;
  939. ufs_lbn_t lbn, rlbn;
  940. ufs2_daddr_t len, blkno, numblks, blksperindir;
  941. struct ufs1_dinode *dip;
  942. struct thread *td = curthread;
  943. struct buf *bp;
  944. /*
  945. * Prepare to expunge the inode. If its inode block has not
  946. * yet been copied, then allocate and fill the copy.
  947. */
  948. lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
  949. blkno = 0;
  950. if (lbn < UFS_NDADDR) {
  951. blkno = VTOI(snapvp)->i_din1->di_db[lbn];
  952. } else {
  953. if (DOINGSOFTDEP(snapvp))
  954. softdep_prealloc(snapvp, MNT_WAIT);
  955. td->td_pflags |= TDP_COWINPROGRESS;
  956. error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn),
  957. fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
  958. td->td_pflags &= ~TDP_COWINPROGRESS;
  959. if (error)
  960. return (error);
  961. indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
  962. blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
  963. bqrelse(bp);
  964. }
  965. if (blkno != 0) {
  966. if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
  967. return (error);
  968. } else {
  969. error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn),
  970. fs->fs_bsize, KERNCRED, 0, &bp);
  971. if (error)
  972. return (error);
  973. if ((error = readblock(snapvp, bp, lbn)) != 0)
  974. return (error);
  975. }
  976. /*
  977. * Set a snapshot inode to be a zero length file, regular files
  978. * or unlinked snapshots to be completely unallocated.
  979. */
  980. dip = (struct ufs1_dinode *)bp->b_data +
  981. ino_to_fsbo(fs, cancelip->i_number);
  982. if (clearmode || cancelip->i_effnlink == 0)
  983. dip->di_mode = 0;
  984. dip->di_size = 0;
  985. dip->di_blocks = 0;
  986. dip->di_flags &= ~SF_SNAPSHOT;
  987. bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t));
  988. bdwrite(bp);
  989. /*
  990. * Now go through and expunge all the blocks in the file
  991. * using the function requested.
  992. */
  993. numblks = howmany(cancelip->i_size, fs->fs_bsize);
  994. if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0],
  995. &cancelip->i_din1->di_db[UFS_NDADDR], fs, 0, expungetype)))
  996. return (error);
  997. if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0],
  998. &cancelip->i_din1->di_ib[UFS_NIADDR], fs, -1, expungetype)))
  999. return (error);
  1000. blksperindir = 1;
  1001. lbn = -UFS_NDADDR;
  1002. len = numblks - UFS_NDADDR;
  1003. rlbn = UFS_NDADDR;
  1004. for (i = 0; len > 0 && i < UFS_NIADDR; i++) {
  1005. error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
  1006. cancelip->i_din1->di_ib[i], lbn, rlbn, len,
  1007. blksperindir, fs, acctfunc, expungetype);
  1008. if (error)
  1009. return (error);
  1010. blksperindir *= NINDIR(fs);
  1011. lbn -= blksperindir + 1;
  1012. len -= blksperindir;
  1013. rlbn += blksperindir;
  1014. }
  1015. return (0);
  1016. }
  1017. /*
  1018. * Descend an indirect block chain for vnode cancelvp accounting for all
  1019. * its indirect blocks in snapvp.
  1020. */
  1021. static int
  1022. indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
  1023. blksperindir, fs, acctfunc, expungetype)
  1024. struct vnode *snapvp;
  1025. struct vnode *cancelvp;
  1026. int level;
  1027. ufs1_daddr_t blkno;
  1028. ufs_lbn_t lbn;
  1029. ufs_lbn_t rlbn;
  1030. ufs_lbn_t remblks;
  1031. ufs_lbn_t blksperindir;
  1032. struct fs *fs;
  1033. int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  1034. struct fs *, ufs_lbn_t, int);
  1035. int expungetype;
  1036. {
  1037. int error, num, i;
  1038. ufs_lbn_t subblksperindir;
  1039. struct indir indirs[UFS_NIADDR + 2];
  1040. ufs1_daddr_t last, *bap;
  1041. struct buf *bp;
  1042. if (blkno == 0) {
  1043. if (expungetype == BLK_NOCOPY)
  1044. return (0);
  1045. panic("indiracct_ufs1: missing indir");
  1046. }
  1047. if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
  1048. return (error);
  1049. if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
  1050. panic("indiracct_ufs1: botched params");
  1051. /*
  1052. * We have to expand bread here since it will deadlock looking
  1053. * up the block number for any blocks that are not in the cache.
  1054. */
  1055. bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
  1056. bp->b_blkno = fsbtodb(fs, blkno);
  1057. if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
  1058. (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
  1059. brelse(bp);
  1060. return (error);
  1061. }
  1062. /*
  1063. * Account for the block pointers in this indirect block.
  1064. */
  1065. last = howmany(remblks, blksperindir);
  1066. if (last > NINDIR(fs))
  1067. last = NINDIR(fs);
  1068. bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
  1069. bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
  1070. bqrelse(bp);
  1071. error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
  1072. level == 0 ? rlbn : -1, expungetype);
  1073. if (error || level == 0)
  1074. goto out;
  1075. /*
  1076. * Account for the block pointers in each of the indirect blocks
  1077. * in the levels below us.
  1078. */
  1079. subblksperindir = blksperindir / NINDIR(fs);
  1080. for (lbn++, level--, i = 0; i < last; i++) {
  1081. error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn,
  1082. rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
  1083. if (error)
  1084. goto out;
  1085. rlbn += blksperindir;
  1086. lbn -= blksperindir;
  1087. remblks -= blksperindir;
  1088. }
  1089. out:
  1090. free(bap, M_DEVBUF);
  1091. return (error);
  1092. }
  1093. /*
  1094. * Do both snap accounting and map accounting.
  1095. */
  1096. static int
  1097. fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)
  1098. struct vnode *vp;
  1099. ufs1_daddr_t *oldblkp, *lastblkp;
  1100. struct fs *fs;
  1101. ufs_lbn_t lblkno;
  1102. int exptype; /* BLK_SNAP or BLK_NOCOPY */
  1103. {
  1104. int error;
  1105. if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
  1106. return (error);
  1107. return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
  1108. }
  1109. /*
  1110. * Identify a set of blocks allocated in a snapshot inode.
  1111. */
  1112. static int
  1113. snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
  1114. struct vnode *vp;
  1115. ufs1_daddr_t *oldblkp, *lastblkp;
  1116. struct fs *fs;
  1117. ufs_lbn_t lblkno;
  1118. int expungetype; /* BLK_SNAP or BLK_NOCOPY */
  1119. {
  1120. struct inode *ip = VTOI(vp);
  1121. ufs1_daddr_t blkno, *blkp;
  1122. ufs_lbn_t lbn;
  1123. struct buf *ibp;
  1124. int error;
  1125. for ( ; oldblkp < lastblkp; oldblkp++) {
  1126. blkno = *oldblkp;
  1127. if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
  1128. continue;
  1129. lbn = fragstoblks(fs, blkno);
  1130. if (lbn < UFS_NDADDR) {
  1131. blkp = &ip->i_din1->di_db[lbn];
  1132. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  1133. } else {
  1134. error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn),
  1135. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  1136. if (error)
  1137. return (error);
  1138. blkp = &((ufs1_daddr_t *)(ibp->b_data))
  1139. [(lbn - UFS_NDADDR) % NINDIR(fs)];
  1140. }
  1141. /*
  1142. * If we are expunging a snapshot vnode and we
  1143. * find a block marked BLK_NOCOPY, then it is
  1144. * one that has been allocated to this snapshot after
  1145. * we took our current snapshot and can be ignored.
  1146. */
  1147. if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
  1148. if (lbn >= UFS_NDADDR)
  1149. brelse(ibp);
  1150. } else {
  1151. if (*blkp != 0)
  1152. panic("snapacct_ufs1: bad block");
  1153. *blkp = expungetype;
  1154. if (lbn >= UFS_NDADDR)
  1155. bdwrite(ibp);
  1156. }
  1157. }
  1158. return (0);
  1159. }
  1160. /*
  1161. * Account for a set of blocks allocated in a snapshot inode.
  1162. */
  1163. static int
  1164. mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
  1165. struct vnode *vp;
  1166. ufs1_daddr_t *oldblkp, *lastblkp;
  1167. struct fs *fs;
  1168. ufs_lbn_t lblkno;
  1169. int expungetype;
  1170. {
  1171. ufs1_daddr_t blkno;
  1172. struct inode *ip;
  1173. ino_t inum;
  1174. int acctit;
  1175. ip = VTOI(vp);
  1176. inum = ip->i_number;
  1177. if (lblkno == -1)
  1178. acctit = 0;
  1179. else
  1180. acctit = 1;
  1181. for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
  1182. blkno = *oldblkp;
  1183. if (blkno == 0 || blkno == BLK_NOCOPY)
  1184. continue;
  1185. if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
  1186. *ip->i_snapblklist++ = lblkno;
  1187. if (blkno == BLK_SNAP)
  1188. blkno = blkstofrags(fs, lblkno);
  1189. ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
  1190. vp->v_type, nil);
  1191. }
  1192. return (0);
  1193. }
  1194. /*
  1195. * Before expunging a snapshot inode, note all the
  1196. * blocks that it claims with BLK_SNAP so that fsck will
  1197. * be able to account for those blocks properly and so
  1198. * that this snapshot knows that it need not copy them
  1199. * if the other snapshot holding them is freed. This code
  1200. * is reproduced once each for UFS1 and UFS2.
  1201. */
  1202. static int
  1203. expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
  1204. struct vnode *snapvp;
  1205. struct inode *cancelip;
  1206. struct fs *fs;
  1207. int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
  1208. struct fs *, ufs_lbn_t, int);
  1209. int expungetype;
  1210. int clearmode;
  1211. {
  1212. int i, error, indiroff;
  1213. ufs_lbn_t lbn, rlbn;
  1214. ufs2_daddr_t len, blkno, numblks, blksperindir;
  1215. struct ufs2_dinode *dip;
  1216. struct thread *td = curthread;
  1217. struct buf *bp;
  1218. /*
  1219. * Prepare to expunge the inode. If its inode block has not
  1220. * yet been copied, then allocate and fill the copy.
  1221. */
  1222. lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
  1223. blkno = 0;
  1224. if (lbn < UFS_NDADDR) {
  1225. blkno = VTOI(snapvp)->i_din2->di_db[lbn];
  1226. } else {
  1227. if (DOINGSOFTDEP(snapvp))
  1228. softdep_prealloc(snapvp, MNT_WAIT);
  1229. td->td_pflags |= TDP_COWINPROGRESS;
  1230. error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn),
  1231. fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
  1232. td->td_pflags &= ~TDP_COWINPROGRESS;
  1233. if (error)
  1234. return (error);
  1235. indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
  1236. blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
  1237. bqrelse(bp);
  1238. }
  1239. if (blkno != 0) {
  1240. if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
  1241. return (error);
  1242. } else {
  1243. error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn),
  1244. fs->fs_bsize, KERNCRED, 0, &bp);
  1245. if (error)
  1246. return (error);
  1247. if ((error = readblock(snapvp, bp, lbn)) != 0)
  1248. return (error);
  1249. }
  1250. /*
  1251. * Set a snapshot inode to be a zero length file, regular files
  1252. * to be completely unallocated.
  1253. */
  1254. dip = (struct ufs2_dinode *)bp->b_data +
  1255. ino_to_fsbo(fs, cancelip->i_number);
  1256. if (clearmode || cancelip->i_effnlink == 0)
  1257. dip->di_mode = 0;
  1258. dip->di_size = 0;
  1259. dip->di_blocks = 0;
  1260. dip->di_flags &= ~SF_SNAPSHOT;
  1261. bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t));
  1262. bdwrite(bp);
  1263. /*
  1264. * Now go through and expunge all the blocks in the file
  1265. * using the function requested.
  1266. */
  1267. numblks = howmany(cancelip->i_size, fs->fs_bsize);
  1268. if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0],
  1269. &cancelip->i_din2->di_db[UFS_NDADDR], fs, 0, expungetype)))
  1270. return (error);
  1271. if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0],
  1272. &cancelip->i_din2->di_ib[UFS_NIADDR], fs, -1, expungetype)))
  1273. return (error);
  1274. blksperindir = 1;
  1275. lbn = -UFS_NDADDR;
  1276. len = numblks - UFS_NDADDR;
  1277. rlbn = UFS_NDADDR;
  1278. for (i = 0; len > 0 && i < UFS_NIADDR; i++) {
  1279. error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
  1280. cancelip->i_din2->di_ib[i], lbn, rlbn, len,
  1281. blksperindir, fs, acctfunc, expungetype);
  1282. if (error)
  1283. return (error);
  1284. blksperindir *= NINDIR(fs);
  1285. lbn -= blksperindir + 1;
  1286. len -= blksperindir;
  1287. rlbn += blksperindir;
  1288. }
  1289. return (0);
  1290. }
  1291. /*
  1292. * Descend an indirect block chain for vnode cancelvp accounting for all
  1293. * its indirect blocks in snapvp.
  1294. */
  1295. static int
  1296. indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
  1297. blksperindir, fs, acctfunc, expungetype)
  1298. struct vnode *snapvp;
  1299. struct vnode *cancelvp;
  1300. int level;
  1301. ufs2_daddr_t blkno;
  1302. ufs_lbn_t lbn;
  1303. ufs_lbn_t rlbn;
  1304. ufs_lbn_t remblks;
  1305. ufs_lbn_t blksperindir;
  1306. struct fs *fs;
  1307. int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
  1308. struct fs *, ufs_lbn_t, int);
  1309. int expungetype;
  1310. {
  1311. int error, num, i;
  1312. ufs_lbn_t subblksperindir;
  1313. struct indir indirs[UFS_NIADDR + 2];
  1314. ufs2_daddr_t last, *bap;
  1315. struct buf *bp;
  1316. if (blkno == 0) {
  1317. if (expungetype == BLK_NOCOPY)
  1318. return (0);
  1319. panic("indiracct_ufs2: missing indir");
  1320. }
  1321. if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
  1322. return (error);
  1323. if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
  1324. panic("indiracct_ufs2: botched params");
  1325. /*
  1326. * We have to expand bread here since it will deadlock looking
  1327. * up the block number for any blocks that are not in the cache.
  1328. */
  1329. bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
  1330. bp->b_blkno = fsbtodb(fs, blkno);
  1331. if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
  1332. (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
  1333. brelse(bp);
  1334. return (error);
  1335. }
  1336. /*
  1337. * Account for the block pointers in this indirect block.
  1338. */
  1339. last = howmany(remblks, blksperindir);
  1340. if (last > NINDIR(fs))
  1341. last = NINDIR(fs);
  1342. bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
  1343. bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
  1344. bqrelse(bp);
  1345. error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
  1346. level == 0 ? rlbn : -1, expungetype);
  1347. if (error || level == 0)
  1348. goto out;
  1349. /*
  1350. * Account for the block pointers in each of the indirect blocks
  1351. * in the levels below us.
  1352. */
  1353. subblksperindir = blksperindir / NINDIR(fs);
  1354. for (lbn++, level--, i = 0; i < last; i++) {
  1355. error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn,
  1356. rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
  1357. if (error)
  1358. goto out;
  1359. rlbn += blksperindir;
  1360. lbn -= blksperindir;
  1361. remblks -= blksperindir;
  1362. }
  1363. out:
  1364. free(bap, M_DEVBUF);
  1365. return (error);
  1366. }
  1367. /*
  1368. * Do both snap accounting and map accounting.
  1369. */
  1370. static int
  1371. fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)
  1372. struct vnode *vp;
  1373. ufs2_daddr_t *oldblkp, *lastblkp;
  1374. struct fs *fs;
  1375. ufs_lbn_t lblkno;
  1376. int exptype; /* BLK_SNAP or BLK_NOCOPY */
  1377. {
  1378. int error;
  1379. if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
  1380. return (error);
  1381. return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
  1382. }
  1383. /*
  1384. * Identify a set of blocks allocated in a snapshot inode.
  1385. */
  1386. static int
  1387. snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
  1388. struct vnode *vp;
  1389. ufs2_daddr_t *oldblkp, *lastblkp;
  1390. struct fs *fs;
  1391. ufs_lbn_t lblkno;
  1392. int expungetype; /* BLK_SNAP or BLK_NOCOPY */
  1393. {
  1394. struct inode *ip = VTOI(vp);
  1395. ufs2_daddr_t blkno, *blkp;
  1396. ufs_lbn_t lbn;
  1397. struct buf *ibp;
  1398. int error;
  1399. for ( ; oldblkp < lastblkp; oldblkp++) {
  1400. blkno = *oldblkp;
  1401. if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
  1402. continue;
  1403. lbn = fragstoblks(fs, blkno);
  1404. if (lbn < UFS_NDADDR) {
  1405. blkp = &ip->i_din2->di_db[lbn];
  1406. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  1407. } else {
  1408. error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn),
  1409. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  1410. if (error)
  1411. return (error);
  1412. blkp = &((ufs2_daddr_t *)(ibp->b_data))
  1413. [(lbn - UFS_NDADDR) % NINDIR(fs)];
  1414. }
  1415. /*
  1416. * If we are expunging a snapshot vnode and we
  1417. * find a block marked BLK_NOCOPY, then it is
  1418. * one that has been allocated to this snapshot after
  1419. * we took our current snapshot and can be ignored.
  1420. */
  1421. if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
  1422. if (lbn >= UFS_NDADDR)
  1423. brelse(ibp);
  1424. } else {
  1425. if (*blkp != 0)
  1426. panic("snapacct_ufs2: bad block");
  1427. *blkp = expungetype;
  1428. if (lbn >= UFS_NDADDR)
  1429. bdwrite(ibp);
  1430. }
  1431. }
  1432. return (0);
  1433. }
  1434. /*
  1435. * Account for a set of blocks allocated in a snapshot inode.
  1436. */
  1437. static int
  1438. mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
  1439. struct vnode *vp;
  1440. ufs2_daddr_t *oldblkp, *lastblkp;
  1441. struct fs *fs;
  1442. ufs_lbn_t lblkno;
  1443. int expungetype;
  1444. {
  1445. ufs2_daddr_t blkno;
  1446. struct inode *ip;
  1447. ino_t inum;
  1448. int acctit;
  1449. ip = VTOI(vp);
  1450. inum = ip->i_number;
  1451. if (lblkno == -1)
  1452. acctit = 0;
  1453. else
  1454. acctit = 1;
  1455. for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
  1456. blkno = *oldblkp;
  1457. if (blkno == 0 || blkno == BLK_NOCOPY)
  1458. continue;
  1459. if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
  1460. *ip->i_snapblklist++ = lblkno;
  1461. if (blkno == BLK_SNAP)
  1462. blkno = blkstofrags(fs, lblkno);
  1463. ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
  1464. vp->v_type, nil);
  1465. }
  1466. return (0);
  1467. }
  1468. #endif // 0
  1469. /*
  1470. * Decrement extra reference on snapshot when last name is removed.
  1471. * It will not be freed until the last open reference goes away.
  1472. */
  1473. void
  1474. ffs_snapgone (inode *ip)
  1475. {
  1476. print("HARVEY TODO: %s\n", __func__);
  1477. #if 0
  1478. struct inode *xp;
  1479. struct fs *fs;
  1480. int snaploc;
  1481. struct snapdata *sn;
  1482. struct ufsmount *ump;
  1483. /*
  1484. * Find snapshot in incore list.
  1485. */
  1486. xp = nil;
  1487. sn = ITODEVVP(ip)->v_rdev->si_snapdata;
  1488. if (sn != nil)
  1489. TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap)
  1490. if (xp == ip)
  1491. break;
  1492. if (xp != nil)
  1493. vrele(ITOV(ip));
  1494. else if (snapdebug)
  1495. printf("ffs_snapgone: lost snapshot vnode %ju\n",
  1496. (uintmax_t)ip->i_number);
  1497. /*
  1498. * Delete snapshot inode from superblock. Keep list dense.
  1499. */
  1500. ump = ITOUMP(ip);
  1501. fs = ump->um_fs;
  1502. UFS_LOCK(ump);
  1503. for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
  1504. if (fs->fs_snapinum[snaploc] == ip->i_number)
  1505. break;
  1506. if (snaploc < FSMAXSNAP) {
  1507. for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
  1508. if (fs->fs_snapinum[snaploc] == 0)
  1509. break;
  1510. fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
  1511. }
  1512. fs->fs_snapinum[snaploc - 1] = 0;
  1513. }
  1514. UFS_UNLOCK(ump);
  1515. #endif // 0
  1516. }
  1517. #if 0
  1518. /*
  1519. * Prepare a snapshot file for being removed.
  1520. */
  1521. void
  1522. ffs_snapremove (struct vnode *vp)
  1523. {
  1524. struct inode *ip;
  1525. struct vnode *devvp;
  1526. struct buf *ibp;
  1527. struct fs *fs;
  1528. ufs2_daddr_t numblks, blkno, dblk;
  1529. int error, loc, last;
  1530. struct snapdata *sn;
  1531. ip = VTOI(vp);
  1532. fs = ITOFS(ip);
  1533. devvp = ITODEVVP(ip);
  1534. /*
  1535. * If active, delete from incore list (this snapshot may
  1536. * already have been in the process of being deleted, so
  1537. * would not have been active).
  1538. *
  1539. * Clear copy-on-write flag if last snapshot.
  1540. */
  1541. VI_LOCK(devvp);
  1542. if (ip->i_nextsnap.tqe_prev != 0) {
  1543. sn = devvp->v_rdev->si_snapdata;
  1544. TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap);
  1545. ip->i_nextsnap.tqe_prev = 0;
  1546. VI_UNLOCK(devvp);
  1547. lockmgr(&vp->v_lock, LK_EXCLUSIVE, nil);
  1548. KASSERT(vp->v_vnlock == &sn->sn_lock,
  1549. ("ffs_snapremove: lost lock mutation"));
  1550. vp->v_vnlock = &vp->v_lock;
  1551. VI_LOCK(devvp);
  1552. lockmgr(&sn->sn_lock, LK_RELEASE, nil);
  1553. try_free_snapdata(devvp);
  1554. } else
  1555. VI_UNLOCK(devvp);
  1556. /*
  1557. * Clear all BLK_NOCOPY fields. Pass any block claims to other
  1558. * snapshots that want them (see ffs_snapblkfree below).
  1559. */
  1560. for (blkno = 1; blkno < UFS_NDADDR; blkno++) {
  1561. dblk = DIP(ip, i_db[blkno]);
  1562. if (dblk == 0)
  1563. continue;
  1564. if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
  1565. DIP_SET(ip, i_db[blkno], 0);
  1566. else if ((dblk == blkstofrags(fs, blkno) &&
  1567. ffs_snapblkfree(fs, ITODEVVP(ip), dblk, fs->fs_bsize,
  1568. ip->i_number, vp->v_type, nil))) {
  1569. DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
  1570. btodb(fs->fs_bsize));
  1571. DIP_SET(ip, i_db[blkno], 0);
  1572. }
  1573. }
  1574. numblks = howmany(ip->i_size, fs->fs_bsize);
  1575. for (blkno = UFS_NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
  1576. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
  1577. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  1578. if (error)
  1579. continue;
  1580. if (fs->fs_size - blkno > NINDIR(fs))
  1581. last = NINDIR(fs);
  1582. else
  1583. last = fs->fs_size - blkno;
  1584. for (loc = 0; loc < last; loc++) {
  1585. if (I_IS_UFS1(ip)) {
  1586. dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc];
  1587. if (dblk == 0)
  1588. continue;
  1589. if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
  1590. ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
  1591. else if ((dblk == blkstofrags(fs, blkno) &&
  1592. ffs_snapblkfree(fs, ITODEVVP(ip), dblk,
  1593. fs->fs_bsize, ip->i_number, vp->v_type,
  1594. nil))) {
  1595. ip->i_din1->di_blocks -=
  1596. btodb(fs->fs_bsize);
  1597. ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
  1598. }
  1599. continue;
  1600. }
  1601. dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc];
  1602. if (dblk == 0)
  1603. continue;
  1604. if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
  1605. ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
  1606. else if ((dblk == blkstofrags(fs, blkno) &&
  1607. ffs_snapblkfree(fs, ITODEVVP(ip), dblk,
  1608. fs->fs_bsize, ip->i_number, vp->v_type, nil))) {
  1609. ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
  1610. ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
  1611. }
  1612. }
  1613. bawrite(ibp);
  1614. }
  1615. /*
  1616. * Clear snapshot flag and drop reference.
  1617. */
  1618. ip->i_flags &= ~SF_SNAPSHOT;
  1619. DIP_SET(ip, i_flags, ip->i_flags);
  1620. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  1621. /*
  1622. * The dirtied indirects must be written out before
  1623. * softdep_setup_freeblocks() is called. Otherwise indir_trunc()
  1624. * may find indirect pointers using the magic BLK_* values.
  1625. */
  1626. if (DOINGSOFTDEP(vp))
  1627. ffs_syncvnode(vp, MNT_WAIT, 0);
  1628. #ifdef QUOTA
  1629. /*
  1630. * Reenable disk quotas for ex-snapshot file.
  1631. */
  1632. if (!getinoquota(ip))
  1633. (void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE);
  1634. #endif
  1635. }
  1636. /*
  1637. * Notification that a block is being freed. Return zero if the free
  1638. * should be allowed to proceed. Return non-zero if the snapshot file
  1639. * wants to claim the block. The block will be claimed if it is an
  1640. * uncopied part of one of the snapshots. It will be freed if it is
  1641. * either a BLK_NOCOPY or has already been copied in all of the snapshots.
  1642. * If a fragment is being freed, then all snapshots that care about
  1643. * it must make a copy since a snapshot file can only claim full sized
  1644. * blocks. Note that if more than one snapshot file maps the block,
  1645. * we can pick one at random to claim it. Since none of the snapshots
  1646. * can change, we are assurred that they will all see the same unmodified
  1647. * image. When deleting a snapshot file (see ffs_snapremove above), we
  1648. * must push any of these claimed blocks to one of the other snapshots
  1649. * that maps it. These claimed blocks are easily identified as they will
  1650. * have a block number equal to their logical block number within the
  1651. * snapshot. A copied block can never have this property because they
  1652. * must always have been allocated from a BLK_NOCOPY location.
  1653. */
  1654. int
  1655. ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
  1656. struct fs *fs;
  1657. struct vnode *devvp;
  1658. ufs2_daddr_t bno;
  1659. long size;
  1660. ino_t inum;
  1661. enum vtype vtype;
  1662. struct workhead *wkhd;
  1663. {
  1664. struct buf *ibp, *cbp, *savedcbp = nil;
  1665. struct thread *td = curthread;
  1666. struct inode *ip;
  1667. struct vnode *vp = nil;
  1668. ufs_lbn_t lbn;
  1669. ufs2_daddr_t blkno;
  1670. int indiroff = 0, error = 0, claimedblk = 0;
  1671. struct snapdata *sn;
  1672. lbn = fragstoblks(fs, bno);
  1673. retry:
  1674. VI_LOCK(devvp);
  1675. sn = devvp->v_rdev->si_snapdata;
  1676. if (sn == nil) {
  1677. VI_UNLOCK(devvp);
  1678. return (0);
  1679. }
  1680. if (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
  1681. VI_MTX(devvp)) != 0)
  1682. goto retry;
  1683. TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
  1684. vp = ITOV(ip);
  1685. if (DOINGSOFTDEP(vp))
  1686. softdep_prealloc(vp, MNT_WAIT);
  1687. /*
  1688. * Lookup block being written.
  1689. */
  1690. if (lbn < UFS_NDADDR) {
  1691. blkno = DIP(ip, i_db[lbn]);
  1692. } else {
  1693. td->td_pflags |= TDP_COWINPROGRESS;
  1694. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
  1695. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  1696. td->td_pflags &= ~TDP_COWINPROGRESS;
  1697. if (error)
  1698. break;
  1699. indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
  1700. if (I_IS_UFS1(ip))
  1701. blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
  1702. else
  1703. blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
  1704. }
  1705. /*
  1706. * Check to see if block needs to be copied.
  1707. */
  1708. if (blkno == 0) {
  1709. /*
  1710. * A block that we map is being freed. If it has not
  1711. * been claimed yet, we will claim or copy it (below).
  1712. */
  1713. claimedblk = 1;
  1714. } else if (blkno == BLK_SNAP) {
  1715. /*
  1716. * No previous snapshot claimed the block,
  1717. * so it will be freed and become a BLK_NOCOPY
  1718. * (don't care) for us.
  1719. */
  1720. if (claimedblk)
  1721. panic("snapblkfree: inconsistent block type");
  1722. if (lbn < UFS_NDADDR) {
  1723. DIP_SET(ip, i_db[lbn], BLK_NOCOPY);
  1724. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  1725. } else if (I_IS_UFS1(ip)) {
  1726. ((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
  1727. BLK_NOCOPY;
  1728. bdwrite(ibp);
  1729. } else {
  1730. ((ufs2_daddr_t *)(ibp->b_data))[indiroff] =
  1731. BLK_NOCOPY;
  1732. bdwrite(ibp);
  1733. }
  1734. continue;
  1735. } else /* BLK_NOCOPY or default */ {
  1736. /*
  1737. * If the snapshot has already copied the block
  1738. * (default), or does not care about the block,
  1739. * it is not needed.
  1740. */
  1741. if (lbn >= UFS_NDADDR)
  1742. bqrelse(ibp);
  1743. continue;
  1744. }
  1745. /*
  1746. * If this is a full size block, we will just grab it
  1747. * and assign it to the snapshot inode. Otherwise we
  1748. * will proceed to copy it. See explanation for this
  1749. * routine as to why only a single snapshot needs to
  1750. * claim this block.
  1751. */
  1752. if (size == fs->fs_bsize) {
  1753. #ifdef DEBUG
  1754. if (snapdebug)
  1755. printf("%s %ju lbn %jd from inum %ju\n",
  1756. "Grabonremove: snapino",
  1757. (uintmax_t)ip->i_number,
  1758. (intmax_t)lbn, (uintmax_t)inum);
  1759. #endif
  1760. /*
  1761. * If journaling is tracking this write we must add
  1762. * the work to the inode or indirect being written.
  1763. */
  1764. if (wkhd != nil) {
  1765. if (lbn < UFS_NDADDR)
  1766. softdep_inode_append(ip,
  1767. curthread->td_ucred, wkhd);
  1768. else
  1769. softdep_buf_append(ibp, wkhd);
  1770. }
  1771. if (lbn < UFS_NDADDR) {
  1772. DIP_SET(ip, i_db[lbn], bno);
  1773. } else if (I_IS_UFS1(ip)) {
  1774. ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno;
  1775. bdwrite(ibp);
  1776. } else {
  1777. ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno;
  1778. bdwrite(ibp);
  1779. }
  1780. DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size));
  1781. ip->i_flag |= IN_CHANGE | IN_UPDATE;
  1782. lockmgr(vp->v_vnlock, LK_RELEASE, nil);
  1783. return (1);
  1784. }
  1785. if (lbn >= UFS_NDADDR)
  1786. bqrelse(ibp);
  1787. /*
  1788. * Allocate the block into which to do the copy. Note that this
  1789. * allocation will never require any additional allocations for
  1790. * the snapshot inode.
  1791. */
  1792. td->td_pflags |= TDP_COWINPROGRESS;
  1793. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
  1794. fs->fs_bsize, KERNCRED, 0, &cbp);
  1795. td->td_pflags &= ~TDP_COWINPROGRESS;
  1796. if (error)
  1797. break;
  1798. #ifdef DEBUG
  1799. if (snapdebug)
  1800. printf("%s%ju lbn %jd %s %ju size %ld to blkno %jd\n",
  1801. "Copyonremove: snapino ", (uintmax_t)ip->i_number,
  1802. (intmax_t)lbn, "for inum", (uintmax_t)inum, size,
  1803. (intmax_t)cbp->b_blkno);
  1804. #endif
  1805. /*
  1806. * If we have already read the old block contents, then
  1807. * simply copy them to the new block. Note that we need
  1808. * to synchronously write snapshots that have not been
  1809. * unlinked, and hence will be visible after a crash,
  1810. * to ensure their integrity. At a minimum we ensure the
  1811. * integrity of the filesystem metadata, but use the
  1812. * dopersistence sysctl-setable flag to decide on the
  1813. * persistence needed for file content data.
  1814. */
  1815. if (savedcbp != nil) {
  1816. bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
  1817. bawrite(cbp);
  1818. if ((vtype == VDIR || dopersistence) &&
  1819. ip->i_effnlink > 0)
  1820. (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
  1821. continue;
  1822. }
  1823. /*
  1824. * Otherwise, read the old block contents into the buffer.
  1825. */
  1826. if ((error = readblock(vp, cbp, lbn)) != 0) {
  1827. bzero(cbp->b_data, fs->fs_bsize);
  1828. bawrite(cbp);
  1829. if ((vtype == VDIR || dopersistence) &&
  1830. ip->i_effnlink > 0)
  1831. (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
  1832. break;
  1833. }
  1834. savedcbp = cbp;
  1835. }
  1836. /*
  1837. * Note that we need to synchronously write snapshots that
  1838. * have not been unlinked, and hence will be visible after
  1839. * a crash, to ensure their integrity. At a minimum we
  1840. * ensure the integrity of the filesystem metadata, but
  1841. * use the dopersistence sysctl-setable flag to decide on
  1842. * the persistence needed for file content data.
  1843. */
  1844. if (savedcbp) {
  1845. vp = savedcbp->b_vp;
  1846. bawrite(savedcbp);
  1847. if ((vtype == VDIR || dopersistence) &&
  1848. VTOI(vp)->i_effnlink > 0)
  1849. (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
  1850. }
  1851. /*
  1852. * If we have been unable to allocate a block in which to do
  1853. * the copy, then return non-zero so that the fragment will
  1854. * not be freed. Although space will be lost, the snapshot
  1855. * will stay consistent.
  1856. */
  1857. if (error != 0 && wkhd != nil)
  1858. softdep_freework(wkhd);
  1859. lockmgr(vp->v_vnlock, LK_RELEASE, nil);
  1860. return (error);
  1861. }
  1862. /*
  1863. * Associate snapshot files when mounting.
  1864. */
  1865. void
  1866. ffs_snapshot_mount (MountPoint *mp)
  1867. {
  1868. struct ufsmount *ump = VFSTOUFS(mp);
  1869. struct vnode *devvp = ump->um_devvp;
  1870. struct fs *fs = ump->um_fs;
  1871. struct thread *td = curthread;
  1872. struct snapdata *sn;
  1873. struct vnode *vp;
  1874. struct vnode *lastvp;
  1875. struct inode *ip;
  1876. struct uio auio;
  1877. struct iovec aiov;
  1878. void *snapblklist;
  1879. char *reason;
  1880. daddr_t snaplistsize;
  1881. int error, snaploc, loc;
  1882. /*
  1883. * XXX The following needs to be set before ffs_truncate or
  1884. * VOP_READ can be called.
  1885. */
  1886. mp->mnt_stat.f_iosize = fs->fs_bsize;
  1887. /*
  1888. * Process each snapshot listed in the superblock.
  1889. */
  1890. vp = nil;
  1891. lastvp = nil;
  1892. sn = nil;
  1893. for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
  1894. if (fs->fs_snapinum[snaploc] == 0)
  1895. break;
  1896. if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc],
  1897. LK_EXCLUSIVE, &vp)) != 0){
  1898. printf("ffs_snapshot_mount: vget failed %d\n", error);
  1899. continue;
  1900. }
  1901. ip = VTOI(vp);
  1902. if (!IS_SNAPSHOT(ip) || ip->i_size ==
  1903. lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) {
  1904. if (!IS_SNAPSHOT(ip)) {
  1905. reason = "non-snapshot";
  1906. } else {
  1907. reason = "old format snapshot";
  1908. (void)ffs_truncate(vp, (off_t)0, 0, NOCRED);
  1909. (void)ffs_syncvnode(vp, MNT_WAIT, 0);
  1910. }
  1911. printf("ffs_snapshot_mount: %s inode %d\n",
  1912. reason, fs->fs_snapinum[snaploc]);
  1913. vput(vp);
  1914. vp = nil;
  1915. for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
  1916. if (fs->fs_snapinum[loc] == 0)
  1917. break;
  1918. fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
  1919. }
  1920. fs->fs_snapinum[loc - 1] = 0;
  1921. snaploc--;
  1922. continue;
  1923. }
  1924. /*
  1925. * Acquire a lock on the snapdata structure, creating it if
  1926. * necessary.
  1927. */
  1928. sn = ffs_snapdata_acquire(devvp);
  1929. /*
  1930. * Change vnode to use shared snapshot lock instead of the
  1931. * original private lock.
  1932. */
  1933. vp->v_vnlock = &sn->sn_lock;
  1934. lockmgr(&vp->v_lock, LK_RELEASE, nil);
  1935. /*
  1936. * Link it onto the active snapshot list.
  1937. */
  1938. VI_LOCK(devvp);
  1939. if (ip->i_nextsnap.tqe_prev != 0)
  1940. panic("ffs_snapshot_mount: %ju already on list",
  1941. (uintmax_t)ip->i_number);
  1942. else
  1943. TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
  1944. vp->v_vflag |= VV_SYSTEM;
  1945. VI_UNLOCK(devvp);
  1946. VOP_UNLOCK(vp, 0);
  1947. lastvp = vp;
  1948. }
  1949. vp = lastvp;
  1950. /*
  1951. * No usable snapshots found.
  1952. */
  1953. if (sn == nil || vp == nil)
  1954. return;
  1955. /*
  1956. * Allocate the space for the block hints list. We always want to
  1957. * use the list from the newest snapshot.
  1958. */
  1959. auio.uio_iov = &aiov;
  1960. auio.uio_iovcnt = 1;
  1961. aiov.iov_base = (void *)&snaplistsize;
  1962. aiov.iov_len = sizeof(snaplistsize);
  1963. auio.uio_resid = aiov.iov_len;
  1964. auio.uio_offset =
  1965. lblktosize(fs, howmany(fs->fs_size, fs->fs_frag));
  1966. auio.uio_segflg = UIO_SYSSPACE;
  1967. auio.uio_rw = UIO_READ;
  1968. auio.uio_td = td;
  1969. vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  1970. if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
  1971. printf("ffs_snapshot_mount: read_1 failed %d\n", error);
  1972. VOP_UNLOCK(vp, 0);
  1973. return;
  1974. }
  1975. snapblklist = malloc(snaplistsize * sizeof(daddr_t),
  1976. M_UFSMNT, M_WAITOK);
  1977. auio.uio_iovcnt = 1;
  1978. aiov.iov_base = snapblklist;
  1979. aiov.iov_len = snaplistsize * sizeof (daddr_t);
  1980. auio.uio_resid = aiov.iov_len;
  1981. auio.uio_offset -= sizeof(snaplistsize);
  1982. if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
  1983. printf("ffs_snapshot_mount: read_2 failed %d\n", error);
  1984. VOP_UNLOCK(vp, 0);
  1985. free(snapblklist, M_UFSMNT);
  1986. return;
  1987. }
  1988. VOP_UNLOCK(vp, 0);
  1989. VI_LOCK(devvp);
  1990. ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount");
  1991. sn->sn_listsize = snaplistsize;
  1992. sn->sn_blklist = (daddr_t *)snapblklist;
  1993. devvp->v_vflag |= VV_COPYONWRITE;
  1994. VI_UNLOCK(devvp);
  1995. }
  1996. /*
  1997. * Disassociate snapshot files when unmounting.
  1998. */
  1999. void
  2000. ffs_snapshot_unmount (struct mount *mp)
  2001. {
  2002. struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
  2003. struct snapdata *sn;
  2004. struct inode *xp;
  2005. struct vnode *vp;
  2006. VI_LOCK(devvp);
  2007. sn = devvp->v_rdev->si_snapdata;
  2008. while (sn != nil && (xp = TAILQ_FIRST(&sn->sn_head)) != nil) {
  2009. vp = ITOV(xp);
  2010. TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap);
  2011. xp->i_nextsnap.tqe_prev = 0;
  2012. lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE,
  2013. VI_MTX(devvp));
  2014. lockmgr(&vp->v_lock, LK_EXCLUSIVE, nil);
  2015. KASSERT(vp->v_vnlock == &sn->sn_lock,
  2016. ("ffs_snapshot_unmount: lost lock mutation"));
  2017. vp->v_vnlock = &vp->v_lock;
  2018. lockmgr(&vp->v_lock, LK_RELEASE, nil);
  2019. lockmgr(&sn->sn_lock, LK_RELEASE, nil);
  2020. if (xp->i_effnlink > 0)
  2021. vrele(vp);
  2022. VI_LOCK(devvp);
  2023. sn = devvp->v_rdev->si_snapdata;
  2024. }
  2025. try_free_snapdata(devvp);
  2026. ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount");
  2027. }
  2028. /*
  2029. * Check the buffer block to be belong to device buffer that shall be
  2030. * locked after snaplk. devvp shall be locked on entry, and will be
  2031. * leaved locked upon exit.
  2032. */
  2033. static int
  2034. ffs_bp_snapblk (struct vnode *devvp, struct buf *bp)
  2035. {
  2036. struct snapdata *sn;
  2037. struct fs *fs;
  2038. ufs2_daddr_t lbn, *snapblklist;
  2039. int lower, upper, mid;
  2040. ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk");
  2041. KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp));
  2042. sn = devvp->v_rdev->si_snapdata;
  2043. if (sn == nil || TAILQ_FIRST(&sn->sn_head) == nil)
  2044. return (0);
  2045. fs = ITOFS(TAILQ_FIRST(&sn->sn_head));
  2046. lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
  2047. snapblklist = sn->sn_blklist;
  2048. upper = sn->sn_listsize - 1;
  2049. lower = 1;
  2050. while (lower <= upper) {
  2051. mid = (lower + upper) / 2;
  2052. if (snapblklist[mid] == lbn)
  2053. break;
  2054. if (snapblklist[mid] < lbn)
  2055. lower = mid + 1;
  2056. else
  2057. upper = mid - 1;
  2058. }
  2059. if (lower <= upper)
  2060. return (1);
  2061. return (0);
  2062. }
  2063. void
  2064. ffs_bdflush (struct bufobj *bo, struct buf *bp)
  2065. {
  2066. struct thread *td;
  2067. struct vnode *vp, *devvp;
  2068. struct buf *nbp;
  2069. int bp_bdskip;
  2070. if (bo->bo_dirty.bv_cnt <= dirtybufthresh)
  2071. return;
  2072. td = curthread;
  2073. vp = bp->b_vp;
  2074. devvp = bo2vnode(bo);
  2075. KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp));
  2076. VI_LOCK(devvp);
  2077. bp_bdskip = ffs_bp_snapblk(devvp, bp);
  2078. if (bp_bdskip)
  2079. bdwriteskip++;
  2080. VI_UNLOCK(devvp);
  2081. if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) {
  2082. (void) VOP_FSYNC(vp, MNT_NOWAIT, td);
  2083. altbufferflushes++;
  2084. } else {
  2085. BO_LOCK(bo);
  2086. /*
  2087. * Try to find a buffer to flush.
  2088. */
  2089. TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
  2090. if ((nbp->b_vflags & BV_BKGRDINPROG) ||
  2091. BUF_LOCK(nbp,
  2092. LK_EXCLUSIVE | LK_NOWAIT, nil))
  2093. continue;
  2094. if (bp == nbp)
  2095. panic("bdwrite: found ourselves");
  2096. BO_UNLOCK(bo);
  2097. /*
  2098. * Don't countdeps with the bo lock
  2099. * held.
  2100. */
  2101. if (buf_countdeps(nbp, 0)) {
  2102. BO_LOCK(bo);
  2103. BUF_UNLOCK(nbp);
  2104. continue;
  2105. }
  2106. if (bp_bdskip) {
  2107. VI_LOCK(devvp);
  2108. if (!ffs_bp_snapblk(vp, nbp)) {
  2109. VI_UNLOCK(devvp);
  2110. BO_LOCK(bo);
  2111. BUF_UNLOCK(nbp);
  2112. continue;
  2113. }
  2114. VI_UNLOCK(devvp);
  2115. }
  2116. if (nbp->b_flags & B_CLUSTEROK) {
  2117. vfs_bio_awrite(nbp);
  2118. } else {
  2119. bremfree(nbp);
  2120. bawrite(nbp);
  2121. }
  2122. dirtybufferflushes++;
  2123. break;
  2124. }
  2125. if (nbp == nil)
  2126. BO_UNLOCK(bo);
  2127. }
  2128. }
  2129. /*
  2130. * Check for need to copy block that is about to be written,
  2131. * copying the block if necessary.
  2132. */
  2133. int
  2134. ffs_copyonwrite (struct vnode *devvp, struct buf *bp)
  2135. {
  2136. struct snapdata *sn;
  2137. struct buf *ibp, *cbp, *savedcbp = nil;
  2138. struct thread *td = curthread;
  2139. struct fs *fs;
  2140. struct inode *ip;
  2141. struct vnode *vp = nil;
  2142. ufs2_daddr_t lbn, blkno, *snapblklist;
  2143. int lower, upper, mid, indiroff, error = 0;
  2144. int launched_async_io, prev_norunningbuf;
  2145. long saved_runningbufspace;
  2146. if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp)))
  2147. return (0); /* Update on a snapshot file */
  2148. if (td->td_pflags & TDP_COWINPROGRESS)
  2149. panic("ffs_copyonwrite: recursive call");
  2150. /*
  2151. * First check to see if it is in the preallocated list.
  2152. * By doing this check we avoid several potential deadlocks.
  2153. */
  2154. VI_LOCK(devvp);
  2155. sn = devvp->v_rdev->si_snapdata;
  2156. if (sn == nil ||
  2157. TAILQ_EMPTY(&sn->sn_head)) {
  2158. VI_UNLOCK(devvp);
  2159. return (0); /* No snapshot */
  2160. }
  2161. ip = TAILQ_FIRST(&sn->sn_head);
  2162. fs = ITOFS(ip);
  2163. lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
  2164. snapblklist = sn->sn_blklist;
  2165. upper = sn->sn_listsize - 1;
  2166. lower = 1;
  2167. while (lower <= upper) {
  2168. mid = (lower + upper) / 2;
  2169. if (snapblklist[mid] == lbn)
  2170. break;
  2171. if (snapblklist[mid] < lbn)
  2172. lower = mid + 1;
  2173. else
  2174. upper = mid - 1;
  2175. }
  2176. if (lower <= upper) {
  2177. VI_UNLOCK(devvp);
  2178. return (0);
  2179. }
  2180. launched_async_io = 0;
  2181. prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF;
  2182. /*
  2183. * Since I/O on bp isn't yet in progress and it may be blocked
  2184. * for a long time waiting on snaplk, back it out of
  2185. * runningbufspace, possibly waking other threads waiting for space.
  2186. */
  2187. saved_runningbufspace = bp->b_runningbufspace;
  2188. if (saved_runningbufspace != 0)
  2189. runningbufwakeup(bp);
  2190. /*
  2191. * Not in the precomputed list, so check the snapshots.
  2192. */
  2193. while (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
  2194. VI_MTX(devvp)) != 0) {
  2195. VI_LOCK(devvp);
  2196. sn = devvp->v_rdev->si_snapdata;
  2197. if (sn == nil ||
  2198. TAILQ_EMPTY(&sn->sn_head)) {
  2199. VI_UNLOCK(devvp);
  2200. if (saved_runningbufspace != 0) {
  2201. bp->b_runningbufspace = saved_runningbufspace;
  2202. atomic_add_long(&runningbufspace,
  2203. bp->b_runningbufspace);
  2204. }
  2205. return (0); /* Snapshot gone */
  2206. }
  2207. }
  2208. TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
  2209. vp = ITOV(ip);
  2210. if (DOINGSOFTDEP(vp))
  2211. softdep_prealloc(vp, MNT_WAIT);
  2212. /*
  2213. * We ensure that everything of our own that needs to be
  2214. * copied will be done at the time that ffs_snapshot is
  2215. * called. Thus we can skip the check here which can
  2216. * deadlock in doing the lookup in UFS_BALLOC.
  2217. */
  2218. if (bp->b_vp == vp)
  2219. continue;
  2220. /*
  2221. * Check to see if block needs to be copied. We do not have
  2222. * to hold the snapshot lock while doing this lookup as it
  2223. * will never require any additional allocations for the
  2224. * snapshot inode.
  2225. */
  2226. if (lbn < UFS_NDADDR) {
  2227. blkno = DIP(ip, i_db[lbn]);
  2228. } else {
  2229. td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
  2230. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
  2231. fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  2232. td->td_pflags &= ~TDP_COWINPROGRESS;
  2233. if (error)
  2234. break;
  2235. indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
  2236. if (I_IS_UFS1(ip))
  2237. blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
  2238. else
  2239. blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
  2240. bqrelse(ibp);
  2241. }
  2242. #ifdef INVARIANTS
  2243. if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
  2244. panic("ffs_copyonwrite: bad copy block");
  2245. #endif
  2246. if (blkno != 0)
  2247. continue;
  2248. /*
  2249. * Allocate the block into which to do the copy. Since
  2250. * multiple processes may all try to copy the same block,
  2251. * we have to recheck our need to do a copy if we sleep
  2252. * waiting for the lock.
  2253. *
  2254. * Because all snapshots on a filesystem share a single
  2255. * lock, we ensure that we will never be in competition
  2256. * with another process to allocate a block.
  2257. */
  2258. td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
  2259. error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
  2260. fs->fs_bsize, KERNCRED, 0, &cbp);
  2261. td->td_pflags &= ~TDP_COWINPROGRESS;
  2262. if (error)
  2263. break;
  2264. #ifdef DEBUG
  2265. if (snapdebug) {
  2266. printf("Copyonwrite: snapino %ju lbn %jd for ",
  2267. (uintmax_t)ip->i_number, (intmax_t)lbn);
  2268. if (bp->b_vp == devvp)
  2269. printf("fs metadata");
  2270. else
  2271. printf("inum %ju",
  2272. (uintmax_t)VTOI(bp->b_vp)->i_number);
  2273. printf(" lblkno %jd to blkno %jd\n",
  2274. (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno);
  2275. }
  2276. #endif
  2277. /*
  2278. * If we have already read the old block contents, then
  2279. * simply copy them to the new block. Note that we need
  2280. * to synchronously write snapshots that have not been
  2281. * unlinked, and hence will be visible after a crash,
  2282. * to ensure their integrity. At a minimum we ensure the
  2283. * integrity of the filesystem metadata, but use the
  2284. * dopersistence sysctl-setable flag to decide on the
  2285. * persistence needed for file content data.
  2286. */
  2287. if (savedcbp != nil) {
  2288. bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
  2289. bawrite(cbp);
  2290. if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
  2291. dopersistence) && ip->i_effnlink > 0)
  2292. (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
  2293. else
  2294. launched_async_io = 1;
  2295. continue;
  2296. }
  2297. /*
  2298. * Otherwise, read the old block contents into the buffer.
  2299. */
  2300. if ((error = readblock(vp, cbp, lbn)) != 0) {
  2301. bzero(cbp->b_data, fs->fs_bsize);
  2302. bawrite(cbp);
  2303. if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
  2304. dopersistence) && ip->i_effnlink > 0)
  2305. (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
  2306. else
  2307. launched_async_io = 1;
  2308. break;
  2309. }
  2310. savedcbp = cbp;
  2311. }
  2312. /*
  2313. * Note that we need to synchronously write snapshots that
  2314. * have not been unlinked, and hence will be visible after
  2315. * a crash, to ensure their integrity. At a minimum we
  2316. * ensure the integrity of the filesystem metadata, but
  2317. * use the dopersistence sysctl-setable flag to decide on
  2318. * the persistence needed for file content data.
  2319. */
  2320. if (savedcbp) {
  2321. vp = savedcbp->b_vp;
  2322. bawrite(savedcbp);
  2323. if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
  2324. dopersistence) && VTOI(vp)->i_effnlink > 0)
  2325. (void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
  2326. else
  2327. launched_async_io = 1;
  2328. }
  2329. lockmgr(vp->v_vnlock, LK_RELEASE, nil);
  2330. td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) |
  2331. prev_norunningbuf;
  2332. if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0)
  2333. waitrunningbufspace();
  2334. /*
  2335. * I/O on bp will now be started, so count it in runningbufspace.
  2336. */
  2337. if (saved_runningbufspace != 0) {
  2338. bp->b_runningbufspace = saved_runningbufspace;
  2339. atomic_add_long(&runningbufspace, bp->b_runningbufspace);
  2340. }
  2341. return (error);
  2342. }
  2343. /*
  2344. * sync snapshots to force freework records waiting on snapshots to claim
  2345. * blocks to free.
  2346. */
  2347. void
  2348. ffs_sync_snap (struct mount *mp, int waitfor)
  2349. {
  2350. struct snapdata *sn;
  2351. struct vnode *devvp;
  2352. struct vnode *vp;
  2353. struct inode *ip;
  2354. devvp = VFSTOUFS(mp)->um_devvp;
  2355. if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
  2356. return;
  2357. for (;;) {
  2358. VI_LOCK(devvp);
  2359. sn = devvp->v_rdev->si_snapdata;
  2360. if (sn == nil) {
  2361. VI_UNLOCK(devvp);
  2362. return;
  2363. }
  2364. if (lockmgr(&sn->sn_lock,
  2365. LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
  2366. VI_MTX(devvp)) == 0)
  2367. break;
  2368. }
  2369. TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
  2370. vp = ITOV(ip);
  2371. ffs_syncvnode(vp, waitfor, NO_INO_UPDT);
  2372. }
  2373. lockmgr(&sn->sn_lock, LK_RELEASE, nil);
  2374. }
  2375. /*
  2376. * Read the specified block into the given buffer.
  2377. * Much of this boiler-plate comes from bwrite().
  2378. */
  2379. static int
  2380. readblock(vp, bp, lbn)
  2381. struct vnode *vp;
  2382. struct buf *bp;
  2383. ufs2_daddr_t lbn;
  2384. {
  2385. struct inode *ip = VTOI(vp);
  2386. struct bio *bip;
  2387. struct fs *fs;
  2388. ip = VTOI(vp);
  2389. fs = ITOFS(ip);
  2390. bip = g_alloc_bio();
  2391. bip->bio_cmd = BIO_READ;
  2392. bip->bio_offset = dbtob(fsbtodb(fs, blkstofrags(fs, lbn)));
  2393. bip->bio_data = bp->b_data;
  2394. bip->bio_length = bp->b_bcount;
  2395. bip->bio_done = nil;
  2396. g_io_request(bip, ITODEVVP(ip)->v_bufobj.bo_private);
  2397. bp->b_error = biowait(bip, "snaprdb");
  2398. g_destroy_bio(bip);
  2399. return (bp->b_error);
  2400. }
  2401. #endif // 0
  2402. #endif
  2403. #if 0
  2404. /*
  2405. * Process file deletes that were deferred by ufs_inactive() due to
  2406. * the file system being suspended. Transfer IN_LAZYACCESS into
  2407. * IN_MODIFIED for vnodes that were accessed during suspension.
  2408. */
  2409. void
  2410. process_deferred_inactive(struct mount *mp)
  2411. {
  2412. struct vnode *vp, *mvp;
  2413. struct inode *ip;
  2414. struct thread *td;
  2415. int error;
  2416. td = curthread;
  2417. (void) vn_start_secondary_write(nil, &mp, V_WAIT);
  2418. loop:
  2419. MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
  2420. /*
  2421. * IN_LAZYACCESS is checked here without holding any
  2422. * vnode lock, but this flag is set only while holding
  2423. * vnode interlock.
  2424. */
  2425. if (vp->v_type == VNON ||
  2426. ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 &&
  2427. ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0))) {
  2428. VI_UNLOCK(vp);
  2429. continue;
  2430. }
  2431. vholdl(vp);
  2432. error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
  2433. if (error != 0) {
  2434. vdrop(vp);
  2435. if (error == ENOENT)
  2436. continue; /* vnode recycled */
  2437. MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
  2438. goto loop;
  2439. }
  2440. ip = VTOI(vp);
  2441. if ((ip->i_flag & IN_LAZYACCESS) != 0) {
  2442. ip->i_flag &= ~IN_LAZYACCESS;
  2443. ip->i_flag |= IN_MODIFIED;
  2444. }
  2445. VI_LOCK(vp);
  2446. if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) {
  2447. VI_UNLOCK(vp);
  2448. VOP_UNLOCK(vp, 0);
  2449. vdrop(vp);
  2450. continue;
  2451. }
  2452. vinactive(vp, td);
  2453. VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
  2454. ("process_deferred_inactive: got VI_OWEINACT"));
  2455. VI_UNLOCK(vp);
  2456. VOP_UNLOCK(vp, 0);
  2457. vdrop(vp);
  2458. }
  2459. vn_finished_secondary_write(mp);
  2460. }
  2461. #ifndef NO_FFS_SNAPSHOT
  2462. static struct snapdata *
  2463. ffs_snapdata_alloc(void)
  2464. {
  2465. struct snapdata *sn;
  2466. /*
  2467. * Fetch a snapdata from the free list if there is one available.
  2468. */
  2469. mtx_lock(&snapfree_lock);
  2470. sn = LIST_FIRST(&snapfree);
  2471. if (sn != nil)
  2472. LIST_REMOVE(sn, sn_link);
  2473. mtx_unlock(&snapfree_lock);
  2474. if (sn != nil)
  2475. return (sn);
  2476. /*
  2477. * If there were no free snapdatas allocate one.
  2478. */
  2479. sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO);
  2480. TAILQ_INIT(&sn->sn_head);
  2481. lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT,
  2482. LK_CANRECURSE | LK_NOSHARE);
  2483. return (sn);
  2484. }
  2485. /*
  2486. * The snapdata is never freed because we can not be certain that
  2487. * there are no threads sleeping on the snap lock. Persisting
  2488. * them permanently avoids costly synchronization in ffs_lock().
  2489. */
  2490. static void
  2491. ffs_snapdata_free(struct snapdata *sn)
  2492. {
  2493. mtx_lock(&snapfree_lock);
  2494. LIST_INSERT_HEAD(&snapfree, sn, sn_link);
  2495. mtx_unlock(&snapfree_lock);
  2496. }
  2497. /* Try to free snapdata associated with devvp */
  2498. static void
  2499. try_free_snapdata(struct vnode *devvp)
  2500. {
  2501. struct snapdata *sn;
  2502. ufs2_daddr_t *snapblklist;
  2503. ASSERT_VI_LOCKED(devvp, "try_free_snapdata");
  2504. sn = devvp->v_rdev->si_snapdata;
  2505. if (sn == nil || TAILQ_FIRST(&sn->sn_head) != nil ||
  2506. (devvp->v_vflag & VV_COPYONWRITE) == 0) {
  2507. VI_UNLOCK(devvp);
  2508. return;
  2509. }
  2510. devvp->v_rdev->si_snapdata = nil;
  2511. devvp->v_vflag &= ~VV_COPYONWRITE;
  2512. lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp));
  2513. snapblklist = sn->sn_blklist;
  2514. sn->sn_blklist = nil;
  2515. sn->sn_listsize = 0;
  2516. lockmgr(&sn->sn_lock, LK_RELEASE, nil);
  2517. if (snapblklist != nil)
  2518. free(snapblklist, M_UFSMNT);
  2519. ffs_snapdata_free(sn);
  2520. }
  2521. static struct snapdata *
  2522. ffs_snapdata_acquire(struct vnode *devvp)
  2523. {
  2524. struct snapdata *nsn;
  2525. struct snapdata *sn;
  2526. /*
  2527. * Allocate a free snapdata. This is done before acquiring the
  2528. * devvp lock to avoid allocation while the devvp interlock is
  2529. * held.
  2530. */
  2531. nsn = ffs_snapdata_alloc();
  2532. /*
  2533. * If there snapshots already exist on this filesystem grab a
  2534. * reference to the shared lock. Otherwise this is the first
  2535. * snapshot on this filesystem and we need to use our
  2536. * pre-allocated snapdata.
  2537. */
  2538. VI_LOCK(devvp);
  2539. if (devvp->v_rdev->si_snapdata == nil) {
  2540. devvp->v_rdev->si_snapdata = nsn;
  2541. nsn = nil;
  2542. }
  2543. sn = devvp->v_rdev->si_snapdata;
  2544. /*
  2545. * Acquire the snapshot lock.
  2546. */
  2547. lockmgr(&sn->sn_lock,
  2548. LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, VI_MTX(devvp));
  2549. /*
  2550. * Free any unused snapdata.
  2551. */
  2552. if (nsn != nil)
  2553. ffs_snapdata_free(nsn);
  2554. return (sn);
  2555. }
  2556. #endif
  2557. #endif // 0