100-overlayfs_v12.patch 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232
  1. --- a/Documentation/filesystems/Locking
  2. +++ b/Documentation/filesystems/Locking
  3. @@ -62,6 +62,7 @@ ata *);
  4. int (*removexattr) (struct dentry *, const char *);
  5. void (*truncate_range)(struct inode *, loff_t, loff_t);
  6. int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
  7. + struct file *(*open)(struct dentry *,struct file *,const struct cred *);
  8. locking rules:
  9. all may block
  10. @@ -89,6 +90,7 @@ listxattr: no
  11. removexattr: yes
  12. truncate_range: yes
  13. fiemap: no
  14. +open: no
  15. Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
  16. victim.
  17. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
  18. --- /dev/null
  19. +++ b/Documentation/filesystems/overlayfs.txt
  20. @@ -0,0 +1,199 @@
  21. +Written by: Neil Brown <neilb@suse.de>
  22. +
  23. +Overlay Filesystem
  24. +==================
  25. +
  26. +This document describes a prototype for a new approach to providing
  27. +overlay-filesystem functionality in Linux (sometimes referred to as
  28. +union-filesystems). An overlay-filesystem tries to present a
  29. +filesystem which is the result over overlaying one filesystem on top
  30. +of the other.
  31. +
  32. +The result will inevitably fail to look exactly like a normal
  33. +filesystem for various technical reasons. The expectation is that
  34. +many use cases will be able to ignore these differences.
  35. +
  36. +This approach is 'hybrid' because the objects that appear in the
  37. +filesystem do not all appear to belong to that filesystem. In many
  38. +cases an object accessed in the union will be indistinguishable
  39. +from accessing the corresponding object from the original filesystem.
  40. +This is most obvious from the 'st_dev' field returned by stat(2).
  41. +
  42. +While directories will report an st_dev from the overlay-filesystem,
  43. +all non-directory objects will report an st_dev from the lower or
  44. +upper filesystem that is providing the object. Similarly st_ino will
  45. +only be unique when combined with st_dev, and both of these can change
  46. +over the lifetime of a non-directory object. Many applications and
  47. +tools ignore these values and will not be affected.
  48. +
  49. +Upper and Lower
  50. +---------------
  51. +
  52. +An overlay filesystem combines two filesystems - an 'upper' filesystem
  53. +and a 'lower' filesystem. When a name exists in both filesystems, the
  54. +object in the 'upper' filesystem is visible while the object in the
  55. +'lower' filesystem is either hidden or, in the case of directories,
  56. +merged with the 'upper' object.
  57. +
  58. +It would be more correct to refer to an upper and lower 'directory
  59. +tree' rather than 'filesystem' as it is quite possible for both
  60. +directory trees to be in the same filesystem and there is no
  61. +requirement that the root of a filesystem be given for either upper or
  62. +lower.
  63. +
  64. +The lower filesystem can be any filesystem supported by Linux and does
  65. +not need to be writable. The lower filesystem can even be another
  66. +overlayfs. The upper filesystem will normally be writable and if it
  67. +is it must support the creation of trusted.* extended attributes, and
  68. +must provide valid d_type in readdir responses, at least for symbolic
  69. +links - so NFS is not suitable.
  70. +
  71. +A read-only overlay of two read-only filesystems may use any
  72. +filesystem type.
  73. +
  74. +Directories
  75. +-----------
  76. +
  77. +Overlaying mainly involved directories. If a given name appears in both
  78. +upper and lower filesystems and refers to a non-directory in either,
  79. +then the lower object is hidden - the name refers only to the upper
  80. +object.
  81. +
  82. +Where both upper and lower objects are directories, a merged directory
  83. +is formed.
  84. +
  85. +At mount time, the two directories given as mount options are combined
  86. +into a merged directory:
  87. +
  88. + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper /overlay
  89. +
  90. +Then whenever a lookup is requested in such a merged directory, the
  91. +lookup is performed in each actual directory and the combined result
  92. +is cached in the dentry belonging to the overlay filesystem. If both
  93. +actual lookups find directories, both are stored and a merged
  94. +directory is created, otherwise only one is stored: the upper if it
  95. +exists, else the lower.
  96. +
  97. +Only the lists of names from directories are merged. Other content
  98. +such as metadata and extended attributes are reported for the upper
  99. +directory only. These attributes of the lower directory are hidden.
  100. +
  101. +whiteouts and opaque directories
  102. +--------------------------------
  103. +
  104. +In order to support rm and rmdir without changing the lower
  105. +filesystem, an overlay filesystem needs to record in the upper filesystem
  106. +that files have been removed. This is done using whiteouts and opaque
  107. +directories (non-directories are always opaque).
  108. +
  109. +The overlay filesystem uses extended attributes with a
  110. +"trusted.overlay." prefix to record these details.
  111. +
  112. +A whiteout is created as a symbolic link with target
  113. +"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y".
  114. +When a whiteout is found in the upper level of a merged directory, any
  115. +matching name in the lower level is ignored, and the whiteout itself
  116. +is also hidden.
  117. +
  118. +A directory is made opaque by setting the xattr "trusted.overlay.opaque"
  119. +to "y". Where the upper filesystem contains an opaque directory, any
  120. +directory in the lower filesystem with the same name is ignored.
  121. +
  122. +readdir
  123. +-------
  124. +
  125. +When a 'readdir' request is made on a merged directory, the upper and
  126. +lower directories are each read and the name lists merged in the
  127. +obvious way (upper is read first, then lower - entries that already
  128. +exist are not re-added). This merged name list is cached in the
  129. +'struct file' and so remains as long as the file is kept open. If the
  130. +directory is opened and read by two processes at the same time, they
  131. +will each have separate caches. A seekdir to the start of the
  132. +directory (offset 0) followed by a readdir will cause the cache to be
  133. +discarded and rebuilt.
  134. +
  135. +This means that changes to the merged directory do not appear while a
  136. +directory is being read. This is unlikely to be noticed by many
  137. +programs.
  138. +
  139. +seek offsets are assigned sequentially when the directories are read.
  140. +Thus if
  141. + - read part of a directory
  142. + - remember an offset, and close the directory
  143. + - re-open the directory some time later
  144. + - seek to the remembered offset
  145. +
  146. +there may be little correlation between the old and new locations in
  147. +the list of filenames, particularly if anything has changed in the
  148. +directory.
  149. +
  150. +Readdir on directories that are not merged is simply handled by the
  151. +underlying directory (upper or lower).
  152. +
  153. +
  154. +Non-directories
  155. +---------------
  156. +
  157. +Objects that are not directories (files, symlinks, device-special
  158. +files etc.) are presented either from the upper or lower filesystem as
  159. +appropriate. When a file in the lower filesystem is accessed in a way
  160. +the requires write-access, such as opening for write access, changing
  161. +some metadata etc., the file is first copied from the lower filesystem
  162. +to the upper filesystem (copy_up). Note that creating a hard-link
  163. +also requires copy_up, though of course creation of a symlink does
  164. +not.
  165. +
  166. +The copy_up may turn out to be unnecessary, for example if the file is
  167. +opened for read-write but the data is not modified.
  168. +
  169. +The copy_up process first makes sure that the containing directory
  170. +exists in the upper filesystem - creating it and any parents as
  171. +necessary. It then creates the object with the same metadata (owner,
  172. +mode, mtime, symlink-target etc.) and then if the object is a file, the
  173. +data is copied from the lower to the upper filesystem. Finally any
  174. +extended attributes are copied up.
  175. +
  176. +Once the copy_up is complete, the overlay filesystem simply
  177. +provides direct access to the newly created file in the upper
  178. +filesystem - future operations on the file are barely noticed by the
  179. +overlay filesystem (though an operation on the name of the file such as
  180. +rename or unlink will of course be noticed and handled).
  181. +
  182. +
  183. +Non-standard behavior
  184. +---------------------
  185. +
  186. +The copy_up operation essentially creates a new, identical file and
  187. +moves it over to the old name. The new file may be on a different
  188. +filesystem, so both st_dev and st_ino of the file may change.
  189. +
  190. +Any open files referring to this inode will access the old data and
  191. +metadata. Similarly any file locks obtained before copy_up will not
  192. +apply to the copied up file.
  193. +
  194. +On a file is opened with O_RDONLY fchmod(2), fchown(2), futimesat(2)
  195. +and fsetxattr(2) will fail with EROFS.
  196. +
  197. +If a file with multiple hard links is copied up, then this will
  198. +"break" the link. Changes will not be propagated to other names
  199. +referring to the same inode.
  200. +
  201. +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
  202. +object in overlayfs will not contain vaid absolute paths, only
  203. +relative paths leading up to the filesystem's root. This will be
  204. +fixed in the future.
  205. +
  206. +Some operations are not atomic, for example a crash during copy_up or
  207. +rename will leave the filesystem in an inconsitent state. This will
  208. +be addressed in the future.
  209. +
  210. +Changes to underlying filesystems
  211. +---------------------------------
  212. +
  213. +Offline changes, when the overlay is not mounted, are allowed to either
  214. +the upper or the lower trees.
  215. +
  216. +Changes to the underlying filesystems while part of a mounted overlay
  217. +filesystem are not allowed. If the underlying filesystem is changed,
  218. +the behavior of the overlay is undefined, though it will not result in
  219. +a crash or deadlock.
  220. --- a/Documentation/filesystems/vfs.txt
  221. +++ b/Documentation/filesystems/vfs.txt
  222. @@ -364,6 +364,8 @@ struct inode_operations {
  223. ssize_t (*listxattr) (struct dentry *, char *, size_t);
  224. int (*removexattr) (struct dentry *, const char *);
  225. void (*truncate_range)(struct inode *, loff_t, loff_t);
  226. + struct file *(*open) (struct dentry *, struct file *,
  227. + const struct cred *);
  228. };
  229. Again, all methods are called without any locks being held, unless
  230. @@ -475,6 +477,12 @@ otherwise noted.
  231. truncate_range: a method provided by the underlying filesystem to truncate a
  232. range of blocks , i.e. punch a hole somewhere in a file.
  233. + open: this is an alternative to f_op->open(), the difference is that this
  234. + method may return any open file, not necessarily originating from the
  235. + same filesystem as the one i_op->open() was called on. It may be useful
  236. + for stacking filesystems which want to allow native I/O directly on
  237. + underlying files.
  238. +
  239. The Address Space Object
  240. ========================
  241. --- a/MAINTAINERS
  242. +++ b/MAINTAINERS
  243. @@ -4955,6 +4955,13 @@ F: drivers/scsi/osd/
  244. F: include/scsi/osd_*
  245. F: fs/exofs/
  246. +OVERLAYFS FILESYSTEM
  247. +M: Miklos Szeredi <miklos@szeredi.hu>
  248. +L: linux-fsdevel@vger.kernel.org
  249. +S: Supported
  250. +F: fs/overlayfs/*
  251. +F: Documentation/filesystems/overlayfs.txt
  252. +
  253. P54 WIRELESS DRIVER
  254. M: Christian Lamparter <chunkeey@googlemail.com>
  255. L: linux-wireless@vger.kernel.org
  256. --- a/fs/Kconfig
  257. +++ b/fs/Kconfig
  258. @@ -63,6 +63,7 @@ source "fs/quota/Kconfig"
  259. source "fs/autofs4/Kconfig"
  260. source "fs/fuse/Kconfig"
  261. +source "fs/overlayfs/Kconfig"
  262. config CUSE
  263. tristate "Character device in Userspace support"
  264. --- a/fs/Makefile
  265. +++ b/fs/Makefile
  266. @@ -105,6 +105,7 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/
  267. obj-$(CONFIG_AUTOFS4_FS) += autofs4/
  268. obj-$(CONFIG_ADFS_FS) += adfs/
  269. obj-$(CONFIG_FUSE_FS) += fuse/
  270. +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
  271. obj-$(CONFIG_UDF_FS) += udf/
  272. obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
  273. obj-$(CONFIG_OMFS_FS) += omfs/
  274. --- a/fs/ecryptfs/main.c
  275. +++ b/fs/ecryptfs/main.c
  276. @@ -544,6 +544,13 @@ static struct dentry *ecryptfs_mount(str
  277. s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
  278. s->s_blocksize = path.dentry->d_sb->s_blocksize;
  279. s->s_magic = ECRYPTFS_SUPER_MAGIC;
  280. + s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
  281. +
  282. + rc = -EINVAL;
  283. + if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  284. + printk(KERN_ERR "eCryptfs: maximum fs stacking depth exceeded\n");
  285. + goto out_free;
  286. + }
  287. inode = ecryptfs_get_inode(path.dentry->d_inode, s);
  288. rc = PTR_ERR(inode);
  289. --- a/fs/namespace.c
  290. +++ b/fs/namespace.c
  291. @@ -1325,6 +1325,24 @@ void drop_collected_mounts(struct vfsmou
  292. release_mounts(&umount_list);
  293. }
  294. +struct vfsmount *clone_private_mount(struct path *path)
  295. +{
  296. + struct mount *old_mnt = real_mount(path->mnt);
  297. + struct mount *new_mnt;
  298. +
  299. + if (IS_MNT_UNBINDABLE(old_mnt))
  300. + return ERR_PTR(-EINVAL);
  301. +
  302. + down_read(&namespace_sem);
  303. + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
  304. + up_read(&namespace_sem);
  305. + if (!new_mnt)
  306. + return ERR_PTR(-ENOMEM);
  307. +
  308. + return &new_mnt->mnt;
  309. +}
  310. +EXPORT_SYMBOL_GPL(clone_private_mount);
  311. +
  312. int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
  313. struct vfsmount *root)
  314. {
  315. --- a/fs/open.c
  316. +++ b/fs/open.c
  317. @@ -644,24 +644,24 @@ static inline int __get_file_write_acces
  318. return error;
  319. }
  320. -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
  321. - struct file *f,
  322. - int (*open)(struct inode *, struct file *),
  323. - const struct cred *cred)
  324. +static struct file *__dentry_open(struct path *path, struct file *f,
  325. + int (*open)(struct inode *, struct file *),
  326. + const struct cred *cred)
  327. {
  328. static const struct file_operations empty_fops = {};
  329. struct inode *inode;
  330. int error;
  331. + path_get(path);
  332. f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
  333. FMODE_PREAD | FMODE_PWRITE;
  334. if (unlikely(f->f_flags & O_PATH))
  335. f->f_mode = FMODE_PATH;
  336. - inode = dentry->d_inode;
  337. + inode = path->dentry->d_inode;
  338. if (f->f_mode & FMODE_WRITE) {
  339. - error = __get_file_write_access(inode, mnt);
  340. + error = __get_file_write_access(inode, path->mnt);
  341. if (error)
  342. goto cleanup_file;
  343. if (!special_file(inode->i_mode))
  344. @@ -669,8 +669,7 @@ static struct file *__dentry_open(struct
  345. }
  346. f->f_mapping = inode->i_mapping;
  347. - f->f_path.dentry = dentry;
  348. - f->f_path.mnt = mnt;
  349. + f->f_path = *path;
  350. f->f_pos = 0;
  351. file_sb_list_add(f, inode->i_sb);
  352. @@ -727,7 +726,7 @@ cleanup_all:
  353. * here, so just reset the state.
  354. */
  355. file_reset_write(f);
  356. - mnt_drop_write(mnt);
  357. + mnt_drop_write(path->mnt);
  358. }
  359. }
  360. file_sb_list_del(f);
  361. @@ -735,8 +734,7 @@ cleanup_all:
  362. f->f_path.mnt = NULL;
  363. cleanup_file:
  364. put_filp(f);
  365. - dput(dentry);
  366. - mntput(mnt);
  367. + path_put(path);
  368. return ERR_PTR(error);
  369. }
  370. @@ -762,14 +760,14 @@ cleanup_file:
  371. struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
  372. int (*open)(struct inode *, struct file *))
  373. {
  374. + struct path path = { .dentry = dentry, .mnt = nd->path.mnt };
  375. const struct cred *cred = current_cred();
  376. if (IS_ERR(nd->intent.open.file))
  377. goto out;
  378. if (IS_ERR(dentry))
  379. goto out_err;
  380. - nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
  381. - nd->intent.open.file,
  382. + nd->intent.open.file = __dentry_open(&path, nd->intent.open.file,
  383. open, cred);
  384. out:
  385. return nd->intent.open.file;
  386. @@ -797,11 +795,9 @@ struct file *nameidata_to_filp(struct na
  387. nd->intent.open.file = NULL;
  388. /* Has the filesystem initialised the file for us? */
  389. - if (filp->f_path.dentry == NULL) {
  390. - path_get(&nd->path);
  391. - filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
  392. - NULL, cred);
  393. - }
  394. + if (filp->f_path.dentry == NULL)
  395. + filp = vfs_open(&nd->path, filp, cred);
  396. +
  397. return filp;
  398. }
  399. @@ -812,27 +808,48 @@ struct file *nameidata_to_filp(struct na
  400. struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
  401. const struct cred *cred)
  402. {
  403. - int error;
  404. struct file *f;
  405. + struct file *ret;
  406. + struct path path = { .dentry = dentry, .mnt = mnt };
  407. validate_creds(cred);
  408. /* We must always pass in a valid mount pointer. */
  409. BUG_ON(!mnt);
  410. - error = -ENFILE;
  411. + ret = ERR_PTR(-ENFILE);
  412. f = get_empty_filp();
  413. - if (f == NULL) {
  414. - dput(dentry);
  415. - mntput(mnt);
  416. - return ERR_PTR(error);
  417. + if (f != NULL) {
  418. + f->f_flags = flags;
  419. + ret = vfs_open(&path, f, cred);
  420. }
  421. + path_put(&path);
  422. - f->f_flags = flags;
  423. - return __dentry_open(dentry, mnt, f, NULL, cred);
  424. + return ret;
  425. }
  426. EXPORT_SYMBOL(dentry_open);
  427. +/**
  428. + * vfs_open - open the file at the given path
  429. + * @path: path to open
  430. + * @filp: newly allocated file with f_flag initialized
  431. + * @cred: credentials to use
  432. + *
  433. + * Open the file. If successful, the returned file will have acquired
  434. + * an additional reference for path.
  435. + */
  436. +struct file *vfs_open(struct path *path, struct file *filp,
  437. + const struct cred *cred)
  438. +{
  439. + struct inode *inode = path->dentry->d_inode;
  440. +
  441. + if (inode->i_op->open)
  442. + return inode->i_op->open(path->dentry, filp, cred);
  443. + else
  444. + return __dentry_open(path, filp, NULL, cred);
  445. +}
  446. +EXPORT_SYMBOL(vfs_open);
  447. +
  448. static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  449. {
  450. struct fdtable *fdt = files_fdtable(files);
  451. --- /dev/null
  452. +++ b/fs/overlayfs/Kconfig
  453. @@ -0,0 +1,4 @@
  454. +config OVERLAYFS_FS
  455. + tristate "Overlay filesystem support"
  456. + help
  457. + Add support for overlay filesystem.
  458. --- /dev/null
  459. +++ b/fs/overlayfs/Makefile
  460. @@ -0,0 +1,7 @@
  461. +#
  462. +# Makefile for the overlay filesystem.
  463. +#
  464. +
  465. +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
  466. +
  467. +overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
  468. --- /dev/null
  469. +++ b/fs/overlayfs/copy_up.c
  470. @@ -0,0 +1,384 @@
  471. +/*
  472. + *
  473. + * Copyright (C) 2011 Novell Inc.
  474. + *
  475. + * This program is free software; you can redistribute it and/or modify it
  476. + * under the terms of the GNU General Public License version 2 as published by
  477. + * the Free Software Foundation.
  478. + */
  479. +
  480. +#include <linux/fs.h>
  481. +#include <linux/slab.h>
  482. +#include <linux/file.h>
  483. +#include <linux/splice.h>
  484. +#include <linux/xattr.h>
  485. +#include <linux/security.h>
  486. +#include <linux/uaccess.h>
  487. +#include "overlayfs.h"
  488. +
  489. +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  490. +
  491. +static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new)
  492. +{
  493. + ssize_t list_size, size;
  494. + char *buf, *name, *value;
  495. + int error;
  496. +
  497. + if (!old->d_inode->i_op->getxattr ||
  498. + !new->d_inode->i_op->getxattr)
  499. + return 0;
  500. +
  501. + list_size = vfs_listxattr(old, NULL, 0);
  502. + if (list_size <= 0) {
  503. + if (list_size == -EOPNOTSUPP)
  504. + return 0;
  505. + return list_size;
  506. + }
  507. +
  508. + buf = kzalloc(list_size, GFP_KERNEL);
  509. + if (!buf)
  510. + return -ENOMEM;
  511. +
  512. + error = -ENOMEM;
  513. + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
  514. + if (!value)
  515. + goto out;
  516. +
  517. + list_size = vfs_listxattr(old, buf, list_size);
  518. + if (list_size <= 0) {
  519. + error = list_size;
  520. + goto out_free_value;
  521. + }
  522. +
  523. + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
  524. + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
  525. + if (size <= 0) {
  526. + error = size;
  527. + goto out_free_value;
  528. + }
  529. + error = vfs_setxattr(new, name, value, size, 0);
  530. + if (error)
  531. + goto out_free_value;
  532. + }
  533. +
  534. +out_free_value:
  535. + kfree(value);
  536. +out:
  537. + kfree(buf);
  538. + return error;
  539. +}
  540. +
  541. +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
  542. +{
  543. + struct file *old_file;
  544. + struct file *new_file;
  545. + int error = 0;
  546. +
  547. + if (len == 0)
  548. + return 0;
  549. +
  550. + old_file = ovl_path_open(old, O_RDONLY);
  551. + if (IS_ERR(old_file))
  552. + return PTR_ERR(old_file);
  553. +
  554. + new_file = ovl_path_open(new, O_WRONLY);
  555. + if (IS_ERR(new_file)) {
  556. + error = PTR_ERR(new_file);
  557. + goto out_fput;
  558. + }
  559. +
  560. + /* FIXME: copy up sparse files efficiently */
  561. + while (len) {
  562. + loff_t offset = new_file->f_pos;
  563. + size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
  564. + long bytes;
  565. +
  566. + if (len < this_len)
  567. + this_len = len;
  568. +
  569. + if (signal_pending_state(TASK_KILLABLE, current)) {
  570. + error = -EINTR;
  571. + break;
  572. + }
  573. +
  574. + bytes = do_splice_direct(old_file, &offset, new_file, this_len,
  575. + SPLICE_F_MOVE);
  576. + if (bytes <= 0) {
  577. + error = bytes;
  578. + break;
  579. + }
  580. +
  581. + len -= bytes;
  582. + }
  583. +
  584. + fput(new_file);
  585. +out_fput:
  586. + fput(old_file);
  587. + return error;
  588. +}
  589. +
  590. +static char *ovl_read_symlink(struct dentry *realdentry)
  591. +{
  592. + int res;
  593. + char *buf;
  594. + struct inode *inode = realdentry->d_inode;
  595. + mm_segment_t old_fs;
  596. +
  597. + res = -EINVAL;
  598. + if (!inode->i_op->readlink)
  599. + goto err;
  600. +
  601. + res = -ENOMEM;
  602. + buf = (char *) __get_free_page(GFP_KERNEL);
  603. + if (!buf)
  604. + goto err;
  605. +
  606. + old_fs = get_fs();
  607. + set_fs(get_ds());
  608. + /* The cast to a user pointer is valid due to the set_fs() */
  609. + res = inode->i_op->readlink(realdentry,
  610. + (char __user *)buf, PAGE_SIZE - 1);
  611. + set_fs(old_fs);
  612. + if (res < 0) {
  613. + free_page((unsigned long) buf);
  614. + goto err;
  615. + }
  616. + buf[res] = '\0';
  617. +
  618. + return buf;
  619. +
  620. +err:
  621. + return ERR_PTR(res);
  622. +}
  623. +
  624. +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
  625. +{
  626. + struct iattr attr = {
  627. + .ia_valid =
  628. + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
  629. + .ia_atime = stat->atime,
  630. + .ia_mtime = stat->mtime,
  631. + };
  632. +
  633. + return notify_change(upperdentry, &attr);
  634. +}
  635. +
  636. +static int ovl_set_mode(struct dentry *upperdentry, umode_t mode)
  637. +{
  638. + struct iattr attr = {
  639. + .ia_valid = ATTR_MODE,
  640. + .ia_mode = mode,
  641. + };
  642. +
  643. + return notify_change(upperdentry, &attr);
  644. +}
  645. +
  646. +static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry,
  647. + struct path *lowerpath, struct kstat *stat,
  648. + const char *link)
  649. +{
  650. + int err;
  651. + struct path newpath;
  652. + umode_t mode = stat->mode;
  653. +
  654. + /* Can't properly set mode on creation because of the umask */
  655. + stat->mode &= S_IFMT;
  656. +
  657. + ovl_path_upper(dentry, &newpath);
  658. + WARN_ON(newpath.dentry);
  659. + newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link);
  660. + if (IS_ERR(newpath.dentry))
  661. + return PTR_ERR(newpath.dentry);
  662. +
  663. + if (S_ISREG(stat->mode)) {
  664. + err = ovl_copy_up_data(lowerpath, &newpath, stat->size);
  665. + if (err)
  666. + goto err_remove;
  667. + }
  668. +
  669. + err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry);
  670. + if (err)
  671. + goto err_remove;
  672. +
  673. + mutex_lock(&newpath.dentry->d_inode->i_mutex);
  674. + if (!S_ISLNK(stat->mode))
  675. + err = ovl_set_mode(newpath.dentry, mode);
  676. + if (!err)
  677. + err = ovl_set_timestamps(newpath.dentry, stat);
  678. + mutex_unlock(&newpath.dentry->d_inode->i_mutex);
  679. + if (err)
  680. + goto err_remove;
  681. +
  682. + ovl_dentry_update(dentry, newpath.dentry);
  683. +
  684. + /*
  685. + * Easiest way to get rid of the lower dentry reference is to
  686. + * drop this dentry. This is neither needed nor possible for
  687. + * directories.
  688. + */
  689. + if (!S_ISDIR(stat->mode))
  690. + d_drop(dentry);
  691. +
  692. + return 0;
  693. +
  694. +err_remove:
  695. + if (S_ISDIR(stat->mode))
  696. + vfs_rmdir(upperdir->d_inode, newpath.dentry);
  697. + else
  698. + vfs_unlink(upperdir->d_inode, newpath.dentry);
  699. +
  700. + dput(newpath.dentry);
  701. +
  702. + return err;
  703. +}
  704. +
  705. +/*
  706. + * Copy up a single dentry
  707. + *
  708. + * Directory renames only allowed on "pure upper" (already created on
  709. + * upper filesystem, never copied up). Directories which are on lower or
  710. + * are merged may not be renamed. For these -EXDEV is returned and
  711. + * userspace has to deal with it. This means, when copying up a
  712. + * directory we can rely on it and ancestors being stable.
  713. + *
  714. + * Non-directory renames start with copy up of source if necessary. The
  715. + * actual rename will only proceed once the copy up was successful. Copy
  716. + * up uses upper parent i_mutex for exclusion. Since rename can change
  717. + * d_parent it is possible that the copy up will lock the old parent. At
  718. + * that point the file will have already been copied up anyway.
  719. + */
  720. +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
  721. + struct path *lowerpath, struct kstat *stat)
  722. +{
  723. + int err;
  724. + struct kstat pstat;
  725. + struct path parentpath;
  726. + struct dentry *upperdir;
  727. + const struct cred *old_cred;
  728. + struct cred *override_cred;
  729. + char *link = NULL;
  730. +
  731. + ovl_path_upper(parent, &parentpath);
  732. + upperdir = parentpath.dentry;
  733. +
  734. + err = vfs_getattr(parentpath.mnt, parentpath.dentry, &pstat);
  735. + if (err)
  736. + return err;
  737. +
  738. + if (S_ISLNK(stat->mode)) {
  739. + link = ovl_read_symlink(lowerpath->dentry);
  740. + if (IS_ERR(link))
  741. + return PTR_ERR(link);
  742. + }
  743. +
  744. + err = -ENOMEM;
  745. + override_cred = prepare_creds();
  746. + if (!override_cred)
  747. + goto out_free_link;
  748. +
  749. + override_cred->fsuid = stat->uid;
  750. + override_cred->fsgid = stat->gid;
  751. + /*
  752. + * CAP_SYS_ADMIN for copying up extended attributes
  753. + * CAP_DAC_OVERRIDE for create
  754. + * CAP_FOWNER for chmod, timestamp update
  755. + * CAP_FSETID for chmod
  756. + * CAP_MKNOD for mknod
  757. + */
  758. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  759. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  760. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  761. + cap_raise(override_cred->cap_effective, CAP_FSETID);
  762. + cap_raise(override_cred->cap_effective, CAP_MKNOD);
  763. + old_cred = override_creds(override_cred);
  764. +
  765. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  766. + if (ovl_path_type(dentry) != OVL_PATH_LOWER) {
  767. + err = 0;
  768. + } else {
  769. + err = ovl_copy_up_locked(upperdir, dentry, lowerpath,
  770. + stat, link);
  771. + if (!err) {
  772. + /* Restore timestamps on parent (best effort) */
  773. + ovl_set_timestamps(upperdir, &pstat);
  774. + }
  775. + }
  776. +
  777. + mutex_unlock(&upperdir->d_inode->i_mutex);
  778. +
  779. + revert_creds(old_cred);
  780. + put_cred(override_cred);
  781. +
  782. +out_free_link:
  783. + if (link)
  784. + free_page((unsigned long) link);
  785. +
  786. + return err;
  787. +}
  788. +
  789. +int ovl_copy_up(struct dentry *dentry)
  790. +{
  791. + int err;
  792. +
  793. + err = 0;
  794. + while (!err) {
  795. + struct dentry *next;
  796. + struct dentry *parent;
  797. + struct path lowerpath;
  798. + struct kstat stat;
  799. + enum ovl_path_type type = ovl_path_type(dentry);
  800. +
  801. + if (type != OVL_PATH_LOWER)
  802. + break;
  803. +
  804. + next = dget(dentry);
  805. + /* find the topmost dentry not yet copied up */
  806. + for (;;) {
  807. + parent = dget_parent(next);
  808. +
  809. + type = ovl_path_type(parent);
  810. + if (type != OVL_PATH_LOWER)
  811. + break;
  812. +
  813. + dput(next);
  814. + next = parent;
  815. + }
  816. +
  817. + ovl_path_lower(next, &lowerpath);
  818. + err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
  819. + if (!err)
  820. + err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
  821. +
  822. + dput(parent);
  823. + dput(next);
  824. + }
  825. +
  826. + return err;
  827. +}
  828. +
  829. +/* Optimize by not copying up the file first and truncating later */
  830. +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size)
  831. +{
  832. + int err;
  833. + struct kstat stat;
  834. + struct path lowerpath;
  835. + struct dentry *parent = dget_parent(dentry);
  836. +
  837. + err = ovl_copy_up(parent);
  838. + if (err)
  839. + goto out_dput_parent;
  840. +
  841. + ovl_path_lower(dentry, &lowerpath);
  842. + err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
  843. + if (err)
  844. + goto out_dput_parent;
  845. +
  846. + if (size < stat.size)
  847. + stat.size = size;
  848. +
  849. + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
  850. +
  851. +out_dput_parent:
  852. + dput(parent);
  853. + return err;
  854. +}
  855. --- /dev/null
  856. +++ b/fs/overlayfs/dir.c
  857. @@ -0,0 +1,596 @@
  858. +/*
  859. + *
  860. + * Copyright (C) 2011 Novell Inc.
  861. + *
  862. + * This program is free software; you can redistribute it and/or modify it
  863. + * under the terms of the GNU General Public License version 2 as published by
  864. + * the Free Software Foundation.
  865. + */
  866. +
  867. +#include <linux/fs.h>
  868. +#include <linux/namei.h>
  869. +#include <linux/xattr.h>
  870. +#include <linux/security.h>
  871. +#include "overlayfs.h"
  872. +
  873. +static const char *ovl_whiteout_symlink = "(overlay-whiteout)";
  874. +
  875. +static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry)
  876. +{
  877. + int err;
  878. + struct dentry *newdentry;
  879. + const struct cred *old_cred;
  880. + struct cred *override_cred;
  881. +
  882. + /* FIXME: recheck lower dentry to see if whiteout is really needed */
  883. +
  884. + err = -ENOMEM;
  885. + override_cred = prepare_creds();
  886. + if (!override_cred)
  887. + goto out;
  888. +
  889. + /*
  890. + * CAP_SYS_ADMIN for setxattr
  891. + * CAP_DAC_OVERRIDE for symlink creation
  892. + * CAP_FOWNER for unlink in sticky directory
  893. + */
  894. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  895. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  896. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  897. + override_cred->fsuid = 0;
  898. + override_cred->fsgid = 0;
  899. + old_cred = override_creds(override_cred);
  900. +
  901. + newdentry = lookup_one_len(dentry->d_name.name, upperdir,
  902. + dentry->d_name.len);
  903. + err = PTR_ERR(newdentry);
  904. + if (IS_ERR(newdentry))
  905. + goto out_put_cred;
  906. +
  907. + /* Just been removed within the same locked region */
  908. + WARN_ON(newdentry->d_inode);
  909. +
  910. + err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink);
  911. + if (err)
  912. + goto out_dput;
  913. +
  914. + ovl_dentry_version_inc(dentry->d_parent);
  915. +
  916. + err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0);
  917. + if (err)
  918. + vfs_unlink(upperdir->d_inode, newdentry);
  919. +
  920. +out_dput:
  921. + dput(newdentry);
  922. +out_put_cred:
  923. + revert_creds(old_cred);
  924. + put_cred(override_cred);
  925. +out:
  926. + if (err) {
  927. + /*
  928. + * There's no way to recover from failure to whiteout.
  929. + * What should we do? Log a big fat error and... ?
  930. + */
  931. + printk(KERN_ERR "overlayfs: ERROR - failed to whiteout '%s'\n",
  932. + dentry->d_name.name);
  933. + }
  934. +
  935. + return err;
  936. +}
  937. +
  938. +static struct dentry *ovl_lookup_create(struct dentry *upperdir,
  939. + struct dentry *template)
  940. +{
  941. + int err;
  942. + struct dentry *newdentry;
  943. + struct qstr *name = &template->d_name;
  944. +
  945. + newdentry = lookup_one_len(name->name, upperdir, name->len);
  946. + if (IS_ERR(newdentry))
  947. + return newdentry;
  948. +
  949. + if (newdentry->d_inode) {
  950. + const struct cred *old_cred;
  951. + struct cred *override_cred;
  952. +
  953. + /* No need to check whiteout if lower parent is non-existent */
  954. + err = -EEXIST;
  955. + if (!ovl_dentry_lower(template->d_parent))
  956. + goto out_dput;
  957. +
  958. + if (!S_ISLNK(newdentry->d_inode->i_mode))
  959. + goto out_dput;
  960. +
  961. + err = -ENOMEM;
  962. + override_cred = prepare_creds();
  963. + if (!override_cred)
  964. + goto out_dput;
  965. +
  966. + /*
  967. + * CAP_SYS_ADMIN for getxattr
  968. + * CAP_FOWNER for unlink in sticky directory
  969. + */
  970. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  971. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  972. + old_cred = override_creds(override_cred);
  973. +
  974. + err = -EEXIST;
  975. + if (ovl_is_whiteout(newdentry))
  976. + err = vfs_unlink(upperdir->d_inode, newdentry);
  977. +
  978. + revert_creds(old_cred);
  979. + put_cred(override_cred);
  980. + if (err)
  981. + goto out_dput;
  982. +
  983. + dput(newdentry);
  984. + newdentry = lookup_one_len(name->name, upperdir, name->len);
  985. + if (IS_ERR(newdentry)) {
  986. + ovl_whiteout(upperdir, template);
  987. + return newdentry;
  988. + }
  989. +
  990. + /*
  991. + * Whiteout just been successfully removed, parent
  992. + * i_mutex is still held, there's no way the lookup
  993. + * could return positive.
  994. + */
  995. + WARN_ON(newdentry->d_inode);
  996. + }
  997. +
  998. + return newdentry;
  999. +
  1000. +out_dput:
  1001. + dput(newdentry);
  1002. + return ERR_PTR(err);
  1003. +}
  1004. +
  1005. +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
  1006. + struct kstat *stat, const char *link)
  1007. +{
  1008. + int err;
  1009. + struct dentry *newdentry;
  1010. + struct inode *dir = upperdir->d_inode;
  1011. +
  1012. + newdentry = ovl_lookup_create(upperdir, dentry);
  1013. + if (IS_ERR(newdentry))
  1014. + goto out;
  1015. +
  1016. + switch (stat->mode & S_IFMT) {
  1017. + case S_IFREG:
  1018. + err = vfs_create(dir, newdentry, stat->mode, NULL);
  1019. + break;
  1020. +
  1021. + case S_IFDIR:
  1022. + err = vfs_mkdir(dir, newdentry, stat->mode);
  1023. + break;
  1024. +
  1025. + case S_IFCHR:
  1026. + case S_IFBLK:
  1027. + case S_IFIFO:
  1028. + case S_IFSOCK:
  1029. + err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev);
  1030. + break;
  1031. +
  1032. + case S_IFLNK:
  1033. + err = vfs_symlink(dir, newdentry, link);
  1034. + break;
  1035. +
  1036. + default:
  1037. + err = -EPERM;
  1038. + }
  1039. + if (err) {
  1040. + if (ovl_dentry_is_opaque(dentry))
  1041. + ovl_whiteout(upperdir, dentry);
  1042. + dput(newdentry);
  1043. + newdentry = ERR_PTR(err);
  1044. + } else if (WARN_ON(!newdentry->d_inode)) {
  1045. + /*
  1046. + * Not quite sure if non-instantiated dentry is legal or not.
  1047. + * VFS doesn't seem to care so check and warn here.
  1048. + */
  1049. + dput(newdentry);
  1050. + newdentry = ERR_PTR(-ENOENT);
  1051. + }
  1052. +
  1053. +out:
  1054. + return newdentry;
  1055. +
  1056. +}
  1057. +
  1058. +static int ovl_set_opaque(struct dentry *upperdentry)
  1059. +{
  1060. + int err;
  1061. + const struct cred *old_cred;
  1062. + struct cred *override_cred;
  1063. +
  1064. + override_cred = prepare_creds();
  1065. + if (!override_cred)
  1066. + return -ENOMEM;
  1067. +
  1068. + /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */
  1069. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1070. + old_cred = override_creds(override_cred);
  1071. + err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
  1072. + revert_creds(old_cred);
  1073. + put_cred(override_cred);
  1074. +
  1075. + return err;
  1076. +}
  1077. +
  1078. +static int ovl_remove_opaque(struct dentry *upperdentry)
  1079. +{
  1080. + int err;
  1081. + const struct cred *old_cred;
  1082. + struct cred *override_cred;
  1083. +
  1084. + override_cred = prepare_creds();
  1085. + if (!override_cred)
  1086. + return -ENOMEM;
  1087. +
  1088. + /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */
  1089. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  1090. + old_cred = override_creds(override_cred);
  1091. + err = vfs_removexattr(upperdentry, ovl_opaque_xattr);
  1092. + revert_creds(old_cred);
  1093. + put_cred(override_cred);
  1094. +
  1095. + return err;
  1096. +}
  1097. +
  1098. +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
  1099. + struct kstat *stat)
  1100. +{
  1101. + int err;
  1102. + enum ovl_path_type type;
  1103. + struct path realpath;
  1104. +
  1105. + type = ovl_path_real(dentry, &realpath);
  1106. + err = vfs_getattr(realpath.mnt, realpath.dentry, stat);
  1107. + if (err)
  1108. + return err;
  1109. +
  1110. + stat->dev = dentry->d_sb->s_dev;
  1111. + stat->ino = dentry->d_inode->i_ino;
  1112. +
  1113. + /*
  1114. + * It's probably not worth it to count subdirs to get the
  1115. + * correct link count. nlink=1 seems to pacify 'find' and
  1116. + * other utilities.
  1117. + */
  1118. + if (type == OVL_PATH_MERGE)
  1119. + stat->nlink = 1;
  1120. +
  1121. + return 0;
  1122. +}
  1123. +
  1124. +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
  1125. + const char *link)
  1126. +{
  1127. + int err;
  1128. + struct dentry *newdentry;
  1129. + struct dentry *upperdir;
  1130. + struct inode *inode;
  1131. + struct kstat stat = {
  1132. + .mode = mode,
  1133. + .rdev = rdev,
  1134. + };
  1135. +
  1136. + err = -ENOMEM;
  1137. + inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
  1138. + if (!inode)
  1139. + goto out;
  1140. +
  1141. + err = ovl_copy_up(dentry->d_parent);
  1142. + if (err)
  1143. + goto out_iput;
  1144. +
  1145. + upperdir = ovl_dentry_upper(dentry->d_parent);
  1146. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1147. +
  1148. + newdentry = ovl_upper_create(upperdir, dentry, &stat, link);
  1149. + err = PTR_ERR(newdentry);
  1150. + if (IS_ERR(newdentry))
  1151. + goto out_unlock;
  1152. +
  1153. + ovl_dentry_version_inc(dentry->d_parent);
  1154. + if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) {
  1155. + err = ovl_set_opaque(newdentry);
  1156. + if (err) {
  1157. + vfs_rmdir(upperdir->d_inode, newdentry);
  1158. + ovl_whiteout(upperdir, dentry);
  1159. + goto out_dput;
  1160. + }
  1161. + }
  1162. + ovl_dentry_update(dentry, newdentry);
  1163. + d_instantiate(dentry, inode);
  1164. + inode = NULL;
  1165. + newdentry = NULL;
  1166. + err = 0;
  1167. +
  1168. +out_dput:
  1169. + dput(newdentry);
  1170. +out_unlock:
  1171. + mutex_unlock(&upperdir->d_inode->i_mutex);
  1172. +out_iput:
  1173. + iput(inode);
  1174. +out:
  1175. + return err;
  1176. +}
  1177. +
  1178. +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  1179. + struct nameidata *nd)
  1180. +{
  1181. + return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
  1182. +}
  1183. +
  1184. +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  1185. +{
  1186. + return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
  1187. +}
  1188. +
  1189. +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
  1190. + dev_t rdev)
  1191. +{
  1192. + return ovl_create_object(dentry, mode, rdev, NULL);
  1193. +}
  1194. +
  1195. +static int ovl_symlink(struct inode *dir, struct dentry *dentry,
  1196. + const char *link)
  1197. +{
  1198. + return ovl_create_object(dentry, S_IFLNK, 0, link);
  1199. +}
  1200. +
  1201. +static int ovl_do_remove(struct dentry *dentry, bool is_dir)
  1202. +{
  1203. + int err;
  1204. + enum ovl_path_type type;
  1205. + struct path realpath;
  1206. + struct dentry *upperdir;
  1207. +
  1208. + err = ovl_copy_up(dentry->d_parent);
  1209. + if (err)
  1210. + return err;
  1211. +
  1212. + upperdir = ovl_dentry_upper(dentry->d_parent);
  1213. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1214. + type = ovl_path_real(dentry, &realpath);
  1215. + if (type != OVL_PATH_LOWER) {
  1216. + err = -ESTALE;
  1217. + if (realpath.dentry->d_parent != upperdir)
  1218. + goto out_d_drop;
  1219. +
  1220. + /* FIXME: create whiteout up front and rename to target */
  1221. +
  1222. + if (is_dir)
  1223. + err = vfs_rmdir(upperdir->d_inode, realpath.dentry);
  1224. + else
  1225. + err = vfs_unlink(upperdir->d_inode, realpath.dentry);
  1226. + if (err)
  1227. + goto out_d_drop;
  1228. +
  1229. + ovl_dentry_version_inc(dentry->d_parent);
  1230. + }
  1231. +
  1232. + if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry))
  1233. + err = ovl_whiteout(upperdir, dentry);
  1234. +
  1235. + /*
  1236. + * Keeping this dentry hashed would mean having to release
  1237. + * upperpath/lowerpath, which could only be done if we are the
  1238. + * sole user of this dentry. Too tricky... Just unhash for
  1239. + * now.
  1240. + */
  1241. +out_d_drop:
  1242. + d_drop(dentry);
  1243. + mutex_unlock(&upperdir->d_inode->i_mutex);
  1244. +
  1245. + return err;
  1246. +}
  1247. +
  1248. +static int ovl_unlink(struct inode *dir, struct dentry *dentry)
  1249. +{
  1250. + return ovl_do_remove(dentry, false);
  1251. +}
  1252. +
  1253. +
  1254. +static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
  1255. +{
  1256. + int err;
  1257. + enum ovl_path_type type;
  1258. +
  1259. + type = ovl_path_type(dentry);
  1260. + if (type != OVL_PATH_UPPER) {
  1261. + err = ovl_check_empty_and_clear(dentry, type);
  1262. + if (err)
  1263. + return err;
  1264. + }
  1265. +
  1266. + return ovl_do_remove(dentry, true);
  1267. +}
  1268. +
  1269. +static int ovl_link(struct dentry *old, struct inode *newdir,
  1270. + struct dentry *new)
  1271. +{
  1272. + int err;
  1273. + struct dentry *olddentry;
  1274. + struct dentry *newdentry;
  1275. + struct dentry *upperdir;
  1276. +
  1277. + err = ovl_copy_up(old);
  1278. + if (err)
  1279. + goto out;
  1280. +
  1281. + err = ovl_copy_up(new->d_parent);
  1282. + if (err)
  1283. + goto out;
  1284. +
  1285. + upperdir = ovl_dentry_upper(new->d_parent);
  1286. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  1287. + newdentry = ovl_lookup_create(upperdir, new);
  1288. + err = PTR_ERR(newdentry);
  1289. + if (IS_ERR(newdentry))
  1290. + goto out_unlock;
  1291. +
  1292. + olddentry = ovl_dentry_upper(old);
  1293. + err = vfs_link(olddentry, upperdir->d_inode, newdentry);
  1294. + if (!err) {
  1295. + if (WARN_ON(!newdentry->d_inode)) {
  1296. + dput(newdentry);
  1297. + err = -ENOENT;
  1298. + goto out_unlock;
  1299. + }
  1300. +
  1301. + ovl_dentry_version_inc(new->d_parent);
  1302. + ovl_dentry_update(new, newdentry);
  1303. +
  1304. + ihold(old->d_inode);
  1305. + d_instantiate(new, old->d_inode);
  1306. + } else {
  1307. + if (ovl_dentry_is_opaque(new))
  1308. + ovl_whiteout(upperdir, new);
  1309. + dput(newdentry);
  1310. + }
  1311. +out_unlock:
  1312. + mutex_unlock(&upperdir->d_inode->i_mutex);
  1313. +out:
  1314. + return err;
  1315. +
  1316. +}
  1317. +
  1318. +static int ovl_rename(struct inode *olddir, struct dentry *old,
  1319. + struct inode *newdir, struct dentry *new)
  1320. +{
  1321. + int err;
  1322. + enum ovl_path_type old_type;
  1323. + enum ovl_path_type new_type;
  1324. + struct dentry *old_upperdir;
  1325. + struct dentry *new_upperdir;
  1326. + struct dentry *olddentry;
  1327. + struct dentry *newdentry;
  1328. + struct dentry *trap;
  1329. + bool old_opaque;
  1330. + bool new_opaque;
  1331. + bool new_create = false;
  1332. + bool is_dir = S_ISDIR(old->d_inode->i_mode);
  1333. +
  1334. + /* Don't copy up directory trees */
  1335. + old_type = ovl_path_type(old);
  1336. + if (old_type != OVL_PATH_UPPER && is_dir)
  1337. + return -EXDEV;
  1338. +
  1339. + if (new->d_inode) {
  1340. + new_type = ovl_path_type(new);
  1341. +
  1342. + if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
  1343. + if (ovl_dentry_lower(old)->d_inode ==
  1344. + ovl_dentry_lower(new)->d_inode)
  1345. + return 0;
  1346. + }
  1347. + if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
  1348. + if (ovl_dentry_upper(old)->d_inode ==
  1349. + ovl_dentry_upper(new)->d_inode)
  1350. + return 0;
  1351. + }
  1352. +
  1353. + if (new_type != OVL_PATH_UPPER &&
  1354. + S_ISDIR(new->d_inode->i_mode)) {
  1355. + err = ovl_check_empty_and_clear(new, new_type);
  1356. + if (err)
  1357. + return err;
  1358. + }
  1359. + } else {
  1360. + new_type = OVL_PATH_UPPER;
  1361. + }
  1362. +
  1363. + err = ovl_copy_up(old);
  1364. + if (err)
  1365. + return err;
  1366. +
  1367. + err = ovl_copy_up(new->d_parent);
  1368. + if (err)
  1369. + return err;
  1370. +
  1371. + old_upperdir = ovl_dentry_upper(old->d_parent);
  1372. + new_upperdir = ovl_dentry_upper(new->d_parent);
  1373. +
  1374. + trap = lock_rename(new_upperdir, old_upperdir);
  1375. +
  1376. + olddentry = ovl_dentry_upper(old);
  1377. + newdentry = ovl_dentry_upper(new);
  1378. + if (newdentry) {
  1379. + dget(newdentry);
  1380. + } else {
  1381. + new_create = true;
  1382. + newdentry = ovl_lookup_create(new_upperdir, new);
  1383. + err = PTR_ERR(newdentry);
  1384. + if (IS_ERR(newdentry))
  1385. + goto out_unlock;
  1386. + }
  1387. +
  1388. + err = -ESTALE;
  1389. + if (olddentry->d_parent != old_upperdir)
  1390. + goto out_dput;
  1391. + if (newdentry->d_parent != new_upperdir)
  1392. + goto out_dput;
  1393. + if (olddentry == trap)
  1394. + goto out_dput;
  1395. + if (newdentry == trap)
  1396. + goto out_dput;
  1397. +
  1398. + old_opaque = ovl_dentry_is_opaque(old);
  1399. + new_opaque = ovl_dentry_is_opaque(new) || new_type != OVL_PATH_UPPER;
  1400. +
  1401. + if (is_dir && !old_opaque && new_opaque) {
  1402. + err = ovl_set_opaque(olddentry);
  1403. + if (err)
  1404. + goto out_dput;
  1405. + }
  1406. +
  1407. + err = vfs_rename(old_upperdir->d_inode, olddentry,
  1408. + new_upperdir->d_inode, newdentry);
  1409. +
  1410. + if (err) {
  1411. + if (new_create && ovl_dentry_is_opaque(new))
  1412. + ovl_whiteout(new_upperdir, new);
  1413. + if (is_dir && !old_opaque && new_opaque)
  1414. + ovl_remove_opaque(olddentry);
  1415. + goto out_dput;
  1416. + }
  1417. +
  1418. + if (old_type != OVL_PATH_UPPER || old_opaque)
  1419. + err = ovl_whiteout(old_upperdir, old);
  1420. + if (is_dir && old_opaque && !new_opaque)
  1421. + ovl_remove_opaque(olddentry);
  1422. +
  1423. + if (old_opaque != new_opaque)
  1424. + ovl_dentry_set_opaque(old, new_opaque);
  1425. +
  1426. + ovl_dentry_version_inc(old->d_parent);
  1427. + ovl_dentry_version_inc(new->d_parent);
  1428. +
  1429. +out_dput:
  1430. + dput(newdentry);
  1431. +out_unlock:
  1432. + unlock_rename(new_upperdir, old_upperdir);
  1433. + return err;
  1434. +}
  1435. +
  1436. +const struct inode_operations ovl_dir_inode_operations = {
  1437. + .lookup = ovl_lookup,
  1438. + .mkdir = ovl_mkdir,
  1439. + .symlink = ovl_symlink,
  1440. + .unlink = ovl_unlink,
  1441. + .rmdir = ovl_rmdir,
  1442. + .rename = ovl_rename,
  1443. + .link = ovl_link,
  1444. + .setattr = ovl_setattr,
  1445. + .create = ovl_create,
  1446. + .mknod = ovl_mknod,
  1447. + .permission = ovl_permission,
  1448. + .getattr = ovl_dir_getattr,
  1449. + .setxattr = ovl_setxattr,
  1450. + .getxattr = ovl_getxattr,
  1451. + .listxattr = ovl_listxattr,
  1452. + .removexattr = ovl_removexattr,
  1453. +};
  1454. --- /dev/null
  1455. +++ b/fs/overlayfs/inode.c
  1456. @@ -0,0 +1,384 @@
  1457. +/*
  1458. + *
  1459. + * Copyright (C) 2011 Novell Inc.
  1460. + *
  1461. + * This program is free software; you can redistribute it and/or modify it
  1462. + * under the terms of the GNU General Public License version 2 as published by
  1463. + * the Free Software Foundation.
  1464. + */
  1465. +
  1466. +#include <linux/fs.h>
  1467. +#include <linux/slab.h>
  1468. +#include <linux/xattr.h>
  1469. +#include "overlayfs.h"
  1470. +
  1471. +int ovl_setattr(struct dentry *dentry, struct iattr *attr)
  1472. +{
  1473. + struct dentry *upperdentry;
  1474. + int err;
  1475. +
  1476. + if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry))
  1477. + err = ovl_copy_up_truncate(dentry, attr->ia_size);
  1478. + else
  1479. + err = ovl_copy_up(dentry);
  1480. + if (err)
  1481. + return err;
  1482. +
  1483. + upperdentry = ovl_dentry_upper(dentry);
  1484. +
  1485. + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
  1486. + attr->ia_valid &= ~ATTR_MODE;
  1487. +
  1488. + mutex_lock(&upperdentry->d_inode->i_mutex);
  1489. + err = notify_change(upperdentry, attr);
  1490. + mutex_unlock(&upperdentry->d_inode->i_mutex);
  1491. +
  1492. + return err;
  1493. +}
  1494. +
  1495. +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
  1496. + struct kstat *stat)
  1497. +{
  1498. + struct path realpath;
  1499. +
  1500. + ovl_path_real(dentry, &realpath);
  1501. + return vfs_getattr(realpath.mnt, realpath.dentry, stat);
  1502. +}
  1503. +
  1504. +int ovl_permission(struct inode *inode, int mask)
  1505. +{
  1506. + struct ovl_entry *oe;
  1507. + struct dentry *alias = NULL;
  1508. + struct inode *realinode;
  1509. + struct dentry *realdentry;
  1510. + bool is_upper;
  1511. + int err;
  1512. +
  1513. + if (S_ISDIR(inode->i_mode)) {
  1514. + oe = inode->i_private;
  1515. + } else if (mask & MAY_NOT_BLOCK) {
  1516. + return -ECHILD;
  1517. + } else {
  1518. + /*
  1519. + * For non-directories find an alias and get the info
  1520. + * from there.
  1521. + */
  1522. + spin_lock(&inode->i_lock);
  1523. + if (WARN_ON(list_empty(&inode->i_dentry))) {
  1524. + spin_unlock(&inode->i_lock);
  1525. + return -ENOENT;
  1526. + }
  1527. + alias = list_entry(inode->i_dentry.next,
  1528. + struct dentry, d_alias);
  1529. + dget(alias);
  1530. + spin_unlock(&inode->i_lock);
  1531. + oe = alias->d_fsdata;
  1532. + }
  1533. +
  1534. + realdentry = ovl_entry_real(oe, &is_upper);
  1535. +
  1536. + /* Careful in RCU walk mode */
  1537. + realinode = ACCESS_ONCE(realdentry->d_inode);
  1538. + if (!realinode) {
  1539. + WARN_ON(!(mask & MAY_NOT_BLOCK));
  1540. + err = -ENOENT;
  1541. + goto out_dput;
  1542. + }
  1543. +
  1544. + if (mask & MAY_WRITE) {
  1545. + umode_t mode = realinode->i_mode;
  1546. +
  1547. + /*
  1548. + * Writes will always be redirected to upper layer, so
  1549. + * ignore lower layer being read-only.
  1550. + *
  1551. + * If the overlay itself is read-only then proceed
  1552. + * with the permission check, don't return EROFS.
  1553. + * This will only happen if this is the lower layer of
  1554. + * another overlayfs.
  1555. + *
  1556. + * If upper fs becomes read-only after the overlay was
  1557. + * constructed return EROFS to prevent modification of
  1558. + * upper layer.
  1559. + */
  1560. + err = -EROFS;
  1561. + if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
  1562. + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
  1563. + goto out_dput;
  1564. +
  1565. + /*
  1566. + * Nobody gets write access to an immutable file.
  1567. + */
  1568. + err = -EACCES;
  1569. + if (IS_IMMUTABLE(realinode))
  1570. + goto out_dput;
  1571. + }
  1572. +
  1573. + if (realinode->i_op->permission)
  1574. + err = realinode->i_op->permission(realinode, mask);
  1575. + else
  1576. + err = generic_permission(realinode, mask);
  1577. +out_dput:
  1578. + dput(alias);
  1579. + return err;
  1580. +}
  1581. +
  1582. +
  1583. +struct ovl_link_data {
  1584. + struct dentry *realdentry;
  1585. + void *cookie;
  1586. +};
  1587. +
  1588. +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
  1589. +{
  1590. + void *ret;
  1591. + struct dentry *realdentry;
  1592. + struct inode *realinode;
  1593. +
  1594. + realdentry = ovl_dentry_real(dentry);
  1595. + realinode = realdentry->d_inode;
  1596. +
  1597. + if (WARN_ON(!realinode->i_op->follow_link))
  1598. + return ERR_PTR(-EPERM);
  1599. +
  1600. + ret = realinode->i_op->follow_link(realdentry, nd);
  1601. + if (IS_ERR(ret))
  1602. + return ret;
  1603. +
  1604. + if (realinode->i_op->put_link) {
  1605. + struct ovl_link_data *data;
  1606. +
  1607. + data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
  1608. + if (!data) {
  1609. + realinode->i_op->put_link(realdentry, nd, ret);
  1610. + return ERR_PTR(-ENOMEM);
  1611. + }
  1612. + data->realdentry = realdentry;
  1613. + data->cookie = ret;
  1614. +
  1615. + return data;
  1616. + } else {
  1617. + return NULL;
  1618. + }
  1619. +}
  1620. +
  1621. +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
  1622. +{
  1623. + struct inode *realinode;
  1624. + struct ovl_link_data *data = c;
  1625. +
  1626. + if (!data)
  1627. + return;
  1628. +
  1629. + realinode = data->realdentry->d_inode;
  1630. + realinode->i_op->put_link(data->realdentry, nd, data->cookie);
  1631. + kfree(data);
  1632. +}
  1633. +
  1634. +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
  1635. +{
  1636. + struct path realpath;
  1637. + struct inode *realinode;
  1638. +
  1639. + ovl_path_real(dentry, &realpath);
  1640. + realinode = realpath.dentry->d_inode;
  1641. +
  1642. + if (!realinode->i_op->readlink)
  1643. + return -EINVAL;
  1644. +
  1645. + touch_atime(realpath.mnt, realpath.dentry);
  1646. +
  1647. + return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
  1648. +}
  1649. +
  1650. +
  1651. +static bool ovl_is_private_xattr(const char *name)
  1652. +{
  1653. + return strncmp(name, "trusted.overlay.", 14) == 0;
  1654. +}
  1655. +
  1656. +int ovl_setxattr(struct dentry *dentry, const char *name,
  1657. + const void *value, size_t size, int flags)
  1658. +{
  1659. + int err;
  1660. + struct dentry *upperdentry;
  1661. +
  1662. + if (ovl_is_private_xattr(name))
  1663. + return -EPERM;
  1664. +
  1665. + err = ovl_copy_up(dentry);
  1666. + if (err)
  1667. + return err;
  1668. +
  1669. + upperdentry = ovl_dentry_upper(dentry);
  1670. + return vfs_setxattr(upperdentry, name, value, size, flags);
  1671. +}
  1672. +
  1673. +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
  1674. + void *value, size_t size)
  1675. +{
  1676. + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
  1677. + ovl_is_private_xattr(name))
  1678. + return -ENODATA;
  1679. +
  1680. + return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
  1681. +}
  1682. +
  1683. +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
  1684. +{
  1685. + ssize_t res;
  1686. + int off;
  1687. +
  1688. + res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
  1689. + if (res <= 0 || size == 0)
  1690. + return res;
  1691. +
  1692. + if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
  1693. + return res;
  1694. +
  1695. + /* filter out private xattrs */
  1696. + for (off = 0; off < res;) {
  1697. + char *s = list + off;
  1698. + size_t slen = strlen(s) + 1;
  1699. +
  1700. + BUG_ON(off + slen > res);
  1701. +
  1702. + if (ovl_is_private_xattr(s)) {
  1703. + res -= slen;
  1704. + memmove(s, s + slen, res - off);
  1705. + } else {
  1706. + off += slen;
  1707. + }
  1708. + }
  1709. +
  1710. + return res;
  1711. +}
  1712. +
  1713. +int ovl_removexattr(struct dentry *dentry, const char *name)
  1714. +{
  1715. + int err;
  1716. + struct path realpath;
  1717. + enum ovl_path_type type;
  1718. +
  1719. + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
  1720. + ovl_is_private_xattr(name))
  1721. + return -ENODATA;
  1722. +
  1723. + type = ovl_path_real(dentry, &realpath);
  1724. + if (type == OVL_PATH_LOWER) {
  1725. + err = vfs_getxattr(realpath.dentry, name, NULL, 0);
  1726. + if (err < 0)
  1727. + return err;
  1728. +
  1729. + err = ovl_copy_up(dentry);
  1730. + if (err)
  1731. + return err;
  1732. +
  1733. + ovl_path_upper(dentry, &realpath);
  1734. + }
  1735. +
  1736. + return vfs_removexattr(realpath.dentry, name);
  1737. +}
  1738. +
  1739. +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
  1740. + struct dentry *realdentry)
  1741. +{
  1742. + if (type != OVL_PATH_LOWER)
  1743. + return false;
  1744. +
  1745. + if (special_file(realdentry->d_inode->i_mode))
  1746. + return false;
  1747. +
  1748. + if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
  1749. + return false;
  1750. +
  1751. + return true;
  1752. +}
  1753. +
  1754. +static struct file *ovl_open(struct dentry *dentry, struct file *file,
  1755. + const struct cred *cred)
  1756. +{
  1757. + int err;
  1758. + struct path realpath;
  1759. + enum ovl_path_type type;
  1760. +
  1761. + type = ovl_path_real(dentry, &realpath);
  1762. + if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
  1763. + if (file->f_flags & O_TRUNC)
  1764. + err = ovl_copy_up_truncate(dentry, 0);
  1765. + else
  1766. + err = ovl_copy_up(dentry);
  1767. + if (err)
  1768. + return ERR_PTR(err);
  1769. +
  1770. + ovl_path_upper(dentry, &realpath);
  1771. + }
  1772. +
  1773. + return vfs_open(&realpath, file, cred);
  1774. +}
  1775. +
  1776. +static const struct inode_operations ovl_file_inode_operations = {
  1777. + .setattr = ovl_setattr,
  1778. + .permission = ovl_permission,
  1779. + .getattr = ovl_getattr,
  1780. + .setxattr = ovl_setxattr,
  1781. + .getxattr = ovl_getxattr,
  1782. + .listxattr = ovl_listxattr,
  1783. + .removexattr = ovl_removexattr,
  1784. + .open = ovl_open,
  1785. +};
  1786. +
  1787. +static const struct inode_operations ovl_symlink_inode_operations = {
  1788. + .setattr = ovl_setattr,
  1789. + .follow_link = ovl_follow_link,
  1790. + .put_link = ovl_put_link,
  1791. + .readlink = ovl_readlink,
  1792. + .getattr = ovl_getattr,
  1793. + .setxattr = ovl_setxattr,
  1794. + .getxattr = ovl_getxattr,
  1795. + .listxattr = ovl_listxattr,
  1796. + .removexattr = ovl_removexattr,
  1797. +};
  1798. +
  1799. +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
  1800. + struct ovl_entry *oe)
  1801. +{
  1802. + struct inode *inode;
  1803. +
  1804. + inode = new_inode(sb);
  1805. + if (!inode)
  1806. + return NULL;
  1807. +
  1808. + mode &= S_IFMT;
  1809. +
  1810. + inode->i_ino = get_next_ino();
  1811. + inode->i_mode = mode;
  1812. + inode->i_flags |= S_NOATIME | S_NOCMTIME;
  1813. +
  1814. + switch (mode) {
  1815. + case S_IFDIR:
  1816. + inode->i_private = oe;
  1817. + inode->i_op = &ovl_dir_inode_operations;
  1818. + inode->i_fop = &ovl_dir_operations;
  1819. + break;
  1820. +
  1821. + case S_IFLNK:
  1822. + inode->i_op = &ovl_symlink_inode_operations;
  1823. + break;
  1824. +
  1825. + case S_IFREG:
  1826. + case S_IFSOCK:
  1827. + case S_IFBLK:
  1828. + case S_IFCHR:
  1829. + case S_IFIFO:
  1830. + inode->i_op = &ovl_file_inode_operations;
  1831. + break;
  1832. +
  1833. + default:
  1834. + WARN(1, "illegal file type: %i\n", mode);
  1835. + inode = NULL;
  1836. + }
  1837. +
  1838. + return inode;
  1839. +
  1840. +}
  1841. --- /dev/null
  1842. +++ b/fs/overlayfs/overlayfs.h
  1843. @@ -0,0 +1,64 @@
  1844. +/*
  1845. + *
  1846. + * Copyright (C) 2011 Novell Inc.
  1847. + *
  1848. + * This program is free software; you can redistribute it and/or modify it
  1849. + * under the terms of the GNU General Public License version 2 as published by
  1850. + * the Free Software Foundation.
  1851. + */
  1852. +
  1853. +struct ovl_entry;
  1854. +
  1855. +enum ovl_path_type {
  1856. + OVL_PATH_UPPER,
  1857. + OVL_PATH_MERGE,
  1858. + OVL_PATH_LOWER,
  1859. +};
  1860. +
  1861. +extern const char *ovl_opaque_xattr;
  1862. +extern const char *ovl_whiteout_xattr;
  1863. +extern const struct dentry_operations ovl_dentry_operations;
  1864. +
  1865. +enum ovl_path_type ovl_path_type(struct dentry *dentry);
  1866. +u64 ovl_dentry_version_get(struct dentry *dentry);
  1867. +void ovl_dentry_version_inc(struct dentry *dentry);
  1868. +void ovl_path_upper(struct dentry *dentry, struct path *path);
  1869. +void ovl_path_lower(struct dentry *dentry, struct path *path);
  1870. +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
  1871. +struct dentry *ovl_dentry_upper(struct dentry *dentry);
  1872. +struct dentry *ovl_dentry_lower(struct dentry *dentry);
  1873. +struct dentry *ovl_dentry_real(struct dentry *dentry);
  1874. +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
  1875. +bool ovl_dentry_is_opaque(struct dentry *dentry);
  1876. +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
  1877. +bool ovl_is_whiteout(struct dentry *dentry);
  1878. +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
  1879. +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
  1880. + struct nameidata *nd);
  1881. +struct file *ovl_path_open(struct path *path, int flags);
  1882. +
  1883. +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
  1884. + struct kstat *stat, const char *link);
  1885. +
  1886. +/* readdir.c */
  1887. +extern const struct file_operations ovl_dir_operations;
  1888. +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type);
  1889. +
  1890. +/* inode.c */
  1891. +int ovl_setattr(struct dentry *dentry, struct iattr *attr);
  1892. +int ovl_permission(struct inode *inode, int mask);
  1893. +int ovl_setxattr(struct dentry *dentry, const char *name,
  1894. + const void *value, size_t size, int flags);
  1895. +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
  1896. + void *value, size_t size);
  1897. +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
  1898. +int ovl_removexattr(struct dentry *dentry, const char *name);
  1899. +
  1900. +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
  1901. + struct ovl_entry *oe);
  1902. +/* dir.c */
  1903. +extern const struct inode_operations ovl_dir_inode_operations;
  1904. +
  1905. +/* copy_up.c */
  1906. +int ovl_copy_up(struct dentry *dentry);
  1907. +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size);
  1908. --- /dev/null
  1909. +++ b/fs/overlayfs/readdir.c
  1910. @@ -0,0 +1,565 @@
  1911. +/*
  1912. + *
  1913. + * Copyright (C) 2011 Novell Inc.
  1914. + *
  1915. + * This program is free software; you can redistribute it and/or modify it
  1916. + * under the terms of the GNU General Public License version 2 as published by
  1917. + * the Free Software Foundation.
  1918. + */
  1919. +
  1920. +#include <linux/fs.h>
  1921. +#include <linux/slab.h>
  1922. +#include <linux/namei.h>
  1923. +#include <linux/file.h>
  1924. +#include <linux/xattr.h>
  1925. +#include <linux/rbtree.h>
  1926. +#include <linux/security.h>
  1927. +#include "overlayfs.h"
  1928. +
  1929. +struct ovl_cache_entry {
  1930. + const char *name;
  1931. + unsigned int len;
  1932. + unsigned int type;
  1933. + u64 ino;
  1934. + bool is_whiteout;
  1935. + struct list_head l_node;
  1936. + struct rb_node node;
  1937. +};
  1938. +
  1939. +struct ovl_readdir_data {
  1940. + struct rb_root *root;
  1941. + struct list_head *list;
  1942. + struct list_head *middle;
  1943. + struct dentry *dir;
  1944. + int count;
  1945. + int err;
  1946. +};
  1947. +
  1948. +struct ovl_dir_file {
  1949. + bool is_real;
  1950. + bool is_cached;
  1951. + struct list_head cursor;
  1952. + u64 cache_version;
  1953. + struct list_head cache;
  1954. + struct file *realfile;
  1955. +};
  1956. +
  1957. +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  1958. +{
  1959. + return container_of(n, struct ovl_cache_entry, node);
  1960. +}
  1961. +
  1962. +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  1963. + const char *name, int len)
  1964. +{
  1965. + struct rb_node *node = root->rb_node;
  1966. + int cmp;
  1967. +
  1968. + while (node) {
  1969. + struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
  1970. +
  1971. + cmp = strncmp(name, p->name, len);
  1972. + if (cmp > 0)
  1973. + node = p->node.rb_right;
  1974. + else if (cmp < 0 || len < p->len)
  1975. + node = p->node.rb_left;
  1976. + else
  1977. + return p;
  1978. + }
  1979. +
  1980. + return NULL;
  1981. +}
  1982. +
  1983. +static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
  1984. + u64 ino, unsigned int d_type)
  1985. +{
  1986. + struct ovl_cache_entry *p;
  1987. +
  1988. + p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
  1989. + if (p) {
  1990. + char *name_copy = (char *) (p + 1);
  1991. + memcpy(name_copy, name, len);
  1992. + name_copy[len] = '\0';
  1993. + p->name = name_copy;
  1994. + p->len = len;
  1995. + p->type = d_type;
  1996. + p->ino = ino;
  1997. + p->is_whiteout = false;
  1998. + }
  1999. +
  2000. + return p;
  2001. +}
  2002. +
  2003. +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
  2004. + const char *name, int len, u64 ino,
  2005. + unsigned int d_type)
  2006. +{
  2007. + struct rb_node **newp = &rdd->root->rb_node;
  2008. + struct rb_node *parent = NULL;
  2009. + struct ovl_cache_entry *p;
  2010. +
  2011. + while (*newp) {
  2012. + int cmp;
  2013. + struct ovl_cache_entry *tmp;
  2014. +
  2015. + parent = *newp;
  2016. + tmp = ovl_cache_entry_from_node(*newp);
  2017. + cmp = strncmp(name, tmp->name, len);
  2018. + if (cmp > 0)
  2019. + newp = &tmp->node.rb_right;
  2020. + else if (cmp < 0 || len < tmp->len)
  2021. + newp = &tmp->node.rb_left;
  2022. + else
  2023. + return 0;
  2024. + }
  2025. +
  2026. + p = ovl_cache_entry_new(name, len, ino, d_type);
  2027. + if (p == NULL)
  2028. + return -ENOMEM;
  2029. +
  2030. + list_add_tail(&p->l_node, rdd->list);
  2031. + rb_link_node(&p->node, parent, newp);
  2032. + rb_insert_color(&p->node, rdd->root);
  2033. +
  2034. + return 0;
  2035. +}
  2036. +
  2037. +static int ovl_fill_lower(void *buf, const char *name, int namelen,
  2038. + loff_t offset, u64 ino, unsigned int d_type)
  2039. +{
  2040. + struct ovl_readdir_data *rdd = buf;
  2041. + struct ovl_cache_entry *p;
  2042. +
  2043. + rdd->count++;
  2044. + p = ovl_cache_entry_find(rdd->root, name, namelen);
  2045. + if (p) {
  2046. + list_move_tail(&p->l_node, rdd->middle);
  2047. + } else {
  2048. + p = ovl_cache_entry_new(name, namelen, ino, d_type);
  2049. + if (p == NULL)
  2050. + rdd->err = -ENOMEM;
  2051. + else
  2052. + list_add_tail(&p->l_node, rdd->middle);
  2053. + }
  2054. +
  2055. + return rdd->err;
  2056. +}
  2057. +
  2058. +static void ovl_cache_free(struct list_head *list)
  2059. +{
  2060. + struct ovl_cache_entry *p;
  2061. + struct ovl_cache_entry *n;
  2062. +
  2063. + list_for_each_entry_safe(p, n, list, l_node)
  2064. + kfree(p);
  2065. +
  2066. + INIT_LIST_HEAD(list);
  2067. +}
  2068. +
  2069. +static int ovl_fill_upper(void *buf, const char *name, int namelen,
  2070. + loff_t offset, u64 ino, unsigned int d_type)
  2071. +{
  2072. + struct ovl_readdir_data *rdd = buf;
  2073. +
  2074. + rdd->count++;
  2075. + return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
  2076. +}
  2077. +
  2078. +static inline int ovl_dir_read(struct path *realpath,
  2079. + struct ovl_readdir_data *rdd, filldir_t filler)
  2080. +{
  2081. + struct file *realfile;
  2082. + int err;
  2083. +
  2084. + realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
  2085. + if (IS_ERR(realfile))
  2086. + return PTR_ERR(realfile);
  2087. +
  2088. + do {
  2089. + rdd->count = 0;
  2090. + rdd->err = 0;
  2091. + err = vfs_readdir(realfile, filler, rdd);
  2092. + if (err >= 0)
  2093. + err = rdd->err;
  2094. + } while (!err && rdd->count);
  2095. + fput(realfile);
  2096. +
  2097. + return 0;
  2098. +}
  2099. +
  2100. +static void ovl_dir_reset(struct file *file)
  2101. +{
  2102. + struct ovl_dir_file *od = file->private_data;
  2103. + enum ovl_path_type type = ovl_path_type(file->f_path.dentry);
  2104. +
  2105. + if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) {
  2106. + list_del_init(&od->cursor);
  2107. + ovl_cache_free(&od->cache);
  2108. + od->is_cached = false;
  2109. + }
  2110. + WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
  2111. + if (od->is_real && type == OVL_PATH_MERGE) {
  2112. + fput(od->realfile);
  2113. + od->realfile = NULL;
  2114. + od->is_real = false;
  2115. + }
  2116. +}
  2117. +
  2118. +static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd)
  2119. +{
  2120. + struct ovl_cache_entry *p;
  2121. + struct dentry *dentry;
  2122. + const struct cred *old_cred;
  2123. + struct cred *override_cred;
  2124. +
  2125. + override_cred = prepare_creds();
  2126. + if (!override_cred) {
  2127. + ovl_cache_free(rdd->list);
  2128. + return -ENOMEM;
  2129. + }
  2130. +
  2131. + /*
  2132. + * CAP_SYS_ADMIN for getxattr
  2133. + * CAP_DAC_OVERRIDE for lookup
  2134. + */
  2135. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2136. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  2137. + old_cred = override_creds(override_cred);
  2138. +
  2139. + mutex_lock(&rdd->dir->d_inode->i_mutex);
  2140. + list_for_each_entry(p, rdd->list, l_node) {
  2141. + if (p->type != DT_LNK)
  2142. + continue;
  2143. +
  2144. + dentry = lookup_one_len(p->name, rdd->dir, p->len);
  2145. + if (IS_ERR(dentry))
  2146. + continue;
  2147. +
  2148. + p->is_whiteout = ovl_is_whiteout(dentry);
  2149. + dput(dentry);
  2150. + }
  2151. + mutex_unlock(&rdd->dir->d_inode->i_mutex);
  2152. +
  2153. + revert_creds(old_cred);
  2154. + put_cred(override_cred);
  2155. +
  2156. + return 0;
  2157. +}
  2158. +
  2159. +static inline int ovl_dir_read_merged(struct path *upperpath,
  2160. + struct path *lowerpath,
  2161. + struct ovl_readdir_data *rdd)
  2162. +{
  2163. + int err;
  2164. + struct rb_root root = RB_ROOT;
  2165. + struct list_head middle;
  2166. +
  2167. + rdd->root = &root;
  2168. + if (upperpath->dentry) {
  2169. + rdd->dir = upperpath->dentry;
  2170. + err = ovl_dir_read(upperpath, rdd, ovl_fill_upper);
  2171. + if (err)
  2172. + goto out;
  2173. +
  2174. + err = ovl_dir_mark_whiteouts(rdd);
  2175. + if (err)
  2176. + goto out;
  2177. + }
  2178. + /*
  2179. + * Insert lowerpath entries before upperpath ones, this allows
  2180. + * offsets to be reasonably constant
  2181. + */
  2182. + list_add(&middle, rdd->list);
  2183. + rdd->middle = &middle;
  2184. + err = ovl_dir_read(lowerpath, rdd, ovl_fill_lower);
  2185. + list_del(&middle);
  2186. +out:
  2187. + rdd->root = NULL;
  2188. +
  2189. + return err;
  2190. +}
  2191. +
  2192. +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
  2193. +{
  2194. + struct list_head *l;
  2195. + loff_t off;
  2196. +
  2197. + l = od->cache.next;
  2198. + for (off = 0; off < pos; off++) {
  2199. + if (l == &od->cache)
  2200. + break;
  2201. + l = l->next;
  2202. + }
  2203. + list_move_tail(&od->cursor, l);
  2204. +}
  2205. +
  2206. +static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
  2207. +{
  2208. + struct ovl_dir_file *od = file->private_data;
  2209. + int res;
  2210. +
  2211. + if (!file->f_pos)
  2212. + ovl_dir_reset(file);
  2213. +
  2214. + if (od->is_real) {
  2215. + res = vfs_readdir(od->realfile, filler, buf);
  2216. + file->f_pos = od->realfile->f_pos;
  2217. +
  2218. + return res;
  2219. + }
  2220. +
  2221. + if (!od->is_cached) {
  2222. + struct path lowerpath;
  2223. + struct path upperpath;
  2224. + struct ovl_readdir_data rdd = { .list = &od->cache };
  2225. +
  2226. + ovl_path_lower(file->f_path.dentry, &lowerpath);
  2227. + ovl_path_upper(file->f_path.dentry, &upperpath);
  2228. +
  2229. + res = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
  2230. + if (res) {
  2231. + ovl_cache_free(rdd.list);
  2232. + return res;
  2233. + }
  2234. +
  2235. + od->cache_version = ovl_dentry_version_get(file->f_path.dentry);
  2236. + od->is_cached = true;
  2237. +
  2238. + ovl_seek_cursor(od, file->f_pos);
  2239. + }
  2240. +
  2241. + while (od->cursor.next != &od->cache) {
  2242. + int over;
  2243. + loff_t off;
  2244. + struct ovl_cache_entry *p;
  2245. +
  2246. + p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node);
  2247. + off = file->f_pos;
  2248. + if (!p->is_whiteout) {
  2249. + over = filler(buf, p->name, p->len, off, p->ino,
  2250. + p->type);
  2251. + if (over)
  2252. + break;
  2253. + }
  2254. + file->f_pos++;
  2255. + list_move(&od->cursor, &p->l_node);
  2256. + }
  2257. +
  2258. + return 0;
  2259. +}
  2260. +
  2261. +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
  2262. +{
  2263. + loff_t res;
  2264. + struct ovl_dir_file *od = file->private_data;
  2265. +
  2266. + mutex_lock(&file->f_dentry->d_inode->i_mutex);
  2267. + if (!file->f_pos)
  2268. + ovl_dir_reset(file);
  2269. +
  2270. + if (od->is_real) {
  2271. + res = vfs_llseek(od->realfile, offset, origin);
  2272. + file->f_pos = od->realfile->f_pos;
  2273. + } else {
  2274. + res = -EINVAL;
  2275. +
  2276. + switch (origin) {
  2277. + case SEEK_CUR:
  2278. + offset += file->f_pos;
  2279. + break;
  2280. + case SEEK_SET:
  2281. + break;
  2282. + default:
  2283. + goto out_unlock;
  2284. + }
  2285. + if (offset < 0)
  2286. + goto out_unlock;
  2287. +
  2288. + if (offset != file->f_pos) {
  2289. + file->f_pos = offset;
  2290. + if (od->is_cached)
  2291. + ovl_seek_cursor(od, offset);
  2292. + }
  2293. + res = offset;
  2294. + }
  2295. +out_unlock:
  2296. + mutex_unlock(&file->f_dentry->d_inode->i_mutex);
  2297. +
  2298. + return res;
  2299. +}
  2300. +
  2301. +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
  2302. + int datasync)
  2303. +{
  2304. + struct ovl_dir_file *od = file->private_data;
  2305. +
  2306. + /* May need to reopen directory if it got copied up */
  2307. + if (!od->realfile) {
  2308. + struct path upperpath;
  2309. +
  2310. + ovl_path_upper(file->f_path.dentry, &upperpath);
  2311. + od->realfile = ovl_path_open(&upperpath, O_RDONLY);
  2312. + if (IS_ERR(od->realfile))
  2313. + return PTR_ERR(od->realfile);
  2314. + }
  2315. +
  2316. + return vfs_fsync_range(od->realfile, start, end, datasync);
  2317. +}
  2318. +
  2319. +static int ovl_dir_release(struct inode *inode, struct file *file)
  2320. +{
  2321. + struct ovl_dir_file *od = file->private_data;
  2322. +
  2323. + list_del(&od->cursor);
  2324. + ovl_cache_free(&od->cache);
  2325. + if (od->realfile)
  2326. + fput(od->realfile);
  2327. + kfree(od);
  2328. +
  2329. + return 0;
  2330. +}
  2331. +
  2332. +static int ovl_dir_open(struct inode *inode, struct file *file)
  2333. +{
  2334. + struct path realpath;
  2335. + struct file *realfile;
  2336. + struct ovl_dir_file *od;
  2337. + enum ovl_path_type type;
  2338. +
  2339. + od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
  2340. + if (!od)
  2341. + return -ENOMEM;
  2342. +
  2343. + type = ovl_path_real(file->f_path.dentry, &realpath);
  2344. + realfile = ovl_path_open(&realpath, file->f_flags);
  2345. + if (IS_ERR(realfile)) {
  2346. + kfree(od);
  2347. + return PTR_ERR(realfile);
  2348. + }
  2349. + INIT_LIST_HEAD(&od->cache);
  2350. + INIT_LIST_HEAD(&od->cursor);
  2351. + od->is_cached = false;
  2352. + od->realfile = realfile;
  2353. + od->is_real = (type != OVL_PATH_MERGE);
  2354. + file->private_data = od;
  2355. +
  2356. + return 0;
  2357. +}
  2358. +
  2359. +const struct file_operations ovl_dir_operations = {
  2360. + .read = generic_read_dir,
  2361. + .open = ovl_dir_open,
  2362. + .readdir = ovl_readdir,
  2363. + .llseek = ovl_dir_llseek,
  2364. + .fsync = ovl_dir_fsync,
  2365. + .release = ovl_dir_release,
  2366. +};
  2367. +
  2368. +static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
  2369. +{
  2370. + int err;
  2371. + struct path lowerpath;
  2372. + struct path upperpath;
  2373. + struct ovl_cache_entry *p;
  2374. + struct ovl_readdir_data rdd = { .list = list };
  2375. +
  2376. + ovl_path_upper(dentry, &upperpath);
  2377. + ovl_path_lower(dentry, &lowerpath);
  2378. +
  2379. + err = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
  2380. + if (err)
  2381. + return err;
  2382. +
  2383. + err = 0;
  2384. +
  2385. + list_for_each_entry(p, list, l_node) {
  2386. + if (p->is_whiteout)
  2387. + continue;
  2388. +
  2389. + if (p->name[0] == '.') {
  2390. + if (p->len == 1)
  2391. + continue;
  2392. + if (p->len == 2 && p->name[1] == '.')
  2393. + continue;
  2394. + }
  2395. + err = -ENOTEMPTY;
  2396. + break;
  2397. + }
  2398. +
  2399. + return err;
  2400. +}
  2401. +
  2402. +static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list)
  2403. +{
  2404. + struct path upperpath;
  2405. + struct dentry *upperdir;
  2406. + struct ovl_cache_entry *p;
  2407. + const struct cred *old_cred;
  2408. + struct cred *override_cred;
  2409. + int err;
  2410. +
  2411. + ovl_path_upper(dir, &upperpath);
  2412. + upperdir = upperpath.dentry;
  2413. +
  2414. + override_cred = prepare_creds();
  2415. + if (!override_cred)
  2416. + return -ENOMEM;
  2417. +
  2418. + /*
  2419. + * CAP_DAC_OVERRIDE for lookup and unlink
  2420. + * CAP_SYS_ADMIN for setxattr of "trusted" namespace
  2421. + * CAP_FOWNER for unlink in sticky directory
  2422. + */
  2423. + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  2424. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2425. + cap_raise(override_cred->cap_effective, CAP_FOWNER);
  2426. + old_cred = override_creds(override_cred);
  2427. +
  2428. + err = vfs_setxattr(upperdir, ovl_opaque_xattr, "y", 1, 0);
  2429. + if (err)
  2430. + goto out_revert_creds;
  2431. +
  2432. + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
  2433. + list_for_each_entry(p, list, l_node) {
  2434. + struct dentry *dentry;
  2435. + int ret;
  2436. +
  2437. + if (!p->is_whiteout)
  2438. + continue;
  2439. +
  2440. + dentry = lookup_one_len(p->name, upperdir, p->len);
  2441. + if (IS_ERR(dentry)) {
  2442. + printk(KERN_WARNING
  2443. + "overlayfs: failed to lookup whiteout %.*s: %li\n",
  2444. + p->len, p->name, PTR_ERR(dentry));
  2445. + continue;
  2446. + }
  2447. + ret = vfs_unlink(upperdir->d_inode, dentry);
  2448. + dput(dentry);
  2449. + if (ret)
  2450. + printk(KERN_WARNING
  2451. + "overlayfs: failed to unlink whiteout %.*s: %i\n",
  2452. + p->len, p->name, ret);
  2453. + }
  2454. + mutex_unlock(&upperdir->d_inode->i_mutex);
  2455. +
  2456. +out_revert_creds:
  2457. + revert_creds(old_cred);
  2458. + put_cred(override_cred);
  2459. +
  2460. + return err;
  2461. +}
  2462. +
  2463. +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type)
  2464. +{
  2465. + int err;
  2466. + LIST_HEAD(list);
  2467. +
  2468. + err = ovl_check_empty_dir(dentry, &list);
  2469. + if (!err && type == OVL_PATH_MERGE)
  2470. + err = ovl_remove_whiteouts(dentry, &list);
  2471. +
  2472. + ovl_cache_free(&list);
  2473. +
  2474. + return err;
  2475. +}
  2476. --- /dev/null
  2477. +++ b/fs/overlayfs/super.c
  2478. @@ -0,0 +1,664 @@
  2479. +/*
  2480. + *
  2481. + * Copyright (C) 2011 Novell Inc.
  2482. + *
  2483. + * This program is free software; you can redistribute it and/or modify it
  2484. + * under the terms of the GNU General Public License version 2 as published by
  2485. + * the Free Software Foundation.
  2486. + */
  2487. +
  2488. +#include <linux/fs.h>
  2489. +#include <linux/namei.h>
  2490. +#include <linux/xattr.h>
  2491. +#include <linux/security.h>
  2492. +#include <linux/mount.h>
  2493. +#include <linux/slab.h>
  2494. +#include <linux/parser.h>
  2495. +#include <linux/module.h>
  2496. +#include <linux/seq_file.h>
  2497. +#include "overlayfs.h"
  2498. +
  2499. +MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  2500. +MODULE_DESCRIPTION("Overlay filesystem");
  2501. +MODULE_LICENSE("GPL");
  2502. +
  2503. +struct ovl_config {
  2504. + char *lowerdir;
  2505. + char *upperdir;
  2506. +};
  2507. +
  2508. +/* private information held for overlayfs's superblock */
  2509. +struct ovl_fs {
  2510. + struct vfsmount *upper_mnt;
  2511. + struct vfsmount *lower_mnt;
  2512. + /* pathnames of lower and upper dirs, for show_options */
  2513. + struct ovl_config config;
  2514. +};
  2515. +
  2516. +/* private information held for every overlayfs dentry */
  2517. +struct ovl_entry {
  2518. + /*
  2519. + * Keep "double reference" on upper dentries, so that
  2520. + * d_delete() doesn't think it's OK to reset d_inode to NULL.
  2521. + */
  2522. + struct dentry *__upperdentry;
  2523. + struct dentry *lowerdentry;
  2524. + union {
  2525. + struct {
  2526. + u64 version;
  2527. + bool opaque;
  2528. + };
  2529. + struct rcu_head rcu;
  2530. + };
  2531. +};
  2532. +
  2533. +const char *ovl_whiteout_xattr = "trusted.overlay.whiteout";
  2534. +const char *ovl_opaque_xattr = "trusted.overlay.opaque";
  2535. +
  2536. +
  2537. +enum ovl_path_type ovl_path_type(struct dentry *dentry)
  2538. +{
  2539. + struct ovl_entry *oe = dentry->d_fsdata;
  2540. +
  2541. + if (oe->__upperdentry) {
  2542. + if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode))
  2543. + return OVL_PATH_MERGE;
  2544. + else
  2545. + return OVL_PATH_UPPER;
  2546. + } else {
  2547. + return OVL_PATH_LOWER;
  2548. + }
  2549. +}
  2550. +
  2551. +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
  2552. +{
  2553. + struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
  2554. + smp_read_barrier_depends();
  2555. + return upperdentry;
  2556. +}
  2557. +
  2558. +void ovl_path_upper(struct dentry *dentry, struct path *path)
  2559. +{
  2560. + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2561. + struct ovl_entry *oe = dentry->d_fsdata;
  2562. +
  2563. + path->mnt = ofs->upper_mnt;
  2564. + path->dentry = ovl_upperdentry_dereference(oe);
  2565. +}
  2566. +
  2567. +void ovl_path_lower(struct dentry *dentry, struct path *path)
  2568. +{
  2569. + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  2570. + struct ovl_entry *oe = dentry->d_fsdata;
  2571. +
  2572. + path->mnt = ofs->lower_mnt;
  2573. + path->dentry = oe->lowerdentry;
  2574. +}
  2575. +
  2576. +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
  2577. +{
  2578. +
  2579. + enum ovl_path_type type = ovl_path_type(dentry);
  2580. +
  2581. + if (type == OVL_PATH_LOWER)
  2582. + ovl_path_lower(dentry, path);
  2583. + else
  2584. + ovl_path_upper(dentry, path);
  2585. +
  2586. + return type;
  2587. +}
  2588. +
  2589. +struct dentry *ovl_dentry_upper(struct dentry *dentry)
  2590. +{
  2591. + struct ovl_entry *oe = dentry->d_fsdata;
  2592. +
  2593. + return ovl_upperdentry_dereference(oe);
  2594. +}
  2595. +
  2596. +struct dentry *ovl_dentry_lower(struct dentry *dentry)
  2597. +{
  2598. + struct ovl_entry *oe = dentry->d_fsdata;
  2599. +
  2600. + return oe->lowerdentry;
  2601. +}
  2602. +
  2603. +struct dentry *ovl_dentry_real(struct dentry *dentry)
  2604. +{
  2605. + struct ovl_entry *oe = dentry->d_fsdata;
  2606. + struct dentry *realdentry;
  2607. +
  2608. + realdentry = ovl_upperdentry_dereference(oe);
  2609. + if (!realdentry)
  2610. + realdentry = oe->lowerdentry;
  2611. +
  2612. + return realdentry;
  2613. +}
  2614. +
  2615. +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
  2616. +{
  2617. + struct dentry *realdentry;
  2618. +
  2619. + realdentry = ovl_upperdentry_dereference(oe);
  2620. + if (realdentry) {
  2621. + *is_upper = true;
  2622. + } else {
  2623. + realdentry = oe->lowerdentry;
  2624. + *is_upper = false;
  2625. + }
  2626. + return realdentry;
  2627. +}
  2628. +
  2629. +bool ovl_dentry_is_opaque(struct dentry *dentry)
  2630. +{
  2631. + struct ovl_entry *oe = dentry->d_fsdata;
  2632. + return oe->opaque;
  2633. +}
  2634. +
  2635. +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
  2636. +{
  2637. + struct ovl_entry *oe = dentry->d_fsdata;
  2638. + oe->opaque = opaque;
  2639. +}
  2640. +
  2641. +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
  2642. +{
  2643. + struct ovl_entry *oe = dentry->d_fsdata;
  2644. +
  2645. + WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
  2646. + WARN_ON(oe->__upperdentry);
  2647. + BUG_ON(!upperdentry->d_inode);
  2648. + smp_wmb();
  2649. + oe->__upperdentry = dget(upperdentry);
  2650. +}
  2651. +
  2652. +void ovl_dentry_version_inc(struct dentry *dentry)
  2653. +{
  2654. + struct ovl_entry *oe = dentry->d_fsdata;
  2655. +
  2656. + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  2657. + oe->version++;
  2658. +}
  2659. +
  2660. +u64 ovl_dentry_version_get(struct dentry *dentry)
  2661. +{
  2662. + struct ovl_entry *oe = dentry->d_fsdata;
  2663. +
  2664. + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  2665. + return oe->version;
  2666. +}
  2667. +
  2668. +bool ovl_is_whiteout(struct dentry *dentry)
  2669. +{
  2670. + int res;
  2671. + char val;
  2672. +
  2673. + if (!dentry)
  2674. + return false;
  2675. + if (!dentry->d_inode)
  2676. + return false;
  2677. + if (!S_ISLNK(dentry->d_inode->i_mode))
  2678. + return false;
  2679. +
  2680. + res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1);
  2681. + if (res == 1 && val == 'y')
  2682. + return true;
  2683. +
  2684. + return false;
  2685. +}
  2686. +
  2687. +static bool ovl_is_opaquedir(struct dentry *dentry)
  2688. +{
  2689. + int res;
  2690. + char val;
  2691. +
  2692. + if (!S_ISDIR(dentry->d_inode->i_mode))
  2693. + return false;
  2694. +
  2695. + res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1);
  2696. + if (res == 1 && val == 'y')
  2697. + return true;
  2698. +
  2699. + return false;
  2700. +}
  2701. +
  2702. +static void ovl_entry_free(struct rcu_head *head)
  2703. +{
  2704. + struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu);
  2705. + kfree(oe);
  2706. +}
  2707. +
  2708. +static void ovl_dentry_release(struct dentry *dentry)
  2709. +{
  2710. + struct ovl_entry *oe = dentry->d_fsdata;
  2711. +
  2712. + if (oe) {
  2713. + dput(oe->__upperdentry);
  2714. + dput(oe->__upperdentry);
  2715. + dput(oe->lowerdentry);
  2716. + call_rcu(&oe->rcu, ovl_entry_free);
  2717. + }
  2718. +}
  2719. +
  2720. +const struct dentry_operations ovl_dentry_operations = {
  2721. + .d_release = ovl_dentry_release,
  2722. +};
  2723. +
  2724. +static struct ovl_entry *ovl_alloc_entry(void)
  2725. +{
  2726. + return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
  2727. +}
  2728. +
  2729. +static inline struct dentry *ovl_lookup_real(struct dentry *dir,
  2730. + struct qstr *name)
  2731. +{
  2732. + struct dentry *dentry;
  2733. +
  2734. + mutex_lock(&dir->d_inode->i_mutex);
  2735. + dentry = lookup_one_len(name->name, dir, name->len);
  2736. + mutex_unlock(&dir->d_inode->i_mutex);
  2737. +
  2738. + if (IS_ERR(dentry)) {
  2739. + if (PTR_ERR(dentry) == -ENOENT)
  2740. + dentry = NULL;
  2741. + } else if (!dentry->d_inode) {
  2742. + dput(dentry);
  2743. + dentry = NULL;
  2744. + }
  2745. + return dentry;
  2746. +}
  2747. +
  2748. +static int ovl_do_lookup(struct dentry *dentry)
  2749. +{
  2750. + struct ovl_entry *oe;
  2751. + struct dentry *upperdir;
  2752. + struct dentry *lowerdir;
  2753. + struct dentry *upperdentry = NULL;
  2754. + struct dentry *lowerdentry = NULL;
  2755. + struct inode *inode = NULL;
  2756. + int err;
  2757. +
  2758. + err = -ENOMEM;
  2759. + oe = ovl_alloc_entry();
  2760. + if (!oe)
  2761. + goto out;
  2762. +
  2763. + upperdir = ovl_dentry_upper(dentry->d_parent);
  2764. + lowerdir = ovl_dentry_lower(dentry->d_parent);
  2765. +
  2766. + if (upperdir) {
  2767. + upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
  2768. + err = PTR_ERR(upperdentry);
  2769. + if (IS_ERR(upperdentry))
  2770. + goto out_put_dir;
  2771. +
  2772. + if (lowerdir && upperdentry &&
  2773. + (S_ISLNK(upperdentry->d_inode->i_mode) ||
  2774. + S_ISDIR(upperdentry->d_inode->i_mode))) {
  2775. + const struct cred *old_cred;
  2776. + struct cred *override_cred;
  2777. +
  2778. + err = -ENOMEM;
  2779. + override_cred = prepare_creds();
  2780. + if (!override_cred)
  2781. + goto out_dput_upper;
  2782. +
  2783. + /* CAP_SYS_ADMIN needed for getxattr */
  2784. + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  2785. + old_cred = override_creds(override_cred);
  2786. +
  2787. + if (ovl_is_opaquedir(upperdentry)) {
  2788. + oe->opaque = true;
  2789. + } else if (ovl_is_whiteout(upperdentry)) {
  2790. + dput(upperdentry);
  2791. + upperdentry = NULL;
  2792. + oe->opaque = true;
  2793. + }
  2794. + revert_creds(old_cred);
  2795. + put_cred(override_cred);
  2796. + }
  2797. + }
  2798. + if (lowerdir && !oe->opaque) {
  2799. + lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
  2800. + err = PTR_ERR(lowerdentry);
  2801. + if (IS_ERR(lowerdentry))
  2802. + goto out_dput_upper;
  2803. + }
  2804. +
  2805. + if (lowerdentry && upperdentry &&
  2806. + (!S_ISDIR(upperdentry->d_inode->i_mode) ||
  2807. + !S_ISDIR(lowerdentry->d_inode->i_mode))) {
  2808. + dput(lowerdentry);
  2809. + lowerdentry = NULL;
  2810. + oe->opaque = true;
  2811. + }
  2812. +
  2813. + if (lowerdentry || upperdentry) {
  2814. + struct dentry *realdentry;
  2815. +
  2816. + realdentry = upperdentry ? upperdentry : lowerdentry;
  2817. + err = -ENOMEM;
  2818. + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
  2819. + oe);
  2820. + if (!inode)
  2821. + goto out_dput;
  2822. + }
  2823. +
  2824. + if (upperdentry)
  2825. + oe->__upperdentry = dget(upperdentry);
  2826. +
  2827. + if (lowerdentry)
  2828. + oe->lowerdentry = lowerdentry;
  2829. +
  2830. + dentry->d_fsdata = oe;
  2831. + dentry->d_op = &ovl_dentry_operations;
  2832. + d_add(dentry, inode);
  2833. +
  2834. + return 0;
  2835. +
  2836. +out_dput:
  2837. + dput(lowerdentry);
  2838. +out_dput_upper:
  2839. + dput(upperdentry);
  2840. +out_put_dir:
  2841. + kfree(oe);
  2842. +out:
  2843. + return err;
  2844. +}
  2845. +
  2846. +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
  2847. + struct nameidata *nd)
  2848. +{
  2849. + int err = ovl_do_lookup(dentry);
  2850. +
  2851. + if (err)
  2852. + return ERR_PTR(err);
  2853. +
  2854. + return NULL;
  2855. +}
  2856. +
  2857. +struct file *ovl_path_open(struct path *path, int flags)
  2858. +{
  2859. + path_get(path);
  2860. + return dentry_open(path->dentry, path->mnt, flags, current_cred());
  2861. +}
  2862. +
  2863. +static void ovl_put_super(struct super_block *sb)
  2864. +{
  2865. + struct ovl_fs *ufs = sb->s_fs_info;
  2866. +
  2867. + if (!(sb->s_flags & MS_RDONLY))
  2868. + mnt_drop_write(ufs->upper_mnt);
  2869. +
  2870. + mntput(ufs->upper_mnt);
  2871. + mntput(ufs->lower_mnt);
  2872. +
  2873. + kfree(ufs->config.lowerdir);
  2874. + kfree(ufs->config.upperdir);
  2875. + kfree(ufs);
  2876. +}
  2877. +
  2878. +static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data)
  2879. +{
  2880. + int flags = *flagsp;
  2881. + struct ovl_fs *ufs = sb->s_fs_info;
  2882. +
  2883. + /* When remounting rw or ro, we need to adjust the write access to the
  2884. + * upper fs.
  2885. + */
  2886. + if (((flags ^ sb->s_flags) & MS_RDONLY) == 0)
  2887. + /* No change to readonly status */
  2888. + return 0;
  2889. +
  2890. + if (flags & MS_RDONLY) {
  2891. + mnt_drop_write(ufs->upper_mnt);
  2892. + return 0;
  2893. + } else
  2894. + return mnt_want_write(ufs->upper_mnt);
  2895. +}
  2896. +
  2897. +/**
  2898. + * ovl_statfs
  2899. + * @sb: The overlayfs super block
  2900. + * @buf: The struct kstatfs to fill in with stats
  2901. + *
  2902. + * Get the filesystem statistics. As writes always target the upper layer
  2903. + * filesystem pass the statfs to the same filesystem.
  2904. + */
  2905. +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
  2906. +{
  2907. + struct dentry *root_dentry = dentry->d_sb->s_root;
  2908. + struct path path;
  2909. + ovl_path_upper(root_dentry, &path);
  2910. +
  2911. + if (!path.dentry->d_sb->s_op->statfs)
  2912. + return -ENOSYS;
  2913. + return path.dentry->d_sb->s_op->statfs(path.dentry, buf);
  2914. +}
  2915. +
  2916. +/**
  2917. + * ovl_show_options
  2918. + *
  2919. + * Prints the mount options for a given superblock.
  2920. + * Returns zero; does not fail.
  2921. + */
  2922. +static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
  2923. +{
  2924. + struct super_block *sb = dentry->d_sb;
  2925. + struct ovl_fs *ufs = sb->s_fs_info;
  2926. +
  2927. + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
  2928. + seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
  2929. + return 0;
  2930. +}
  2931. +
  2932. +static const struct super_operations ovl_super_operations = {
  2933. + .put_super = ovl_put_super,
  2934. + .remount_fs = ovl_remount_fs,
  2935. + .statfs = ovl_statfs,
  2936. + .show_options = ovl_show_options,
  2937. +};
  2938. +
  2939. +enum {
  2940. + Opt_lowerdir,
  2941. + Opt_upperdir,
  2942. + Opt_err,
  2943. +};
  2944. +
  2945. +static const match_table_t ovl_tokens = {
  2946. + {Opt_lowerdir, "lowerdir=%s"},
  2947. + {Opt_upperdir, "upperdir=%s"},
  2948. + {Opt_err, NULL}
  2949. +};
  2950. +
  2951. +static int ovl_parse_opt(char *opt, struct ovl_config *config)
  2952. +{
  2953. + char *p;
  2954. +
  2955. + config->upperdir = NULL;
  2956. + config->lowerdir = NULL;
  2957. +
  2958. + while ((p = strsep(&opt, ",")) != NULL) {
  2959. + int token;
  2960. + substring_t args[MAX_OPT_ARGS];
  2961. +
  2962. + if (!*p)
  2963. + continue;
  2964. +
  2965. + token = match_token(p, ovl_tokens, args);
  2966. + switch (token) {
  2967. + case Opt_upperdir:
  2968. + kfree(config->upperdir);
  2969. + config->upperdir = match_strdup(&args[0]);
  2970. + if (!config->upperdir)
  2971. + return -ENOMEM;
  2972. + break;
  2973. +
  2974. + case Opt_lowerdir:
  2975. + kfree(config->lowerdir);
  2976. + config->lowerdir = match_strdup(&args[0]);
  2977. + if (!config->lowerdir)
  2978. + return -ENOMEM;
  2979. + break;
  2980. +
  2981. + default:
  2982. + return -EINVAL;
  2983. + }
  2984. + }
  2985. + return 0;
  2986. +}
  2987. +
  2988. +static int ovl_fill_super(struct super_block *sb, void *data, int silent)
  2989. +{
  2990. + struct path lowerpath;
  2991. + struct path upperpath;
  2992. + struct inode *root_inode;
  2993. + struct dentry *root_dentry;
  2994. + struct ovl_entry *oe;
  2995. + struct ovl_fs *ufs;
  2996. + int err;
  2997. +
  2998. + err = -ENOMEM;
  2999. + ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL);
  3000. + if (!ufs)
  3001. + goto out;
  3002. +
  3003. + err = ovl_parse_opt((char *) data, &ufs->config);
  3004. + if (err)
  3005. + goto out_free_ufs;
  3006. +
  3007. + err = -EINVAL;
  3008. + if (!ufs->config.upperdir || !ufs->config.lowerdir) {
  3009. + printk(KERN_ERR "overlayfs: missing upperdir or lowerdir\n");
  3010. + goto out_free_config;
  3011. + }
  3012. +
  3013. + oe = ovl_alloc_entry();
  3014. + if (oe == NULL)
  3015. + goto out_free_config;
  3016. +
  3017. + root_inode = ovl_new_inode(sb, S_IFDIR, oe);
  3018. + if (!root_inode)
  3019. + goto out_free_oe;
  3020. +
  3021. + err = kern_path(ufs->config.upperdir, LOOKUP_FOLLOW, &upperpath);
  3022. + if (err)
  3023. + goto out_put_root;
  3024. +
  3025. + err = kern_path(ufs->config.lowerdir, LOOKUP_FOLLOW, &lowerpath);
  3026. + if (err)
  3027. + goto out_put_upperpath;
  3028. +
  3029. + err = -ENOTDIR;
  3030. + if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
  3031. + !S_ISDIR(lowerpath.dentry->d_inode->i_mode))
  3032. + goto out_put_lowerpath;
  3033. +
  3034. + sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
  3035. + lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
  3036. +
  3037. + err = -EINVAL;
  3038. + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  3039. + printk(KERN_ERR "overlayfs: maximum fs stacking depth exceeded\n");
  3040. + goto out_put_lowerpath;
  3041. + }
  3042. +
  3043. +
  3044. + ufs->upper_mnt = clone_private_mount(&upperpath);
  3045. + err = PTR_ERR(ufs->upper_mnt);
  3046. + if (IS_ERR(ufs->upper_mnt)) {
  3047. + printk(KERN_ERR "overlayfs: failed to clone upperpath\n");
  3048. + goto out_put_lowerpath;
  3049. + }
  3050. +
  3051. + ufs->lower_mnt = clone_private_mount(&lowerpath);
  3052. + err = PTR_ERR(ufs->lower_mnt);
  3053. + if (IS_ERR(ufs->lower_mnt)) {
  3054. + printk(KERN_ERR "overlayfs: failed to clone lowerpath\n");
  3055. + goto out_put_upper_mnt;
  3056. + }
  3057. +
  3058. + /*
  3059. + * Make lower_mnt R/O. That way fchmod/fchown on lower file
  3060. + * will fail instead of modifying lower fs.
  3061. + */
  3062. + ufs->lower_mnt->mnt_flags |= MNT_READONLY;
  3063. +
  3064. + /* If the upper fs is r/o, we mark overlayfs r/o too */
  3065. + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
  3066. + sb->s_flags |= MS_RDONLY;
  3067. +
  3068. + if (!(sb->s_flags & MS_RDONLY)) {
  3069. + err = mnt_want_write(ufs->upper_mnt);
  3070. + if (err)
  3071. + goto out_put_lower_mnt;
  3072. + }
  3073. +
  3074. + err = -ENOMEM;
  3075. + root_dentry = d_alloc_root(root_inode);
  3076. + if (!root_dentry)
  3077. + goto out_drop_write;
  3078. +
  3079. + mntput(upperpath.mnt);
  3080. + mntput(lowerpath.mnt);
  3081. +
  3082. + oe->__upperdentry = dget(upperpath.dentry);
  3083. + oe->lowerdentry = lowerpath.dentry;
  3084. +
  3085. + root_dentry->d_fsdata = oe;
  3086. + root_dentry->d_op = &ovl_dentry_operations;
  3087. +
  3088. + sb->s_op = &ovl_super_operations;
  3089. + sb->s_root = root_dentry;
  3090. + sb->s_fs_info = ufs;
  3091. +
  3092. + return 0;
  3093. +
  3094. +out_drop_write:
  3095. + if (!(sb->s_flags & MS_RDONLY))
  3096. + mnt_drop_write(ufs->upper_mnt);
  3097. +out_put_lower_mnt:
  3098. + mntput(ufs->lower_mnt);
  3099. +out_put_upper_mnt:
  3100. + mntput(ufs->upper_mnt);
  3101. +out_put_lowerpath:
  3102. + path_put(&lowerpath);
  3103. +out_put_upperpath:
  3104. + path_put(&upperpath);
  3105. +out_put_root:
  3106. + iput(root_inode);
  3107. +out_free_oe:
  3108. + kfree(oe);
  3109. +out_free_config:
  3110. + kfree(ufs->config.lowerdir);
  3111. + kfree(ufs->config.upperdir);
  3112. +out_free_ufs:
  3113. + kfree(ufs);
  3114. +out:
  3115. + return err;
  3116. +}
  3117. +
  3118. +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
  3119. + const char *dev_name, void *raw_data)
  3120. +{
  3121. + return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
  3122. +}
  3123. +
  3124. +static struct file_system_type ovl_fs_type = {
  3125. + .owner = THIS_MODULE,
  3126. + .name = "overlayfs",
  3127. + .mount = ovl_mount,
  3128. + .kill_sb = kill_anon_super,
  3129. +};
  3130. +
  3131. +static int __init ovl_init(void)
  3132. +{
  3133. + return register_filesystem(&ovl_fs_type);
  3134. +}
  3135. +
  3136. +static void __exit ovl_exit(void)
  3137. +{
  3138. + unregister_filesystem(&ovl_fs_type);
  3139. +}
  3140. +
  3141. +module_init(ovl_init);
  3142. +module_exit(ovl_exit);
  3143. --- a/fs/splice.c
  3144. +++ b/fs/splice.c
  3145. @@ -1302,6 +1302,7 @@ long do_splice_direct(struct file *in, l
  3146. return ret;
  3147. }
  3148. +EXPORT_SYMBOL(do_splice_direct);
  3149. static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
  3150. struct pipe_inode_info *opipe,
  3151. --- a/include/linux/fs.h
  3152. +++ b/include/linux/fs.h
  3153. @@ -484,6 +484,12 @@ struct iattr {
  3154. */
  3155. #include <linux/quota.h>
  3156. +/*
  3157. + * Maximum number of layers of fs stack. Needs to be limited to
  3158. + * prevent kernel stack overflow
  3159. + */
  3160. +#define FILESYSTEM_MAX_STACK_DEPTH 2
  3161. +
  3162. /**
  3163. * enum positive_aop_returns - aop return codes with specific semantics
  3164. *
  3165. @@ -1501,6 +1507,11 @@ struct super_block {
  3166. /* Being remounted read-only */
  3167. int s_readonly_remount;
  3168. +
  3169. + /*
  3170. + * Indicates how deep in a filesystem stack this SB is
  3171. + */
  3172. + int s_stack_depth;
  3173. };
  3174. /* superblock cache pruning functions */
  3175. @@ -1658,6 +1669,8 @@ struct inode_operations {
  3176. void (*truncate_range)(struct inode *, loff_t, loff_t);
  3177. int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
  3178. u64 len);
  3179. + struct file *(*open) (struct dentry *, struct file *,
  3180. + const struct cred *);
  3181. } ____cacheline_aligned;
  3182. struct seq_file;
  3183. @@ -2028,6 +2041,7 @@ extern long do_sys_open(int dfd, const c
  3184. extern struct file *filp_open(const char *, int, umode_t);
  3185. extern struct file *file_open_root(struct dentry *, struct vfsmount *,
  3186. const char *, int);
  3187. +extern struct file *vfs_open(struct path *, struct file *, const struct cred *);
  3188. extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
  3189. const struct cred *);
  3190. extern int filp_close(struct file *, fl_owner_t id);
  3191. --- a/include/linux/mount.h
  3192. +++ b/include/linux/mount.h
  3193. @@ -66,6 +66,9 @@ extern void mnt_pin(struct vfsmount *mnt
  3194. extern void mnt_unpin(struct vfsmount *mnt);
  3195. extern int __mnt_is_readonly(struct vfsmount *mnt);
  3196. +struct path;
  3197. +extern struct vfsmount *clone_private_mount(struct path *path);
  3198. +
  3199. struct file_system_type;
  3200. extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
  3201. int flags, const char *name,