├── .gitignore ├── README ├── overlayfs.v13-3.4-rc7.patch ├── overlayfs.v14-3.6-rc1.patch ├── overlayfs.v15-3.6-rc4.patch ├── overlayfs.v16-3.9-rc2.patch ├── overlayfs.v17-3.9-rc2.patch ├── overlayfs.v18-3.10-rc7.patch ├── overlayfs.v19-3.11-rc2.patch ├── overlayfs.v20-3.12-rc2.patch ├── overlayfs.v21-3.13-rc7.patch ├── overlayfs.v22-3.15-rc6.patch └── overlayfs.v23-3.16-rc4.patch /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Mac OS X 2 | .DS_Store 3 | 4 | # Ignore build directories and custom configuration 5 | build 6 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | OverlayFS Linux kernel patches 2 | ============================== 3 | 4 | Note, I am NOT a developer for the OverlayFS at all. But I desire to use 5 | OverlayFS and required kernel patches to apply to my kernel sources. 6 | 7 | The official OverlayFS sources are at 8 | http://git.kernel.org/cgit/linux/kernel/git/mszeredi/vfs.git/ 9 | 10 | I was unable to find any patches to apply OverlayFS to an existing kernel 11 | source tree. It appears that OverlayFS is developed as branches in a git 12 | repo of the entire Linux kernel source tree. I extracted patches for each 13 | of the OverlayFS versions as per this procedure. 14 | 15 | http://adis.ca/entry/2014/overlayfs-patch/ 16 | 17 | Linux 3.12-rc2 overlayfs.v20 18 | Linux 3.11-rc2 overlayfs.v19 19 | Linux 3.10-rc7 overlayfs.v18 20 | Linux 3.9-rc2 overlayfs.v17 21 | Linux 3.9-rc2 overlayfs.v16 22 | Linux 3.6-rc4 overlayfs.v15 23 | Linux 3.6-rc1 overlayfs.v14 24 | Linux 3.4-rc7 overlayfs.v13 25 | 26 | I was unable to create patches for overlayfs.v11 and overlayfs.v12. 27 | 28 | -------------------------------------------------------------------------------- /overlayfs.v14-3.6-rc1.patch: -------------------------------------------------------------------------------- 1 | diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking 2 | index 0f103e3..d222b6a 100644 3 | --- a/Documentation/filesystems/Locking 4 | +++ b/Documentation/filesystems/Locking 5 | @@ -64,6 +64,7 @@ prototypes: 6 | int (*atomic_open)(struct inode *, struct dentry *, 7 | struct file *, unsigned open_flag, 8 | umode_t create_mode, int *opened); 9 | + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); 10 | 11 | locking rules: 12 | all may block 13 | @@ -92,6 +93,7 @@ removexattr: yes 14 | fiemap: no 15 | update_time: no 16 | atomic_open: yes 17 | +open: no 18 | 19 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 20 | victim. 21 | diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt 22 | new file mode 100644 23 | index 0000000..7161dc3 24 | --- /dev/null 25 | +++ b/Documentation/filesystems/overlayfs.txt 26 | @@ -0,0 +1,199 @@ 27 | +Written by: Neil Brown 28 | + 29 | +Overlay Filesystem 30 | +================== 31 | + 32 | +This document describes a prototype for a new approach to providing 33 | +overlay-filesystem functionality in Linux (sometimes referred to as 34 | +union-filesystems). An overlay-filesystem tries to present a 35 | +filesystem which is the result over overlaying one filesystem on top 36 | +of the other. 37 | + 38 | +The result will inevitably fail to look exactly like a normal 39 | +filesystem for various technical reasons. The expectation is that 40 | +many use cases will be able to ignore these differences. 41 | + 42 | +This approach is 'hybrid' because the objects that appear in the 43 | +filesystem do not all appear to belong to that filesystem. In many 44 | +cases an object accessed in the union will be indistinguishable 45 | +from accessing the corresponding object from the original filesystem. 46 | +This is most obvious from the 'st_dev' field returned by stat(2). 47 | + 48 | +While directories will report an st_dev from the overlay-filesystem, 49 | +all non-directory objects will report an st_dev from the lower or 50 | +upper filesystem that is providing the object. Similarly st_ino will 51 | +only be unique when combined with st_dev, and both of these can change 52 | +over the lifetime of a non-directory object. Many applications and 53 | +tools ignore these values and will not be affected. 54 | + 55 | +Upper and Lower 56 | +--------------- 57 | + 58 | +An overlay filesystem combines two filesystems - an 'upper' filesystem 59 | +and a 'lower' filesystem. When a name exists in both filesystems, the 60 | +object in the 'upper' filesystem is visible while the object in the 61 | +'lower' filesystem is either hidden or, in the case of directories, 62 | +merged with the 'upper' object. 63 | + 64 | +It would be more correct to refer to an upper and lower 'directory 65 | +tree' rather than 'filesystem' as it is quite possible for both 66 | +directory trees to be in the same filesystem and there is no 67 | +requirement that the root of a filesystem be given for either upper or 68 | +lower. 69 | + 70 | +The lower filesystem can be any filesystem supported by Linux and does 71 | +not need to be writable. The lower filesystem can even be another 72 | +overlayfs. The upper filesystem will normally be writable and if it 73 | +is it must support the creation of trusted.* extended attributes, and 74 | +must provide valid d_type in readdir responses, at least for symbolic 75 | +links - so NFS is not suitable. 76 | + 77 | +A read-only overlay of two read-only filesystems may use any 78 | +filesystem type. 79 | + 80 | +Directories 81 | +----------- 82 | + 83 | +Overlaying mainly involved directories. If a given name appears in both 84 | +upper and lower filesystems and refers to a non-directory in either, 85 | +then the lower object is hidden - the name refers only to the upper 86 | +object. 87 | + 88 | +Where both upper and lower objects are directories, a merged directory 89 | +is formed. 90 | + 91 | +At mount time, the two directories given as mount options are combined 92 | +into a merged directory: 93 | + 94 | + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper /overlay 95 | + 96 | +Then whenever a lookup is requested in such a merged directory, the 97 | +lookup is performed in each actual directory and the combined result 98 | +is cached in the dentry belonging to the overlay filesystem. If both 99 | +actual lookups find directories, both are stored and a merged 100 | +directory is created, otherwise only one is stored: the upper if it 101 | +exists, else the lower. 102 | + 103 | +Only the lists of names from directories are merged. Other content 104 | +such as metadata and extended attributes are reported for the upper 105 | +directory only. These attributes of the lower directory are hidden. 106 | + 107 | +whiteouts and opaque directories 108 | +-------------------------------- 109 | + 110 | +In order to support rm and rmdir without changing the lower 111 | +filesystem, an overlay filesystem needs to record in the upper filesystem 112 | +that files have been removed. This is done using whiteouts and opaque 113 | +directories (non-directories are always opaque). 114 | + 115 | +The overlay filesystem uses extended attributes with a 116 | +"trusted.overlay." prefix to record these details. 117 | + 118 | +A whiteout is created as a symbolic link with target 119 | +"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y". 120 | +When a whiteout is found in the upper level of a merged directory, any 121 | +matching name in the lower level is ignored, and the whiteout itself 122 | +is also hidden. 123 | + 124 | +A directory is made opaque by setting the xattr "trusted.overlay.opaque" 125 | +to "y". Where the upper filesystem contains an opaque directory, any 126 | +directory in the lower filesystem with the same name is ignored. 127 | + 128 | +readdir 129 | +------- 130 | + 131 | +When a 'readdir' request is made on a merged directory, the upper and 132 | +lower directories are each read and the name lists merged in the 133 | +obvious way (upper is read first, then lower - entries that already 134 | +exist are not re-added). This merged name list is cached in the 135 | +'struct file' and so remains as long as the file is kept open. If the 136 | +directory is opened and read by two processes at the same time, they 137 | +will each have separate caches. A seekdir to the start of the 138 | +directory (offset 0) followed by a readdir will cause the cache to be 139 | +discarded and rebuilt. 140 | + 141 | +This means that changes to the merged directory do not appear while a 142 | +directory is being read. This is unlikely to be noticed by many 143 | +programs. 144 | + 145 | +seek offsets are assigned sequentially when the directories are read. 146 | +Thus if 147 | + - read part of a directory 148 | + - remember an offset, and close the directory 149 | + - re-open the directory some time later 150 | + - seek to the remembered offset 151 | + 152 | +there may be little correlation between the old and new locations in 153 | +the list of filenames, particularly if anything has changed in the 154 | +directory. 155 | + 156 | +Readdir on directories that are not merged is simply handled by the 157 | +underlying directory (upper or lower). 158 | + 159 | + 160 | +Non-directories 161 | +--------------- 162 | + 163 | +Objects that are not directories (files, symlinks, device-special 164 | +files etc.) are presented either from the upper or lower filesystem as 165 | +appropriate. When a file in the lower filesystem is accessed in a way 166 | +the requires write-access, such as opening for write access, changing 167 | +some metadata etc., the file is first copied from the lower filesystem 168 | +to the upper filesystem (copy_up). Note that creating a hard-link 169 | +also requires copy_up, though of course creation of a symlink does 170 | +not. 171 | + 172 | +The copy_up may turn out to be unnecessary, for example if the file is 173 | +opened for read-write but the data is not modified. 174 | + 175 | +The copy_up process first makes sure that the containing directory 176 | +exists in the upper filesystem - creating it and any parents as 177 | +necessary. It then creates the object with the same metadata (owner, 178 | +mode, mtime, symlink-target etc.) and then if the object is a file, the 179 | +data is copied from the lower to the upper filesystem. Finally any 180 | +extended attributes are copied up. 181 | + 182 | +Once the copy_up is complete, the overlay filesystem simply 183 | +provides direct access to the newly created file in the upper 184 | +filesystem - future operations on the file are barely noticed by the 185 | +overlay filesystem (though an operation on the name of the file such as 186 | +rename or unlink will of course be noticed and handled). 187 | + 188 | + 189 | +Non-standard behavior 190 | +--------------------- 191 | + 192 | +The copy_up operation essentially creates a new, identical file and 193 | +moves it over to the old name. The new file may be on a different 194 | +filesystem, so both st_dev and st_ino of the file may change. 195 | + 196 | +Any open files referring to this inode will access the old data and 197 | +metadata. Similarly any file locks obtained before copy_up will not 198 | +apply to the copied up file. 199 | + 200 | +On a file is opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) 201 | +and fsetxattr(2) will fail with EROFS. 202 | + 203 | +If a file with multiple hard links is copied up, then this will 204 | +"break" the link. Changes will not be propagated to other names 205 | +referring to the same inode. 206 | + 207 | +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory 208 | +object in overlayfs will not contain vaid absolute paths, only 209 | +relative paths leading up to the filesystem's root. This will be 210 | +fixed in the future. 211 | + 212 | +Some operations are not atomic, for example a crash during copy_up or 213 | +rename will leave the filesystem in an inconsitent state. This will 214 | +be addressed in the future. 215 | + 216 | +Changes to underlying filesystems 217 | +--------------------------------- 218 | + 219 | +Offline changes, when the overlay is not mounted, are allowed to either 220 | +the upper or the lower trees. 221 | + 222 | +Changes to the underlying filesystems while part of a mounted overlay 223 | +filesystem are not allowed. If the underlying filesystem is changed, 224 | +the behavior of the overlay is undefined, though it will not result in 225 | +a crash or deadlock. 226 | diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt 227 | index 065aa2d..f53d93c 100644 228 | --- a/Documentation/filesystems/vfs.txt 229 | +++ b/Documentation/filesystems/vfs.txt 230 | @@ -367,6 +367,7 @@ struct inode_operations { 231 | int (*atomic_open)(struct inode *, struct dentry *, 232 | struct file *, unsigned open_flag, 233 | umode_t create_mode, int *opened); 234 | + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); 235 | }; 236 | 237 | Again, all methods are called without any locks being held, unless 238 | @@ -696,6 +697,12 @@ struct address_space_operations { 239 | but instead uses bmap to find out where the blocks in the file 240 | are and uses those addresses directly. 241 | 242 | + dentry_open: this is an alternative to f_op->open(), the difference is that 243 | + this method may open a file not necessarily originating from the same 244 | + filesystem as the one i_op->open() was called on. It may be 245 | + useful for stacking filesystems which want to allow native I/O directly 246 | + on underlying files. 247 | + 248 | 249 | invalidatepage: If a page has PagePrivate set, then invalidatepage 250 | will be called when part or all of the page is to be removed 251 | diff --git a/MAINTAINERS b/MAINTAINERS 252 | index 94b823f..d843a51 100644 253 | --- a/MAINTAINERS 254 | +++ b/MAINTAINERS 255 | @@ -5099,6 +5099,13 @@ F: drivers/scsi/osd/ 256 | F: include/scsi/osd_* 257 | F: fs/exofs/ 258 | 259 | +OVERLAYFS FILESYSTEM 260 | +M: Miklos Szeredi 261 | +L: linux-fsdevel@vger.kernel.org 262 | +S: Supported 263 | +F: fs/overlayfs/* 264 | +F: Documentation/filesystems/overlayfs.txt 265 | + 266 | P54 WIRELESS DRIVER 267 | M: Christian Lamparter 268 | L: linux-wireless@vger.kernel.org 269 | diff --git a/fs/Kconfig b/fs/Kconfig 270 | index f95ae3a..e0c5d43 100644 271 | --- a/fs/Kconfig 272 | +++ b/fs/Kconfig 273 | @@ -67,6 +67,7 @@ source "fs/quota/Kconfig" 274 | 275 | source "fs/autofs4/Kconfig" 276 | source "fs/fuse/Kconfig" 277 | +source "fs/overlayfs/Kconfig" 278 | 279 | config CUSE 280 | tristate "Character device in Userspace support" 281 | diff --git a/fs/Makefile b/fs/Makefile 282 | index 2fb9779..fcd9788 100644 283 | --- a/fs/Makefile 284 | +++ b/fs/Makefile 285 | @@ -106,6 +106,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ 286 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 287 | obj-$(CONFIG_ADFS_FS) += adfs/ 288 | obj-$(CONFIG_FUSE_FS) += fuse/ 289 | +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ 290 | obj-$(CONFIG_UDF_FS) += udf/ 291 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 292 | obj-$(CONFIG_OMFS_FS) += omfs/ 293 | diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c 294 | index 2768138..344fb2c 100644 295 | --- a/fs/ecryptfs/main.c 296 | +++ b/fs/ecryptfs/main.c 297 | @@ -565,6 +565,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags 298 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; 299 | s->s_blocksize = path.dentry->d_sb->s_blocksize; 300 | s->s_magic = ECRYPTFS_SUPER_MAGIC; 301 | + s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; 302 | + 303 | + rc = -EINVAL; 304 | + if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 305 | + printk(KERN_ERR "eCryptfs: maximum fs stacking depth exceeded\n"); 306 | + goto out_free; 307 | + } 308 | 309 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); 310 | rc = PTR_ERR(inode); 311 | diff --git a/fs/internal.h b/fs/internal.h 312 | index 371bcc4..8578209 100644 313 | --- a/fs/internal.h 314 | +++ b/fs/internal.h 315 | @@ -42,11 +42,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) 316 | extern void __init chrdev_init(void); 317 | 318 | /* 319 | - * namei.c 320 | - */ 321 | -extern int __inode_permission(struct inode *, int); 322 | - 323 | -/* 324 | * namespace.c 325 | */ 326 | extern int copy_mount_options(const void __user *, unsigned long *); 327 | diff --git a/fs/namei.c b/fs/namei.c 328 | index 1b46439..9be439a 100644 329 | --- a/fs/namei.c 330 | +++ b/fs/namei.c 331 | @@ -348,6 +348,7 @@ int __inode_permission(struct inode *inode, int mask) 332 | 333 | return security_inode_permission(inode, mask); 334 | } 335 | +EXPORT_SYMBOL(__inode_permission); 336 | 337 | /** 338 | * sb_permission - Check superblock-level permissions 339 | @@ -2816,9 +2817,12 @@ finish_open_created: 340 | error = may_open(&nd->path, acc_mode, open_flag); 341 | if (error) 342 | goto out; 343 | - file->f_path.mnt = nd->path.mnt; 344 | - error = finish_open(file, nd->path.dentry, NULL, opened); 345 | - if (error) { 346 | + 347 | + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ 348 | + error = vfs_open(&nd->path, file, current_cred()); 349 | + if (!error) { 350 | + *opened |= FILE_OPENED; 351 | + } else { 352 | if (error == -EOPENSTALE) 353 | goto stale_open; 354 | goto out; 355 | diff --git a/fs/namespace.c b/fs/namespace.c 356 | index 4d31f73..b4712ea 100644 357 | --- a/fs/namespace.c 358 | +++ b/fs/namespace.c 359 | @@ -1387,6 +1387,24 @@ void drop_collected_mounts(struct vfsmount *mnt) 360 | release_mounts(&umount_list); 361 | } 362 | 363 | +struct vfsmount *clone_private_mount(struct path *path) 364 | +{ 365 | + struct mount *old_mnt = real_mount(path->mnt); 366 | + struct mount *new_mnt; 367 | + 368 | + if (IS_MNT_UNBINDABLE(old_mnt)) 369 | + return ERR_PTR(-EINVAL); 370 | + 371 | + down_read(&namespace_sem); 372 | + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); 373 | + up_read(&namespace_sem); 374 | + if (!new_mnt) 375 | + return ERR_PTR(-ENOMEM); 376 | + 377 | + return &new_mnt->mnt; 378 | +} 379 | +EXPORT_SYMBOL_GPL(clone_private_mount); 380 | + 381 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 382 | struct vfsmount *root) 383 | { 384 | diff --git a/fs/open.c b/fs/open.c 385 | index f3d96e7..c5a8cac 100644 386 | --- a/fs/open.c 387 | +++ b/fs/open.c 388 | @@ -787,8 +787,7 @@ struct file *dentry_open(const struct path *path, int flags, 389 | return ERR_PTR(error); 390 | 391 | f->f_flags = flags; 392 | - f->f_path = *path; 393 | - error = do_dentry_open(f, NULL, cred); 394 | + error = vfs_open(path, f, cred); 395 | if (!error) { 396 | error = open_check_o_direct(f); 397 | if (error) { 398 | @@ -803,6 +802,26 @@ struct file *dentry_open(const struct path *path, int flags, 399 | } 400 | EXPORT_SYMBOL(dentry_open); 401 | 402 | +/** 403 | + * vfs_open - open the file at the given path 404 | + * @path: path to open 405 | + * @filp: newly allocated file with f_flag initialized 406 | + * @cred: credentials to use 407 | + */ 408 | +int vfs_open(const struct path *path, struct file *filp, 409 | + const struct cred *cred) 410 | +{ 411 | + struct inode *inode = path->dentry->d_inode; 412 | + 413 | + if (inode->i_op->dentry_open) 414 | + return inode->i_op->dentry_open(path->dentry, filp, cred); 415 | + else { 416 | + filp->f_path = *path; 417 | + return do_dentry_open(filp, NULL, cred); 418 | + } 419 | +} 420 | +EXPORT_SYMBOL(vfs_open); 421 | + 422 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) 423 | { 424 | struct fdtable *fdt = files_fdtable(files); 425 | diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig 426 | new file mode 100644 427 | index 0000000..c4517da 428 | --- /dev/null 429 | +++ b/fs/overlayfs/Kconfig 430 | @@ -0,0 +1,4 @@ 431 | +config OVERLAYFS_FS 432 | + tristate "Overlay filesystem support" 433 | + help 434 | + Add support for overlay filesystem. 435 | diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile 436 | new file mode 100644 437 | index 0000000..8f91889 438 | --- /dev/null 439 | +++ b/fs/overlayfs/Makefile 440 | @@ -0,0 +1,7 @@ 441 | +# 442 | +# Makefile for the overlay filesystem. 443 | +# 444 | + 445 | +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o 446 | + 447 | +overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o 448 | diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c 449 | new file mode 100644 450 | index 0000000..87dbeee 451 | --- /dev/null 452 | +++ b/fs/overlayfs/copy_up.c 453 | @@ -0,0 +1,385 @@ 454 | +/* 455 | + * 456 | + * Copyright (C) 2011 Novell Inc. 457 | + * 458 | + * This program is free software; you can redistribute it and/or modify it 459 | + * under the terms of the GNU General Public License version 2 as published by 460 | + * the Free Software Foundation. 461 | + */ 462 | + 463 | +#include 464 | +#include 465 | +#include 466 | +#include 467 | +#include 468 | +#include 469 | +#include 470 | +#include 471 | +#include "overlayfs.h" 472 | + 473 | +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 474 | + 475 | +static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new) 476 | +{ 477 | + ssize_t list_size, size; 478 | + char *buf, *name, *value; 479 | + int error; 480 | + 481 | + if (!old->d_inode->i_op->getxattr || 482 | + !new->d_inode->i_op->getxattr) 483 | + return 0; 484 | + 485 | + list_size = vfs_listxattr(old, NULL, 0); 486 | + if (list_size <= 0) { 487 | + if (list_size == -EOPNOTSUPP) 488 | + return 0; 489 | + return list_size; 490 | + } 491 | + 492 | + buf = kzalloc(list_size, GFP_KERNEL); 493 | + if (!buf) 494 | + return -ENOMEM; 495 | + 496 | + error = -ENOMEM; 497 | + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); 498 | + if (!value) 499 | + goto out; 500 | + 501 | + list_size = vfs_listxattr(old, buf, list_size); 502 | + if (list_size <= 0) { 503 | + error = list_size; 504 | + goto out_free_value; 505 | + } 506 | + 507 | + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { 508 | + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); 509 | + if (size <= 0) { 510 | + error = size; 511 | + goto out_free_value; 512 | + } 513 | + error = vfs_setxattr(new, name, value, size, 0); 514 | + if (error) 515 | + goto out_free_value; 516 | + } 517 | + 518 | +out_free_value: 519 | + kfree(value); 520 | +out: 521 | + kfree(buf); 522 | + return error; 523 | +} 524 | + 525 | +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) 526 | +{ 527 | + struct file *old_file; 528 | + struct file *new_file; 529 | + int error = 0; 530 | + 531 | + if (len == 0) 532 | + return 0; 533 | + 534 | + old_file = ovl_path_open(old, O_RDONLY); 535 | + if (IS_ERR(old_file)) 536 | + return PTR_ERR(old_file); 537 | + 538 | + new_file = ovl_path_open(new, O_WRONLY); 539 | + if (IS_ERR(new_file)) { 540 | + error = PTR_ERR(new_file); 541 | + goto out_fput; 542 | + } 543 | + 544 | + /* FIXME: copy up sparse files efficiently */ 545 | + while (len) { 546 | + loff_t offset = new_file->f_pos; 547 | + size_t this_len = OVL_COPY_UP_CHUNK_SIZE; 548 | + long bytes; 549 | + 550 | + if (len < this_len) 551 | + this_len = len; 552 | + 553 | + if (signal_pending_state(TASK_KILLABLE, current)) { 554 | + error = -EINTR; 555 | + break; 556 | + } 557 | + 558 | + bytes = do_splice_direct(old_file, &offset, new_file, this_len, 559 | + SPLICE_F_MOVE); 560 | + if (bytes <= 0) { 561 | + error = bytes; 562 | + break; 563 | + } 564 | + 565 | + len -= bytes; 566 | + } 567 | + 568 | + fput(new_file); 569 | +out_fput: 570 | + fput(old_file); 571 | + return error; 572 | +} 573 | + 574 | +static char *ovl_read_symlink(struct dentry *realdentry) 575 | +{ 576 | + int res; 577 | + char *buf; 578 | + struct inode *inode = realdentry->d_inode; 579 | + mm_segment_t old_fs; 580 | + 581 | + res = -EINVAL; 582 | + if (!inode->i_op->readlink) 583 | + goto err; 584 | + 585 | + res = -ENOMEM; 586 | + buf = (char *) __get_free_page(GFP_KERNEL); 587 | + if (!buf) 588 | + goto err; 589 | + 590 | + old_fs = get_fs(); 591 | + set_fs(get_ds()); 592 | + /* The cast to a user pointer is valid due to the set_fs() */ 593 | + res = inode->i_op->readlink(realdentry, 594 | + (char __user *)buf, PAGE_SIZE - 1); 595 | + set_fs(old_fs); 596 | + if (res < 0) { 597 | + free_page((unsigned long) buf); 598 | + goto err; 599 | + } 600 | + buf[res] = '\0'; 601 | + 602 | + return buf; 603 | + 604 | +err: 605 | + return ERR_PTR(res); 606 | +} 607 | + 608 | +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 609 | +{ 610 | + struct iattr attr = { 611 | + .ia_valid = 612 | + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, 613 | + .ia_atime = stat->atime, 614 | + .ia_mtime = stat->mtime, 615 | + }; 616 | + 617 | + return notify_change(upperdentry, &attr); 618 | +} 619 | + 620 | +static int ovl_set_mode(struct dentry *upperdentry, umode_t mode) 621 | +{ 622 | + struct iattr attr = { 623 | + .ia_valid = ATTR_MODE, 624 | + .ia_mode = mode, 625 | + }; 626 | + 627 | + return notify_change(upperdentry, &attr); 628 | +} 629 | + 630 | +static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry, 631 | + struct path *lowerpath, struct kstat *stat, 632 | + const char *link) 633 | +{ 634 | + int err; 635 | + struct path newpath; 636 | + umode_t mode = stat->mode; 637 | + 638 | + /* Can't properly set mode on creation because of the umask */ 639 | + stat->mode &= S_IFMT; 640 | + 641 | + ovl_path_upper(dentry, &newpath); 642 | + WARN_ON(newpath.dentry); 643 | + newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link); 644 | + if (IS_ERR(newpath.dentry)) 645 | + return PTR_ERR(newpath.dentry); 646 | + 647 | + if (S_ISREG(stat->mode)) { 648 | + err = ovl_copy_up_data(lowerpath, &newpath, stat->size); 649 | + if (err) 650 | + goto err_remove; 651 | + } 652 | + 653 | + err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry); 654 | + if (err) 655 | + goto err_remove; 656 | + 657 | + mutex_lock(&newpath.dentry->d_inode->i_mutex); 658 | + if (!S_ISLNK(stat->mode)) 659 | + err = ovl_set_mode(newpath.dentry, mode); 660 | + if (!err) 661 | + err = ovl_set_timestamps(newpath.dentry, stat); 662 | + mutex_unlock(&newpath.dentry->d_inode->i_mutex); 663 | + if (err) 664 | + goto err_remove; 665 | + 666 | + ovl_dentry_update(dentry, newpath.dentry); 667 | + 668 | + /* 669 | + * Easiest way to get rid of the lower dentry reference is to 670 | + * drop this dentry. This is neither needed nor possible for 671 | + * directories. 672 | + */ 673 | + if (!S_ISDIR(stat->mode)) 674 | + d_drop(dentry); 675 | + 676 | + return 0; 677 | + 678 | +err_remove: 679 | + if (S_ISDIR(stat->mode)) 680 | + vfs_rmdir(upperdir->d_inode, newpath.dentry); 681 | + else 682 | + vfs_unlink(upperdir->d_inode, newpath.dentry); 683 | + 684 | + dput(newpath.dentry); 685 | + 686 | + return err; 687 | +} 688 | + 689 | +/* 690 | + * Copy up a single dentry 691 | + * 692 | + * Directory renames only allowed on "pure upper" (already created on 693 | + * upper filesystem, never copied up). Directories which are on lower or 694 | + * are merged may not be renamed. For these -EXDEV is returned and 695 | + * userspace has to deal with it. This means, when copying up a 696 | + * directory we can rely on it and ancestors being stable. 697 | + * 698 | + * Non-directory renames start with copy up of source if necessary. The 699 | + * actual rename will only proceed once the copy up was successful. Copy 700 | + * up uses upper parent i_mutex for exclusion. Since rename can change 701 | + * d_parent it is possible that the copy up will lock the old parent. At 702 | + * that point the file will have already been copied up anyway. 703 | + */ 704 | +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 705 | + struct path *lowerpath, struct kstat *stat) 706 | +{ 707 | + int err; 708 | + struct kstat pstat; 709 | + struct path parentpath; 710 | + struct dentry *upperdir; 711 | + const struct cred *old_cred; 712 | + struct cred *override_cred; 713 | + char *link = NULL; 714 | + 715 | + ovl_path_upper(parent, &parentpath); 716 | + upperdir = parentpath.dentry; 717 | + 718 | + err = vfs_getattr(parentpath.mnt, parentpath.dentry, &pstat); 719 | + if (err) 720 | + return err; 721 | + 722 | + if (S_ISLNK(stat->mode)) { 723 | + link = ovl_read_symlink(lowerpath->dentry); 724 | + if (IS_ERR(link)) 725 | + return PTR_ERR(link); 726 | + } 727 | + 728 | + err = -ENOMEM; 729 | + override_cred = prepare_creds(); 730 | + if (!override_cred) 731 | + goto out_free_link; 732 | + 733 | + override_cred->fsuid = stat->uid; 734 | + override_cred->fsgid = stat->gid; 735 | + /* 736 | + * CAP_SYS_ADMIN for copying up extended attributes 737 | + * CAP_DAC_OVERRIDE for create 738 | + * CAP_FOWNER for chmod, timestamp update 739 | + * CAP_FSETID for chmod 740 | + * CAP_MKNOD for mknod 741 | + */ 742 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 743 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 744 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 745 | + cap_raise(override_cred->cap_effective, CAP_FSETID); 746 | + cap_raise(override_cred->cap_effective, CAP_MKNOD); 747 | + old_cred = override_creds(override_cred); 748 | + 749 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 750 | + if (ovl_path_type(dentry) != OVL_PATH_LOWER) { 751 | + err = 0; 752 | + } else { 753 | + err = ovl_copy_up_locked(upperdir, dentry, lowerpath, 754 | + stat, link); 755 | + if (!err) { 756 | + /* Restore timestamps on parent (best effort) */ 757 | + ovl_set_timestamps(upperdir, &pstat); 758 | + } 759 | + } 760 | + 761 | + mutex_unlock(&upperdir->d_inode->i_mutex); 762 | + 763 | + revert_creds(old_cred); 764 | + put_cred(override_cred); 765 | + 766 | +out_free_link: 767 | + if (link) 768 | + free_page((unsigned long) link); 769 | + 770 | + return err; 771 | +} 772 | + 773 | +int ovl_copy_up(struct dentry *dentry) 774 | +{ 775 | + int err; 776 | + 777 | + err = 0; 778 | + while (!err) { 779 | + struct dentry *next; 780 | + struct dentry *parent; 781 | + struct path lowerpath; 782 | + struct kstat stat; 783 | + enum ovl_path_type type = ovl_path_type(dentry); 784 | + 785 | + if (type != OVL_PATH_LOWER) 786 | + break; 787 | + 788 | + next = dget(dentry); 789 | + /* find the topmost dentry not yet copied up */ 790 | + for (;;) { 791 | + parent = dget_parent(next); 792 | + 793 | + type = ovl_path_type(parent); 794 | + if (type != OVL_PATH_LOWER) 795 | + break; 796 | + 797 | + dput(next); 798 | + next = parent; 799 | + } 800 | + 801 | + ovl_path_lower(next, &lowerpath); 802 | + err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat); 803 | + if (!err) 804 | + err = ovl_copy_up_one(parent, next, &lowerpath, &stat); 805 | + 806 | + dput(parent); 807 | + dput(next); 808 | + } 809 | + 810 | + return err; 811 | +} 812 | + 813 | +/* Optimize by not copying up the file first and truncating later */ 814 | +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size) 815 | +{ 816 | + int err; 817 | + struct kstat stat; 818 | + struct path lowerpath; 819 | + struct dentry *parent = dget_parent(dentry); 820 | + 821 | + err = ovl_copy_up(parent); 822 | + if (err) 823 | + goto out_dput_parent; 824 | + 825 | + ovl_path_lower(dentry, &lowerpath); 826 | + err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat); 827 | + if (err) 828 | + goto out_dput_parent; 829 | + 830 | + if (size < stat.size) 831 | + stat.size = size; 832 | + 833 | + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat); 834 | + 835 | +out_dput_parent: 836 | + dput(parent); 837 | + return err; 838 | +} 839 | diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c 840 | new file mode 100644 841 | index 0000000..c4446c4 842 | --- /dev/null 843 | +++ b/fs/overlayfs/dir.c 844 | @@ -0,0 +1,604 @@ 845 | +/* 846 | + * 847 | + * Copyright (C) 2011 Novell Inc. 848 | + * 849 | + * This program is free software; you can redistribute it and/or modify it 850 | + * under the terms of the GNU General Public License version 2 as published by 851 | + * the Free Software Foundation. 852 | + */ 853 | + 854 | +#include 855 | +#include 856 | +#include 857 | +#include 858 | +#include 859 | +#include "overlayfs.h" 860 | + 861 | +static const char *ovl_whiteout_symlink = "(overlay-whiteout)"; 862 | + 863 | +static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry) 864 | +{ 865 | + int err; 866 | + struct dentry *newdentry; 867 | + const struct cred *old_cred; 868 | + struct cred *override_cred; 869 | + 870 | + /* FIXME: recheck lower dentry to see if whiteout is really needed */ 871 | + 872 | + err = -ENOMEM; 873 | + override_cred = prepare_creds(); 874 | + if (!override_cred) 875 | + goto out; 876 | + 877 | + /* 878 | + * CAP_SYS_ADMIN for setxattr 879 | + * CAP_DAC_OVERRIDE for symlink creation 880 | + * CAP_FOWNER for unlink in sticky directory 881 | + */ 882 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 883 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 884 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 885 | + override_cred->fsuid = 0; 886 | + override_cred->fsgid = 0; 887 | + old_cred = override_creds(override_cred); 888 | + 889 | + newdentry = lookup_one_len(dentry->d_name.name, upperdir, 890 | + dentry->d_name.len); 891 | + err = PTR_ERR(newdentry); 892 | + if (IS_ERR(newdentry)) 893 | + goto out_put_cred; 894 | + 895 | + /* Just been removed within the same locked region */ 896 | + WARN_ON(newdentry->d_inode); 897 | + 898 | + err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink); 899 | + if (err) 900 | + goto out_dput; 901 | + 902 | + ovl_dentry_version_inc(dentry->d_parent); 903 | + 904 | + err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0); 905 | + if (err) 906 | + vfs_unlink(upperdir->d_inode, newdentry); 907 | + 908 | +out_dput: 909 | + dput(newdentry); 910 | +out_put_cred: 911 | + revert_creds(old_cred); 912 | + put_cred(override_cred); 913 | +out: 914 | + if (err) { 915 | + /* 916 | + * There's no way to recover from failure to whiteout. 917 | + * What should we do? Log a big fat error and... ? 918 | + */ 919 | + printk(KERN_ERR "overlayfs: ERROR - failed to whiteout '%s'\n", 920 | + dentry->d_name.name); 921 | + } 922 | + 923 | + return err; 924 | +} 925 | + 926 | +static struct dentry *ovl_lookup_create(struct dentry *upperdir, 927 | + struct dentry *template) 928 | +{ 929 | + int err; 930 | + struct dentry *newdentry; 931 | + struct qstr *name = &template->d_name; 932 | + 933 | + newdentry = lookup_one_len(name->name, upperdir, name->len); 934 | + if (IS_ERR(newdentry)) 935 | + return newdentry; 936 | + 937 | + if (newdentry->d_inode) { 938 | + const struct cred *old_cred; 939 | + struct cred *override_cred; 940 | + 941 | + /* No need to check whiteout if lower parent is non-existent */ 942 | + err = -EEXIST; 943 | + if (!ovl_dentry_lower(template->d_parent)) 944 | + goto out_dput; 945 | + 946 | + if (!S_ISLNK(newdentry->d_inode->i_mode)) 947 | + goto out_dput; 948 | + 949 | + err = -ENOMEM; 950 | + override_cred = prepare_creds(); 951 | + if (!override_cred) 952 | + goto out_dput; 953 | + 954 | + /* 955 | + * CAP_SYS_ADMIN for getxattr 956 | + * CAP_FOWNER for unlink in sticky directory 957 | + */ 958 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 959 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 960 | + old_cred = override_creds(override_cred); 961 | + 962 | + err = -EEXIST; 963 | + if (ovl_is_whiteout(newdentry)) 964 | + err = vfs_unlink(upperdir->d_inode, newdentry); 965 | + 966 | + revert_creds(old_cred); 967 | + put_cred(override_cred); 968 | + if (err) 969 | + goto out_dput; 970 | + 971 | + dput(newdentry); 972 | + newdentry = lookup_one_len(name->name, upperdir, name->len); 973 | + if (IS_ERR(newdentry)) { 974 | + ovl_whiteout(upperdir, template); 975 | + return newdentry; 976 | + } 977 | + 978 | + /* 979 | + * Whiteout just been successfully removed, parent 980 | + * i_mutex is still held, there's no way the lookup 981 | + * could return positive. 982 | + */ 983 | + WARN_ON(newdentry->d_inode); 984 | + } 985 | + 986 | + return newdentry; 987 | + 988 | +out_dput: 989 | + dput(newdentry); 990 | + return ERR_PTR(err); 991 | +} 992 | + 993 | +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, 994 | + struct kstat *stat, const char *link) 995 | +{ 996 | + int err; 997 | + struct dentry *newdentry; 998 | + struct inode *dir = upperdir->d_inode; 999 | + 1000 | + newdentry = ovl_lookup_create(upperdir, dentry); 1001 | + if (IS_ERR(newdentry)) 1002 | + goto out; 1003 | + 1004 | + switch (stat->mode & S_IFMT) { 1005 | + case S_IFREG: 1006 | + err = vfs_create(dir, newdentry, stat->mode, NULL); 1007 | + break; 1008 | + 1009 | + case S_IFDIR: 1010 | + err = vfs_mkdir(dir, newdentry, stat->mode); 1011 | + break; 1012 | + 1013 | + case S_IFCHR: 1014 | + case S_IFBLK: 1015 | + case S_IFIFO: 1016 | + case S_IFSOCK: 1017 | + err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev); 1018 | + break; 1019 | + 1020 | + case S_IFLNK: 1021 | + err = vfs_symlink(dir, newdentry, link); 1022 | + break; 1023 | + 1024 | + default: 1025 | + err = -EPERM; 1026 | + } 1027 | + if (err) { 1028 | + if (ovl_dentry_is_opaque(dentry)) 1029 | + ovl_whiteout(upperdir, dentry); 1030 | + dput(newdentry); 1031 | + newdentry = ERR_PTR(err); 1032 | + } else if (WARN_ON(!newdentry->d_inode)) { 1033 | + /* 1034 | + * Not quite sure if non-instantiated dentry is legal or not. 1035 | + * VFS doesn't seem to care so check and warn here. 1036 | + */ 1037 | + dput(newdentry); 1038 | + newdentry = ERR_PTR(-ENOENT); 1039 | + } 1040 | + 1041 | +out: 1042 | + return newdentry; 1043 | + 1044 | +} 1045 | + 1046 | +static int ovl_set_opaque(struct dentry *upperdentry) 1047 | +{ 1048 | + int err; 1049 | + const struct cred *old_cred; 1050 | + struct cred *override_cred; 1051 | + 1052 | + override_cred = prepare_creds(); 1053 | + if (!override_cred) 1054 | + return -ENOMEM; 1055 | + 1056 | + /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */ 1057 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 1058 | + old_cred = override_creds(override_cred); 1059 | + err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); 1060 | + revert_creds(old_cred); 1061 | + put_cred(override_cred); 1062 | + 1063 | + return err; 1064 | +} 1065 | + 1066 | +static int ovl_remove_opaque(struct dentry *upperdentry) 1067 | +{ 1068 | + int err; 1069 | + const struct cred *old_cred; 1070 | + struct cred *override_cred; 1071 | + 1072 | + override_cred = prepare_creds(); 1073 | + if (!override_cred) 1074 | + return -ENOMEM; 1075 | + 1076 | + /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */ 1077 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 1078 | + old_cred = override_creds(override_cred); 1079 | + err = vfs_removexattr(upperdentry, ovl_opaque_xattr); 1080 | + revert_creds(old_cred); 1081 | + put_cred(override_cred); 1082 | + 1083 | + return err; 1084 | +} 1085 | + 1086 | +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, 1087 | + struct kstat *stat) 1088 | +{ 1089 | + int err; 1090 | + enum ovl_path_type type; 1091 | + struct path realpath; 1092 | + 1093 | + type = ovl_path_real(dentry, &realpath); 1094 | + err = vfs_getattr(realpath.mnt, realpath.dentry, stat); 1095 | + if (err) 1096 | + return err; 1097 | + 1098 | + stat->dev = dentry->d_sb->s_dev; 1099 | + stat->ino = dentry->d_inode->i_ino; 1100 | + 1101 | + /* 1102 | + * It's probably not worth it to count subdirs to get the 1103 | + * correct link count. nlink=1 seems to pacify 'find' and 1104 | + * other utilities. 1105 | + */ 1106 | + if (type == OVL_PATH_MERGE) 1107 | + stat->nlink = 1; 1108 | + 1109 | + return 0; 1110 | +} 1111 | + 1112 | +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, 1113 | + const char *link) 1114 | +{ 1115 | + int err; 1116 | + struct dentry *newdentry; 1117 | + struct dentry *upperdir; 1118 | + struct inode *inode; 1119 | + struct kstat stat = { 1120 | + .mode = mode, 1121 | + .rdev = rdev, 1122 | + }; 1123 | + 1124 | + err = -ENOMEM; 1125 | + inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); 1126 | + if (!inode) 1127 | + goto out; 1128 | + 1129 | + err = ovl_copy_up(dentry->d_parent); 1130 | + if (err) 1131 | + goto out_iput; 1132 | + 1133 | + upperdir = ovl_dentry_upper(dentry->d_parent); 1134 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 1135 | + 1136 | + newdentry = ovl_upper_create(upperdir, dentry, &stat, link); 1137 | + err = PTR_ERR(newdentry); 1138 | + if (IS_ERR(newdentry)) 1139 | + goto out_unlock; 1140 | + 1141 | + ovl_dentry_version_inc(dentry->d_parent); 1142 | + if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) { 1143 | + err = ovl_set_opaque(newdentry); 1144 | + if (err) { 1145 | + vfs_rmdir(upperdir->d_inode, newdentry); 1146 | + ovl_whiteout(upperdir, dentry); 1147 | + goto out_dput; 1148 | + } 1149 | + } 1150 | + ovl_dentry_update(dentry, newdentry); 1151 | + ovl_copyattr(newdentry->d_inode, inode); 1152 | + d_instantiate(dentry, inode); 1153 | + inode = NULL; 1154 | + newdentry = NULL; 1155 | + err = 0; 1156 | + 1157 | +out_dput: 1158 | + dput(newdentry); 1159 | +out_unlock: 1160 | + mutex_unlock(&upperdir->d_inode->i_mutex); 1161 | +out_iput: 1162 | + iput(inode); 1163 | +out: 1164 | + return err; 1165 | +} 1166 | + 1167 | +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, 1168 | + bool excl) 1169 | +{ 1170 | + return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); 1171 | +} 1172 | + 1173 | +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 1174 | +{ 1175 | + return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); 1176 | +} 1177 | + 1178 | +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, 1179 | + dev_t rdev) 1180 | +{ 1181 | + return ovl_create_object(dentry, mode, rdev, NULL); 1182 | +} 1183 | + 1184 | +static int ovl_symlink(struct inode *dir, struct dentry *dentry, 1185 | + const char *link) 1186 | +{ 1187 | + return ovl_create_object(dentry, S_IFLNK, 0, link); 1188 | +} 1189 | + 1190 | +static int ovl_do_remove(struct dentry *dentry, bool is_dir) 1191 | +{ 1192 | + int err; 1193 | + enum ovl_path_type type; 1194 | + struct path realpath; 1195 | + struct dentry *upperdir; 1196 | + 1197 | + err = ovl_copy_up(dentry->d_parent); 1198 | + if (err) 1199 | + return err; 1200 | + 1201 | + upperdir = ovl_dentry_upper(dentry->d_parent); 1202 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 1203 | + type = ovl_path_real(dentry, &realpath); 1204 | + if (type != OVL_PATH_LOWER) { 1205 | + err = -ESTALE; 1206 | + if (realpath.dentry->d_parent != upperdir) 1207 | + goto out_d_drop; 1208 | + 1209 | + /* FIXME: create whiteout up front and rename to target */ 1210 | + 1211 | + if (is_dir) 1212 | + err = vfs_rmdir(upperdir->d_inode, realpath.dentry); 1213 | + else 1214 | + err = vfs_unlink(upperdir->d_inode, realpath.dentry); 1215 | + if (err) 1216 | + goto out_d_drop; 1217 | + 1218 | + ovl_dentry_version_inc(dentry->d_parent); 1219 | + } 1220 | + 1221 | + if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry)) 1222 | + err = ovl_whiteout(upperdir, dentry); 1223 | + 1224 | + /* 1225 | + * Keeping this dentry hashed would mean having to release 1226 | + * upperpath/lowerpath, which could only be done if we are the 1227 | + * sole user of this dentry. Too tricky... Just unhash for 1228 | + * now. 1229 | + */ 1230 | +out_d_drop: 1231 | + d_drop(dentry); 1232 | + mutex_unlock(&upperdir->d_inode->i_mutex); 1233 | + 1234 | + return err; 1235 | +} 1236 | + 1237 | +static int ovl_unlink(struct inode *dir, struct dentry *dentry) 1238 | +{ 1239 | + return ovl_do_remove(dentry, false); 1240 | +} 1241 | + 1242 | + 1243 | +static int ovl_rmdir(struct inode *dir, struct dentry *dentry) 1244 | +{ 1245 | + int err; 1246 | + enum ovl_path_type type; 1247 | + 1248 | + type = ovl_path_type(dentry); 1249 | + if (type != OVL_PATH_UPPER) { 1250 | + err = ovl_check_empty_and_clear(dentry, type); 1251 | + if (err) 1252 | + return err; 1253 | + } 1254 | + 1255 | + return ovl_do_remove(dentry, true); 1256 | +} 1257 | + 1258 | +static int ovl_link(struct dentry *old, struct inode *newdir, 1259 | + struct dentry *new) 1260 | +{ 1261 | + int err; 1262 | + struct dentry *olddentry; 1263 | + struct dentry *newdentry; 1264 | + struct dentry *upperdir; 1265 | + struct inode *newinode; 1266 | + 1267 | + err = ovl_copy_up(old); 1268 | + if (err) 1269 | + goto out; 1270 | + 1271 | + err = ovl_copy_up(new->d_parent); 1272 | + if (err) 1273 | + goto out; 1274 | + 1275 | + upperdir = ovl_dentry_upper(new->d_parent); 1276 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 1277 | + newdentry = ovl_lookup_create(upperdir, new); 1278 | + err = PTR_ERR(newdentry); 1279 | + if (IS_ERR(newdentry)) 1280 | + goto out_unlock; 1281 | + 1282 | + olddentry = ovl_dentry_upper(old); 1283 | + err = vfs_link(olddentry, upperdir->d_inode, newdentry); 1284 | + if (!err) { 1285 | + if (WARN_ON(!newdentry->d_inode)) { 1286 | + dput(newdentry); 1287 | + err = -ENOENT; 1288 | + goto out_unlock; 1289 | + } 1290 | + newinode = ovl_new_inode(old->d_sb, newdentry->d_inode->i_mode, 1291 | + new->d_fsdata); 1292 | + if (!newinode) 1293 | + goto link_fail; 1294 | + ovl_copyattr(upperdir->d_inode, newinode); 1295 | + 1296 | + ovl_dentry_version_inc(new->d_parent); 1297 | + ovl_dentry_update(new, newdentry); 1298 | + 1299 | + d_instantiate(new, newinode); 1300 | + } else { 1301 | +link_fail: 1302 | + if (ovl_dentry_is_opaque(new)) 1303 | + ovl_whiteout(upperdir, new); 1304 | + dput(newdentry); 1305 | + } 1306 | +out_unlock: 1307 | + mutex_unlock(&upperdir->d_inode->i_mutex); 1308 | +out: 1309 | + return err; 1310 | + 1311 | +} 1312 | + 1313 | +static int ovl_rename(struct inode *olddir, struct dentry *old, 1314 | + struct inode *newdir, struct dentry *new) 1315 | +{ 1316 | + int err; 1317 | + enum ovl_path_type old_type; 1318 | + enum ovl_path_type new_type; 1319 | + struct dentry *old_upperdir; 1320 | + struct dentry *new_upperdir; 1321 | + struct dentry *olddentry; 1322 | + struct dentry *newdentry; 1323 | + struct dentry *trap; 1324 | + bool old_opaque; 1325 | + bool new_opaque; 1326 | + bool new_create = false; 1327 | + bool is_dir = S_ISDIR(old->d_inode->i_mode); 1328 | + 1329 | + /* Don't copy up directory trees */ 1330 | + old_type = ovl_path_type(old); 1331 | + if (old_type != OVL_PATH_UPPER && is_dir) 1332 | + return -EXDEV; 1333 | + 1334 | + if (new->d_inode) { 1335 | + new_type = ovl_path_type(new); 1336 | + 1337 | + if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { 1338 | + if (ovl_dentry_lower(old)->d_inode == 1339 | + ovl_dentry_lower(new)->d_inode) 1340 | + return 0; 1341 | + } 1342 | + if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { 1343 | + if (ovl_dentry_upper(old)->d_inode == 1344 | + ovl_dentry_upper(new)->d_inode) 1345 | + return 0; 1346 | + } 1347 | + 1348 | + if (new_type != OVL_PATH_UPPER && 1349 | + S_ISDIR(new->d_inode->i_mode)) { 1350 | + err = ovl_check_empty_and_clear(new, new_type); 1351 | + if (err) 1352 | + return err; 1353 | + } 1354 | + } else { 1355 | + new_type = OVL_PATH_UPPER; 1356 | + } 1357 | + 1358 | + err = ovl_copy_up(old); 1359 | + if (err) 1360 | + return err; 1361 | + 1362 | + err = ovl_copy_up(new->d_parent); 1363 | + if (err) 1364 | + return err; 1365 | + 1366 | + old_upperdir = ovl_dentry_upper(old->d_parent); 1367 | + new_upperdir = ovl_dentry_upper(new->d_parent); 1368 | + 1369 | + trap = lock_rename(new_upperdir, old_upperdir); 1370 | + 1371 | + olddentry = ovl_dentry_upper(old); 1372 | + newdentry = ovl_dentry_upper(new); 1373 | + if (newdentry) { 1374 | + dget(newdentry); 1375 | + } else { 1376 | + new_create = true; 1377 | + newdentry = ovl_lookup_create(new_upperdir, new); 1378 | + err = PTR_ERR(newdentry); 1379 | + if (IS_ERR(newdentry)) 1380 | + goto out_unlock; 1381 | + } 1382 | + 1383 | + err = -ESTALE; 1384 | + if (olddentry->d_parent != old_upperdir) 1385 | + goto out_dput; 1386 | + if (newdentry->d_parent != new_upperdir) 1387 | + goto out_dput; 1388 | + if (olddentry == trap) 1389 | + goto out_dput; 1390 | + if (newdentry == trap) 1391 | + goto out_dput; 1392 | + 1393 | + old_opaque = ovl_dentry_is_opaque(old); 1394 | + new_opaque = ovl_dentry_is_opaque(new) || new_type != OVL_PATH_UPPER; 1395 | + 1396 | + if (is_dir && !old_opaque && new_opaque) { 1397 | + err = ovl_set_opaque(olddentry); 1398 | + if (err) 1399 | + goto out_dput; 1400 | + } 1401 | + 1402 | + err = vfs_rename(old_upperdir->d_inode, olddentry, 1403 | + new_upperdir->d_inode, newdentry); 1404 | + 1405 | + if (err) { 1406 | + if (new_create && ovl_dentry_is_opaque(new)) 1407 | + ovl_whiteout(new_upperdir, new); 1408 | + if (is_dir && !old_opaque && new_opaque) 1409 | + ovl_remove_opaque(olddentry); 1410 | + goto out_dput; 1411 | + } 1412 | + 1413 | + if (old_type != OVL_PATH_UPPER || old_opaque) 1414 | + err = ovl_whiteout(old_upperdir, old); 1415 | + if (is_dir && old_opaque && !new_opaque) 1416 | + ovl_remove_opaque(olddentry); 1417 | + 1418 | + if (old_opaque != new_opaque) 1419 | + ovl_dentry_set_opaque(old, new_opaque); 1420 | + 1421 | + ovl_dentry_version_inc(old->d_parent); 1422 | + ovl_dentry_version_inc(new->d_parent); 1423 | + 1424 | +out_dput: 1425 | + dput(newdentry); 1426 | +out_unlock: 1427 | + unlock_rename(new_upperdir, old_upperdir); 1428 | + return err; 1429 | +} 1430 | + 1431 | +const struct inode_operations ovl_dir_inode_operations = { 1432 | + .lookup = ovl_lookup, 1433 | + .mkdir = ovl_mkdir, 1434 | + .symlink = ovl_symlink, 1435 | + .unlink = ovl_unlink, 1436 | + .rmdir = ovl_rmdir, 1437 | + .rename = ovl_rename, 1438 | + .link = ovl_link, 1439 | + .setattr = ovl_setattr, 1440 | + .create = ovl_create, 1441 | + .mknod = ovl_mknod, 1442 | + .permission = ovl_permission, 1443 | + .getattr = ovl_dir_getattr, 1444 | + .setxattr = ovl_setxattr, 1445 | + .getxattr = ovl_getxattr, 1446 | + .listxattr = ovl_listxattr, 1447 | + .removexattr = ovl_removexattr, 1448 | +}; 1449 | diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c 1450 | new file mode 100644 1451 | index 0000000..e7ab09b 1452 | --- /dev/null 1453 | +++ b/fs/overlayfs/inode.c 1454 | @@ -0,0 +1,372 @@ 1455 | +/* 1456 | + * 1457 | + * Copyright (C) 2011 Novell Inc. 1458 | + * 1459 | + * This program is free software; you can redistribute it and/or modify it 1460 | + * under the terms of the GNU General Public License version 2 as published by 1461 | + * the Free Software Foundation. 1462 | + */ 1463 | + 1464 | +#include 1465 | +#include 1466 | +#include 1467 | +#include "overlayfs.h" 1468 | + 1469 | +int ovl_setattr(struct dentry *dentry, struct iattr *attr) 1470 | +{ 1471 | + struct dentry *upperdentry; 1472 | + int err; 1473 | + 1474 | + if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry)) 1475 | + err = ovl_copy_up_truncate(dentry, attr->ia_size); 1476 | + else 1477 | + err = ovl_copy_up(dentry); 1478 | + if (err) 1479 | + return err; 1480 | + 1481 | + upperdentry = ovl_dentry_upper(dentry); 1482 | + 1483 | + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 1484 | + attr->ia_valid &= ~ATTR_MODE; 1485 | + 1486 | + mutex_lock(&upperdentry->d_inode->i_mutex); 1487 | + err = notify_change(upperdentry, attr); 1488 | + if (!err) 1489 | + ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 1490 | + mutex_unlock(&upperdentry->d_inode->i_mutex); 1491 | + 1492 | + return err; 1493 | +} 1494 | + 1495 | +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, 1496 | + struct kstat *stat) 1497 | +{ 1498 | + struct path realpath; 1499 | + 1500 | + ovl_path_real(dentry, &realpath); 1501 | + return vfs_getattr(realpath.mnt, realpath.dentry, stat); 1502 | +} 1503 | + 1504 | +int ovl_permission(struct inode *inode, int mask) 1505 | +{ 1506 | + struct ovl_entry *oe; 1507 | + struct dentry *alias = NULL; 1508 | + struct inode *realinode; 1509 | + struct dentry *realdentry; 1510 | + bool is_upper; 1511 | + int err; 1512 | + 1513 | + if (S_ISDIR(inode->i_mode)) { 1514 | + oe = inode->i_private; 1515 | + } else if (mask & MAY_NOT_BLOCK) { 1516 | + return -ECHILD; 1517 | + } else { 1518 | + /* 1519 | + * For non-directories find an alias and get the info 1520 | + * from there. 1521 | + */ 1522 | + alias = d_find_any_alias(inode); 1523 | + if (WARN_ON(!alias)) 1524 | + return -ENOENT; 1525 | + 1526 | + oe = alias->d_fsdata; 1527 | + } 1528 | + 1529 | + realdentry = ovl_entry_real(oe, &is_upper); 1530 | + 1531 | + /* Careful in RCU walk mode */ 1532 | + realinode = ACCESS_ONCE(realdentry->d_inode); 1533 | + if (!realinode) { 1534 | + WARN_ON(!(mask & MAY_NOT_BLOCK)); 1535 | + err = -ENOENT; 1536 | + goto out_dput; 1537 | + } 1538 | + 1539 | + if (mask & MAY_WRITE) { 1540 | + umode_t mode = realinode->i_mode; 1541 | + 1542 | + /* 1543 | + * Writes will always be redirected to upper layer, so 1544 | + * ignore lower layer being read-only. 1545 | + * 1546 | + * If the overlay itself is read-only then proceed 1547 | + * with the permission check, don't return EROFS. 1548 | + * This will only happen if this is the lower layer of 1549 | + * another overlayfs. 1550 | + * 1551 | + * If upper fs becomes read-only after the overlay was 1552 | + * constructed return EROFS to prevent modification of 1553 | + * upper layer. 1554 | + */ 1555 | + err = -EROFS; 1556 | + if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && 1557 | + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 1558 | + goto out_dput; 1559 | + } 1560 | + 1561 | + err = __inode_permission(realinode, mask); 1562 | +out_dput: 1563 | + dput(alias); 1564 | + return err; 1565 | +} 1566 | + 1567 | + 1568 | +struct ovl_link_data { 1569 | + struct dentry *realdentry; 1570 | + void *cookie; 1571 | +}; 1572 | + 1573 | +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) 1574 | +{ 1575 | + void *ret; 1576 | + struct dentry *realdentry; 1577 | + struct inode *realinode; 1578 | + 1579 | + realdentry = ovl_dentry_real(dentry); 1580 | + realinode = realdentry->d_inode; 1581 | + 1582 | + if (WARN_ON(!realinode->i_op->follow_link)) 1583 | + return ERR_PTR(-EPERM); 1584 | + 1585 | + ret = realinode->i_op->follow_link(realdentry, nd); 1586 | + if (IS_ERR(ret)) 1587 | + return ret; 1588 | + 1589 | + if (realinode->i_op->put_link) { 1590 | + struct ovl_link_data *data; 1591 | + 1592 | + data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); 1593 | + if (!data) { 1594 | + realinode->i_op->put_link(realdentry, nd, ret); 1595 | + return ERR_PTR(-ENOMEM); 1596 | + } 1597 | + data->realdentry = realdentry; 1598 | + data->cookie = ret; 1599 | + 1600 | + return data; 1601 | + } else { 1602 | + return NULL; 1603 | + } 1604 | +} 1605 | + 1606 | +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) 1607 | +{ 1608 | + struct inode *realinode; 1609 | + struct ovl_link_data *data = c; 1610 | + 1611 | + if (!data) 1612 | + return; 1613 | + 1614 | + realinode = data->realdentry->d_inode; 1615 | + realinode->i_op->put_link(data->realdentry, nd, data->cookie); 1616 | + kfree(data); 1617 | +} 1618 | + 1619 | +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) 1620 | +{ 1621 | + struct path realpath; 1622 | + struct inode *realinode; 1623 | + 1624 | + ovl_path_real(dentry, &realpath); 1625 | + realinode = realpath.dentry->d_inode; 1626 | + 1627 | + if (!realinode->i_op->readlink) 1628 | + return -EINVAL; 1629 | + 1630 | + touch_atime(&realpath); 1631 | + 1632 | + return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); 1633 | +} 1634 | + 1635 | + 1636 | +static bool ovl_is_private_xattr(const char *name) 1637 | +{ 1638 | + return strncmp(name, "trusted.overlay.", 14) == 0; 1639 | +} 1640 | + 1641 | +int ovl_setxattr(struct dentry *dentry, const char *name, 1642 | + const void *value, size_t size, int flags) 1643 | +{ 1644 | + int err; 1645 | + struct dentry *upperdentry; 1646 | + 1647 | + if (ovl_is_private_xattr(name)) 1648 | + return -EPERM; 1649 | + 1650 | + err = ovl_copy_up(dentry); 1651 | + if (err) 1652 | + return err; 1653 | + 1654 | + upperdentry = ovl_dentry_upper(dentry); 1655 | + return vfs_setxattr(upperdentry, name, value, size, flags); 1656 | +} 1657 | + 1658 | +ssize_t ovl_getxattr(struct dentry *dentry, const char *name, 1659 | + void *value, size_t size) 1660 | +{ 1661 | + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && 1662 | + ovl_is_private_xattr(name)) 1663 | + return -ENODATA; 1664 | + 1665 | + return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); 1666 | +} 1667 | + 1668 | +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 1669 | +{ 1670 | + ssize_t res; 1671 | + int off; 1672 | + 1673 | + res = vfs_listxattr(ovl_dentry_real(dentry), list, size); 1674 | + if (res <= 0 || size == 0) 1675 | + return res; 1676 | + 1677 | + if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) 1678 | + return res; 1679 | + 1680 | + /* filter out private xattrs */ 1681 | + for (off = 0; off < res;) { 1682 | + char *s = list + off; 1683 | + size_t slen = strlen(s) + 1; 1684 | + 1685 | + BUG_ON(off + slen > res); 1686 | + 1687 | + if (ovl_is_private_xattr(s)) { 1688 | + res -= slen; 1689 | + memmove(s, s + slen, res - off); 1690 | + } else { 1691 | + off += slen; 1692 | + } 1693 | + } 1694 | + 1695 | + return res; 1696 | +} 1697 | + 1698 | +int ovl_removexattr(struct dentry *dentry, const char *name) 1699 | +{ 1700 | + int err; 1701 | + struct path realpath; 1702 | + enum ovl_path_type type; 1703 | + 1704 | + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && 1705 | + ovl_is_private_xattr(name)) 1706 | + return -ENODATA; 1707 | + 1708 | + type = ovl_path_real(dentry, &realpath); 1709 | + if (type == OVL_PATH_LOWER) { 1710 | + err = vfs_getxattr(realpath.dentry, name, NULL, 0); 1711 | + if (err < 0) 1712 | + return err; 1713 | + 1714 | + err = ovl_copy_up(dentry); 1715 | + if (err) 1716 | + return err; 1717 | + 1718 | + ovl_path_upper(dentry, &realpath); 1719 | + } 1720 | + 1721 | + return vfs_removexattr(realpath.dentry, name); 1722 | +} 1723 | + 1724 | +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, 1725 | + struct dentry *realdentry) 1726 | +{ 1727 | + if (type != OVL_PATH_LOWER) 1728 | + return false; 1729 | + 1730 | + if (special_file(realdentry->d_inode->i_mode)) 1731 | + return false; 1732 | + 1733 | + if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) 1734 | + return false; 1735 | + 1736 | + return true; 1737 | +} 1738 | + 1739 | +static int ovl_dentry_open(struct dentry *dentry, struct file *file, 1740 | + const struct cred *cred) 1741 | +{ 1742 | + int err; 1743 | + struct path realpath; 1744 | + enum ovl_path_type type; 1745 | + 1746 | + type = ovl_path_real(dentry, &realpath); 1747 | + if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { 1748 | + if (file->f_flags & O_TRUNC) 1749 | + err = ovl_copy_up_truncate(dentry, 0); 1750 | + else 1751 | + err = ovl_copy_up(dentry); 1752 | + if (err) 1753 | + return err; 1754 | + 1755 | + ovl_path_upper(dentry, &realpath); 1756 | + } 1757 | + 1758 | + return vfs_open(&realpath, file, cred); 1759 | +} 1760 | + 1761 | +static const struct inode_operations ovl_file_inode_operations = { 1762 | + .setattr = ovl_setattr, 1763 | + .permission = ovl_permission, 1764 | + .getattr = ovl_getattr, 1765 | + .setxattr = ovl_setxattr, 1766 | + .getxattr = ovl_getxattr, 1767 | + .listxattr = ovl_listxattr, 1768 | + .removexattr = ovl_removexattr, 1769 | + .dentry_open = ovl_dentry_open, 1770 | +}; 1771 | + 1772 | +static const struct inode_operations ovl_symlink_inode_operations = { 1773 | + .setattr = ovl_setattr, 1774 | + .follow_link = ovl_follow_link, 1775 | + .put_link = ovl_put_link, 1776 | + .readlink = ovl_readlink, 1777 | + .getattr = ovl_getattr, 1778 | + .setxattr = ovl_setxattr, 1779 | + .getxattr = ovl_getxattr, 1780 | + .listxattr = ovl_listxattr, 1781 | + .removexattr = ovl_removexattr, 1782 | +}; 1783 | + 1784 | +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, 1785 | + struct ovl_entry *oe) 1786 | +{ 1787 | + struct inode *inode; 1788 | + 1789 | + inode = new_inode(sb); 1790 | + if (!inode) 1791 | + return NULL; 1792 | + 1793 | + mode &= S_IFMT; 1794 | + 1795 | + inode->i_ino = get_next_ino(); 1796 | + inode->i_mode = mode; 1797 | + inode->i_flags |= S_NOATIME | S_NOCMTIME; 1798 | + 1799 | + switch (mode) { 1800 | + case S_IFDIR: 1801 | + inode->i_private = oe; 1802 | + inode->i_op = &ovl_dir_inode_operations; 1803 | + inode->i_fop = &ovl_dir_operations; 1804 | + break; 1805 | + 1806 | + case S_IFLNK: 1807 | + inode->i_op = &ovl_symlink_inode_operations; 1808 | + break; 1809 | + 1810 | + case S_IFREG: 1811 | + case S_IFSOCK: 1812 | + case S_IFBLK: 1813 | + case S_IFCHR: 1814 | + case S_IFIFO: 1815 | + inode->i_op = &ovl_file_inode_operations; 1816 | + break; 1817 | + 1818 | + default: 1819 | + WARN(1, "illegal file type: %i\n", mode); 1820 | + iput(inode); 1821 | + inode = NULL; 1822 | + } 1823 | + 1824 | + return inode; 1825 | + 1826 | +} 1827 | diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h 1828 | new file mode 100644 1829 | index 0000000..1cba38f 1830 | --- /dev/null 1831 | +++ b/fs/overlayfs/overlayfs.h 1832 | @@ -0,0 +1,70 @@ 1833 | +/* 1834 | + * 1835 | + * Copyright (C) 2011 Novell Inc. 1836 | + * 1837 | + * This program is free software; you can redistribute it and/or modify it 1838 | + * under the terms of the GNU General Public License version 2 as published by 1839 | + * the Free Software Foundation. 1840 | + */ 1841 | + 1842 | +struct ovl_entry; 1843 | + 1844 | +enum ovl_path_type { 1845 | + OVL_PATH_UPPER, 1846 | + OVL_PATH_MERGE, 1847 | + OVL_PATH_LOWER, 1848 | +}; 1849 | + 1850 | +extern const char *ovl_opaque_xattr; 1851 | +extern const char *ovl_whiteout_xattr; 1852 | +extern const struct dentry_operations ovl_dentry_operations; 1853 | + 1854 | +enum ovl_path_type ovl_path_type(struct dentry *dentry); 1855 | +u64 ovl_dentry_version_get(struct dentry *dentry); 1856 | +void ovl_dentry_version_inc(struct dentry *dentry); 1857 | +void ovl_path_upper(struct dentry *dentry, struct path *path); 1858 | +void ovl_path_lower(struct dentry *dentry, struct path *path); 1859 | +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); 1860 | +struct dentry *ovl_dentry_upper(struct dentry *dentry); 1861 | +struct dentry *ovl_dentry_lower(struct dentry *dentry); 1862 | +struct dentry *ovl_dentry_real(struct dentry *dentry); 1863 | +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); 1864 | +bool ovl_dentry_is_opaque(struct dentry *dentry); 1865 | +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); 1866 | +bool ovl_is_whiteout(struct dentry *dentry); 1867 | +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); 1868 | +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 1869 | + unsigned int flags); 1870 | +struct file *ovl_path_open(struct path *path, int flags); 1871 | + 1872 | +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, 1873 | + struct kstat *stat, const char *link); 1874 | + 1875 | +/* readdir.c */ 1876 | +extern const struct file_operations ovl_dir_operations; 1877 | +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type); 1878 | + 1879 | +/* inode.c */ 1880 | +int ovl_setattr(struct dentry *dentry, struct iattr *attr); 1881 | +int ovl_permission(struct inode *inode, int mask); 1882 | +int ovl_setxattr(struct dentry *dentry, const char *name, 1883 | + const void *value, size_t size, int flags); 1884 | +ssize_t ovl_getxattr(struct dentry *dentry, const char *name, 1885 | + void *value, size_t size); 1886 | +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); 1887 | +int ovl_removexattr(struct dentry *dentry, const char *name); 1888 | + 1889 | +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, 1890 | + struct ovl_entry *oe); 1891 | +static inline void ovl_copyattr(struct inode *from, struct inode *to) 1892 | +{ 1893 | + to->i_uid = from->i_uid; 1894 | + to->i_gid = from->i_gid; 1895 | +} 1896 | + 1897 | +/* dir.c */ 1898 | +extern const struct inode_operations ovl_dir_inode_operations; 1899 | + 1900 | +/* copy_up.c */ 1901 | +int ovl_copy_up(struct dentry *dentry); 1902 | +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size); 1903 | diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c 1904 | new file mode 100644 1905 | index 0000000..0797efb 1906 | --- /dev/null 1907 | +++ b/fs/overlayfs/readdir.c 1908 | @@ -0,0 +1,566 @@ 1909 | +/* 1910 | + * 1911 | + * Copyright (C) 2011 Novell Inc. 1912 | + * 1913 | + * This program is free software; you can redistribute it and/or modify it 1914 | + * under the terms of the GNU General Public License version 2 as published by 1915 | + * the Free Software Foundation. 1916 | + */ 1917 | + 1918 | +#include 1919 | +#include 1920 | +#include 1921 | +#include 1922 | +#include 1923 | +#include 1924 | +#include 1925 | +#include 1926 | +#include "overlayfs.h" 1927 | + 1928 | +struct ovl_cache_entry { 1929 | + const char *name; 1930 | + unsigned int len; 1931 | + unsigned int type; 1932 | + u64 ino; 1933 | + bool is_whiteout; 1934 | + struct list_head l_node; 1935 | + struct rb_node node; 1936 | +}; 1937 | + 1938 | +struct ovl_readdir_data { 1939 | + struct rb_root *root; 1940 | + struct list_head *list; 1941 | + struct list_head *middle; 1942 | + struct dentry *dir; 1943 | + int count; 1944 | + int err; 1945 | +}; 1946 | + 1947 | +struct ovl_dir_file { 1948 | + bool is_real; 1949 | + bool is_cached; 1950 | + struct list_head cursor; 1951 | + u64 cache_version; 1952 | + struct list_head cache; 1953 | + struct file *realfile; 1954 | +}; 1955 | + 1956 | +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 1957 | +{ 1958 | + return container_of(n, struct ovl_cache_entry, node); 1959 | +} 1960 | + 1961 | +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 1962 | + const char *name, int len) 1963 | +{ 1964 | + struct rb_node *node = root->rb_node; 1965 | + int cmp; 1966 | + 1967 | + while (node) { 1968 | + struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 1969 | + 1970 | + cmp = strncmp(name, p->name, len); 1971 | + if (cmp > 0) 1972 | + node = p->node.rb_right; 1973 | + else if (cmp < 0 || len < p->len) 1974 | + node = p->node.rb_left; 1975 | + else 1976 | + return p; 1977 | + } 1978 | + 1979 | + return NULL; 1980 | +} 1981 | + 1982 | +static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, 1983 | + u64 ino, unsigned int d_type) 1984 | +{ 1985 | + struct ovl_cache_entry *p; 1986 | + 1987 | + p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); 1988 | + if (p) { 1989 | + char *name_copy = (char *) (p + 1); 1990 | + memcpy(name_copy, name, len); 1991 | + name_copy[len] = '\0'; 1992 | + p->name = name_copy; 1993 | + p->len = len; 1994 | + p->type = d_type; 1995 | + p->ino = ino; 1996 | + p->is_whiteout = false; 1997 | + } 1998 | + 1999 | + return p; 2000 | +} 2001 | + 2002 | +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 2003 | + const char *name, int len, u64 ino, 2004 | + unsigned int d_type) 2005 | +{ 2006 | + struct rb_node **newp = &rdd->root->rb_node; 2007 | + struct rb_node *parent = NULL; 2008 | + struct ovl_cache_entry *p; 2009 | + 2010 | + while (*newp) { 2011 | + int cmp; 2012 | + struct ovl_cache_entry *tmp; 2013 | + 2014 | + parent = *newp; 2015 | + tmp = ovl_cache_entry_from_node(*newp); 2016 | + cmp = strncmp(name, tmp->name, len); 2017 | + if (cmp > 0) 2018 | + newp = &tmp->node.rb_right; 2019 | + else if (cmp < 0 || len < tmp->len) 2020 | + newp = &tmp->node.rb_left; 2021 | + else 2022 | + return 0; 2023 | + } 2024 | + 2025 | + p = ovl_cache_entry_new(name, len, ino, d_type); 2026 | + if (p == NULL) 2027 | + return -ENOMEM; 2028 | + 2029 | + list_add_tail(&p->l_node, rdd->list); 2030 | + rb_link_node(&p->node, parent, newp); 2031 | + rb_insert_color(&p->node, rdd->root); 2032 | + 2033 | + return 0; 2034 | +} 2035 | + 2036 | +static int ovl_fill_lower(void *buf, const char *name, int namelen, 2037 | + loff_t offset, u64 ino, unsigned int d_type) 2038 | +{ 2039 | + struct ovl_readdir_data *rdd = buf; 2040 | + struct ovl_cache_entry *p; 2041 | + 2042 | + rdd->count++; 2043 | + p = ovl_cache_entry_find(rdd->root, name, namelen); 2044 | + if (p) { 2045 | + list_move_tail(&p->l_node, rdd->middle); 2046 | + } else { 2047 | + p = ovl_cache_entry_new(name, namelen, ino, d_type); 2048 | + if (p == NULL) 2049 | + rdd->err = -ENOMEM; 2050 | + else 2051 | + list_add_tail(&p->l_node, rdd->middle); 2052 | + } 2053 | + 2054 | + return rdd->err; 2055 | +} 2056 | + 2057 | +static void ovl_cache_free(struct list_head *list) 2058 | +{ 2059 | + struct ovl_cache_entry *p; 2060 | + struct ovl_cache_entry *n; 2061 | + 2062 | + list_for_each_entry_safe(p, n, list, l_node) 2063 | + kfree(p); 2064 | + 2065 | + INIT_LIST_HEAD(list); 2066 | +} 2067 | + 2068 | +static int ovl_fill_upper(void *buf, const char *name, int namelen, 2069 | + loff_t offset, u64 ino, unsigned int d_type) 2070 | +{ 2071 | + struct ovl_readdir_data *rdd = buf; 2072 | + 2073 | + rdd->count++; 2074 | + return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); 2075 | +} 2076 | + 2077 | +static inline int ovl_dir_read(struct path *realpath, 2078 | + struct ovl_readdir_data *rdd, filldir_t filler) 2079 | +{ 2080 | + struct file *realfile; 2081 | + int err; 2082 | + 2083 | + realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); 2084 | + if (IS_ERR(realfile)) 2085 | + return PTR_ERR(realfile); 2086 | + 2087 | + do { 2088 | + rdd->count = 0; 2089 | + rdd->err = 0; 2090 | + err = vfs_readdir(realfile, filler, rdd); 2091 | + if (err >= 0) 2092 | + err = rdd->err; 2093 | + } while (!err && rdd->count); 2094 | + fput(realfile); 2095 | + 2096 | + return 0; 2097 | +} 2098 | + 2099 | +static void ovl_dir_reset(struct file *file) 2100 | +{ 2101 | + struct ovl_dir_file *od = file->private_data; 2102 | + enum ovl_path_type type = ovl_path_type(file->f_path.dentry); 2103 | + 2104 | + if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) { 2105 | + list_del_init(&od->cursor); 2106 | + ovl_cache_free(&od->cache); 2107 | + od->is_cached = false; 2108 | + } 2109 | + WARN_ON(!od->is_real && type != OVL_PATH_MERGE); 2110 | + if (od->is_real && type == OVL_PATH_MERGE) { 2111 | + fput(od->realfile); 2112 | + od->realfile = NULL; 2113 | + od->is_real = false; 2114 | + } 2115 | +} 2116 | + 2117 | +static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd) 2118 | +{ 2119 | + struct ovl_cache_entry *p; 2120 | + struct dentry *dentry; 2121 | + const struct cred *old_cred; 2122 | + struct cred *override_cred; 2123 | + 2124 | + override_cred = prepare_creds(); 2125 | + if (!override_cred) { 2126 | + ovl_cache_free(rdd->list); 2127 | + return -ENOMEM; 2128 | + } 2129 | + 2130 | + /* 2131 | + * CAP_SYS_ADMIN for getxattr 2132 | + * CAP_DAC_OVERRIDE for lookup 2133 | + */ 2134 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 2135 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 2136 | + old_cred = override_creds(override_cred); 2137 | + 2138 | + mutex_lock(&rdd->dir->d_inode->i_mutex); 2139 | + list_for_each_entry(p, rdd->list, l_node) { 2140 | + if (p->type != DT_LNK) 2141 | + continue; 2142 | + 2143 | + dentry = lookup_one_len(p->name, rdd->dir, p->len); 2144 | + if (IS_ERR(dentry)) 2145 | + continue; 2146 | + 2147 | + p->is_whiteout = ovl_is_whiteout(dentry); 2148 | + dput(dentry); 2149 | + } 2150 | + mutex_unlock(&rdd->dir->d_inode->i_mutex); 2151 | + 2152 | + revert_creds(old_cred); 2153 | + put_cred(override_cred); 2154 | + 2155 | + return 0; 2156 | +} 2157 | + 2158 | +static inline int ovl_dir_read_merged(struct path *upperpath, 2159 | + struct path *lowerpath, 2160 | + struct ovl_readdir_data *rdd) 2161 | +{ 2162 | + int err; 2163 | + struct rb_root root = RB_ROOT; 2164 | + struct list_head middle; 2165 | + 2166 | + rdd->root = &root; 2167 | + if (upperpath->dentry) { 2168 | + rdd->dir = upperpath->dentry; 2169 | + err = ovl_dir_read(upperpath, rdd, ovl_fill_upper); 2170 | + if (err) 2171 | + goto out; 2172 | + 2173 | + err = ovl_dir_mark_whiteouts(rdd); 2174 | + if (err) 2175 | + goto out; 2176 | + } 2177 | + /* 2178 | + * Insert lowerpath entries before upperpath ones, this allows 2179 | + * offsets to be reasonably constant 2180 | + */ 2181 | + list_add(&middle, rdd->list); 2182 | + rdd->middle = &middle; 2183 | + err = ovl_dir_read(lowerpath, rdd, ovl_fill_lower); 2184 | + list_del(&middle); 2185 | +out: 2186 | + rdd->root = NULL; 2187 | + 2188 | + return err; 2189 | +} 2190 | + 2191 | +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 2192 | +{ 2193 | + struct list_head *l; 2194 | + loff_t off; 2195 | + 2196 | + l = od->cache.next; 2197 | + for (off = 0; off < pos; off++) { 2198 | + if (l == &od->cache) 2199 | + break; 2200 | + l = l->next; 2201 | + } 2202 | + list_move_tail(&od->cursor, l); 2203 | +} 2204 | + 2205 | +static int ovl_readdir(struct file *file, void *buf, filldir_t filler) 2206 | +{ 2207 | + struct ovl_dir_file *od = file->private_data; 2208 | + int res; 2209 | + 2210 | + if (!file->f_pos) 2211 | + ovl_dir_reset(file); 2212 | + 2213 | + if (od->is_real) { 2214 | + res = vfs_readdir(od->realfile, filler, buf); 2215 | + file->f_pos = od->realfile->f_pos; 2216 | + 2217 | + return res; 2218 | + } 2219 | + 2220 | + if (!od->is_cached) { 2221 | + struct path lowerpath; 2222 | + struct path upperpath; 2223 | + struct ovl_readdir_data rdd = { .list = &od->cache }; 2224 | + 2225 | + ovl_path_lower(file->f_path.dentry, &lowerpath); 2226 | + ovl_path_upper(file->f_path.dentry, &upperpath); 2227 | + 2228 | + res = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd); 2229 | + if (res) { 2230 | + ovl_cache_free(rdd.list); 2231 | + return res; 2232 | + } 2233 | + 2234 | + od->cache_version = ovl_dentry_version_get(file->f_path.dentry); 2235 | + od->is_cached = true; 2236 | + 2237 | + ovl_seek_cursor(od, file->f_pos); 2238 | + } 2239 | + 2240 | + while (od->cursor.next != &od->cache) { 2241 | + int over; 2242 | + loff_t off; 2243 | + struct ovl_cache_entry *p; 2244 | + 2245 | + p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node); 2246 | + off = file->f_pos; 2247 | + if (!p->is_whiteout) { 2248 | + over = filler(buf, p->name, p->len, off, p->ino, 2249 | + p->type); 2250 | + if (over) 2251 | + break; 2252 | + } 2253 | + file->f_pos++; 2254 | + list_move(&od->cursor, &p->l_node); 2255 | + } 2256 | + 2257 | + return 0; 2258 | +} 2259 | + 2260 | +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 2261 | +{ 2262 | + loff_t res; 2263 | + struct ovl_dir_file *od = file->private_data; 2264 | + 2265 | + mutex_lock(&file->f_dentry->d_inode->i_mutex); 2266 | + if (!file->f_pos) 2267 | + ovl_dir_reset(file); 2268 | + 2269 | + if (od->is_real) { 2270 | + res = vfs_llseek(od->realfile, offset, origin); 2271 | + file->f_pos = od->realfile->f_pos; 2272 | + } else { 2273 | + res = -EINVAL; 2274 | + 2275 | + switch (origin) { 2276 | + case SEEK_CUR: 2277 | + offset += file->f_pos; 2278 | + break; 2279 | + case SEEK_SET: 2280 | + break; 2281 | + default: 2282 | + goto out_unlock; 2283 | + } 2284 | + if (offset < 0) 2285 | + goto out_unlock; 2286 | + 2287 | + if (offset != file->f_pos) { 2288 | + file->f_pos = offset; 2289 | + if (od->is_cached) 2290 | + ovl_seek_cursor(od, offset); 2291 | + } 2292 | + res = offset; 2293 | + } 2294 | +out_unlock: 2295 | + mutex_unlock(&file->f_dentry->d_inode->i_mutex); 2296 | + 2297 | + return res; 2298 | +} 2299 | + 2300 | +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 2301 | + int datasync) 2302 | +{ 2303 | + struct ovl_dir_file *od = file->private_data; 2304 | + 2305 | + /* May need to reopen directory if it got copied up */ 2306 | + if (!od->realfile) { 2307 | + struct path upperpath; 2308 | + 2309 | + ovl_path_upper(file->f_path.dentry, &upperpath); 2310 | + od->realfile = ovl_path_open(&upperpath, O_RDONLY); 2311 | + if (IS_ERR(od->realfile)) 2312 | + return PTR_ERR(od->realfile); 2313 | + } 2314 | + 2315 | + return vfs_fsync_range(od->realfile, start, end, datasync); 2316 | +} 2317 | + 2318 | +static int ovl_dir_release(struct inode *inode, struct file *file) 2319 | +{ 2320 | + struct ovl_dir_file *od = file->private_data; 2321 | + 2322 | + list_del(&od->cursor); 2323 | + ovl_cache_free(&od->cache); 2324 | + if (od->realfile) 2325 | + fput(od->realfile); 2326 | + kfree(od); 2327 | + 2328 | + return 0; 2329 | +} 2330 | + 2331 | +static int ovl_dir_open(struct inode *inode, struct file *file) 2332 | +{ 2333 | + struct path realpath; 2334 | + struct file *realfile; 2335 | + struct ovl_dir_file *od; 2336 | + enum ovl_path_type type; 2337 | + 2338 | + od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 2339 | + if (!od) 2340 | + return -ENOMEM; 2341 | + 2342 | + type = ovl_path_real(file->f_path.dentry, &realpath); 2343 | + realfile = ovl_path_open(&realpath, file->f_flags); 2344 | + if (IS_ERR(realfile)) { 2345 | + kfree(od); 2346 | + return PTR_ERR(realfile); 2347 | + } 2348 | + INIT_LIST_HEAD(&od->cache); 2349 | + INIT_LIST_HEAD(&od->cursor); 2350 | + od->is_cached = false; 2351 | + od->realfile = realfile; 2352 | + od->is_real = (type != OVL_PATH_MERGE); 2353 | + file->private_data = od; 2354 | + 2355 | + return 0; 2356 | +} 2357 | + 2358 | +const struct file_operations ovl_dir_operations = { 2359 | + .read = generic_read_dir, 2360 | + .open = ovl_dir_open, 2361 | + .readdir = ovl_readdir, 2362 | + .llseek = ovl_dir_llseek, 2363 | + .fsync = ovl_dir_fsync, 2364 | + .release = ovl_dir_release, 2365 | +}; 2366 | + 2367 | +static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 2368 | +{ 2369 | + int err; 2370 | + struct path lowerpath; 2371 | + struct path upperpath; 2372 | + struct ovl_cache_entry *p; 2373 | + struct ovl_readdir_data rdd = { .list = list }; 2374 | + 2375 | + ovl_path_upper(dentry, &upperpath); 2376 | + ovl_path_lower(dentry, &lowerpath); 2377 | + 2378 | + err = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd); 2379 | + if (err) 2380 | + return err; 2381 | + 2382 | + err = 0; 2383 | + 2384 | + list_for_each_entry(p, list, l_node) { 2385 | + if (p->is_whiteout) 2386 | + continue; 2387 | + 2388 | + if (p->name[0] == '.') { 2389 | + if (p->len == 1) 2390 | + continue; 2391 | + if (p->len == 2 && p->name[1] == '.') 2392 | + continue; 2393 | + } 2394 | + err = -ENOTEMPTY; 2395 | + break; 2396 | + } 2397 | + 2398 | + return err; 2399 | +} 2400 | + 2401 | +static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list) 2402 | +{ 2403 | + struct path upperpath; 2404 | + struct dentry *upperdir; 2405 | + struct ovl_cache_entry *p; 2406 | + const struct cred *old_cred; 2407 | + struct cred *override_cred; 2408 | + int err; 2409 | + 2410 | + ovl_path_upper(dir, &upperpath); 2411 | + upperdir = upperpath.dentry; 2412 | + 2413 | + override_cred = prepare_creds(); 2414 | + if (!override_cred) 2415 | + return -ENOMEM; 2416 | + 2417 | + /* 2418 | + * CAP_DAC_OVERRIDE for lookup and unlink 2419 | + * CAP_SYS_ADMIN for setxattr of "trusted" namespace 2420 | + * CAP_FOWNER for unlink in sticky directory 2421 | + */ 2422 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 2423 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 2424 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 2425 | + old_cred = override_creds(override_cred); 2426 | + 2427 | + err = vfs_setxattr(upperdir, ovl_opaque_xattr, "y", 1, 0); 2428 | + if (err) 2429 | + goto out_revert_creds; 2430 | + 2431 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 2432 | + list_for_each_entry(p, list, l_node) { 2433 | + struct dentry *dentry; 2434 | + int ret; 2435 | + 2436 | + if (!p->is_whiteout) 2437 | + continue; 2438 | + 2439 | + dentry = lookup_one_len(p->name, upperdir, p->len); 2440 | + if (IS_ERR(dentry)) { 2441 | + printk(KERN_WARNING 2442 | + "overlayfs: failed to lookup whiteout %.*s: %li\n", 2443 | + p->len, p->name, PTR_ERR(dentry)); 2444 | + continue; 2445 | + } 2446 | + ret = vfs_unlink(upperdir->d_inode, dentry); 2447 | + dput(dentry); 2448 | + if (ret) 2449 | + printk(KERN_WARNING 2450 | + "overlayfs: failed to unlink whiteout %.*s: %i\n", 2451 | + p->len, p->name, ret); 2452 | + } 2453 | + mutex_unlock(&upperdir->d_inode->i_mutex); 2454 | + 2455 | +out_revert_creds: 2456 | + revert_creds(old_cred); 2457 | + put_cred(override_cred); 2458 | + 2459 | + return err; 2460 | +} 2461 | + 2462 | +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type) 2463 | +{ 2464 | + int err; 2465 | + LIST_HEAD(list); 2466 | + 2467 | + err = ovl_check_empty_dir(dentry, &list); 2468 | + if (!err && type == OVL_PATH_MERGE) 2469 | + err = ovl_remove_whiteouts(dentry, &list); 2470 | + 2471 | + ovl_cache_free(&list); 2472 | + 2473 | + return err; 2474 | +} 2475 | diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c 2476 | new file mode 100644 2477 | index 0000000..9808408 2478 | --- /dev/null 2479 | +++ b/fs/overlayfs/super.c 2480 | @@ -0,0 +1,665 @@ 2481 | +/* 2482 | + * 2483 | + * Copyright (C) 2011 Novell Inc. 2484 | + * 2485 | + * This program is free software; you can redistribute it and/or modify it 2486 | + * under the terms of the GNU General Public License version 2 as published by 2487 | + * the Free Software Foundation. 2488 | + */ 2489 | + 2490 | +#include 2491 | +#include 2492 | +#include 2493 | +#include 2494 | +#include 2495 | +#include 2496 | +#include 2497 | +#include 2498 | +#include 2499 | +#include 2500 | +#include 2501 | +#include "overlayfs.h" 2502 | + 2503 | +MODULE_AUTHOR("Miklos Szeredi "); 2504 | +MODULE_DESCRIPTION("Overlay filesystem"); 2505 | +MODULE_LICENSE("GPL"); 2506 | + 2507 | +struct ovl_config { 2508 | + char *lowerdir; 2509 | + char *upperdir; 2510 | +}; 2511 | + 2512 | +/* private information held for overlayfs's superblock */ 2513 | +struct ovl_fs { 2514 | + struct vfsmount *upper_mnt; 2515 | + struct vfsmount *lower_mnt; 2516 | + /* pathnames of lower and upper dirs, for show_options */ 2517 | + struct ovl_config config; 2518 | +}; 2519 | + 2520 | +/* private information held for every overlayfs dentry */ 2521 | +struct ovl_entry { 2522 | + /* 2523 | + * Keep "double reference" on upper dentries, so that 2524 | + * d_delete() doesn't think it's OK to reset d_inode to NULL. 2525 | + */ 2526 | + struct dentry *__upperdentry; 2527 | + struct dentry *lowerdentry; 2528 | + union { 2529 | + struct { 2530 | + u64 version; 2531 | + bool opaque; 2532 | + }; 2533 | + struct rcu_head rcu; 2534 | + }; 2535 | +}; 2536 | + 2537 | +const char *ovl_whiteout_xattr = "trusted.overlay.whiteout"; 2538 | +const char *ovl_opaque_xattr = "trusted.overlay.opaque"; 2539 | + 2540 | + 2541 | +enum ovl_path_type ovl_path_type(struct dentry *dentry) 2542 | +{ 2543 | + struct ovl_entry *oe = dentry->d_fsdata; 2544 | + 2545 | + if (oe->__upperdentry) { 2546 | + if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode)) 2547 | + return OVL_PATH_MERGE; 2548 | + else 2549 | + return OVL_PATH_UPPER; 2550 | + } else { 2551 | + return OVL_PATH_LOWER; 2552 | + } 2553 | +} 2554 | + 2555 | +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) 2556 | +{ 2557 | + struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); 2558 | + smp_read_barrier_depends(); 2559 | + return upperdentry; 2560 | +} 2561 | + 2562 | +void ovl_path_upper(struct dentry *dentry, struct path *path) 2563 | +{ 2564 | + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 2565 | + struct ovl_entry *oe = dentry->d_fsdata; 2566 | + 2567 | + path->mnt = ofs->upper_mnt; 2568 | + path->dentry = ovl_upperdentry_dereference(oe); 2569 | +} 2570 | + 2571 | +void ovl_path_lower(struct dentry *dentry, struct path *path) 2572 | +{ 2573 | + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 2574 | + struct ovl_entry *oe = dentry->d_fsdata; 2575 | + 2576 | + path->mnt = ofs->lower_mnt; 2577 | + path->dentry = oe->lowerdentry; 2578 | +} 2579 | + 2580 | +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) 2581 | +{ 2582 | + 2583 | + enum ovl_path_type type = ovl_path_type(dentry); 2584 | + 2585 | + if (type == OVL_PATH_LOWER) 2586 | + ovl_path_lower(dentry, path); 2587 | + else 2588 | + ovl_path_upper(dentry, path); 2589 | + 2590 | + return type; 2591 | +} 2592 | + 2593 | +struct dentry *ovl_dentry_upper(struct dentry *dentry) 2594 | +{ 2595 | + struct ovl_entry *oe = dentry->d_fsdata; 2596 | + 2597 | + return ovl_upperdentry_dereference(oe); 2598 | +} 2599 | + 2600 | +struct dentry *ovl_dentry_lower(struct dentry *dentry) 2601 | +{ 2602 | + struct ovl_entry *oe = dentry->d_fsdata; 2603 | + 2604 | + return oe->lowerdentry; 2605 | +} 2606 | + 2607 | +struct dentry *ovl_dentry_real(struct dentry *dentry) 2608 | +{ 2609 | + struct ovl_entry *oe = dentry->d_fsdata; 2610 | + struct dentry *realdentry; 2611 | + 2612 | + realdentry = ovl_upperdentry_dereference(oe); 2613 | + if (!realdentry) 2614 | + realdentry = oe->lowerdentry; 2615 | + 2616 | + return realdentry; 2617 | +} 2618 | + 2619 | +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) 2620 | +{ 2621 | + struct dentry *realdentry; 2622 | + 2623 | + realdentry = ovl_upperdentry_dereference(oe); 2624 | + if (realdentry) { 2625 | + *is_upper = true; 2626 | + } else { 2627 | + realdentry = oe->lowerdentry; 2628 | + *is_upper = false; 2629 | + } 2630 | + return realdentry; 2631 | +} 2632 | + 2633 | +bool ovl_dentry_is_opaque(struct dentry *dentry) 2634 | +{ 2635 | + struct ovl_entry *oe = dentry->d_fsdata; 2636 | + return oe->opaque; 2637 | +} 2638 | + 2639 | +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) 2640 | +{ 2641 | + struct ovl_entry *oe = dentry->d_fsdata; 2642 | + oe->opaque = opaque; 2643 | +} 2644 | + 2645 | +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) 2646 | +{ 2647 | + struct ovl_entry *oe = dentry->d_fsdata; 2648 | + 2649 | + WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); 2650 | + WARN_ON(oe->__upperdentry); 2651 | + BUG_ON(!upperdentry->d_inode); 2652 | + smp_wmb(); 2653 | + oe->__upperdentry = dget(upperdentry); 2654 | +} 2655 | + 2656 | +void ovl_dentry_version_inc(struct dentry *dentry) 2657 | +{ 2658 | + struct ovl_entry *oe = dentry->d_fsdata; 2659 | + 2660 | + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2661 | + oe->version++; 2662 | +} 2663 | + 2664 | +u64 ovl_dentry_version_get(struct dentry *dentry) 2665 | +{ 2666 | + struct ovl_entry *oe = dentry->d_fsdata; 2667 | + 2668 | + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2669 | + return oe->version; 2670 | +} 2671 | + 2672 | +bool ovl_is_whiteout(struct dentry *dentry) 2673 | +{ 2674 | + int res; 2675 | + char val; 2676 | + 2677 | + if (!dentry) 2678 | + return false; 2679 | + if (!dentry->d_inode) 2680 | + return false; 2681 | + if (!S_ISLNK(dentry->d_inode->i_mode)) 2682 | + return false; 2683 | + 2684 | + res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1); 2685 | + if (res == 1 && val == 'y') 2686 | + return true; 2687 | + 2688 | + return false; 2689 | +} 2690 | + 2691 | +static bool ovl_is_opaquedir(struct dentry *dentry) 2692 | +{ 2693 | + int res; 2694 | + char val; 2695 | + 2696 | + if (!S_ISDIR(dentry->d_inode->i_mode)) 2697 | + return false; 2698 | + 2699 | + res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1); 2700 | + if (res == 1 && val == 'y') 2701 | + return true; 2702 | + 2703 | + return false; 2704 | +} 2705 | + 2706 | +static void ovl_entry_free(struct rcu_head *head) 2707 | +{ 2708 | + struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu); 2709 | + kfree(oe); 2710 | +} 2711 | + 2712 | +static void ovl_dentry_release(struct dentry *dentry) 2713 | +{ 2714 | + struct ovl_entry *oe = dentry->d_fsdata; 2715 | + 2716 | + if (oe) { 2717 | + dput(oe->__upperdentry); 2718 | + dput(oe->__upperdentry); 2719 | + dput(oe->lowerdentry); 2720 | + call_rcu(&oe->rcu, ovl_entry_free); 2721 | + } 2722 | +} 2723 | + 2724 | +const struct dentry_operations ovl_dentry_operations = { 2725 | + .d_release = ovl_dentry_release, 2726 | +}; 2727 | + 2728 | +static struct ovl_entry *ovl_alloc_entry(void) 2729 | +{ 2730 | + return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); 2731 | +} 2732 | + 2733 | +static inline struct dentry *ovl_lookup_real(struct dentry *dir, 2734 | + struct qstr *name) 2735 | +{ 2736 | + struct dentry *dentry; 2737 | + 2738 | + mutex_lock(&dir->d_inode->i_mutex); 2739 | + dentry = lookup_one_len(name->name, dir, name->len); 2740 | + mutex_unlock(&dir->d_inode->i_mutex); 2741 | + 2742 | + if (IS_ERR(dentry)) { 2743 | + if (PTR_ERR(dentry) == -ENOENT) 2744 | + dentry = NULL; 2745 | + } else if (!dentry->d_inode) { 2746 | + dput(dentry); 2747 | + dentry = NULL; 2748 | + } 2749 | + return dentry; 2750 | +} 2751 | + 2752 | +static int ovl_do_lookup(struct dentry *dentry) 2753 | +{ 2754 | + struct ovl_entry *oe; 2755 | + struct dentry *upperdir; 2756 | + struct dentry *lowerdir; 2757 | + struct dentry *upperdentry = NULL; 2758 | + struct dentry *lowerdentry = NULL; 2759 | + struct inode *inode = NULL; 2760 | + int err; 2761 | + 2762 | + err = -ENOMEM; 2763 | + oe = ovl_alloc_entry(); 2764 | + if (!oe) 2765 | + goto out; 2766 | + 2767 | + upperdir = ovl_dentry_upper(dentry->d_parent); 2768 | + lowerdir = ovl_dentry_lower(dentry->d_parent); 2769 | + 2770 | + if (upperdir) { 2771 | + upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); 2772 | + err = PTR_ERR(upperdentry); 2773 | + if (IS_ERR(upperdentry)) 2774 | + goto out_put_dir; 2775 | + 2776 | + if (lowerdir && upperdentry && 2777 | + (S_ISLNK(upperdentry->d_inode->i_mode) || 2778 | + S_ISDIR(upperdentry->d_inode->i_mode))) { 2779 | + const struct cred *old_cred; 2780 | + struct cred *override_cred; 2781 | + 2782 | + err = -ENOMEM; 2783 | + override_cred = prepare_creds(); 2784 | + if (!override_cred) 2785 | + goto out_dput_upper; 2786 | + 2787 | + /* CAP_SYS_ADMIN needed for getxattr */ 2788 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 2789 | + old_cred = override_creds(override_cred); 2790 | + 2791 | + if (ovl_is_opaquedir(upperdentry)) { 2792 | + oe->opaque = true; 2793 | + } else if (ovl_is_whiteout(upperdentry)) { 2794 | + dput(upperdentry); 2795 | + upperdentry = NULL; 2796 | + oe->opaque = true; 2797 | + } 2798 | + revert_creds(old_cred); 2799 | + put_cred(override_cred); 2800 | + } 2801 | + } 2802 | + if (lowerdir && !oe->opaque) { 2803 | + lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); 2804 | + err = PTR_ERR(lowerdentry); 2805 | + if (IS_ERR(lowerdentry)) 2806 | + goto out_dput_upper; 2807 | + } 2808 | + 2809 | + if (lowerdentry && upperdentry && 2810 | + (!S_ISDIR(upperdentry->d_inode->i_mode) || 2811 | + !S_ISDIR(lowerdentry->d_inode->i_mode))) { 2812 | + dput(lowerdentry); 2813 | + lowerdentry = NULL; 2814 | + oe->opaque = true; 2815 | + } 2816 | + 2817 | + if (lowerdentry || upperdentry) { 2818 | + struct dentry *realdentry; 2819 | + 2820 | + realdentry = upperdentry ? upperdentry : lowerdentry; 2821 | + err = -ENOMEM; 2822 | + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, 2823 | + oe); 2824 | + if (!inode) 2825 | + goto out_dput; 2826 | + ovl_copyattr(realdentry->d_inode, inode); 2827 | + } 2828 | + 2829 | + if (upperdentry) 2830 | + oe->__upperdentry = dget(upperdentry); 2831 | + 2832 | + if (lowerdentry) 2833 | + oe->lowerdentry = lowerdentry; 2834 | + 2835 | + dentry->d_fsdata = oe; 2836 | + dentry->d_op = &ovl_dentry_operations; 2837 | + d_add(dentry, inode); 2838 | + 2839 | + return 0; 2840 | + 2841 | +out_dput: 2842 | + dput(lowerdentry); 2843 | +out_dput_upper: 2844 | + dput(upperdentry); 2845 | +out_put_dir: 2846 | + kfree(oe); 2847 | +out: 2848 | + return err; 2849 | +} 2850 | + 2851 | +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 2852 | + unsigned int flags) 2853 | +{ 2854 | + int err = ovl_do_lookup(dentry); 2855 | + 2856 | + if (err) 2857 | + return ERR_PTR(err); 2858 | + 2859 | + return NULL; 2860 | +} 2861 | + 2862 | +struct file *ovl_path_open(struct path *path, int flags) 2863 | +{ 2864 | + path_get(path); 2865 | + return dentry_open(path, flags, current_cred()); 2866 | +} 2867 | + 2868 | +static void ovl_put_super(struct super_block *sb) 2869 | +{ 2870 | + struct ovl_fs *ufs = sb->s_fs_info; 2871 | + 2872 | + if (!(sb->s_flags & MS_RDONLY)) 2873 | + mnt_drop_write(ufs->upper_mnt); 2874 | + 2875 | + mntput(ufs->upper_mnt); 2876 | + mntput(ufs->lower_mnt); 2877 | + 2878 | + kfree(ufs->config.lowerdir); 2879 | + kfree(ufs->config.upperdir); 2880 | + kfree(ufs); 2881 | +} 2882 | + 2883 | +static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data) 2884 | +{ 2885 | + int flags = *flagsp; 2886 | + struct ovl_fs *ufs = sb->s_fs_info; 2887 | + 2888 | + /* When remounting rw or ro, we need to adjust the write access to the 2889 | + * upper fs. 2890 | + */ 2891 | + if (((flags ^ sb->s_flags) & MS_RDONLY) == 0) 2892 | + /* No change to readonly status */ 2893 | + return 0; 2894 | + 2895 | + if (flags & MS_RDONLY) { 2896 | + mnt_drop_write(ufs->upper_mnt); 2897 | + return 0; 2898 | + } else 2899 | + return mnt_want_write(ufs->upper_mnt); 2900 | +} 2901 | + 2902 | +/** 2903 | + * ovl_statfs 2904 | + * @sb: The overlayfs super block 2905 | + * @buf: The struct kstatfs to fill in with stats 2906 | + * 2907 | + * Get the filesystem statistics. As writes always target the upper layer 2908 | + * filesystem pass the statfs to the same filesystem. 2909 | + */ 2910 | +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 2911 | +{ 2912 | + struct dentry *root_dentry = dentry->d_sb->s_root; 2913 | + struct path path; 2914 | + ovl_path_upper(root_dentry, &path); 2915 | + 2916 | + if (!path.dentry->d_sb->s_op->statfs) 2917 | + return -ENOSYS; 2918 | + return path.dentry->d_sb->s_op->statfs(path.dentry, buf); 2919 | +} 2920 | + 2921 | +/** 2922 | + * ovl_show_options 2923 | + * 2924 | + * Prints the mount options for a given superblock. 2925 | + * Returns zero; does not fail. 2926 | + */ 2927 | +static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 2928 | +{ 2929 | + struct super_block *sb = dentry->d_sb; 2930 | + struct ovl_fs *ufs = sb->s_fs_info; 2931 | + 2932 | + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); 2933 | + seq_printf(m, ",upperdir=%s", ufs->config.upperdir); 2934 | + return 0; 2935 | +} 2936 | + 2937 | +static const struct super_operations ovl_super_operations = { 2938 | + .put_super = ovl_put_super, 2939 | + .remount_fs = ovl_remount_fs, 2940 | + .statfs = ovl_statfs, 2941 | + .show_options = ovl_show_options, 2942 | +}; 2943 | + 2944 | +enum { 2945 | + Opt_lowerdir, 2946 | + Opt_upperdir, 2947 | + Opt_err, 2948 | +}; 2949 | + 2950 | +static const match_table_t ovl_tokens = { 2951 | + {Opt_lowerdir, "lowerdir=%s"}, 2952 | + {Opt_upperdir, "upperdir=%s"}, 2953 | + {Opt_err, NULL} 2954 | +}; 2955 | + 2956 | +static int ovl_parse_opt(char *opt, struct ovl_config *config) 2957 | +{ 2958 | + char *p; 2959 | + 2960 | + config->upperdir = NULL; 2961 | + config->lowerdir = NULL; 2962 | + 2963 | + while ((p = strsep(&opt, ",")) != NULL) { 2964 | + int token; 2965 | + substring_t args[MAX_OPT_ARGS]; 2966 | + 2967 | + if (!*p) 2968 | + continue; 2969 | + 2970 | + token = match_token(p, ovl_tokens, args); 2971 | + switch (token) { 2972 | + case Opt_upperdir: 2973 | + kfree(config->upperdir); 2974 | + config->upperdir = match_strdup(&args[0]); 2975 | + if (!config->upperdir) 2976 | + return -ENOMEM; 2977 | + break; 2978 | + 2979 | + case Opt_lowerdir: 2980 | + kfree(config->lowerdir); 2981 | + config->lowerdir = match_strdup(&args[0]); 2982 | + if (!config->lowerdir) 2983 | + return -ENOMEM; 2984 | + break; 2985 | + 2986 | + default: 2987 | + return -EINVAL; 2988 | + } 2989 | + } 2990 | + return 0; 2991 | +} 2992 | + 2993 | +static int ovl_fill_super(struct super_block *sb, void *data, int silent) 2994 | +{ 2995 | + struct path lowerpath; 2996 | + struct path upperpath; 2997 | + struct inode *root_inode; 2998 | + struct dentry *root_dentry; 2999 | + struct ovl_entry *oe; 3000 | + struct ovl_fs *ufs; 3001 | + int err; 3002 | + 3003 | + err = -ENOMEM; 3004 | + ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL); 3005 | + if (!ufs) 3006 | + goto out; 3007 | + 3008 | + err = ovl_parse_opt((char *) data, &ufs->config); 3009 | + if (err) 3010 | + goto out_free_ufs; 3011 | + 3012 | + err = -EINVAL; 3013 | + if (!ufs->config.upperdir || !ufs->config.lowerdir) { 3014 | + printk(KERN_ERR "overlayfs: missing upperdir or lowerdir\n"); 3015 | + goto out_free_config; 3016 | + } 3017 | + 3018 | + oe = ovl_alloc_entry(); 3019 | + if (oe == NULL) 3020 | + goto out_free_config; 3021 | + 3022 | + err = kern_path(ufs->config.upperdir, LOOKUP_FOLLOW, &upperpath); 3023 | + if (err) 3024 | + goto out_free_oe; 3025 | + 3026 | + err = kern_path(ufs->config.lowerdir, LOOKUP_FOLLOW, &lowerpath); 3027 | + if (err) 3028 | + goto out_put_upperpath; 3029 | + 3030 | + err = -ENOTDIR; 3031 | + if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || 3032 | + !S_ISDIR(lowerpath.dentry->d_inode->i_mode)) 3033 | + goto out_put_lowerpath; 3034 | + 3035 | + sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, 3036 | + lowerpath.mnt->mnt_sb->s_stack_depth) + 1; 3037 | + 3038 | + err = -EINVAL; 3039 | + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 3040 | + printk(KERN_ERR "overlayfs: maximum fs stacking depth exceeded\n"); 3041 | + goto out_put_lowerpath; 3042 | + } 3043 | + 3044 | + 3045 | + ufs->upper_mnt = clone_private_mount(&upperpath); 3046 | + err = PTR_ERR(ufs->upper_mnt); 3047 | + if (IS_ERR(ufs->upper_mnt)) { 3048 | + printk(KERN_ERR "overlayfs: failed to clone upperpath\n"); 3049 | + goto out_put_lowerpath; 3050 | + } 3051 | + 3052 | + ufs->lower_mnt = clone_private_mount(&lowerpath); 3053 | + err = PTR_ERR(ufs->lower_mnt); 3054 | + if (IS_ERR(ufs->lower_mnt)) { 3055 | + printk(KERN_ERR "overlayfs: failed to clone lowerpath\n"); 3056 | + goto out_put_upper_mnt; 3057 | + } 3058 | + 3059 | + /* 3060 | + * Make lower_mnt R/O. That way fchmod/fchown on lower file 3061 | + * will fail instead of modifying lower fs. 3062 | + */ 3063 | + ufs->lower_mnt->mnt_flags |= MNT_READONLY; 3064 | + 3065 | + /* If the upper fs is r/o, we mark overlayfs r/o too */ 3066 | + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) 3067 | + sb->s_flags |= MS_RDONLY; 3068 | + 3069 | + if (!(sb->s_flags & MS_RDONLY)) { 3070 | + err = mnt_want_write(ufs->upper_mnt); 3071 | + if (err) 3072 | + goto out_put_lower_mnt; 3073 | + } 3074 | + 3075 | + err = -ENOMEM; 3076 | + root_inode = ovl_new_inode(sb, S_IFDIR, oe); 3077 | + if (!root_inode) 3078 | + goto out_drop_write; 3079 | + 3080 | + root_dentry = d_make_root(root_inode); 3081 | + if (!root_dentry) 3082 | + goto out_drop_write; 3083 | + 3084 | + mntput(upperpath.mnt); 3085 | + mntput(lowerpath.mnt); 3086 | + 3087 | + oe->__upperdentry = dget(upperpath.dentry); 3088 | + oe->lowerdentry = lowerpath.dentry; 3089 | + 3090 | + root_dentry->d_fsdata = oe; 3091 | + root_dentry->d_op = &ovl_dentry_operations; 3092 | + 3093 | + sb->s_op = &ovl_super_operations; 3094 | + sb->s_root = root_dentry; 3095 | + sb->s_fs_info = ufs; 3096 | + 3097 | + return 0; 3098 | + 3099 | +out_drop_write: 3100 | + if (!(sb->s_flags & MS_RDONLY)) 3101 | + mnt_drop_write(ufs->upper_mnt); 3102 | +out_put_lower_mnt: 3103 | + mntput(ufs->lower_mnt); 3104 | +out_put_upper_mnt: 3105 | + mntput(ufs->upper_mnt); 3106 | +out_put_lowerpath: 3107 | + path_put(&lowerpath); 3108 | +out_put_upperpath: 3109 | + path_put(&upperpath); 3110 | +out_free_oe: 3111 | + kfree(oe); 3112 | +out_free_config: 3113 | + kfree(ufs->config.lowerdir); 3114 | + kfree(ufs->config.upperdir); 3115 | +out_free_ufs: 3116 | + kfree(ufs); 3117 | +out: 3118 | + return err; 3119 | +} 3120 | + 3121 | +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 3122 | + const char *dev_name, void *raw_data) 3123 | +{ 3124 | + return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 3125 | +} 3126 | + 3127 | +static struct file_system_type ovl_fs_type = { 3128 | + .owner = THIS_MODULE, 3129 | + .name = "overlayfs", 3130 | + .mount = ovl_mount, 3131 | + .kill_sb = kill_anon_super, 3132 | +}; 3133 | + 3134 | +static int __init ovl_init(void) 3135 | +{ 3136 | + return register_filesystem(&ovl_fs_type); 3137 | +} 3138 | + 3139 | +static void __exit ovl_exit(void) 3140 | +{ 3141 | + unregister_filesystem(&ovl_fs_type); 3142 | +} 3143 | + 3144 | +module_init(ovl_init); 3145 | +module_exit(ovl_exit); 3146 | diff --git a/fs/splice.c b/fs/splice.c 3147 | index 41514dd..2695a60 100644 3148 | --- a/fs/splice.c 3149 | +++ b/fs/splice.c 3150 | @@ -1308,6 +1308,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 3151 | 3152 | return ret; 3153 | } 3154 | +EXPORT_SYMBOL(do_splice_direct); 3155 | 3156 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 3157 | struct pipe_inode_info *opipe, 3158 | diff --git a/include/linux/fs.h b/include/linux/fs.h 3159 | index 38dba16..d573703 100644 3160 | --- a/include/linux/fs.h 3161 | +++ b/include/linux/fs.h 3162 | @@ -505,6 +505,12 @@ struct iattr { 3163 | */ 3164 | #include 3165 | 3166 | +/* 3167 | + * Maximum number of layers of fs stack. Needs to be limited to 3168 | + * prevent kernel stack overflow 3169 | + */ 3170 | +#define FILESYSTEM_MAX_STACK_DEPTH 2 3171 | + 3172 | /** 3173 | * enum positive_aop_returns - aop return codes with specific semantics 3174 | * 3175 | @@ -1579,6 +1585,11 @@ struct super_block { 3176 | 3177 | /* Being remounted read-only */ 3178 | int s_readonly_remount; 3179 | + 3180 | + /* 3181 | + * Indicates how deep in a filesystem stack this SB is 3182 | + */ 3183 | + int s_stack_depth; 3184 | }; 3185 | 3186 | /* superblock cache pruning functions */ 3187 | @@ -1836,6 +1847,7 @@ struct inode_operations { 3188 | int (*atomic_open)(struct inode *, struct dentry *, 3189 | struct file *, unsigned open_flag, 3190 | umode_t create_mode, int *opened); 3191 | + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); 3192 | } ____cacheline_aligned; 3193 | 3194 | struct seq_file; 3195 | @@ -2201,6 +2213,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags, 3196 | extern struct file *filp_open(const char *, int, umode_t); 3197 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, 3198 | const char *, int); 3199 | +extern int vfs_open(const struct path *, struct file *, const struct cred *); 3200 | extern struct file * dentry_open(const struct path *, int, const struct cred *); 3201 | extern int filp_close(struct file *, fl_owner_t id); 3202 | extern char * getname(const char __user *); 3203 | @@ -2405,6 +2418,7 @@ extern sector_t bmap(struct inode *, sector_t); 3204 | #endif 3205 | extern int notify_change(struct dentry *, struct iattr *); 3206 | extern int inode_permission(struct inode *, int); 3207 | +extern int __inode_permission(struct inode *, int); 3208 | extern int generic_permission(struct inode *, int); 3209 | 3210 | static inline bool execute_ok(struct inode *inode) 3211 | diff --git a/include/linux/mount.h b/include/linux/mount.h 3212 | index d7029f4..344a262 100644 3213 | --- a/include/linux/mount.h 3214 | +++ b/include/linux/mount.h 3215 | @@ -66,6 +66,9 @@ extern void mnt_pin(struct vfsmount *mnt); 3216 | extern void mnt_unpin(struct vfsmount *mnt); 3217 | extern int __mnt_is_readonly(struct vfsmount *mnt); 3218 | 3219 | +struct path; 3220 | +extern struct vfsmount *clone_private_mount(struct path *path); 3221 | + 3222 | struct file_system_type; 3223 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, 3224 | int flags, const char *name, 3225 | -------------------------------------------------------------------------------- /overlayfs.v16-3.9-rc2.patch: -------------------------------------------------------------------------------- 1 | diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking 2 | index 0706d32..4331290 100644 3 | --- a/Documentation/filesystems/Locking 4 | +++ b/Documentation/filesystems/Locking 5 | @@ -66,6 +66,7 @@ prototypes: 6 | int (*atomic_open)(struct inode *, struct dentry *, 7 | struct file *, unsigned open_flag, 8 | umode_t create_mode, int *opened); 9 | + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); 10 | 11 | locking rules: 12 | all may block 13 | @@ -93,6 +94,7 @@ removexattr: yes 14 | fiemap: no 15 | update_time: no 16 | atomic_open: yes 17 | +dentry_open: no 18 | 19 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 20 | victim. 21 | diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt 22 | new file mode 100644 23 | index 0000000..00dbab0 24 | --- /dev/null 25 | +++ b/Documentation/filesystems/overlayfs.txt 26 | @@ -0,0 +1,199 @@ 27 | +Written by: Neil Brown 28 | + 29 | +Overlay Filesystem 30 | +================== 31 | + 32 | +This document describes a prototype for a new approach to providing 33 | +overlay-filesystem functionality in Linux (sometimes referred to as 34 | +union-filesystems). An overlay-filesystem tries to present a 35 | +filesystem which is the result over overlaying one filesystem on top 36 | +of the other. 37 | + 38 | +The result will inevitably fail to look exactly like a normal 39 | +filesystem for various technical reasons. The expectation is that 40 | +many use cases will be able to ignore these differences. 41 | + 42 | +This approach is 'hybrid' because the objects that appear in the 43 | +filesystem do not all appear to belong to that filesystem. In many 44 | +cases an object accessed in the union will be indistinguishable 45 | +from accessing the corresponding object from the original filesystem. 46 | +This is most obvious from the 'st_dev' field returned by stat(2). 47 | + 48 | +While directories will report an st_dev from the overlay-filesystem, 49 | +all non-directory objects will report an st_dev from the lower or 50 | +upper filesystem that is providing the object. Similarly st_ino will 51 | +only be unique when combined with st_dev, and both of these can change 52 | +over the lifetime of a non-directory object. Many applications and 53 | +tools ignore these values and will not be affected. 54 | + 55 | +Upper and Lower 56 | +--------------- 57 | + 58 | +An overlay filesystem combines two filesystems - an 'upper' filesystem 59 | +and a 'lower' filesystem. When a name exists in both filesystems, the 60 | +object in the 'upper' filesystem is visible while the object in the 61 | +'lower' filesystem is either hidden or, in the case of directories, 62 | +merged with the 'upper' object. 63 | + 64 | +It would be more correct to refer to an upper and lower 'directory 65 | +tree' rather than 'filesystem' as it is quite possible for both 66 | +directory trees to be in the same filesystem and there is no 67 | +requirement that the root of a filesystem be given for either upper or 68 | +lower. 69 | + 70 | +The lower filesystem can be any filesystem supported by Linux and does 71 | +not need to be writable. The lower filesystem can even be another 72 | +overlayfs. The upper filesystem will normally be writable and if it 73 | +is it must support the creation of trusted.* extended attributes, and 74 | +must provide valid d_type in readdir responses, at least for symbolic 75 | +links - so NFS is not suitable. 76 | + 77 | +A read-only overlay of two read-only filesystems may use any 78 | +filesystem type. 79 | + 80 | +Directories 81 | +----------- 82 | + 83 | +Overlaying mainly involves directories. If a given name appears in both 84 | +upper and lower filesystems and refers to a non-directory in either, 85 | +then the lower object is hidden - the name refers only to the upper 86 | +object. 87 | + 88 | +Where both upper and lower objects are directories, a merged directory 89 | +is formed. 90 | + 91 | +At mount time, the two directories given as mount options are combined 92 | +into a merged directory: 93 | + 94 | + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper /overlay 95 | + 96 | +Then whenever a lookup is requested in such a merged directory, the 97 | +lookup is performed in each actual directory and the combined result 98 | +is cached in the dentry belonging to the overlay filesystem. If both 99 | +actual lookups find directories, both are stored and a merged 100 | +directory is created, otherwise only one is stored: the upper if it 101 | +exists, else the lower. 102 | + 103 | +Only the lists of names from directories are merged. Other content 104 | +such as metadata and extended attributes are reported for the upper 105 | +directory only. These attributes of the lower directory are hidden. 106 | + 107 | +whiteouts and opaque directories 108 | +-------------------------------- 109 | + 110 | +In order to support rm and rmdir without changing the lower 111 | +filesystem, an overlay filesystem needs to record in the upper filesystem 112 | +that files have been removed. This is done using whiteouts and opaque 113 | +directories (non-directories are always opaque). 114 | + 115 | +The overlay filesystem uses extended attributes with a 116 | +"trusted.overlay." prefix to record these details. 117 | + 118 | +A whiteout is created as a symbolic link with target 119 | +"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y". 120 | +When a whiteout is found in the upper level of a merged directory, any 121 | +matching name in the lower level is ignored, and the whiteout itself 122 | +is also hidden. 123 | + 124 | +A directory is made opaque by setting the xattr "trusted.overlay.opaque" 125 | +to "y". Where the upper filesystem contains an opaque directory, any 126 | +directory in the lower filesystem with the same name is ignored. 127 | + 128 | +readdir 129 | +------- 130 | + 131 | +When a 'readdir' request is made on a merged directory, the upper and 132 | +lower directories are each read and the name lists merged in the 133 | +obvious way (upper is read first, then lower - entries that already 134 | +exist are not re-added). This merged name list is cached in the 135 | +'struct file' and so remains as long as the file is kept open. If the 136 | +directory is opened and read by two processes at the same time, they 137 | +will each have separate caches. A seekdir to the start of the 138 | +directory (offset 0) followed by a readdir will cause the cache to be 139 | +discarded and rebuilt. 140 | + 141 | +This means that changes to the merged directory do not appear while a 142 | +directory is being read. This is unlikely to be noticed by many 143 | +programs. 144 | + 145 | +seek offsets are assigned sequentially when the directories are read. 146 | +Thus if 147 | + - read part of a directory 148 | + - remember an offset, and close the directory 149 | + - re-open the directory some time later 150 | + - seek to the remembered offset 151 | + 152 | +there may be little correlation between the old and new locations in 153 | +the list of filenames, particularly if anything has changed in the 154 | +directory. 155 | + 156 | +Readdir on directories that are not merged is simply handled by the 157 | +underlying directory (upper or lower). 158 | + 159 | + 160 | +Non-directories 161 | +--------------- 162 | + 163 | +Objects that are not directories (files, symlinks, device-special 164 | +files etc.) are presented either from the upper or lower filesystem as 165 | +appropriate. When a file in the lower filesystem is accessed in a way 166 | +the requires write-access, such as opening for write access, changing 167 | +some metadata etc., the file is first copied from the lower filesystem 168 | +to the upper filesystem (copy_up). Note that creating a hard-link 169 | +also requires copy_up, though of course creation of a symlink does 170 | +not. 171 | + 172 | +The copy_up may turn out to be unnecessary, for example if the file is 173 | +opened for read-write but the data is not modified. 174 | + 175 | +The copy_up process first makes sure that the containing directory 176 | +exists in the upper filesystem - creating it and any parents as 177 | +necessary. It then creates the object with the same metadata (owner, 178 | +mode, mtime, symlink-target etc.) and then if the object is a file, the 179 | +data is copied from the lower to the upper filesystem. Finally any 180 | +extended attributes are copied up. 181 | + 182 | +Once the copy_up is complete, the overlay filesystem simply 183 | +provides direct access to the newly created file in the upper 184 | +filesystem - future operations on the file are barely noticed by the 185 | +overlay filesystem (though an operation on the name of the file such as 186 | +rename or unlink will of course be noticed and handled). 187 | + 188 | + 189 | +Non-standard behavior 190 | +--------------------- 191 | + 192 | +The copy_up operation essentially creates a new, identical file and 193 | +moves it over to the old name. The new file may be on a different 194 | +filesystem, so both st_dev and st_ino of the file may change. 195 | + 196 | +Any open files referring to this inode will access the old data and 197 | +metadata. Similarly any file locks obtained before copy_up will not 198 | +apply to the copied up file. 199 | + 200 | +On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and 201 | +fsetxattr(2) will fail with EROFS. 202 | + 203 | +If a file with multiple hard links is copied up, then this will 204 | +"break" the link. Changes will not be propagated to other names 205 | +referring to the same inode. 206 | + 207 | +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory 208 | +object in overlayfs will not contain valid absolute paths, only 209 | +relative paths leading up to the filesystem's root. This will be 210 | +fixed in the future. 211 | + 212 | +Some operations are not atomic, for example a crash during copy_up or 213 | +rename will leave the filesystem in an inconsistent state. This will 214 | +be addressed in the future. 215 | + 216 | +Changes to underlying filesystems 217 | +--------------------------------- 218 | + 219 | +Offline changes, when the overlay is not mounted, are allowed to either 220 | +the upper or the lower trees. 221 | + 222 | +Changes to the underlying filesystems while part of a mounted overlay 223 | +filesystem are not allowed. If the underlying filesystem is changed, 224 | +the behavior of the overlay is undefined, though it will not result in 225 | +a crash or deadlock. 226 | diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt 227 | index bc4b06b..f64a4d1 100644 228 | --- a/Documentation/filesystems/vfs.txt 229 | +++ b/Documentation/filesystems/vfs.txt 230 | @@ -362,6 +362,7 @@ struct inode_operations { 231 | int (*atomic_open)(struct inode *, struct dentry *, 232 | struct file *, unsigned open_flag, 233 | umode_t create_mode, int *opened); 234 | + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); 235 | }; 236 | 237 | Again, all methods are called without any locks being held, unless 238 | @@ -681,6 +682,12 @@ struct address_space_operations { 239 | but instead uses bmap to find out where the blocks in the file 240 | are and uses those addresses directly. 241 | 242 | + dentry_open: this is an alternative to f_op->open(), the difference is that 243 | + this method may open a file not necessarily originating from the same 244 | + filesystem as the one i_op->open() was called on. It may be 245 | + useful for stacking filesystems which want to allow native I/O directly 246 | + on underlying files. 247 | + 248 | 249 | invalidatepage: If a page has PagePrivate set, then invalidatepage 250 | will be called when part or all of the page is to be removed 251 | diff --git a/MAINTAINERS b/MAINTAINERS 252 | index 9561658..9ea89b8 100644 253 | --- a/MAINTAINERS 254 | +++ b/MAINTAINERS 255 | @@ -5872,6 +5872,13 @@ F: drivers/scsi/osd/ 256 | F: include/scsi/osd_* 257 | F: fs/exofs/ 258 | 259 | +OVERLAYFS FILESYSTEM 260 | +M: Miklos Szeredi 261 | +L: linux-fsdevel@vger.kernel.org 262 | +S: Supported 263 | +F: fs/overlayfs/* 264 | +F: Documentation/filesystems/overlayfs.txt 265 | + 266 | P54 WIRELESS DRIVER 267 | M: Christian Lamparter 268 | L: linux-wireless@vger.kernel.org 269 | diff --git a/fs/Kconfig b/fs/Kconfig 270 | index 780725a..9e2ccd5 100644 271 | --- a/fs/Kconfig 272 | +++ b/fs/Kconfig 273 | @@ -67,6 +67,7 @@ source "fs/quota/Kconfig" 274 | 275 | source "fs/autofs4/Kconfig" 276 | source "fs/fuse/Kconfig" 277 | +source "fs/overlayfs/Kconfig" 278 | 279 | config GENERIC_ACL 280 | bool 281 | diff --git a/fs/Makefile b/fs/Makefile 282 | index 9d53192..479a720 100644 283 | --- a/fs/Makefile 284 | +++ b/fs/Makefile 285 | @@ -107,6 +107,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ 286 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 287 | obj-$(CONFIG_ADFS_FS) += adfs/ 288 | obj-$(CONFIG_FUSE_FS) += fuse/ 289 | +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ 290 | obj-$(CONFIG_UDF_FS) += udf/ 291 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 292 | obj-$(CONFIG_OMFS_FS) += omfs/ 293 | diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c 294 | index e924cf4..8f7551e 100644 295 | --- a/fs/ecryptfs/main.c 296 | +++ b/fs/ecryptfs/main.c 297 | @@ -567,6 +567,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags 298 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; 299 | s->s_blocksize = path.dentry->d_sb->s_blocksize; 300 | s->s_magic = ECRYPTFS_SUPER_MAGIC; 301 | + s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; 302 | + 303 | + rc = -EINVAL; 304 | + if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 305 | + printk(KERN_ERR "eCryptfs: maximum fs stacking depth exceeded\n"); 306 | + goto out_free; 307 | + } 308 | 309 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); 310 | rc = PTR_ERR(inode); 311 | diff --git a/fs/internal.h b/fs/internal.h 312 | index 507141f..89481ac 100644 313 | --- a/fs/internal.h 314 | +++ b/fs/internal.h 315 | @@ -42,11 +42,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) 316 | extern void __init chrdev_init(void); 317 | 318 | /* 319 | - * namei.c 320 | - */ 321 | -extern int __inode_permission(struct inode *, int); 322 | - 323 | -/* 324 | * namespace.c 325 | */ 326 | extern int copy_mount_options(const void __user *, unsigned long *); 327 | diff --git a/fs/namei.c b/fs/namei.c 328 | index 57ae9c8..e1cffbd 100644 329 | --- a/fs/namei.c 330 | +++ b/fs/namei.c 331 | @@ -402,6 +402,7 @@ int __inode_permission(struct inode *inode, int mask) 332 | 333 | return security_inode_permission(inode, mask); 334 | } 335 | +EXPORT_SYMBOL(__inode_permission); 336 | 337 | /** 338 | * sb_permission - Check superblock-level permissions 339 | @@ -2867,9 +2868,12 @@ finish_open_created: 340 | error = may_open(&nd->path, acc_mode, open_flag); 341 | if (error) 342 | goto out; 343 | - file->f_path.mnt = nd->path.mnt; 344 | - error = finish_open(file, nd->path.dentry, NULL, opened); 345 | - if (error) { 346 | + 347 | + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ 348 | + error = vfs_open(&nd->path, file, current_cred()); 349 | + if (!error) { 350 | + *opened |= FILE_OPENED; 351 | + } else { 352 | if (error == -EOPENSTALE) 353 | goto stale_open; 354 | goto out; 355 | diff --git a/fs/namespace.c b/fs/namespace.c 356 | index 50ca17d..9791b4e 100644 357 | --- a/fs/namespace.c 358 | +++ b/fs/namespace.c 359 | @@ -1399,6 +1399,24 @@ void drop_collected_mounts(struct vfsmount *mnt) 360 | release_mounts(&umount_list); 361 | } 362 | 363 | +struct vfsmount *clone_private_mount(struct path *path) 364 | +{ 365 | + struct mount *old_mnt = real_mount(path->mnt); 366 | + struct mount *new_mnt; 367 | + 368 | + if (IS_MNT_UNBINDABLE(old_mnt)) 369 | + return ERR_PTR(-EINVAL); 370 | + 371 | + down_read(&namespace_sem); 372 | + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); 373 | + up_read(&namespace_sem); 374 | + if (!new_mnt) 375 | + return ERR_PTR(-ENOMEM); 376 | + 377 | + return &new_mnt->mnt; 378 | +} 379 | +EXPORT_SYMBOL_GPL(clone_private_mount); 380 | + 381 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 382 | struct vfsmount *root) 383 | { 384 | diff --git a/fs/open.c b/fs/open.c 385 | index 6835446..b9d9f9e 100644 386 | --- a/fs/open.c 387 | +++ b/fs/open.c 388 | @@ -828,8 +828,7 @@ struct file *dentry_open(const struct path *path, int flags, 389 | f = get_empty_filp(); 390 | if (!IS_ERR(f)) { 391 | f->f_flags = flags; 392 | - f->f_path = *path; 393 | - error = do_dentry_open(f, NULL, cred); 394 | + error = vfs_open(path, f, cred); 395 | if (!error) { 396 | /* from now on we need fput() to dispose of f */ 397 | error = open_check_o_direct(f); 398 | @@ -846,6 +845,26 @@ struct file *dentry_open(const struct path *path, int flags, 399 | } 400 | EXPORT_SYMBOL(dentry_open); 401 | 402 | +/** 403 | + * vfs_open - open the file at the given path 404 | + * @path: path to open 405 | + * @filp: newly allocated file with f_flag initialized 406 | + * @cred: credentials to use 407 | + */ 408 | +int vfs_open(const struct path *path, struct file *filp, 409 | + const struct cred *cred) 410 | +{ 411 | + struct inode *inode = path->dentry->d_inode; 412 | + 413 | + if (inode->i_op->dentry_open) 414 | + return inode->i_op->dentry_open(path->dentry, filp, cred); 415 | + else { 416 | + filp->f_path = *path; 417 | + return do_dentry_open(filp, NULL, cred); 418 | + } 419 | +} 420 | +EXPORT_SYMBOL(vfs_open); 421 | + 422 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 423 | { 424 | int lookup_flags = 0; 425 | diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig 426 | new file mode 100644 427 | index 0000000..c4517da 428 | --- /dev/null 429 | +++ b/fs/overlayfs/Kconfig 430 | @@ -0,0 +1,4 @@ 431 | +config OVERLAYFS_FS 432 | + tristate "Overlay filesystem support" 433 | + help 434 | + Add support for overlay filesystem. 435 | diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile 436 | new file mode 100644 437 | index 0000000..8f91889 438 | --- /dev/null 439 | +++ b/fs/overlayfs/Makefile 440 | @@ -0,0 +1,7 @@ 441 | +# 442 | +# Makefile for the overlay filesystem. 443 | +# 444 | + 445 | +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o 446 | + 447 | +overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o 448 | diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c 449 | new file mode 100644 450 | index 0000000..eef85e0 451 | --- /dev/null 452 | +++ b/fs/overlayfs/copy_up.c 453 | @@ -0,0 +1,385 @@ 454 | +/* 455 | + * 456 | + * Copyright (C) 2011 Novell Inc. 457 | + * 458 | + * This program is free software; you can redistribute it and/or modify it 459 | + * under the terms of the GNU General Public License version 2 as published by 460 | + * the Free Software Foundation. 461 | + */ 462 | + 463 | +#include 464 | +#include 465 | +#include 466 | +#include 467 | +#include 468 | +#include 469 | +#include 470 | +#include 471 | +#include "overlayfs.h" 472 | + 473 | +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 474 | + 475 | +static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new) 476 | +{ 477 | + ssize_t list_size, size; 478 | + char *buf, *name, *value; 479 | + int error; 480 | + 481 | + if (!old->d_inode->i_op->getxattr || 482 | + !new->d_inode->i_op->getxattr) 483 | + return 0; 484 | + 485 | + list_size = vfs_listxattr(old, NULL, 0); 486 | + if (list_size <= 0) { 487 | + if (list_size == -EOPNOTSUPP) 488 | + return 0; 489 | + return list_size; 490 | + } 491 | + 492 | + buf = kzalloc(list_size, GFP_KERNEL); 493 | + if (!buf) 494 | + return -ENOMEM; 495 | + 496 | + error = -ENOMEM; 497 | + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); 498 | + if (!value) 499 | + goto out; 500 | + 501 | + list_size = vfs_listxattr(old, buf, list_size); 502 | + if (list_size <= 0) { 503 | + error = list_size; 504 | + goto out_free_value; 505 | + } 506 | + 507 | + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { 508 | + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); 509 | + if (size <= 0) { 510 | + error = size; 511 | + goto out_free_value; 512 | + } 513 | + error = vfs_setxattr(new, name, value, size, 0); 514 | + if (error) 515 | + goto out_free_value; 516 | + } 517 | + 518 | +out_free_value: 519 | + kfree(value); 520 | +out: 521 | + kfree(buf); 522 | + return error; 523 | +} 524 | + 525 | +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) 526 | +{ 527 | + struct file *old_file; 528 | + struct file *new_file; 529 | + int error = 0; 530 | + 531 | + if (len == 0) 532 | + return 0; 533 | + 534 | + old_file = ovl_path_open(old, O_RDONLY); 535 | + if (IS_ERR(old_file)) 536 | + return PTR_ERR(old_file); 537 | + 538 | + new_file = ovl_path_open(new, O_WRONLY); 539 | + if (IS_ERR(new_file)) { 540 | + error = PTR_ERR(new_file); 541 | + goto out_fput; 542 | + } 543 | + 544 | + /* FIXME: copy up sparse files efficiently */ 545 | + while (len) { 546 | + loff_t offset = new_file->f_pos; 547 | + size_t this_len = OVL_COPY_UP_CHUNK_SIZE; 548 | + long bytes; 549 | + 550 | + if (len < this_len) 551 | + this_len = len; 552 | + 553 | + if (signal_pending_state(TASK_KILLABLE, current)) { 554 | + error = -EINTR; 555 | + break; 556 | + } 557 | + 558 | + bytes = do_splice_direct(old_file, &offset, new_file, this_len, 559 | + SPLICE_F_MOVE); 560 | + if (bytes <= 0) { 561 | + error = bytes; 562 | + break; 563 | + } 564 | + 565 | + len -= bytes; 566 | + } 567 | + 568 | + fput(new_file); 569 | +out_fput: 570 | + fput(old_file); 571 | + return error; 572 | +} 573 | + 574 | +static char *ovl_read_symlink(struct dentry *realdentry) 575 | +{ 576 | + int res; 577 | + char *buf; 578 | + struct inode *inode = realdentry->d_inode; 579 | + mm_segment_t old_fs; 580 | + 581 | + res = -EINVAL; 582 | + if (!inode->i_op->readlink) 583 | + goto err; 584 | + 585 | + res = -ENOMEM; 586 | + buf = (char *) __get_free_page(GFP_KERNEL); 587 | + if (!buf) 588 | + goto err; 589 | + 590 | + old_fs = get_fs(); 591 | + set_fs(get_ds()); 592 | + /* The cast to a user pointer is valid due to the set_fs() */ 593 | + res = inode->i_op->readlink(realdentry, 594 | + (char __user *)buf, PAGE_SIZE - 1); 595 | + set_fs(old_fs); 596 | + if (res < 0) { 597 | + free_page((unsigned long) buf); 598 | + goto err; 599 | + } 600 | + buf[res] = '\0'; 601 | + 602 | + return buf; 603 | + 604 | +err: 605 | + return ERR_PTR(res); 606 | +} 607 | + 608 | +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 609 | +{ 610 | + struct iattr attr = { 611 | + .ia_valid = 612 | + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, 613 | + .ia_atime = stat->atime, 614 | + .ia_mtime = stat->mtime, 615 | + }; 616 | + 617 | + return notify_change(upperdentry, &attr); 618 | +} 619 | + 620 | +static int ovl_set_mode(struct dentry *upperdentry, umode_t mode) 621 | +{ 622 | + struct iattr attr = { 623 | + .ia_valid = ATTR_MODE, 624 | + .ia_mode = mode, 625 | + }; 626 | + 627 | + return notify_change(upperdentry, &attr); 628 | +} 629 | + 630 | +static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry, 631 | + struct path *lowerpath, struct kstat *stat, 632 | + const char *link) 633 | +{ 634 | + int err; 635 | + struct path newpath; 636 | + umode_t mode = stat->mode; 637 | + 638 | + /* Can't properly set mode on creation because of the umask */ 639 | + stat->mode &= S_IFMT; 640 | + 641 | + ovl_path_upper(dentry, &newpath); 642 | + WARN_ON(newpath.dentry); 643 | + newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link); 644 | + if (IS_ERR(newpath.dentry)) 645 | + return PTR_ERR(newpath.dentry); 646 | + 647 | + if (S_ISREG(stat->mode)) { 648 | + err = ovl_copy_up_data(lowerpath, &newpath, stat->size); 649 | + if (err) 650 | + goto err_remove; 651 | + } 652 | + 653 | + err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry); 654 | + if (err) 655 | + goto err_remove; 656 | + 657 | + mutex_lock(&newpath.dentry->d_inode->i_mutex); 658 | + if (!S_ISLNK(stat->mode)) 659 | + err = ovl_set_mode(newpath.dentry, mode); 660 | + if (!err) 661 | + err = ovl_set_timestamps(newpath.dentry, stat); 662 | + mutex_unlock(&newpath.dentry->d_inode->i_mutex); 663 | + if (err) 664 | + goto err_remove; 665 | + 666 | + ovl_dentry_update(dentry, newpath.dentry); 667 | + 668 | + /* 669 | + * Easiest way to get rid of the lower dentry reference is to 670 | + * drop this dentry. This is neither needed nor possible for 671 | + * directories. 672 | + */ 673 | + if (!S_ISDIR(stat->mode)) 674 | + d_drop(dentry); 675 | + 676 | + return 0; 677 | + 678 | +err_remove: 679 | + if (S_ISDIR(stat->mode)) 680 | + vfs_rmdir(upperdir->d_inode, newpath.dentry); 681 | + else 682 | + vfs_unlink(upperdir->d_inode, newpath.dentry); 683 | + 684 | + dput(newpath.dentry); 685 | + 686 | + return err; 687 | +} 688 | + 689 | +/* 690 | + * Copy up a single dentry 691 | + * 692 | + * Directory renames only allowed on "pure upper" (already created on 693 | + * upper filesystem, never copied up). Directories which are on lower or 694 | + * are merged may not be renamed. For these -EXDEV is returned and 695 | + * userspace has to deal with it. This means, when copying up a 696 | + * directory we can rely on it and ancestors being stable. 697 | + * 698 | + * Non-directory renames start with copy up of source if necessary. The 699 | + * actual rename will only proceed once the copy up was successful. Copy 700 | + * up uses upper parent i_mutex for exclusion. Since rename can change 701 | + * d_parent it is possible that the copy up will lock the old parent. At 702 | + * that point the file will have already been copied up anyway. 703 | + */ 704 | +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 705 | + struct path *lowerpath, struct kstat *stat) 706 | +{ 707 | + int err; 708 | + struct kstat pstat; 709 | + struct path parentpath; 710 | + struct dentry *upperdir; 711 | + const struct cred *old_cred; 712 | + struct cred *override_cred; 713 | + char *link = NULL; 714 | + 715 | + ovl_path_upper(parent, &parentpath); 716 | + upperdir = parentpath.dentry; 717 | + 718 | + err = vfs_getattr(&parentpath, &pstat); 719 | + if (err) 720 | + return err; 721 | + 722 | + if (S_ISLNK(stat->mode)) { 723 | + link = ovl_read_symlink(lowerpath->dentry); 724 | + if (IS_ERR(link)) 725 | + return PTR_ERR(link); 726 | + } 727 | + 728 | + err = -ENOMEM; 729 | + override_cred = prepare_creds(); 730 | + if (!override_cred) 731 | + goto out_free_link; 732 | + 733 | + override_cred->fsuid = stat->uid; 734 | + override_cred->fsgid = stat->gid; 735 | + /* 736 | + * CAP_SYS_ADMIN for copying up extended attributes 737 | + * CAP_DAC_OVERRIDE for create 738 | + * CAP_FOWNER for chmod, timestamp update 739 | + * CAP_FSETID for chmod 740 | + * CAP_MKNOD for mknod 741 | + */ 742 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 743 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 744 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 745 | + cap_raise(override_cred->cap_effective, CAP_FSETID); 746 | + cap_raise(override_cred->cap_effective, CAP_MKNOD); 747 | + old_cred = override_creds(override_cred); 748 | + 749 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 750 | + if (ovl_path_type(dentry) != OVL_PATH_LOWER) { 751 | + err = 0; 752 | + } else { 753 | + err = ovl_copy_up_locked(upperdir, dentry, lowerpath, 754 | + stat, link); 755 | + if (!err) { 756 | + /* Restore timestamps on parent (best effort) */ 757 | + ovl_set_timestamps(upperdir, &pstat); 758 | + } 759 | + } 760 | + 761 | + mutex_unlock(&upperdir->d_inode->i_mutex); 762 | + 763 | + revert_creds(old_cred); 764 | + put_cred(override_cred); 765 | + 766 | +out_free_link: 767 | + if (link) 768 | + free_page((unsigned long) link); 769 | + 770 | + return err; 771 | +} 772 | + 773 | +int ovl_copy_up(struct dentry *dentry) 774 | +{ 775 | + int err; 776 | + 777 | + err = 0; 778 | + while (!err) { 779 | + struct dentry *next; 780 | + struct dentry *parent; 781 | + struct path lowerpath; 782 | + struct kstat stat; 783 | + enum ovl_path_type type = ovl_path_type(dentry); 784 | + 785 | + if (type != OVL_PATH_LOWER) 786 | + break; 787 | + 788 | + next = dget(dentry); 789 | + /* find the topmost dentry not yet copied up */ 790 | + for (;;) { 791 | + parent = dget_parent(next); 792 | + 793 | + type = ovl_path_type(parent); 794 | + if (type != OVL_PATH_LOWER) 795 | + break; 796 | + 797 | + dput(next); 798 | + next = parent; 799 | + } 800 | + 801 | + ovl_path_lower(next, &lowerpath); 802 | + err = vfs_getattr(&lowerpath, &stat); 803 | + if (!err) 804 | + err = ovl_copy_up_one(parent, next, &lowerpath, &stat); 805 | + 806 | + dput(parent); 807 | + dput(next); 808 | + } 809 | + 810 | + return err; 811 | +} 812 | + 813 | +/* Optimize by not copying up the file first and truncating later */ 814 | +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size) 815 | +{ 816 | + int err; 817 | + struct kstat stat; 818 | + struct path lowerpath; 819 | + struct dentry *parent = dget_parent(dentry); 820 | + 821 | + err = ovl_copy_up(parent); 822 | + if (err) 823 | + goto out_dput_parent; 824 | + 825 | + ovl_path_lower(dentry, &lowerpath); 826 | + err = vfs_getattr(&lowerpath, &stat); 827 | + if (err) 828 | + goto out_dput_parent; 829 | + 830 | + if (size < stat.size) 831 | + stat.size = size; 832 | + 833 | + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat); 834 | + 835 | +out_dput_parent: 836 | + dput(parent); 837 | + return err; 838 | +} 839 | diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c 840 | new file mode 100644 841 | index 0000000..b530e56 842 | --- /dev/null 843 | +++ b/fs/overlayfs/dir.c 844 | @@ -0,0 +1,604 @@ 845 | +/* 846 | + * 847 | + * Copyright (C) 2011 Novell Inc. 848 | + * 849 | + * This program is free software; you can redistribute it and/or modify it 850 | + * under the terms of the GNU General Public License version 2 as published by 851 | + * the Free Software Foundation. 852 | + */ 853 | + 854 | +#include 855 | +#include 856 | +#include 857 | +#include 858 | +#include 859 | +#include "overlayfs.h" 860 | + 861 | +static const char *ovl_whiteout_symlink = "(overlay-whiteout)"; 862 | + 863 | +static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry) 864 | +{ 865 | + int err; 866 | + struct dentry *newdentry; 867 | + const struct cred *old_cred; 868 | + struct cred *override_cred; 869 | + 870 | + /* FIXME: recheck lower dentry to see if whiteout is really needed */ 871 | + 872 | + err = -ENOMEM; 873 | + override_cred = prepare_creds(); 874 | + if (!override_cred) 875 | + goto out; 876 | + 877 | + /* 878 | + * CAP_SYS_ADMIN for setxattr 879 | + * CAP_DAC_OVERRIDE for symlink creation 880 | + * CAP_FOWNER for unlink in sticky directory 881 | + */ 882 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 883 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 884 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 885 | + override_cred->fsuid = GLOBAL_ROOT_UID; 886 | + override_cred->fsgid = GLOBAL_ROOT_GID; 887 | + old_cred = override_creds(override_cred); 888 | + 889 | + newdentry = lookup_one_len(dentry->d_name.name, upperdir, 890 | + dentry->d_name.len); 891 | + err = PTR_ERR(newdentry); 892 | + if (IS_ERR(newdentry)) 893 | + goto out_put_cred; 894 | + 895 | + /* Just been removed within the same locked region */ 896 | + WARN_ON(newdentry->d_inode); 897 | + 898 | + err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink); 899 | + if (err) 900 | + goto out_dput; 901 | + 902 | + ovl_dentry_version_inc(dentry->d_parent); 903 | + 904 | + err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0); 905 | + if (err) 906 | + vfs_unlink(upperdir->d_inode, newdentry); 907 | + 908 | +out_dput: 909 | + dput(newdentry); 910 | +out_put_cred: 911 | + revert_creds(old_cred); 912 | + put_cred(override_cred); 913 | +out: 914 | + if (err) { 915 | + /* 916 | + * There's no way to recover from failure to whiteout. 917 | + * What should we do? Log a big fat error and... ? 918 | + */ 919 | + printk(KERN_ERR "overlayfs: ERROR - failed to whiteout '%s'\n", 920 | + dentry->d_name.name); 921 | + } 922 | + 923 | + return err; 924 | +} 925 | + 926 | +static struct dentry *ovl_lookup_create(struct dentry *upperdir, 927 | + struct dentry *template) 928 | +{ 929 | + int err; 930 | + struct dentry *newdentry; 931 | + struct qstr *name = &template->d_name; 932 | + 933 | + newdentry = lookup_one_len(name->name, upperdir, name->len); 934 | + if (IS_ERR(newdentry)) 935 | + return newdentry; 936 | + 937 | + if (newdentry->d_inode) { 938 | + const struct cred *old_cred; 939 | + struct cred *override_cred; 940 | + 941 | + /* No need to check whiteout if lower parent is non-existent */ 942 | + err = -EEXIST; 943 | + if (!ovl_dentry_lower(template->d_parent)) 944 | + goto out_dput; 945 | + 946 | + if (!S_ISLNK(newdentry->d_inode->i_mode)) 947 | + goto out_dput; 948 | + 949 | + err = -ENOMEM; 950 | + override_cred = prepare_creds(); 951 | + if (!override_cred) 952 | + goto out_dput; 953 | + 954 | + /* 955 | + * CAP_SYS_ADMIN for getxattr 956 | + * CAP_FOWNER for unlink in sticky directory 957 | + */ 958 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 959 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 960 | + old_cred = override_creds(override_cred); 961 | + 962 | + err = -EEXIST; 963 | + if (ovl_is_whiteout(newdentry)) 964 | + err = vfs_unlink(upperdir->d_inode, newdentry); 965 | + 966 | + revert_creds(old_cred); 967 | + put_cred(override_cred); 968 | + if (err) 969 | + goto out_dput; 970 | + 971 | + dput(newdentry); 972 | + newdentry = lookup_one_len(name->name, upperdir, name->len); 973 | + if (IS_ERR(newdentry)) { 974 | + ovl_whiteout(upperdir, template); 975 | + return newdentry; 976 | + } 977 | + 978 | + /* 979 | + * Whiteout just been successfully removed, parent 980 | + * i_mutex is still held, there's no way the lookup 981 | + * could return positive. 982 | + */ 983 | + WARN_ON(newdentry->d_inode); 984 | + } 985 | + 986 | + return newdentry; 987 | + 988 | +out_dput: 989 | + dput(newdentry); 990 | + return ERR_PTR(err); 991 | +} 992 | + 993 | +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, 994 | + struct kstat *stat, const char *link) 995 | +{ 996 | + int err; 997 | + struct dentry *newdentry; 998 | + struct inode *dir = upperdir->d_inode; 999 | + 1000 | + newdentry = ovl_lookup_create(upperdir, dentry); 1001 | + if (IS_ERR(newdentry)) 1002 | + goto out; 1003 | + 1004 | + switch (stat->mode & S_IFMT) { 1005 | + case S_IFREG: 1006 | + err = vfs_create(dir, newdentry, stat->mode, NULL); 1007 | + break; 1008 | + 1009 | + case S_IFDIR: 1010 | + err = vfs_mkdir(dir, newdentry, stat->mode); 1011 | + break; 1012 | + 1013 | + case S_IFCHR: 1014 | + case S_IFBLK: 1015 | + case S_IFIFO: 1016 | + case S_IFSOCK: 1017 | + err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev); 1018 | + break; 1019 | + 1020 | + case S_IFLNK: 1021 | + err = vfs_symlink(dir, newdentry, link); 1022 | + break; 1023 | + 1024 | + default: 1025 | + err = -EPERM; 1026 | + } 1027 | + if (err) { 1028 | + if (ovl_dentry_is_opaque(dentry)) 1029 | + ovl_whiteout(upperdir, dentry); 1030 | + dput(newdentry); 1031 | + newdentry = ERR_PTR(err); 1032 | + } else if (WARN_ON(!newdentry->d_inode)) { 1033 | + /* 1034 | + * Not quite sure if non-instantiated dentry is legal or not. 1035 | + * VFS doesn't seem to care so check and warn here. 1036 | + */ 1037 | + dput(newdentry); 1038 | + newdentry = ERR_PTR(-ENOENT); 1039 | + } 1040 | + 1041 | +out: 1042 | + return newdentry; 1043 | + 1044 | +} 1045 | + 1046 | +static int ovl_set_opaque(struct dentry *upperdentry) 1047 | +{ 1048 | + int err; 1049 | + const struct cred *old_cred; 1050 | + struct cred *override_cred; 1051 | + 1052 | + override_cred = prepare_creds(); 1053 | + if (!override_cred) 1054 | + return -ENOMEM; 1055 | + 1056 | + /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */ 1057 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 1058 | + old_cred = override_creds(override_cred); 1059 | + err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); 1060 | + revert_creds(old_cred); 1061 | + put_cred(override_cred); 1062 | + 1063 | + return err; 1064 | +} 1065 | + 1066 | +static int ovl_remove_opaque(struct dentry *upperdentry) 1067 | +{ 1068 | + int err; 1069 | + const struct cred *old_cred; 1070 | + struct cred *override_cred; 1071 | + 1072 | + override_cred = prepare_creds(); 1073 | + if (!override_cred) 1074 | + return -ENOMEM; 1075 | + 1076 | + /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */ 1077 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 1078 | + old_cred = override_creds(override_cred); 1079 | + err = vfs_removexattr(upperdentry, ovl_opaque_xattr); 1080 | + revert_creds(old_cred); 1081 | + put_cred(override_cred); 1082 | + 1083 | + return err; 1084 | +} 1085 | + 1086 | +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, 1087 | + struct kstat *stat) 1088 | +{ 1089 | + int err; 1090 | + enum ovl_path_type type; 1091 | + struct path realpath; 1092 | + 1093 | + type = ovl_path_real(dentry, &realpath); 1094 | + err = vfs_getattr(&realpath, stat); 1095 | + if (err) 1096 | + return err; 1097 | + 1098 | + stat->dev = dentry->d_sb->s_dev; 1099 | + stat->ino = dentry->d_inode->i_ino; 1100 | + 1101 | + /* 1102 | + * It's probably not worth it to count subdirs to get the 1103 | + * correct link count. nlink=1 seems to pacify 'find' and 1104 | + * other utilities. 1105 | + */ 1106 | + if (type == OVL_PATH_MERGE) 1107 | + stat->nlink = 1; 1108 | + 1109 | + return 0; 1110 | +} 1111 | + 1112 | +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, 1113 | + const char *link) 1114 | +{ 1115 | + int err; 1116 | + struct dentry *newdentry; 1117 | + struct dentry *upperdir; 1118 | + struct inode *inode; 1119 | + struct kstat stat = { 1120 | + .mode = mode, 1121 | + .rdev = rdev, 1122 | + }; 1123 | + 1124 | + err = -ENOMEM; 1125 | + inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); 1126 | + if (!inode) 1127 | + goto out; 1128 | + 1129 | + err = ovl_copy_up(dentry->d_parent); 1130 | + if (err) 1131 | + goto out_iput; 1132 | + 1133 | + upperdir = ovl_dentry_upper(dentry->d_parent); 1134 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 1135 | + 1136 | + newdentry = ovl_upper_create(upperdir, dentry, &stat, link); 1137 | + err = PTR_ERR(newdentry); 1138 | + if (IS_ERR(newdentry)) 1139 | + goto out_unlock; 1140 | + 1141 | + ovl_dentry_version_inc(dentry->d_parent); 1142 | + if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) { 1143 | + err = ovl_set_opaque(newdentry); 1144 | + if (err) { 1145 | + vfs_rmdir(upperdir->d_inode, newdentry); 1146 | + ovl_whiteout(upperdir, dentry); 1147 | + goto out_dput; 1148 | + } 1149 | + } 1150 | + ovl_dentry_update(dentry, newdentry); 1151 | + ovl_copyattr(newdentry->d_inode, inode); 1152 | + d_instantiate(dentry, inode); 1153 | + inode = NULL; 1154 | + newdentry = NULL; 1155 | + err = 0; 1156 | + 1157 | +out_dput: 1158 | + dput(newdentry); 1159 | +out_unlock: 1160 | + mutex_unlock(&upperdir->d_inode->i_mutex); 1161 | +out_iput: 1162 | + iput(inode); 1163 | +out: 1164 | + return err; 1165 | +} 1166 | + 1167 | +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, 1168 | + bool excl) 1169 | +{ 1170 | + return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); 1171 | +} 1172 | + 1173 | +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 1174 | +{ 1175 | + return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); 1176 | +} 1177 | + 1178 | +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, 1179 | + dev_t rdev) 1180 | +{ 1181 | + return ovl_create_object(dentry, mode, rdev, NULL); 1182 | +} 1183 | + 1184 | +static int ovl_symlink(struct inode *dir, struct dentry *dentry, 1185 | + const char *link) 1186 | +{ 1187 | + return ovl_create_object(dentry, S_IFLNK, 0, link); 1188 | +} 1189 | + 1190 | +static int ovl_do_remove(struct dentry *dentry, bool is_dir) 1191 | +{ 1192 | + int err; 1193 | + enum ovl_path_type type; 1194 | + struct path realpath; 1195 | + struct dentry *upperdir; 1196 | + 1197 | + err = ovl_copy_up(dentry->d_parent); 1198 | + if (err) 1199 | + return err; 1200 | + 1201 | + upperdir = ovl_dentry_upper(dentry->d_parent); 1202 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 1203 | + type = ovl_path_real(dentry, &realpath); 1204 | + if (type != OVL_PATH_LOWER) { 1205 | + err = -ESTALE; 1206 | + if (realpath.dentry->d_parent != upperdir) 1207 | + goto out_d_drop; 1208 | + 1209 | + /* FIXME: create whiteout up front and rename to target */ 1210 | + 1211 | + if (is_dir) 1212 | + err = vfs_rmdir(upperdir->d_inode, realpath.dentry); 1213 | + else 1214 | + err = vfs_unlink(upperdir->d_inode, realpath.dentry); 1215 | + if (err) 1216 | + goto out_d_drop; 1217 | + 1218 | + ovl_dentry_version_inc(dentry->d_parent); 1219 | + } 1220 | + 1221 | + if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry)) 1222 | + err = ovl_whiteout(upperdir, dentry); 1223 | + 1224 | + /* 1225 | + * Keeping this dentry hashed would mean having to release 1226 | + * upperpath/lowerpath, which could only be done if we are the 1227 | + * sole user of this dentry. Too tricky... Just unhash for 1228 | + * now. 1229 | + */ 1230 | +out_d_drop: 1231 | + d_drop(dentry); 1232 | + mutex_unlock(&upperdir->d_inode->i_mutex); 1233 | + 1234 | + return err; 1235 | +} 1236 | + 1237 | +static int ovl_unlink(struct inode *dir, struct dentry *dentry) 1238 | +{ 1239 | + return ovl_do_remove(dentry, false); 1240 | +} 1241 | + 1242 | + 1243 | +static int ovl_rmdir(struct inode *dir, struct dentry *dentry) 1244 | +{ 1245 | + int err; 1246 | + enum ovl_path_type type; 1247 | + 1248 | + type = ovl_path_type(dentry); 1249 | + if (type != OVL_PATH_UPPER) { 1250 | + err = ovl_check_empty_and_clear(dentry, type); 1251 | + if (err) 1252 | + return err; 1253 | + } 1254 | + 1255 | + return ovl_do_remove(dentry, true); 1256 | +} 1257 | + 1258 | +static int ovl_link(struct dentry *old, struct inode *newdir, 1259 | + struct dentry *new) 1260 | +{ 1261 | + int err; 1262 | + struct dentry *olddentry; 1263 | + struct dentry *newdentry; 1264 | + struct dentry *upperdir; 1265 | + struct inode *newinode; 1266 | + 1267 | + err = ovl_copy_up(old); 1268 | + if (err) 1269 | + goto out; 1270 | + 1271 | + err = ovl_copy_up(new->d_parent); 1272 | + if (err) 1273 | + goto out; 1274 | + 1275 | + upperdir = ovl_dentry_upper(new->d_parent); 1276 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 1277 | + newdentry = ovl_lookup_create(upperdir, new); 1278 | + err = PTR_ERR(newdentry); 1279 | + if (IS_ERR(newdentry)) 1280 | + goto out_unlock; 1281 | + 1282 | + olddentry = ovl_dentry_upper(old); 1283 | + err = vfs_link(olddentry, upperdir->d_inode, newdentry); 1284 | + if (!err) { 1285 | + if (WARN_ON(!newdentry->d_inode)) { 1286 | + dput(newdentry); 1287 | + err = -ENOENT; 1288 | + goto out_unlock; 1289 | + } 1290 | + newinode = ovl_new_inode(old->d_sb, newdentry->d_inode->i_mode, 1291 | + new->d_fsdata); 1292 | + if (!newinode) 1293 | + goto link_fail; 1294 | + ovl_copyattr(upperdir->d_inode, newinode); 1295 | + 1296 | + ovl_dentry_version_inc(new->d_parent); 1297 | + ovl_dentry_update(new, newdentry); 1298 | + 1299 | + d_instantiate(new, newinode); 1300 | + } else { 1301 | +link_fail: 1302 | + if (ovl_dentry_is_opaque(new)) 1303 | + ovl_whiteout(upperdir, new); 1304 | + dput(newdentry); 1305 | + } 1306 | +out_unlock: 1307 | + mutex_unlock(&upperdir->d_inode->i_mutex); 1308 | +out: 1309 | + return err; 1310 | + 1311 | +} 1312 | + 1313 | +static int ovl_rename(struct inode *olddir, struct dentry *old, 1314 | + struct inode *newdir, struct dentry *new) 1315 | +{ 1316 | + int err; 1317 | + enum ovl_path_type old_type; 1318 | + enum ovl_path_type new_type; 1319 | + struct dentry *old_upperdir; 1320 | + struct dentry *new_upperdir; 1321 | + struct dentry *olddentry; 1322 | + struct dentry *newdentry; 1323 | + struct dentry *trap; 1324 | + bool old_opaque; 1325 | + bool new_opaque; 1326 | + bool new_create = false; 1327 | + bool is_dir = S_ISDIR(old->d_inode->i_mode); 1328 | + 1329 | + /* Don't copy up directory trees */ 1330 | + old_type = ovl_path_type(old); 1331 | + if (old_type != OVL_PATH_UPPER && is_dir) 1332 | + return -EXDEV; 1333 | + 1334 | + if (new->d_inode) { 1335 | + new_type = ovl_path_type(new); 1336 | + 1337 | + if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { 1338 | + if (ovl_dentry_lower(old)->d_inode == 1339 | + ovl_dentry_lower(new)->d_inode) 1340 | + return 0; 1341 | + } 1342 | + if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { 1343 | + if (ovl_dentry_upper(old)->d_inode == 1344 | + ovl_dentry_upper(new)->d_inode) 1345 | + return 0; 1346 | + } 1347 | + 1348 | + if (new_type != OVL_PATH_UPPER && 1349 | + S_ISDIR(new->d_inode->i_mode)) { 1350 | + err = ovl_check_empty_and_clear(new, new_type); 1351 | + if (err) 1352 | + return err; 1353 | + } 1354 | + } else { 1355 | + new_type = OVL_PATH_UPPER; 1356 | + } 1357 | + 1358 | + err = ovl_copy_up(old); 1359 | + if (err) 1360 | + return err; 1361 | + 1362 | + err = ovl_copy_up(new->d_parent); 1363 | + if (err) 1364 | + return err; 1365 | + 1366 | + old_upperdir = ovl_dentry_upper(old->d_parent); 1367 | + new_upperdir = ovl_dentry_upper(new->d_parent); 1368 | + 1369 | + trap = lock_rename(new_upperdir, old_upperdir); 1370 | + 1371 | + olddentry = ovl_dentry_upper(old); 1372 | + newdentry = ovl_dentry_upper(new); 1373 | + if (newdentry) { 1374 | + dget(newdentry); 1375 | + } else { 1376 | + new_create = true; 1377 | + newdentry = ovl_lookup_create(new_upperdir, new); 1378 | + err = PTR_ERR(newdentry); 1379 | + if (IS_ERR(newdentry)) 1380 | + goto out_unlock; 1381 | + } 1382 | + 1383 | + err = -ESTALE; 1384 | + if (olddentry->d_parent != old_upperdir) 1385 | + goto out_dput; 1386 | + if (newdentry->d_parent != new_upperdir) 1387 | + goto out_dput; 1388 | + if (olddentry == trap) 1389 | + goto out_dput; 1390 | + if (newdentry == trap) 1391 | + goto out_dput; 1392 | + 1393 | + old_opaque = ovl_dentry_is_opaque(old); 1394 | + new_opaque = ovl_dentry_is_opaque(new) || new_type != OVL_PATH_UPPER; 1395 | + 1396 | + if (is_dir && !old_opaque && new_opaque) { 1397 | + err = ovl_set_opaque(olddentry); 1398 | + if (err) 1399 | + goto out_dput; 1400 | + } 1401 | + 1402 | + err = vfs_rename(old_upperdir->d_inode, olddentry, 1403 | + new_upperdir->d_inode, newdentry); 1404 | + 1405 | + if (err) { 1406 | + if (new_create && ovl_dentry_is_opaque(new)) 1407 | + ovl_whiteout(new_upperdir, new); 1408 | + if (is_dir && !old_opaque && new_opaque) 1409 | + ovl_remove_opaque(olddentry); 1410 | + goto out_dput; 1411 | + } 1412 | + 1413 | + if (old_type != OVL_PATH_UPPER || old_opaque) 1414 | + err = ovl_whiteout(old_upperdir, old); 1415 | + if (is_dir && old_opaque && !new_opaque) 1416 | + ovl_remove_opaque(olddentry); 1417 | + 1418 | + if (old_opaque != new_opaque) 1419 | + ovl_dentry_set_opaque(old, new_opaque); 1420 | + 1421 | + ovl_dentry_version_inc(old->d_parent); 1422 | + ovl_dentry_version_inc(new->d_parent); 1423 | + 1424 | +out_dput: 1425 | + dput(newdentry); 1426 | +out_unlock: 1427 | + unlock_rename(new_upperdir, old_upperdir); 1428 | + return err; 1429 | +} 1430 | + 1431 | +const struct inode_operations ovl_dir_inode_operations = { 1432 | + .lookup = ovl_lookup, 1433 | + .mkdir = ovl_mkdir, 1434 | + .symlink = ovl_symlink, 1435 | + .unlink = ovl_unlink, 1436 | + .rmdir = ovl_rmdir, 1437 | + .rename = ovl_rename, 1438 | + .link = ovl_link, 1439 | + .setattr = ovl_setattr, 1440 | + .create = ovl_create, 1441 | + .mknod = ovl_mknod, 1442 | + .permission = ovl_permission, 1443 | + .getattr = ovl_dir_getattr, 1444 | + .setxattr = ovl_setxattr, 1445 | + .getxattr = ovl_getxattr, 1446 | + .listxattr = ovl_listxattr, 1447 | + .removexattr = ovl_removexattr, 1448 | +}; 1449 | diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c 1450 | new file mode 100644 1451 | index 0000000..ee37e92 1452 | --- /dev/null 1453 | +++ b/fs/overlayfs/inode.c 1454 | @@ -0,0 +1,372 @@ 1455 | +/* 1456 | + * 1457 | + * Copyright (C) 2011 Novell Inc. 1458 | + * 1459 | + * This program is free software; you can redistribute it and/or modify it 1460 | + * under the terms of the GNU General Public License version 2 as published by 1461 | + * the Free Software Foundation. 1462 | + */ 1463 | + 1464 | +#include 1465 | +#include 1466 | +#include 1467 | +#include "overlayfs.h" 1468 | + 1469 | +int ovl_setattr(struct dentry *dentry, struct iattr *attr) 1470 | +{ 1471 | + struct dentry *upperdentry; 1472 | + int err; 1473 | + 1474 | + if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry)) 1475 | + err = ovl_copy_up_truncate(dentry, attr->ia_size); 1476 | + else 1477 | + err = ovl_copy_up(dentry); 1478 | + if (err) 1479 | + return err; 1480 | + 1481 | + upperdentry = ovl_dentry_upper(dentry); 1482 | + 1483 | + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 1484 | + attr->ia_valid &= ~ATTR_MODE; 1485 | + 1486 | + mutex_lock(&upperdentry->d_inode->i_mutex); 1487 | + err = notify_change(upperdentry, attr); 1488 | + if (!err) 1489 | + ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 1490 | + mutex_unlock(&upperdentry->d_inode->i_mutex); 1491 | + 1492 | + return err; 1493 | +} 1494 | + 1495 | +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, 1496 | + struct kstat *stat) 1497 | +{ 1498 | + struct path realpath; 1499 | + 1500 | + ovl_path_real(dentry, &realpath); 1501 | + return vfs_getattr(&realpath, stat); 1502 | +} 1503 | + 1504 | +int ovl_permission(struct inode *inode, int mask) 1505 | +{ 1506 | + struct ovl_entry *oe; 1507 | + struct dentry *alias = NULL; 1508 | + struct inode *realinode; 1509 | + struct dentry *realdentry; 1510 | + bool is_upper; 1511 | + int err; 1512 | + 1513 | + if (S_ISDIR(inode->i_mode)) { 1514 | + oe = inode->i_private; 1515 | + } else if (mask & MAY_NOT_BLOCK) { 1516 | + return -ECHILD; 1517 | + } else { 1518 | + /* 1519 | + * For non-directories find an alias and get the info 1520 | + * from there. 1521 | + */ 1522 | + alias = d_find_any_alias(inode); 1523 | + if (WARN_ON(!alias)) 1524 | + return -ENOENT; 1525 | + 1526 | + oe = alias->d_fsdata; 1527 | + } 1528 | + 1529 | + realdentry = ovl_entry_real(oe, &is_upper); 1530 | + 1531 | + /* Careful in RCU walk mode */ 1532 | + realinode = ACCESS_ONCE(realdentry->d_inode); 1533 | + if (!realinode) { 1534 | + WARN_ON(!(mask & MAY_NOT_BLOCK)); 1535 | + err = -ENOENT; 1536 | + goto out_dput; 1537 | + } 1538 | + 1539 | + if (mask & MAY_WRITE) { 1540 | + umode_t mode = realinode->i_mode; 1541 | + 1542 | + /* 1543 | + * Writes will always be redirected to upper layer, so 1544 | + * ignore lower layer being read-only. 1545 | + * 1546 | + * If the overlay itself is read-only then proceed 1547 | + * with the permission check, don't return EROFS. 1548 | + * This will only happen if this is the lower layer of 1549 | + * another overlayfs. 1550 | + * 1551 | + * If upper fs becomes read-only after the overlay was 1552 | + * constructed return EROFS to prevent modification of 1553 | + * upper layer. 1554 | + */ 1555 | + err = -EROFS; 1556 | + if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && 1557 | + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 1558 | + goto out_dput; 1559 | + } 1560 | + 1561 | + err = __inode_permission(realinode, mask); 1562 | +out_dput: 1563 | + dput(alias); 1564 | + return err; 1565 | +} 1566 | + 1567 | + 1568 | +struct ovl_link_data { 1569 | + struct dentry *realdentry; 1570 | + void *cookie; 1571 | +}; 1572 | + 1573 | +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) 1574 | +{ 1575 | + void *ret; 1576 | + struct dentry *realdentry; 1577 | + struct inode *realinode; 1578 | + 1579 | + realdentry = ovl_dentry_real(dentry); 1580 | + realinode = realdentry->d_inode; 1581 | + 1582 | + if (WARN_ON(!realinode->i_op->follow_link)) 1583 | + return ERR_PTR(-EPERM); 1584 | + 1585 | + ret = realinode->i_op->follow_link(realdentry, nd); 1586 | + if (IS_ERR(ret)) 1587 | + return ret; 1588 | + 1589 | + if (realinode->i_op->put_link) { 1590 | + struct ovl_link_data *data; 1591 | + 1592 | + data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); 1593 | + if (!data) { 1594 | + realinode->i_op->put_link(realdentry, nd, ret); 1595 | + return ERR_PTR(-ENOMEM); 1596 | + } 1597 | + data->realdentry = realdentry; 1598 | + data->cookie = ret; 1599 | + 1600 | + return data; 1601 | + } else { 1602 | + return NULL; 1603 | + } 1604 | +} 1605 | + 1606 | +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) 1607 | +{ 1608 | + struct inode *realinode; 1609 | + struct ovl_link_data *data = c; 1610 | + 1611 | + if (!data) 1612 | + return; 1613 | + 1614 | + realinode = data->realdentry->d_inode; 1615 | + realinode->i_op->put_link(data->realdentry, nd, data->cookie); 1616 | + kfree(data); 1617 | +} 1618 | + 1619 | +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) 1620 | +{ 1621 | + struct path realpath; 1622 | + struct inode *realinode; 1623 | + 1624 | + ovl_path_real(dentry, &realpath); 1625 | + realinode = realpath.dentry->d_inode; 1626 | + 1627 | + if (!realinode->i_op->readlink) 1628 | + return -EINVAL; 1629 | + 1630 | + touch_atime(&realpath); 1631 | + 1632 | + return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); 1633 | +} 1634 | + 1635 | + 1636 | +static bool ovl_is_private_xattr(const char *name) 1637 | +{ 1638 | + return strncmp(name, "trusted.overlay.", 14) == 0; 1639 | +} 1640 | + 1641 | +int ovl_setxattr(struct dentry *dentry, const char *name, 1642 | + const void *value, size_t size, int flags) 1643 | +{ 1644 | + int err; 1645 | + struct dentry *upperdentry; 1646 | + 1647 | + if (ovl_is_private_xattr(name)) 1648 | + return -EPERM; 1649 | + 1650 | + err = ovl_copy_up(dentry); 1651 | + if (err) 1652 | + return err; 1653 | + 1654 | + upperdentry = ovl_dentry_upper(dentry); 1655 | + return vfs_setxattr(upperdentry, name, value, size, flags); 1656 | +} 1657 | + 1658 | +ssize_t ovl_getxattr(struct dentry *dentry, const char *name, 1659 | + void *value, size_t size) 1660 | +{ 1661 | + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && 1662 | + ovl_is_private_xattr(name)) 1663 | + return -ENODATA; 1664 | + 1665 | + return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); 1666 | +} 1667 | + 1668 | +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 1669 | +{ 1670 | + ssize_t res; 1671 | + int off; 1672 | + 1673 | + res = vfs_listxattr(ovl_dentry_real(dentry), list, size); 1674 | + if (res <= 0 || size == 0) 1675 | + return res; 1676 | + 1677 | + if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) 1678 | + return res; 1679 | + 1680 | + /* filter out private xattrs */ 1681 | + for (off = 0; off < res;) { 1682 | + char *s = list + off; 1683 | + size_t slen = strlen(s) + 1; 1684 | + 1685 | + BUG_ON(off + slen > res); 1686 | + 1687 | + if (ovl_is_private_xattr(s)) { 1688 | + res -= slen; 1689 | + memmove(s, s + slen, res - off); 1690 | + } else { 1691 | + off += slen; 1692 | + } 1693 | + } 1694 | + 1695 | + return res; 1696 | +} 1697 | + 1698 | +int ovl_removexattr(struct dentry *dentry, const char *name) 1699 | +{ 1700 | + int err; 1701 | + struct path realpath; 1702 | + enum ovl_path_type type; 1703 | + 1704 | + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && 1705 | + ovl_is_private_xattr(name)) 1706 | + return -ENODATA; 1707 | + 1708 | + type = ovl_path_real(dentry, &realpath); 1709 | + if (type == OVL_PATH_LOWER) { 1710 | + err = vfs_getxattr(realpath.dentry, name, NULL, 0); 1711 | + if (err < 0) 1712 | + return err; 1713 | + 1714 | + err = ovl_copy_up(dentry); 1715 | + if (err) 1716 | + return err; 1717 | + 1718 | + ovl_path_upper(dentry, &realpath); 1719 | + } 1720 | + 1721 | + return vfs_removexattr(realpath.dentry, name); 1722 | +} 1723 | + 1724 | +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, 1725 | + struct dentry *realdentry) 1726 | +{ 1727 | + if (type != OVL_PATH_LOWER) 1728 | + return false; 1729 | + 1730 | + if (special_file(realdentry->d_inode->i_mode)) 1731 | + return false; 1732 | + 1733 | + if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) 1734 | + return false; 1735 | + 1736 | + return true; 1737 | +} 1738 | + 1739 | +static int ovl_dentry_open(struct dentry *dentry, struct file *file, 1740 | + const struct cred *cred) 1741 | +{ 1742 | + int err; 1743 | + struct path realpath; 1744 | + enum ovl_path_type type; 1745 | + 1746 | + type = ovl_path_real(dentry, &realpath); 1747 | + if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { 1748 | + if (file->f_flags & O_TRUNC) 1749 | + err = ovl_copy_up_truncate(dentry, 0); 1750 | + else 1751 | + err = ovl_copy_up(dentry); 1752 | + if (err) 1753 | + return err; 1754 | + 1755 | + ovl_path_upper(dentry, &realpath); 1756 | + } 1757 | + 1758 | + return vfs_open(&realpath, file, cred); 1759 | +} 1760 | + 1761 | +static const struct inode_operations ovl_file_inode_operations = { 1762 | + .setattr = ovl_setattr, 1763 | + .permission = ovl_permission, 1764 | + .getattr = ovl_getattr, 1765 | + .setxattr = ovl_setxattr, 1766 | + .getxattr = ovl_getxattr, 1767 | + .listxattr = ovl_listxattr, 1768 | + .removexattr = ovl_removexattr, 1769 | + .dentry_open = ovl_dentry_open, 1770 | +}; 1771 | + 1772 | +static const struct inode_operations ovl_symlink_inode_operations = { 1773 | + .setattr = ovl_setattr, 1774 | + .follow_link = ovl_follow_link, 1775 | + .put_link = ovl_put_link, 1776 | + .readlink = ovl_readlink, 1777 | + .getattr = ovl_getattr, 1778 | + .setxattr = ovl_setxattr, 1779 | + .getxattr = ovl_getxattr, 1780 | + .listxattr = ovl_listxattr, 1781 | + .removexattr = ovl_removexattr, 1782 | +}; 1783 | + 1784 | +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, 1785 | + struct ovl_entry *oe) 1786 | +{ 1787 | + struct inode *inode; 1788 | + 1789 | + inode = new_inode(sb); 1790 | + if (!inode) 1791 | + return NULL; 1792 | + 1793 | + mode &= S_IFMT; 1794 | + 1795 | + inode->i_ino = get_next_ino(); 1796 | + inode->i_mode = mode; 1797 | + inode->i_flags |= S_NOATIME | S_NOCMTIME; 1798 | + 1799 | + switch (mode) { 1800 | + case S_IFDIR: 1801 | + inode->i_private = oe; 1802 | + inode->i_op = &ovl_dir_inode_operations; 1803 | + inode->i_fop = &ovl_dir_operations; 1804 | + break; 1805 | + 1806 | + case S_IFLNK: 1807 | + inode->i_op = &ovl_symlink_inode_operations; 1808 | + break; 1809 | + 1810 | + case S_IFREG: 1811 | + case S_IFSOCK: 1812 | + case S_IFBLK: 1813 | + case S_IFCHR: 1814 | + case S_IFIFO: 1815 | + inode->i_op = &ovl_file_inode_operations; 1816 | + break; 1817 | + 1818 | + default: 1819 | + WARN(1, "illegal file type: %i\n", mode); 1820 | + iput(inode); 1821 | + inode = NULL; 1822 | + } 1823 | + 1824 | + return inode; 1825 | + 1826 | +} 1827 | diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h 1828 | new file mode 100644 1829 | index 0000000..1cba38f 1830 | --- /dev/null 1831 | +++ b/fs/overlayfs/overlayfs.h 1832 | @@ -0,0 +1,70 @@ 1833 | +/* 1834 | + * 1835 | + * Copyright (C) 2011 Novell Inc. 1836 | + * 1837 | + * This program is free software; you can redistribute it and/or modify it 1838 | + * under the terms of the GNU General Public License version 2 as published by 1839 | + * the Free Software Foundation. 1840 | + */ 1841 | + 1842 | +struct ovl_entry; 1843 | + 1844 | +enum ovl_path_type { 1845 | + OVL_PATH_UPPER, 1846 | + OVL_PATH_MERGE, 1847 | + OVL_PATH_LOWER, 1848 | +}; 1849 | + 1850 | +extern const char *ovl_opaque_xattr; 1851 | +extern const char *ovl_whiteout_xattr; 1852 | +extern const struct dentry_operations ovl_dentry_operations; 1853 | + 1854 | +enum ovl_path_type ovl_path_type(struct dentry *dentry); 1855 | +u64 ovl_dentry_version_get(struct dentry *dentry); 1856 | +void ovl_dentry_version_inc(struct dentry *dentry); 1857 | +void ovl_path_upper(struct dentry *dentry, struct path *path); 1858 | +void ovl_path_lower(struct dentry *dentry, struct path *path); 1859 | +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); 1860 | +struct dentry *ovl_dentry_upper(struct dentry *dentry); 1861 | +struct dentry *ovl_dentry_lower(struct dentry *dentry); 1862 | +struct dentry *ovl_dentry_real(struct dentry *dentry); 1863 | +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); 1864 | +bool ovl_dentry_is_opaque(struct dentry *dentry); 1865 | +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); 1866 | +bool ovl_is_whiteout(struct dentry *dentry); 1867 | +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); 1868 | +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 1869 | + unsigned int flags); 1870 | +struct file *ovl_path_open(struct path *path, int flags); 1871 | + 1872 | +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, 1873 | + struct kstat *stat, const char *link); 1874 | + 1875 | +/* readdir.c */ 1876 | +extern const struct file_operations ovl_dir_operations; 1877 | +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type); 1878 | + 1879 | +/* inode.c */ 1880 | +int ovl_setattr(struct dentry *dentry, struct iattr *attr); 1881 | +int ovl_permission(struct inode *inode, int mask); 1882 | +int ovl_setxattr(struct dentry *dentry, const char *name, 1883 | + const void *value, size_t size, int flags); 1884 | +ssize_t ovl_getxattr(struct dentry *dentry, const char *name, 1885 | + void *value, size_t size); 1886 | +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); 1887 | +int ovl_removexattr(struct dentry *dentry, const char *name); 1888 | + 1889 | +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, 1890 | + struct ovl_entry *oe); 1891 | +static inline void ovl_copyattr(struct inode *from, struct inode *to) 1892 | +{ 1893 | + to->i_uid = from->i_uid; 1894 | + to->i_gid = from->i_gid; 1895 | +} 1896 | + 1897 | +/* dir.c */ 1898 | +extern const struct inode_operations ovl_dir_inode_operations; 1899 | + 1900 | +/* copy_up.c */ 1901 | +int ovl_copy_up(struct dentry *dentry); 1902 | +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size); 1903 | diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c 1904 | new file mode 100644 1905 | index 0000000..0797efb 1906 | --- /dev/null 1907 | +++ b/fs/overlayfs/readdir.c 1908 | @@ -0,0 +1,566 @@ 1909 | +/* 1910 | + * 1911 | + * Copyright (C) 2011 Novell Inc. 1912 | + * 1913 | + * This program is free software; you can redistribute it and/or modify it 1914 | + * under the terms of the GNU General Public License version 2 as published by 1915 | + * the Free Software Foundation. 1916 | + */ 1917 | + 1918 | +#include 1919 | +#include 1920 | +#include 1921 | +#include 1922 | +#include 1923 | +#include 1924 | +#include 1925 | +#include 1926 | +#include "overlayfs.h" 1927 | + 1928 | +struct ovl_cache_entry { 1929 | + const char *name; 1930 | + unsigned int len; 1931 | + unsigned int type; 1932 | + u64 ino; 1933 | + bool is_whiteout; 1934 | + struct list_head l_node; 1935 | + struct rb_node node; 1936 | +}; 1937 | + 1938 | +struct ovl_readdir_data { 1939 | + struct rb_root *root; 1940 | + struct list_head *list; 1941 | + struct list_head *middle; 1942 | + struct dentry *dir; 1943 | + int count; 1944 | + int err; 1945 | +}; 1946 | + 1947 | +struct ovl_dir_file { 1948 | + bool is_real; 1949 | + bool is_cached; 1950 | + struct list_head cursor; 1951 | + u64 cache_version; 1952 | + struct list_head cache; 1953 | + struct file *realfile; 1954 | +}; 1955 | + 1956 | +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 1957 | +{ 1958 | + return container_of(n, struct ovl_cache_entry, node); 1959 | +} 1960 | + 1961 | +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 1962 | + const char *name, int len) 1963 | +{ 1964 | + struct rb_node *node = root->rb_node; 1965 | + int cmp; 1966 | + 1967 | + while (node) { 1968 | + struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 1969 | + 1970 | + cmp = strncmp(name, p->name, len); 1971 | + if (cmp > 0) 1972 | + node = p->node.rb_right; 1973 | + else if (cmp < 0 || len < p->len) 1974 | + node = p->node.rb_left; 1975 | + else 1976 | + return p; 1977 | + } 1978 | + 1979 | + return NULL; 1980 | +} 1981 | + 1982 | +static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, 1983 | + u64 ino, unsigned int d_type) 1984 | +{ 1985 | + struct ovl_cache_entry *p; 1986 | + 1987 | + p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); 1988 | + if (p) { 1989 | + char *name_copy = (char *) (p + 1); 1990 | + memcpy(name_copy, name, len); 1991 | + name_copy[len] = '\0'; 1992 | + p->name = name_copy; 1993 | + p->len = len; 1994 | + p->type = d_type; 1995 | + p->ino = ino; 1996 | + p->is_whiteout = false; 1997 | + } 1998 | + 1999 | + return p; 2000 | +} 2001 | + 2002 | +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 2003 | + const char *name, int len, u64 ino, 2004 | + unsigned int d_type) 2005 | +{ 2006 | + struct rb_node **newp = &rdd->root->rb_node; 2007 | + struct rb_node *parent = NULL; 2008 | + struct ovl_cache_entry *p; 2009 | + 2010 | + while (*newp) { 2011 | + int cmp; 2012 | + struct ovl_cache_entry *tmp; 2013 | + 2014 | + parent = *newp; 2015 | + tmp = ovl_cache_entry_from_node(*newp); 2016 | + cmp = strncmp(name, tmp->name, len); 2017 | + if (cmp > 0) 2018 | + newp = &tmp->node.rb_right; 2019 | + else if (cmp < 0 || len < tmp->len) 2020 | + newp = &tmp->node.rb_left; 2021 | + else 2022 | + return 0; 2023 | + } 2024 | + 2025 | + p = ovl_cache_entry_new(name, len, ino, d_type); 2026 | + if (p == NULL) 2027 | + return -ENOMEM; 2028 | + 2029 | + list_add_tail(&p->l_node, rdd->list); 2030 | + rb_link_node(&p->node, parent, newp); 2031 | + rb_insert_color(&p->node, rdd->root); 2032 | + 2033 | + return 0; 2034 | +} 2035 | + 2036 | +static int ovl_fill_lower(void *buf, const char *name, int namelen, 2037 | + loff_t offset, u64 ino, unsigned int d_type) 2038 | +{ 2039 | + struct ovl_readdir_data *rdd = buf; 2040 | + struct ovl_cache_entry *p; 2041 | + 2042 | + rdd->count++; 2043 | + p = ovl_cache_entry_find(rdd->root, name, namelen); 2044 | + if (p) { 2045 | + list_move_tail(&p->l_node, rdd->middle); 2046 | + } else { 2047 | + p = ovl_cache_entry_new(name, namelen, ino, d_type); 2048 | + if (p == NULL) 2049 | + rdd->err = -ENOMEM; 2050 | + else 2051 | + list_add_tail(&p->l_node, rdd->middle); 2052 | + } 2053 | + 2054 | + return rdd->err; 2055 | +} 2056 | + 2057 | +static void ovl_cache_free(struct list_head *list) 2058 | +{ 2059 | + struct ovl_cache_entry *p; 2060 | + struct ovl_cache_entry *n; 2061 | + 2062 | + list_for_each_entry_safe(p, n, list, l_node) 2063 | + kfree(p); 2064 | + 2065 | + INIT_LIST_HEAD(list); 2066 | +} 2067 | + 2068 | +static int ovl_fill_upper(void *buf, const char *name, int namelen, 2069 | + loff_t offset, u64 ino, unsigned int d_type) 2070 | +{ 2071 | + struct ovl_readdir_data *rdd = buf; 2072 | + 2073 | + rdd->count++; 2074 | + return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); 2075 | +} 2076 | + 2077 | +static inline int ovl_dir_read(struct path *realpath, 2078 | + struct ovl_readdir_data *rdd, filldir_t filler) 2079 | +{ 2080 | + struct file *realfile; 2081 | + int err; 2082 | + 2083 | + realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); 2084 | + if (IS_ERR(realfile)) 2085 | + return PTR_ERR(realfile); 2086 | + 2087 | + do { 2088 | + rdd->count = 0; 2089 | + rdd->err = 0; 2090 | + err = vfs_readdir(realfile, filler, rdd); 2091 | + if (err >= 0) 2092 | + err = rdd->err; 2093 | + } while (!err && rdd->count); 2094 | + fput(realfile); 2095 | + 2096 | + return 0; 2097 | +} 2098 | + 2099 | +static void ovl_dir_reset(struct file *file) 2100 | +{ 2101 | + struct ovl_dir_file *od = file->private_data; 2102 | + enum ovl_path_type type = ovl_path_type(file->f_path.dentry); 2103 | + 2104 | + if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) { 2105 | + list_del_init(&od->cursor); 2106 | + ovl_cache_free(&od->cache); 2107 | + od->is_cached = false; 2108 | + } 2109 | + WARN_ON(!od->is_real && type != OVL_PATH_MERGE); 2110 | + if (od->is_real && type == OVL_PATH_MERGE) { 2111 | + fput(od->realfile); 2112 | + od->realfile = NULL; 2113 | + od->is_real = false; 2114 | + } 2115 | +} 2116 | + 2117 | +static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd) 2118 | +{ 2119 | + struct ovl_cache_entry *p; 2120 | + struct dentry *dentry; 2121 | + const struct cred *old_cred; 2122 | + struct cred *override_cred; 2123 | + 2124 | + override_cred = prepare_creds(); 2125 | + if (!override_cred) { 2126 | + ovl_cache_free(rdd->list); 2127 | + return -ENOMEM; 2128 | + } 2129 | + 2130 | + /* 2131 | + * CAP_SYS_ADMIN for getxattr 2132 | + * CAP_DAC_OVERRIDE for lookup 2133 | + */ 2134 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 2135 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 2136 | + old_cred = override_creds(override_cred); 2137 | + 2138 | + mutex_lock(&rdd->dir->d_inode->i_mutex); 2139 | + list_for_each_entry(p, rdd->list, l_node) { 2140 | + if (p->type != DT_LNK) 2141 | + continue; 2142 | + 2143 | + dentry = lookup_one_len(p->name, rdd->dir, p->len); 2144 | + if (IS_ERR(dentry)) 2145 | + continue; 2146 | + 2147 | + p->is_whiteout = ovl_is_whiteout(dentry); 2148 | + dput(dentry); 2149 | + } 2150 | + mutex_unlock(&rdd->dir->d_inode->i_mutex); 2151 | + 2152 | + revert_creds(old_cred); 2153 | + put_cred(override_cred); 2154 | + 2155 | + return 0; 2156 | +} 2157 | + 2158 | +static inline int ovl_dir_read_merged(struct path *upperpath, 2159 | + struct path *lowerpath, 2160 | + struct ovl_readdir_data *rdd) 2161 | +{ 2162 | + int err; 2163 | + struct rb_root root = RB_ROOT; 2164 | + struct list_head middle; 2165 | + 2166 | + rdd->root = &root; 2167 | + if (upperpath->dentry) { 2168 | + rdd->dir = upperpath->dentry; 2169 | + err = ovl_dir_read(upperpath, rdd, ovl_fill_upper); 2170 | + if (err) 2171 | + goto out; 2172 | + 2173 | + err = ovl_dir_mark_whiteouts(rdd); 2174 | + if (err) 2175 | + goto out; 2176 | + } 2177 | + /* 2178 | + * Insert lowerpath entries before upperpath ones, this allows 2179 | + * offsets to be reasonably constant 2180 | + */ 2181 | + list_add(&middle, rdd->list); 2182 | + rdd->middle = &middle; 2183 | + err = ovl_dir_read(lowerpath, rdd, ovl_fill_lower); 2184 | + list_del(&middle); 2185 | +out: 2186 | + rdd->root = NULL; 2187 | + 2188 | + return err; 2189 | +} 2190 | + 2191 | +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 2192 | +{ 2193 | + struct list_head *l; 2194 | + loff_t off; 2195 | + 2196 | + l = od->cache.next; 2197 | + for (off = 0; off < pos; off++) { 2198 | + if (l == &od->cache) 2199 | + break; 2200 | + l = l->next; 2201 | + } 2202 | + list_move_tail(&od->cursor, l); 2203 | +} 2204 | + 2205 | +static int ovl_readdir(struct file *file, void *buf, filldir_t filler) 2206 | +{ 2207 | + struct ovl_dir_file *od = file->private_data; 2208 | + int res; 2209 | + 2210 | + if (!file->f_pos) 2211 | + ovl_dir_reset(file); 2212 | + 2213 | + if (od->is_real) { 2214 | + res = vfs_readdir(od->realfile, filler, buf); 2215 | + file->f_pos = od->realfile->f_pos; 2216 | + 2217 | + return res; 2218 | + } 2219 | + 2220 | + if (!od->is_cached) { 2221 | + struct path lowerpath; 2222 | + struct path upperpath; 2223 | + struct ovl_readdir_data rdd = { .list = &od->cache }; 2224 | + 2225 | + ovl_path_lower(file->f_path.dentry, &lowerpath); 2226 | + ovl_path_upper(file->f_path.dentry, &upperpath); 2227 | + 2228 | + res = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd); 2229 | + if (res) { 2230 | + ovl_cache_free(rdd.list); 2231 | + return res; 2232 | + } 2233 | + 2234 | + od->cache_version = ovl_dentry_version_get(file->f_path.dentry); 2235 | + od->is_cached = true; 2236 | + 2237 | + ovl_seek_cursor(od, file->f_pos); 2238 | + } 2239 | + 2240 | + while (od->cursor.next != &od->cache) { 2241 | + int over; 2242 | + loff_t off; 2243 | + struct ovl_cache_entry *p; 2244 | + 2245 | + p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node); 2246 | + off = file->f_pos; 2247 | + if (!p->is_whiteout) { 2248 | + over = filler(buf, p->name, p->len, off, p->ino, 2249 | + p->type); 2250 | + if (over) 2251 | + break; 2252 | + } 2253 | + file->f_pos++; 2254 | + list_move(&od->cursor, &p->l_node); 2255 | + } 2256 | + 2257 | + return 0; 2258 | +} 2259 | + 2260 | +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 2261 | +{ 2262 | + loff_t res; 2263 | + struct ovl_dir_file *od = file->private_data; 2264 | + 2265 | + mutex_lock(&file->f_dentry->d_inode->i_mutex); 2266 | + if (!file->f_pos) 2267 | + ovl_dir_reset(file); 2268 | + 2269 | + if (od->is_real) { 2270 | + res = vfs_llseek(od->realfile, offset, origin); 2271 | + file->f_pos = od->realfile->f_pos; 2272 | + } else { 2273 | + res = -EINVAL; 2274 | + 2275 | + switch (origin) { 2276 | + case SEEK_CUR: 2277 | + offset += file->f_pos; 2278 | + break; 2279 | + case SEEK_SET: 2280 | + break; 2281 | + default: 2282 | + goto out_unlock; 2283 | + } 2284 | + if (offset < 0) 2285 | + goto out_unlock; 2286 | + 2287 | + if (offset != file->f_pos) { 2288 | + file->f_pos = offset; 2289 | + if (od->is_cached) 2290 | + ovl_seek_cursor(od, offset); 2291 | + } 2292 | + res = offset; 2293 | + } 2294 | +out_unlock: 2295 | + mutex_unlock(&file->f_dentry->d_inode->i_mutex); 2296 | + 2297 | + return res; 2298 | +} 2299 | + 2300 | +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 2301 | + int datasync) 2302 | +{ 2303 | + struct ovl_dir_file *od = file->private_data; 2304 | + 2305 | + /* May need to reopen directory if it got copied up */ 2306 | + if (!od->realfile) { 2307 | + struct path upperpath; 2308 | + 2309 | + ovl_path_upper(file->f_path.dentry, &upperpath); 2310 | + od->realfile = ovl_path_open(&upperpath, O_RDONLY); 2311 | + if (IS_ERR(od->realfile)) 2312 | + return PTR_ERR(od->realfile); 2313 | + } 2314 | + 2315 | + return vfs_fsync_range(od->realfile, start, end, datasync); 2316 | +} 2317 | + 2318 | +static int ovl_dir_release(struct inode *inode, struct file *file) 2319 | +{ 2320 | + struct ovl_dir_file *od = file->private_data; 2321 | + 2322 | + list_del(&od->cursor); 2323 | + ovl_cache_free(&od->cache); 2324 | + if (od->realfile) 2325 | + fput(od->realfile); 2326 | + kfree(od); 2327 | + 2328 | + return 0; 2329 | +} 2330 | + 2331 | +static int ovl_dir_open(struct inode *inode, struct file *file) 2332 | +{ 2333 | + struct path realpath; 2334 | + struct file *realfile; 2335 | + struct ovl_dir_file *od; 2336 | + enum ovl_path_type type; 2337 | + 2338 | + od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 2339 | + if (!od) 2340 | + return -ENOMEM; 2341 | + 2342 | + type = ovl_path_real(file->f_path.dentry, &realpath); 2343 | + realfile = ovl_path_open(&realpath, file->f_flags); 2344 | + if (IS_ERR(realfile)) { 2345 | + kfree(od); 2346 | + return PTR_ERR(realfile); 2347 | + } 2348 | + INIT_LIST_HEAD(&od->cache); 2349 | + INIT_LIST_HEAD(&od->cursor); 2350 | + od->is_cached = false; 2351 | + od->realfile = realfile; 2352 | + od->is_real = (type != OVL_PATH_MERGE); 2353 | + file->private_data = od; 2354 | + 2355 | + return 0; 2356 | +} 2357 | + 2358 | +const struct file_operations ovl_dir_operations = { 2359 | + .read = generic_read_dir, 2360 | + .open = ovl_dir_open, 2361 | + .readdir = ovl_readdir, 2362 | + .llseek = ovl_dir_llseek, 2363 | + .fsync = ovl_dir_fsync, 2364 | + .release = ovl_dir_release, 2365 | +}; 2366 | + 2367 | +static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 2368 | +{ 2369 | + int err; 2370 | + struct path lowerpath; 2371 | + struct path upperpath; 2372 | + struct ovl_cache_entry *p; 2373 | + struct ovl_readdir_data rdd = { .list = list }; 2374 | + 2375 | + ovl_path_upper(dentry, &upperpath); 2376 | + ovl_path_lower(dentry, &lowerpath); 2377 | + 2378 | + err = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd); 2379 | + if (err) 2380 | + return err; 2381 | + 2382 | + err = 0; 2383 | + 2384 | + list_for_each_entry(p, list, l_node) { 2385 | + if (p->is_whiteout) 2386 | + continue; 2387 | + 2388 | + if (p->name[0] == '.') { 2389 | + if (p->len == 1) 2390 | + continue; 2391 | + if (p->len == 2 && p->name[1] == '.') 2392 | + continue; 2393 | + } 2394 | + err = -ENOTEMPTY; 2395 | + break; 2396 | + } 2397 | + 2398 | + return err; 2399 | +} 2400 | + 2401 | +static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list) 2402 | +{ 2403 | + struct path upperpath; 2404 | + struct dentry *upperdir; 2405 | + struct ovl_cache_entry *p; 2406 | + const struct cred *old_cred; 2407 | + struct cred *override_cred; 2408 | + int err; 2409 | + 2410 | + ovl_path_upper(dir, &upperpath); 2411 | + upperdir = upperpath.dentry; 2412 | + 2413 | + override_cred = prepare_creds(); 2414 | + if (!override_cred) 2415 | + return -ENOMEM; 2416 | + 2417 | + /* 2418 | + * CAP_DAC_OVERRIDE for lookup and unlink 2419 | + * CAP_SYS_ADMIN for setxattr of "trusted" namespace 2420 | + * CAP_FOWNER for unlink in sticky directory 2421 | + */ 2422 | + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 2423 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 2424 | + cap_raise(override_cred->cap_effective, CAP_FOWNER); 2425 | + old_cred = override_creds(override_cred); 2426 | + 2427 | + err = vfs_setxattr(upperdir, ovl_opaque_xattr, "y", 1, 0); 2428 | + if (err) 2429 | + goto out_revert_creds; 2430 | + 2431 | + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT); 2432 | + list_for_each_entry(p, list, l_node) { 2433 | + struct dentry *dentry; 2434 | + int ret; 2435 | + 2436 | + if (!p->is_whiteout) 2437 | + continue; 2438 | + 2439 | + dentry = lookup_one_len(p->name, upperdir, p->len); 2440 | + if (IS_ERR(dentry)) { 2441 | + printk(KERN_WARNING 2442 | + "overlayfs: failed to lookup whiteout %.*s: %li\n", 2443 | + p->len, p->name, PTR_ERR(dentry)); 2444 | + continue; 2445 | + } 2446 | + ret = vfs_unlink(upperdir->d_inode, dentry); 2447 | + dput(dentry); 2448 | + if (ret) 2449 | + printk(KERN_WARNING 2450 | + "overlayfs: failed to unlink whiteout %.*s: %i\n", 2451 | + p->len, p->name, ret); 2452 | + } 2453 | + mutex_unlock(&upperdir->d_inode->i_mutex); 2454 | + 2455 | +out_revert_creds: 2456 | + revert_creds(old_cred); 2457 | + put_cred(override_cred); 2458 | + 2459 | + return err; 2460 | +} 2461 | + 2462 | +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type) 2463 | +{ 2464 | + int err; 2465 | + LIST_HEAD(list); 2466 | + 2467 | + err = ovl_check_empty_dir(dentry, &list); 2468 | + if (!err && type == OVL_PATH_MERGE) 2469 | + err = ovl_remove_whiteouts(dentry, &list); 2470 | + 2471 | + ovl_cache_free(&list); 2472 | + 2473 | + return err; 2474 | +} 2475 | diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c 2476 | new file mode 100644 2477 | index 0000000..357d6e8 2478 | --- /dev/null 2479 | +++ b/fs/overlayfs/super.c 2480 | @@ -0,0 +1,685 @@ 2481 | +/* 2482 | + * 2483 | + * Copyright (C) 2011 Novell Inc. 2484 | + * 2485 | + * This program is free software; you can redistribute it and/or modify it 2486 | + * under the terms of the GNU General Public License version 2 as published by 2487 | + * the Free Software Foundation. 2488 | + */ 2489 | + 2490 | +#include 2491 | +#include 2492 | +#include 2493 | +#include 2494 | +#include 2495 | +#include 2496 | +#include 2497 | +#include 2498 | +#include 2499 | +#include 2500 | +#include 2501 | +#include 2502 | +#include "overlayfs.h" 2503 | + 2504 | +MODULE_AUTHOR("Miklos Szeredi "); 2505 | +MODULE_DESCRIPTION("Overlay filesystem"); 2506 | +MODULE_LICENSE("GPL"); 2507 | + 2508 | +#define OVERLAYFS_SUPER_MAGIC 0x794c764f 2509 | + 2510 | +struct ovl_config { 2511 | + char *lowerdir; 2512 | + char *upperdir; 2513 | +}; 2514 | + 2515 | +/* private information held for overlayfs's superblock */ 2516 | +struct ovl_fs { 2517 | + struct vfsmount *upper_mnt; 2518 | + struct vfsmount *lower_mnt; 2519 | + long lower_namelen; 2520 | + /* pathnames of lower and upper dirs, for show_options */ 2521 | + struct ovl_config config; 2522 | +}; 2523 | + 2524 | +/* private information held for every overlayfs dentry */ 2525 | +struct ovl_entry { 2526 | + /* 2527 | + * Keep "double reference" on upper dentries, so that 2528 | + * d_delete() doesn't think it's OK to reset d_inode to NULL. 2529 | + */ 2530 | + struct dentry *__upperdentry; 2531 | + struct dentry *lowerdentry; 2532 | + union { 2533 | + struct { 2534 | + u64 version; 2535 | + bool opaque; 2536 | + }; 2537 | + struct rcu_head rcu; 2538 | + }; 2539 | +}; 2540 | + 2541 | +const char *ovl_whiteout_xattr = "trusted.overlay.whiteout"; 2542 | +const char *ovl_opaque_xattr = "trusted.overlay.opaque"; 2543 | + 2544 | + 2545 | +enum ovl_path_type ovl_path_type(struct dentry *dentry) 2546 | +{ 2547 | + struct ovl_entry *oe = dentry->d_fsdata; 2548 | + 2549 | + if (oe->__upperdentry) { 2550 | + if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode)) 2551 | + return OVL_PATH_MERGE; 2552 | + else 2553 | + return OVL_PATH_UPPER; 2554 | + } else { 2555 | + return OVL_PATH_LOWER; 2556 | + } 2557 | +} 2558 | + 2559 | +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) 2560 | +{ 2561 | + struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); 2562 | + smp_read_barrier_depends(); 2563 | + return upperdentry; 2564 | +} 2565 | + 2566 | +void ovl_path_upper(struct dentry *dentry, struct path *path) 2567 | +{ 2568 | + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 2569 | + struct ovl_entry *oe = dentry->d_fsdata; 2570 | + 2571 | + path->mnt = ofs->upper_mnt; 2572 | + path->dentry = ovl_upperdentry_dereference(oe); 2573 | +} 2574 | + 2575 | +void ovl_path_lower(struct dentry *dentry, struct path *path) 2576 | +{ 2577 | + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 2578 | + struct ovl_entry *oe = dentry->d_fsdata; 2579 | + 2580 | + path->mnt = ofs->lower_mnt; 2581 | + path->dentry = oe->lowerdentry; 2582 | +} 2583 | + 2584 | +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) 2585 | +{ 2586 | + 2587 | + enum ovl_path_type type = ovl_path_type(dentry); 2588 | + 2589 | + if (type == OVL_PATH_LOWER) 2590 | + ovl_path_lower(dentry, path); 2591 | + else 2592 | + ovl_path_upper(dentry, path); 2593 | + 2594 | + return type; 2595 | +} 2596 | + 2597 | +struct dentry *ovl_dentry_upper(struct dentry *dentry) 2598 | +{ 2599 | + struct ovl_entry *oe = dentry->d_fsdata; 2600 | + 2601 | + return ovl_upperdentry_dereference(oe); 2602 | +} 2603 | + 2604 | +struct dentry *ovl_dentry_lower(struct dentry *dentry) 2605 | +{ 2606 | + struct ovl_entry *oe = dentry->d_fsdata; 2607 | + 2608 | + return oe->lowerdentry; 2609 | +} 2610 | + 2611 | +struct dentry *ovl_dentry_real(struct dentry *dentry) 2612 | +{ 2613 | + struct ovl_entry *oe = dentry->d_fsdata; 2614 | + struct dentry *realdentry; 2615 | + 2616 | + realdentry = ovl_upperdentry_dereference(oe); 2617 | + if (!realdentry) 2618 | + realdentry = oe->lowerdentry; 2619 | + 2620 | + return realdentry; 2621 | +} 2622 | + 2623 | +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) 2624 | +{ 2625 | + struct dentry *realdentry; 2626 | + 2627 | + realdentry = ovl_upperdentry_dereference(oe); 2628 | + if (realdentry) { 2629 | + *is_upper = true; 2630 | + } else { 2631 | + realdentry = oe->lowerdentry; 2632 | + *is_upper = false; 2633 | + } 2634 | + return realdentry; 2635 | +} 2636 | + 2637 | +bool ovl_dentry_is_opaque(struct dentry *dentry) 2638 | +{ 2639 | + struct ovl_entry *oe = dentry->d_fsdata; 2640 | + return oe->opaque; 2641 | +} 2642 | + 2643 | +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) 2644 | +{ 2645 | + struct ovl_entry *oe = dentry->d_fsdata; 2646 | + oe->opaque = opaque; 2647 | +} 2648 | + 2649 | +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) 2650 | +{ 2651 | + struct ovl_entry *oe = dentry->d_fsdata; 2652 | + 2653 | + WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); 2654 | + WARN_ON(oe->__upperdentry); 2655 | + BUG_ON(!upperdentry->d_inode); 2656 | + smp_wmb(); 2657 | + oe->__upperdentry = dget(upperdentry); 2658 | +} 2659 | + 2660 | +void ovl_dentry_version_inc(struct dentry *dentry) 2661 | +{ 2662 | + struct ovl_entry *oe = dentry->d_fsdata; 2663 | + 2664 | + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2665 | + oe->version++; 2666 | +} 2667 | + 2668 | +u64 ovl_dentry_version_get(struct dentry *dentry) 2669 | +{ 2670 | + struct ovl_entry *oe = dentry->d_fsdata; 2671 | + 2672 | + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2673 | + return oe->version; 2674 | +} 2675 | + 2676 | +bool ovl_is_whiteout(struct dentry *dentry) 2677 | +{ 2678 | + int res; 2679 | + char val; 2680 | + 2681 | + if (!dentry) 2682 | + return false; 2683 | + if (!dentry->d_inode) 2684 | + return false; 2685 | + if (!S_ISLNK(dentry->d_inode->i_mode)) 2686 | + return false; 2687 | + 2688 | + res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1); 2689 | + if (res == 1 && val == 'y') 2690 | + return true; 2691 | + 2692 | + return false; 2693 | +} 2694 | + 2695 | +static bool ovl_is_opaquedir(struct dentry *dentry) 2696 | +{ 2697 | + int res; 2698 | + char val; 2699 | + 2700 | + if (!S_ISDIR(dentry->d_inode->i_mode)) 2701 | + return false; 2702 | + 2703 | + res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1); 2704 | + if (res == 1 && val == 'y') 2705 | + return true; 2706 | + 2707 | + return false; 2708 | +} 2709 | + 2710 | +static void ovl_entry_free(struct rcu_head *head) 2711 | +{ 2712 | + struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu); 2713 | + kfree(oe); 2714 | +} 2715 | + 2716 | +static void ovl_dentry_release(struct dentry *dentry) 2717 | +{ 2718 | + struct ovl_entry *oe = dentry->d_fsdata; 2719 | + 2720 | + if (oe) { 2721 | + dput(oe->__upperdentry); 2722 | + dput(oe->__upperdentry); 2723 | + dput(oe->lowerdentry); 2724 | + call_rcu(&oe->rcu, ovl_entry_free); 2725 | + } 2726 | +} 2727 | + 2728 | +const struct dentry_operations ovl_dentry_operations = { 2729 | + .d_release = ovl_dentry_release, 2730 | +}; 2731 | + 2732 | +static struct ovl_entry *ovl_alloc_entry(void) 2733 | +{ 2734 | + return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); 2735 | +} 2736 | + 2737 | +static inline struct dentry *ovl_lookup_real(struct dentry *dir, 2738 | + struct qstr *name) 2739 | +{ 2740 | + struct dentry *dentry; 2741 | + 2742 | + mutex_lock(&dir->d_inode->i_mutex); 2743 | + dentry = lookup_one_len(name->name, dir, name->len); 2744 | + mutex_unlock(&dir->d_inode->i_mutex); 2745 | + 2746 | + if (IS_ERR(dentry)) { 2747 | + if (PTR_ERR(dentry) == -ENOENT) 2748 | + dentry = NULL; 2749 | + } else if (!dentry->d_inode) { 2750 | + dput(dentry); 2751 | + dentry = NULL; 2752 | + } 2753 | + return dentry; 2754 | +} 2755 | + 2756 | +static int ovl_do_lookup(struct dentry *dentry) 2757 | +{ 2758 | + struct ovl_entry *oe; 2759 | + struct dentry *upperdir; 2760 | + struct dentry *lowerdir; 2761 | + struct dentry *upperdentry = NULL; 2762 | + struct dentry *lowerdentry = NULL; 2763 | + struct inode *inode = NULL; 2764 | + int err; 2765 | + 2766 | + err = -ENOMEM; 2767 | + oe = ovl_alloc_entry(); 2768 | + if (!oe) 2769 | + goto out; 2770 | + 2771 | + upperdir = ovl_dentry_upper(dentry->d_parent); 2772 | + lowerdir = ovl_dentry_lower(dentry->d_parent); 2773 | + 2774 | + if (upperdir) { 2775 | + upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); 2776 | + err = PTR_ERR(upperdentry); 2777 | + if (IS_ERR(upperdentry)) 2778 | + goto out_put_dir; 2779 | + 2780 | + if (lowerdir && upperdentry && 2781 | + (S_ISLNK(upperdentry->d_inode->i_mode) || 2782 | + S_ISDIR(upperdentry->d_inode->i_mode))) { 2783 | + const struct cred *old_cred; 2784 | + struct cred *override_cred; 2785 | + 2786 | + err = -ENOMEM; 2787 | + override_cred = prepare_creds(); 2788 | + if (!override_cred) 2789 | + goto out_dput_upper; 2790 | + 2791 | + /* CAP_SYS_ADMIN needed for getxattr */ 2792 | + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 2793 | + old_cred = override_creds(override_cred); 2794 | + 2795 | + if (ovl_is_opaquedir(upperdentry)) { 2796 | + oe->opaque = true; 2797 | + } else if (ovl_is_whiteout(upperdentry)) { 2798 | + dput(upperdentry); 2799 | + upperdentry = NULL; 2800 | + oe->opaque = true; 2801 | + } 2802 | + revert_creds(old_cred); 2803 | + put_cred(override_cred); 2804 | + } 2805 | + } 2806 | + if (lowerdir && !oe->opaque) { 2807 | + lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); 2808 | + err = PTR_ERR(lowerdentry); 2809 | + if (IS_ERR(lowerdentry)) 2810 | + goto out_dput_upper; 2811 | + } 2812 | + 2813 | + if (lowerdentry && upperdentry && 2814 | + (!S_ISDIR(upperdentry->d_inode->i_mode) || 2815 | + !S_ISDIR(lowerdentry->d_inode->i_mode))) { 2816 | + dput(lowerdentry); 2817 | + lowerdentry = NULL; 2818 | + oe->opaque = true; 2819 | + } 2820 | + 2821 | + if (lowerdentry || upperdentry) { 2822 | + struct dentry *realdentry; 2823 | + 2824 | + realdentry = upperdentry ? upperdentry : lowerdentry; 2825 | + err = -ENOMEM; 2826 | + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, 2827 | + oe); 2828 | + if (!inode) 2829 | + goto out_dput; 2830 | + ovl_copyattr(realdentry->d_inode, inode); 2831 | + } 2832 | + 2833 | + if (upperdentry) 2834 | + oe->__upperdentry = dget(upperdentry); 2835 | + 2836 | + if (lowerdentry) 2837 | + oe->lowerdentry = lowerdentry; 2838 | + 2839 | + dentry->d_fsdata = oe; 2840 | + dentry->d_op = &ovl_dentry_operations; 2841 | + d_add(dentry, inode); 2842 | + 2843 | + return 0; 2844 | + 2845 | +out_dput: 2846 | + dput(lowerdentry); 2847 | +out_dput_upper: 2848 | + dput(upperdentry); 2849 | +out_put_dir: 2850 | + kfree(oe); 2851 | +out: 2852 | + return err; 2853 | +} 2854 | + 2855 | +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 2856 | + unsigned int flags) 2857 | +{ 2858 | + int err = ovl_do_lookup(dentry); 2859 | + 2860 | + if (err) 2861 | + return ERR_PTR(err); 2862 | + 2863 | + return NULL; 2864 | +} 2865 | + 2866 | +struct file *ovl_path_open(struct path *path, int flags) 2867 | +{ 2868 | + path_get(path); 2869 | + return dentry_open(path, flags, current_cred()); 2870 | +} 2871 | + 2872 | +static void ovl_put_super(struct super_block *sb) 2873 | +{ 2874 | + struct ovl_fs *ufs = sb->s_fs_info; 2875 | + 2876 | + if (!(sb->s_flags & MS_RDONLY)) 2877 | + mnt_drop_write(ufs->upper_mnt); 2878 | + 2879 | + mntput(ufs->upper_mnt); 2880 | + mntput(ufs->lower_mnt); 2881 | + 2882 | + kfree(ufs->config.lowerdir); 2883 | + kfree(ufs->config.upperdir); 2884 | + kfree(ufs); 2885 | +} 2886 | + 2887 | +static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data) 2888 | +{ 2889 | + int flags = *flagsp; 2890 | + struct ovl_fs *ufs = sb->s_fs_info; 2891 | + 2892 | + /* When remounting rw or ro, we need to adjust the write access to the 2893 | + * upper fs. 2894 | + */ 2895 | + if (((flags ^ sb->s_flags) & MS_RDONLY) == 0) 2896 | + /* No change to readonly status */ 2897 | + return 0; 2898 | + 2899 | + if (flags & MS_RDONLY) { 2900 | + mnt_drop_write(ufs->upper_mnt); 2901 | + return 0; 2902 | + } else 2903 | + return mnt_want_write(ufs->upper_mnt); 2904 | +} 2905 | + 2906 | +/** 2907 | + * ovl_statfs 2908 | + * @sb: The overlayfs super block 2909 | + * @buf: The struct kstatfs to fill in with stats 2910 | + * 2911 | + * Get the filesystem statistics. As writes always target the upper layer 2912 | + * filesystem pass the statfs to the same filesystem. 2913 | + */ 2914 | +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 2915 | +{ 2916 | + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 2917 | + struct dentry *root_dentry = dentry->d_sb->s_root; 2918 | + struct path path; 2919 | + int err; 2920 | + 2921 | + ovl_path_upper(root_dentry, &path); 2922 | + 2923 | + err = vfs_statfs(&path, buf); 2924 | + if (!err) { 2925 | + buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); 2926 | + buf->f_type = OVERLAYFS_SUPER_MAGIC; 2927 | + } 2928 | + 2929 | + return err; 2930 | +} 2931 | + 2932 | +/** 2933 | + * ovl_show_options 2934 | + * 2935 | + * Prints the mount options for a given superblock. 2936 | + * Returns zero; does not fail. 2937 | + */ 2938 | +static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 2939 | +{ 2940 | + struct super_block *sb = dentry->d_sb; 2941 | + struct ovl_fs *ufs = sb->s_fs_info; 2942 | + 2943 | + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); 2944 | + seq_printf(m, ",upperdir=%s", ufs->config.upperdir); 2945 | + return 0; 2946 | +} 2947 | + 2948 | +static const struct super_operations ovl_super_operations = { 2949 | + .put_super = ovl_put_super, 2950 | + .remount_fs = ovl_remount_fs, 2951 | + .statfs = ovl_statfs, 2952 | + .show_options = ovl_show_options, 2953 | +}; 2954 | + 2955 | +enum { 2956 | + Opt_lowerdir, 2957 | + Opt_upperdir, 2958 | + Opt_err, 2959 | +}; 2960 | + 2961 | +static const match_table_t ovl_tokens = { 2962 | + {Opt_lowerdir, "lowerdir=%s"}, 2963 | + {Opt_upperdir, "upperdir=%s"}, 2964 | + {Opt_err, NULL} 2965 | +}; 2966 | + 2967 | +static int ovl_parse_opt(char *opt, struct ovl_config *config) 2968 | +{ 2969 | + char *p; 2970 | + 2971 | + config->upperdir = NULL; 2972 | + config->lowerdir = NULL; 2973 | + 2974 | + while ((p = strsep(&opt, ",")) != NULL) { 2975 | + int token; 2976 | + substring_t args[MAX_OPT_ARGS]; 2977 | + 2978 | + if (!*p) 2979 | + continue; 2980 | + 2981 | + token = match_token(p, ovl_tokens, args); 2982 | + switch (token) { 2983 | + case Opt_upperdir: 2984 | + kfree(config->upperdir); 2985 | + config->upperdir = match_strdup(&args[0]); 2986 | + if (!config->upperdir) 2987 | + return -ENOMEM; 2988 | + break; 2989 | + 2990 | + case Opt_lowerdir: 2991 | + kfree(config->lowerdir); 2992 | + config->lowerdir = match_strdup(&args[0]); 2993 | + if (!config->lowerdir) 2994 | + return -ENOMEM; 2995 | + break; 2996 | + 2997 | + default: 2998 | + return -EINVAL; 2999 | + } 3000 | + } 3001 | + return 0; 3002 | +} 3003 | + 3004 | +static int ovl_fill_super(struct super_block *sb, void *data, int silent) 3005 | +{ 3006 | + struct path lowerpath; 3007 | + struct path upperpath; 3008 | + struct inode *root_inode; 3009 | + struct dentry *root_dentry; 3010 | + struct ovl_entry *oe; 3011 | + struct ovl_fs *ufs; 3012 | + struct kstatfs statfs; 3013 | + int err; 3014 | + 3015 | + err = -ENOMEM; 3016 | + ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL); 3017 | + if (!ufs) 3018 | + goto out; 3019 | + 3020 | + err = ovl_parse_opt((char *) data, &ufs->config); 3021 | + if (err) 3022 | + goto out_free_ufs; 3023 | + 3024 | + err = -EINVAL; 3025 | + if (!ufs->config.upperdir || !ufs->config.lowerdir) { 3026 | + printk(KERN_ERR "overlayfs: missing upperdir or lowerdir\n"); 3027 | + goto out_free_config; 3028 | + } 3029 | + 3030 | + oe = ovl_alloc_entry(); 3031 | + if (oe == NULL) 3032 | + goto out_free_config; 3033 | + 3034 | + err = kern_path(ufs->config.upperdir, LOOKUP_FOLLOW, &upperpath); 3035 | + if (err) 3036 | + goto out_free_oe; 3037 | + 3038 | + err = kern_path(ufs->config.lowerdir, LOOKUP_FOLLOW, &lowerpath); 3039 | + if (err) 3040 | + goto out_put_upperpath; 3041 | + 3042 | + err = -ENOTDIR; 3043 | + if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || 3044 | + !S_ISDIR(lowerpath.dentry->d_inode->i_mode)) 3045 | + goto out_put_lowerpath; 3046 | + 3047 | + err = vfs_statfs(&lowerpath, &statfs); 3048 | + if (err) { 3049 | + printk(KERN_ERR "overlayfs: statfs failed on lowerpath\n"); 3050 | + goto out_put_lowerpath; 3051 | + } 3052 | + ufs->lower_namelen = statfs.f_namelen; 3053 | + 3054 | + sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, 3055 | + lowerpath.mnt->mnt_sb->s_stack_depth) + 1; 3056 | + 3057 | + err = -EINVAL; 3058 | + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 3059 | + printk(KERN_ERR "overlayfs: maximum fs stacking depth exceeded\n"); 3060 | + goto out_put_lowerpath; 3061 | + } 3062 | + 3063 | + 3064 | + ufs->upper_mnt = clone_private_mount(&upperpath); 3065 | + err = PTR_ERR(ufs->upper_mnt); 3066 | + if (IS_ERR(ufs->upper_mnt)) { 3067 | + printk(KERN_ERR "overlayfs: failed to clone upperpath\n"); 3068 | + goto out_put_lowerpath; 3069 | + } 3070 | + 3071 | + ufs->lower_mnt = clone_private_mount(&lowerpath); 3072 | + err = PTR_ERR(ufs->lower_mnt); 3073 | + if (IS_ERR(ufs->lower_mnt)) { 3074 | + printk(KERN_ERR "overlayfs: failed to clone lowerpath\n"); 3075 | + goto out_put_upper_mnt; 3076 | + } 3077 | + 3078 | + /* 3079 | + * Make lower_mnt R/O. That way fchmod/fchown on lower file 3080 | + * will fail instead of modifying lower fs. 3081 | + */ 3082 | + ufs->lower_mnt->mnt_flags |= MNT_READONLY; 3083 | + 3084 | + /* If the upper fs is r/o, we mark overlayfs r/o too */ 3085 | + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) 3086 | + sb->s_flags |= MS_RDONLY; 3087 | + 3088 | + if (!(sb->s_flags & MS_RDONLY)) { 3089 | + err = mnt_want_write(ufs->upper_mnt); 3090 | + if (err) 3091 | + goto out_put_lower_mnt; 3092 | + } 3093 | + 3094 | + err = -ENOMEM; 3095 | + root_inode = ovl_new_inode(sb, S_IFDIR, oe); 3096 | + if (!root_inode) 3097 | + goto out_drop_write; 3098 | + 3099 | + root_dentry = d_make_root(root_inode); 3100 | + if (!root_dentry) 3101 | + goto out_drop_write; 3102 | + 3103 | + mntput(upperpath.mnt); 3104 | + mntput(lowerpath.mnt); 3105 | + 3106 | + oe->__upperdentry = dget(upperpath.dentry); 3107 | + oe->lowerdentry = lowerpath.dentry; 3108 | + 3109 | + root_dentry->d_fsdata = oe; 3110 | + root_dentry->d_op = &ovl_dentry_operations; 3111 | + 3112 | + sb->s_magic = OVERLAYFS_SUPER_MAGIC; 3113 | + sb->s_op = &ovl_super_operations; 3114 | + sb->s_root = root_dentry; 3115 | + sb->s_fs_info = ufs; 3116 | + 3117 | + return 0; 3118 | + 3119 | +out_drop_write: 3120 | + if (!(sb->s_flags & MS_RDONLY)) 3121 | + mnt_drop_write(ufs->upper_mnt); 3122 | +out_put_lower_mnt: 3123 | + mntput(ufs->lower_mnt); 3124 | +out_put_upper_mnt: 3125 | + mntput(ufs->upper_mnt); 3126 | +out_put_lowerpath: 3127 | + path_put(&lowerpath); 3128 | +out_put_upperpath: 3129 | + path_put(&upperpath); 3130 | +out_free_oe: 3131 | + kfree(oe); 3132 | +out_free_config: 3133 | + kfree(ufs->config.lowerdir); 3134 | + kfree(ufs->config.upperdir); 3135 | +out_free_ufs: 3136 | + kfree(ufs); 3137 | +out: 3138 | + return err; 3139 | +} 3140 | + 3141 | +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 3142 | + const char *dev_name, void *raw_data) 3143 | +{ 3144 | + return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 3145 | +} 3146 | + 3147 | +static struct file_system_type ovl_fs_type = { 3148 | + .owner = THIS_MODULE, 3149 | + .name = "overlayfs", 3150 | + .mount = ovl_mount, 3151 | + .kill_sb = kill_anon_super, 3152 | +}; 3153 | + 3154 | +static int __init ovl_init(void) 3155 | +{ 3156 | + return register_filesystem(&ovl_fs_type); 3157 | +} 3158 | + 3159 | +static void __exit ovl_exit(void) 3160 | +{ 3161 | + unregister_filesystem(&ovl_fs_type); 3162 | +} 3163 | + 3164 | +module_init(ovl_init); 3165 | +module_exit(ovl_exit); 3166 | diff --git a/fs/splice.c b/fs/splice.c 3167 | index 718bd00..0e8f44a 100644 3168 | --- a/fs/splice.c 3169 | +++ b/fs/splice.c 3170 | @@ -1308,6 +1308,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 3171 | 3172 | return ret; 3173 | } 3174 | +EXPORT_SYMBOL(do_splice_direct); 3175 | 3176 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 3177 | struct pipe_inode_info *opipe, 3178 | diff --git a/include/linux/fs.h b/include/linux/fs.h 3179 | index 2c28271..3353de6 100644 3180 | --- a/include/linux/fs.h 3181 | +++ b/include/linux/fs.h 3182 | @@ -244,6 +244,12 @@ struct iattr { 3183 | */ 3184 | #include 3185 | 3186 | +/* 3187 | + * Maximum number of layers of fs stack. Needs to be limited to 3188 | + * prevent kernel stack overflow 3189 | + */ 3190 | +#define FILESYSTEM_MAX_STACK_DEPTH 2 3191 | + 3192 | /** 3193 | * enum positive_aop_returns - aop return codes with specific semantics 3194 | * 3195 | @@ -1320,6 +1326,11 @@ struct super_block { 3196 | 3197 | /* Being remounted read-only */ 3198 | int s_readonly_remount; 3199 | + 3200 | + /* 3201 | + * Indicates how deep in a filesystem stack this SB is 3202 | + */ 3203 | + int s_stack_depth; 3204 | }; 3205 | 3206 | /* superblock cache pruning functions */ 3207 | @@ -1573,6 +1584,7 @@ struct inode_operations { 3208 | int (*atomic_open)(struct inode *, struct dentry *, 3209 | struct file *, unsigned open_flag, 3210 | umode_t create_mode, int *opened); 3211 | + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); 3212 | } ____cacheline_aligned; 3213 | 3214 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 3215 | @@ -2006,6 +2018,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); 3216 | extern struct file *filp_open(const char *, int, umode_t); 3217 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, 3218 | const char *, int); 3219 | +extern int vfs_open(const struct path *, struct file *, const struct cred *); 3220 | extern struct file * dentry_open(const struct path *, int, const struct cred *); 3221 | extern int filp_close(struct file *, fl_owner_t id); 3222 | 3223 | @@ -2211,6 +2224,7 @@ extern sector_t bmap(struct inode *, sector_t); 3224 | #endif 3225 | extern int notify_change(struct dentry *, struct iattr *); 3226 | extern int inode_permission(struct inode *, int); 3227 | +extern int __inode_permission(struct inode *, int); 3228 | extern int generic_permission(struct inode *, int); 3229 | 3230 | static inline bool execute_ok(struct inode *inode) 3231 | diff --git a/include/linux/mount.h b/include/linux/mount.h 3232 | index d7029f4..344a262 100644 3233 | --- a/include/linux/mount.h 3234 | +++ b/include/linux/mount.h 3235 | @@ -66,6 +66,9 @@ extern void mnt_pin(struct vfsmount *mnt); 3236 | extern void mnt_unpin(struct vfsmount *mnt); 3237 | extern int __mnt_is_readonly(struct vfsmount *mnt); 3238 | 3239 | +struct path; 3240 | +extern struct vfsmount *clone_private_mount(struct path *path); 3241 | + 3242 | struct file_system_type; 3243 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, 3244 | int flags, const char *name, 3245 | --------------------------------------------------------------------------------