├── bootstrap ├── .gitignore ├── lib ├── container_of.h ├── get_stats.c ├── ext2fs-extra.h ├── inode_async.c ├── rbtree.h ├── block_async.c ├── rbtree.c ├── list.h └── aio_manager.c ├── Makefile.am ├── lester.spec ├── configure.ac ├── README ├── lustre_lov.h ├── lester.h ├── action-namei.c ├── CC0-License ├── action-lsost.c ├── attr.c ├── lester.c ├── action-fslist.c ├── dtree.c ├── iscan.c └── dscan.c /bootstrap: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | aclocal && automake --foreign --add-missing && autoconf 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | .*.dep 3 | .*.swp 4 | lester 5 | 6 | # Autotools debri 7 | .deps/ 8 | lib/.deps/ 9 | lib/.dirstamp 10 | Makefile 11 | Makefile.in 12 | aclocal.m4 13 | autom4te.cache/ 14 | compile 15 | config.log 16 | config.status 17 | configure 18 | depcomp 19 | install-sh 20 | missing 21 | -------------------------------------------------------------------------------- /lib/container_of.h: -------------------------------------------------------------------------------- 1 | /* From Rusty Russell's CCAN, License CC0 (Public Domain) 2 | * See CC0-License in top level directory 3 | */ 4 | 5 | #ifndef _CONTAINER_OF_H 6 | #define _CONTAINER_OF_H 7 | 8 | #define check_types_match(expr1, expr2) \ 9 | ((typeof(expr1) *)0 != (typeof(expr2) *)0) 10 | 11 | #define container_of(member_ptr, containing_type, member) \ 12 | ((containing_type *) \ 13 | ((char *)(member_ptr) - offsetof(containing_type, member)) \ 14 | - check_types_match(*(member_ptr), ((containing_type *)0)->member)) 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AUTOMAKE_OPTIONS = -Wall foreign 3 | ACLOCAL_AMFLAGS = ${ALOCAL_FLAGS} 4 | 5 | AM_CPPFLAGS = -Ilib 6 | AM_CFLAGS = -Wall -Werror 7 | 8 | bin_PROGRAMS = lester 9 | 10 | lester_SOURCES = lester.c iscan.c dscan.c dtree.c attr.c \ 11 | lib/aio_manager.c lib/block_async.c lib/inode_async.c \ 12 | lib/get_stats.c lib/rbtree.c action-fslist.c action-namei.c \ 13 | action-lsost.c 14 | lester_CFLAGS = -Ilib 15 | 16 | # Clean up all the generated files that are ignored in the source repo 17 | # 18 | mrproper: maintainer-clean 19 | rm -f Makefile.in aclocal.m4 configure 20 | rm -f compile depcomp install-sh missing 21 | -------------------------------------------------------------------------------- /lib/get_stats.c: -------------------------------------------------------------------------------- 1 | /* get_stats.h -- API to get per-backend stats from the IO manager 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU Library General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #include 9 | #include "ext2fs-extra.h" 10 | 11 | errcode_t io_channel_get_stats(io_channel channel, io_stats *stats) 12 | { 13 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 14 | 15 | if (!channel->manager->get_stats) 16 | return EXT2_ET_UNIMPLEMENTED; 17 | 18 | return channel->manager->get_stats(channel, stats); 19 | } 20 | -------------------------------------------------------------------------------- /lester.spec: -------------------------------------------------------------------------------- 1 | Name: lester 2 | Version: 1.0 3 | Release: 1%{?dist} 4 | Summary: Lester, the Lustre lister 5 | 6 | #Group: 7 | License: GPLv2 8 | URL: https://github.com/ORNL-TechInt/lester 9 | Source0: https://github.com/ORNL-TechInt/lester-1.0.tar.gz 10 | Packager: Blake Caldwell 11 | 12 | BuildRequires: e2fsprogs-devel >= 1.42.7 13 | BuildRequires: libcom_err-devel >= 1.42.7 14 | BuildRequires: libaio-devel 15 | BuildRequires: autoconf 16 | BuildRequires: automake 17 | Requires: e2fsprogs-libs >= 1.42.7 18 | Requires: libcom_err >= 1.42.7 19 | Requires: libaio 20 | 21 | %description 22 | Lester is an extention of e2scan for generating lists of files (and potentially 23 | their attributes) from a ext2/ext3/ext4/ldiskfs filesystem. 24 | 25 | %prep 26 | %setup -q 27 | 28 | 29 | %build 30 | ./bootstrap 31 | %configure 32 | make %{?_smp_mflags} 33 | 34 | 35 | %install 36 | rm -rf $RPM_BUILD_ROOT 37 | make install DESTDIR=$RPM_BUILD_ROOT 38 | 39 | 40 | %clean 41 | rm -rf $RPM_BUILD_ROOT 42 | 43 | 44 | %files 45 | %defattr(-,root,root,-) 46 | %doc README 47 | /usr/bin/lester 48 | 49 | 50 | 51 | %changelog 52 | * Tue Dec 31 2013 Blake Caldwell - 1.0 53 | - Initial RPM packaging of lester 54 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([lester], [1.0], [help@nccs.gov], [lester]) 2 | AC_PREREQ([2.59]) 3 | AM_INIT_AUTOMAKE([1.11 -Wall no-define foreign silent-rules subdir-objects]) 4 | AM_SILENT_RULES([yes]) 5 | 6 | AC_PROG_CC 7 | AM_PROG_CC_C_O 8 | 9 | PKG_PROG_PKG_CONFIG() 10 | 11 | dnl We may be able to use older versions, but I have not verified that 12 | PKG_CHECK_MODULES([ext2fs], [ext2fs >= 1.42.7 com_err >= 1.42.7]) 13 | 14 | dnl Pull the needed libraries into LIBS (needed for the AC_LINK_IFELSE below) 15 | dnl These should never fail if the PKG_CHECK above passes 16 | AC_SEARCH_LIBS([com_err], [com_err]) 17 | AC_SEARCH_LIBS([ext2fs_open2], [ext2fs]) 18 | AC_SEARCH_LIBS([io_queue_init], [aio], [], 19 | AC_MSG_ERROR([unable to find io_queue_init() -- install libaio-devel?])) 20 | 21 | dnl io_channel_readahead() is actually a #define, so we cannot just use 22 | dnl AC_CHECK_LIB here 23 | AC_MSG_CHECKING([if we're using Lustre's libext2fs]) 24 | AC_LINK_IFELSE([ 25 | AC_LANG_SOURCE([[#include 26 | int main() { 27 | ext2_filsys fs; 28 | return io_channel_readahead(fs->io, 0, 0); 29 | } 30 | ]])], 31 | [AC_MSG_RESULT(yes) 32 | AC_DEFINE([HAVE_LUSTRE_EXTFS2], [1], 33 | [Define non-zero if using Lustre e2fsprogs])], 34 | [AC_MSG_RESULT(no) 35 | AC_DEFINE([HAVE_LUSTRE_EXTFS2], [0], 36 | [Define non-zero if using Lustre e2fsprogs])]) 37 | 38 | AC_CONFIG_FILES([Makefile]) 39 | AC_OUTPUT 40 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Lester, the Lustre lister 2 | 3 | Lester is an extention of e2scan for generating lists of files (and potentially 4 | their attributes) from a ext2/ext3/ext4/ldiskfs filesystem. We primarily use it 5 | for generating a purge candidate list, but it is also useful for generating a 6 | list of files affected by an OST outage or providing a name for an inode. 7 | 8 | For example, to list files that have not been accessed in two weeks and 9 | put the output in ne2scan format in $OUTFILE: 10 | 11 | touch -d 'now - 2 weeks' /tmp/flag 12 | lester -A fslist -a before=/tmp/flag -o $OUTFILE $BLOCKDEV 13 | 14 | To do the same thing, but generate a full listing of the filesystem in 15 | parallel: 16 | 17 | touch -d 'now - 2 weeks' /tmp/flag 18 | lester -A fslist -a before=/tmp/flag -a genhit=$UNACCESSED_LIST \ 19 | -o $FULL_LIST $BLOCKDEV 20 | 21 | To name inodes to stdout (when not using Lustre 2.4's LINKEA): 22 | 23 | lester -A namei -a $INODE1 -a $INODE2 ... $BLOCKDEV 24 | 25 | To get a list of files with objects on OSTs 999 and 1000: 26 | 27 | lester -A lsost -a 999 -a 1000 -o $OUTFILE $BLOCKDEV 28 | 29 | To get a list of options and actions, use 'lester -h'; to get a list of 30 | options for a given action, use 'lester -A $ACTION -a help'. 31 | 32 | Lester uses its own AIO-based IO engine by default, which is usually much 33 | faster than the default Unix engine for large filesystems on high-performance 34 | devices. The number of requests in flight, request size, cache size, and 35 | read-ahead settings for various phases of the scan are all configurable. I 36 | recommend experimenting with the settings to find a balance between speed and 37 | resource usage for your situation. 38 | 39 | -- David Dillow 40 | 41 | 42 | DEPENDENCIES 43 | 44 | libext2fs-devel 1.42.7 or newer (Lustre version preferred) 45 | libcom_err-devel 1.42.7 or newer (Lustre version preferred) 46 | libaio-devel 47 | 48 | Older e2fsprogs libraries may work, but have not been tested 49 | 50 | BUILDING 51 | 52 | ./bootstrap (if building from git repo) 53 | ./configure 54 | make 55 | -------------------------------------------------------------------------------- /lib/ext2fs-extra.h: -------------------------------------------------------------------------------- 1 | /* ext2fs-extra.h -- extensions to libext2fs for async operations 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU Library General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #ifndef __ext2fs_extra_h__ 9 | #define __ext2fs_extra_h__ 10 | 11 | struct struct_aio_stats { 12 | struct struct_io_stats base; 13 | unsigned long async_instream; 14 | unsigned long max_async; 15 | unsigned long total_async; 16 | unsigned long issued_requests; 17 | unsigned long completed_requests; 18 | unsigned long merged_async_issued; 19 | unsigned long merged_async; 20 | unsigned long long merged_gap_bytes; 21 | }; 22 | 23 | typedef struct struct_aio_stats *aio_stats; 24 | extern io_manager aio_io_manager; 25 | 26 | 27 | errcode_t io_channel_get_stats(io_channel channel, io_stats *stats); 28 | 29 | errcode_t io_channel_async_read(io_channel channel, unsigned long block, 30 | int count, int (*cb)(ext2_loff_t offset, 31 | ssize_t size, 32 | void *priv1, 33 | unsigned long priv2, 34 | void *data), 35 | void *priv1, unsigned long priv2); 36 | 37 | /* max_async is the number of async requests allowed to remain after this call 38 | */ 39 | errcode_t io_channel_finish_async(io_channel channel, unsigned long max_async); 40 | errcode_t io_channel_async_count(io_channel channel, unsigned long *count); 41 | 42 | errcode_t ext2fs_block_iterate_async(ext2_filsys fs, ext2_ino_t ino, 43 | struct ext2_inode *inode, 44 | int (*func)(ext2_filsys fs, 45 | blk64_t blocknr, 46 | e2_blkcnt_t blockcnt, 47 | void *priv_data), 48 | void (*end)(ext2_filsys fs, 49 | errcode_t errcode, 50 | void *priv_data), 51 | void *priv_data); 52 | 53 | struct ext2fs_inode_async { 54 | ext2_filsys fs; 55 | ext2_ino_t ino; 56 | int (*func)(ext2_filsys fs, ext2_ino_t ino, struct ext2_inode *inode, void *priv); 57 | void *priv; 58 | int allocated; 59 | }; 60 | 61 | errcode_t ext2fs_read_inode_async(ext2_filsys fs, ext2_ino_t ino, 62 | struct ext2fs_inode_async *async, 63 | int (*func)(ext2_filsys fs, 64 | ext2_ino_t ino, 65 | struct ext2_inode *inode, 66 | void *priv), 67 | void *priv); 68 | 69 | #if !HAVE_LUSTRE_EXTFS2 70 | /* Already in Lustre's libe2fs */ 71 | errcode_t io_channel_readahead(io_channel channel, unsigned long block, 72 | int count); 73 | #endif 74 | 75 | #endif /* __ext2fs_extra_h__ */ 76 | -------------------------------------------------------------------------------- /lib/inode_async.c: -------------------------------------------------------------------------------- 1 | /* inode_async.c --- utility routines to asynchronously read inodes 2 | * 3 | * Copyright (C) 1993, 1994, 1995, 1996, 1997 Theodore Ts'o. 4 | * Copyright (C) 2013 UT-Battelle. 5 | * 6 | * This file may be redistributed under the terms of the GNU Library Public 7 | * License, version 2. 8 | */ 9 | #include 10 | #include "ext2fs-extra.h" 11 | 12 | int ext2fs_read_inode_async_helper(ext2_loff_t offset, ssize_t size, 13 | void *priv1, unsigned long priv2, 14 | void *data) 15 | { 16 | struct ext2fs_inode_async * async = priv1; 17 | struct ext2_inode_large *inode = data + priv2; 18 | int rc; 19 | 20 | #ifdef WORDS_BIGENDIAN 21 | ext2fs_swap_inode_full(async->fs, (struct ext2_inode_large *) inode, 22 | (struct ext2_inode_large *) inode, 23 | 0, EXT2_INODE_SIZE(async->fs->super)); 24 | #endif 25 | 26 | rc = async->func(async->fs, async->ino,(struct ext2_inode *) inode, 27 | async->priv); 28 | if (async->allocated) 29 | ext2fs_free_mem(&async); 30 | return rc; 31 | } 32 | 33 | errcode_t ext2fs_read_inode_async(ext2_filsys fs, ext2_ino_t ino, 34 | struct ext2fs_inode_async *async, 35 | int (*func)(ext2_filsys fs, 36 | ext2_ino_t ino, 37 | struct ext2_inode *inode, 38 | void *priv), 39 | void *priv) 40 | { 41 | unsigned long group, block, block_nr, offset; 42 | errcode_t rc; 43 | int blocks; 44 | 45 | EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); 46 | 47 | if ((ino == 0) || (ino > fs->super->s_inodes_count)) 48 | return EXT2_ET_BAD_INODE_NUM; 49 | 50 | if (!async) { 51 | rc = ext2fs_get_mem(sizeof(struct ext2fs_inode_async), &async); 52 | if (rc) 53 | return rc; 54 | async->allocated = 1; 55 | } else 56 | async->allocated = 0; 57 | 58 | async->fs = fs; 59 | async->ino = ino; 60 | async->func = func; 61 | async->priv = priv; 62 | 63 | group = (ino - 1) / EXT2_INODES_PER_GROUP(fs->super); 64 | rc = EXT2_ET_BAD_INODE_NUM; 65 | if (group > fs->group_desc_count) 66 | goto err; 67 | offset = ((ino - 1) % EXT2_INODES_PER_GROUP(fs->super)) * 68 | EXT2_INODE_SIZE(fs->super); 69 | block = offset >> EXT2_BLOCK_SIZE_BITS(fs->super); 70 | rc = EXT2_ET_MISSING_INODE_TABLE; 71 | if (!ext2fs_inode_table_loc(fs, group)) 72 | goto err; 73 | block_nr = ext2fs_inode_table_loc(fs, group) + block; 74 | 75 | offset &= (EXT2_BLOCK_SIZE(fs->super) - 1); 76 | 77 | blocks = (EXT2_INODE_SIZE(fs->super) + EXT2_BLOCK_SIZE(fs->super) - 1); 78 | blocks /= EXT2_BLOCK_SIZE(fs->super); 79 | 80 | rc = io_channel_async_read(fs->io, block_nr, blocks, 81 | ext2fs_read_inode_async_helper, async, 82 | offset); 83 | err: 84 | if (rc && async->allocated) 85 | ext2fs_free_mem(&async); 86 | return rc; 87 | } 88 | -------------------------------------------------------------------------------- /lustre_lov.h: -------------------------------------------------------------------------------- 1 | /* lustre_lov.h -- Lustre structures 2 | * 3 | * Derived from Lustre headers 4 | * 5 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 6 | * Use is subject to license terms. 7 | * 8 | * Copyright (c) 2011, 2013, Intel Corporation. 9 | * 10 | * This file may be redistributed under the terms of the GNU General 11 | * Public License version 2; see COPYING for details. 12 | */ 13 | #ifndef __lustre_lov_h 14 | #define __lustre_lov_h 1 15 | 16 | /* Should use Lustre's headers, if possible. This is here for build 17 | * testing against plain libext2fs. 18 | */ 19 | #ifndef EXT2_XATTR_INDEX_TRUSTED 20 | #define EXT2_XATTR_INDEX_TRUSTED 4 21 | #endif 22 | #ifndef EXT2_XATTR_INDEX_LUSTRE 23 | #define EXT2_XATTR_INDEX_LUSTRE 5 24 | #endif 25 | 26 | /* From lustre_idl.h */ 27 | #define LOV_MAGIC_V1 0x0BD10BD0 28 | #define LOV_MAGIC_V3 0x0BD30BD0 29 | #define MAXPOOLNAME 16 30 | 31 | struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/ 32 | __u64 l_object_id; /* OST object ID */ 33 | __u64 l_object_gr; /* OST object group (creating MDS number) */ 34 | __u32 l_ost_gen; /* generation of this l_ost_idx */ 35 | __u32 l_ost_idx; /* OST index in LOV (lov_tgt_desc->tgts) */ 36 | }; 37 | 38 | struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ 39 | __u32 lmm_magic; /* magic number = LOV_MAGIC_V1 */ 40 | __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ 41 | __u64 lmm_object_id; /* LOV object ID */ 42 | __u64 lmm_object_gr; /* LOV object group */ 43 | __u32 lmm_stripe_size; /* size of stripe in bytes */ 44 | __u32 lmm_stripe_count; /* num stripes in use for this object */ 45 | struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ 46 | }; 47 | 48 | struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */ 49 | __u32 lmm_magic; /* magic number = LOV_MAGIC_V3 */ 50 | __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ 51 | __u64 lmm_object_id; /* LOV object ID */ 52 | __u64 lmm_object_gr; /* LOV object group */ 53 | __u32 lmm_stripe_size; /* size of stripe in bytes */ 54 | __u32 lmm_stripe_count; /* num stripes in use for this object */ 55 | char lmm_pool_name[MAXPOOLNAME]; /* must be 32bit aligned */ 56 | struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ 57 | }; 58 | 59 | struct lu_fid { 60 | /** 61 | * FID sequence. Sequence is a unit of migration: all files (objects) 62 | * with FIDs from a given sequence are stored on the same server. 63 | * Lustre should support 2^64 objects, so even if each sequence 64 | * has only a single object we can still enumerate 2^64 objects. 65 | */ 66 | __u64 f_seq; 67 | /** FID number within sequence. */ 68 | __u32 f_oid; 69 | /** 70 | * FID version, used to distinguish different versions (in the sense 71 | * of snapshots, etc.) of the same file system object. Not currently 72 | * used. 73 | */ 74 | __u32 f_ver; 75 | }; 76 | 77 | struct lustre_mdt_attrs { 78 | /** 79 | * Bitfield for supported data in this structure. From enum lma_compat. 80 | * lma_self_fid and lma_flags are always available. 81 | */ 82 | __u32 lma_compat; 83 | /** 84 | * Per-file incompat feature list. Lustre version should support all 85 | * flags set in this field. The supported feature mask is available in 86 | * LMA_INCOMPAT_SUPP. 87 | */ 88 | __u32 lma_incompat; 89 | /** FID of this inode */ 90 | struct lu_fid lma_self_fid; 91 | /** mdt/ost type, others */ 92 | __u64 lma_flags; 93 | /* IO Epoch SOM attributes belongs to */ 94 | __u64 lma_ioepoch; 95 | /** total file size in objects */ 96 | __u64 lma_som_size; 97 | /** total fs blocks in objects */ 98 | __u64 lma_som_blocks; 99 | /** mds mount id the size is valid for */ 100 | __u64 lma_som_mountid; 101 | }; 102 | 103 | #endif /* __lustre_lov_h */ 104 | -------------------------------------------------------------------------------- /lester.h: -------------------------------------------------------------------------------- 1 | /* lester.h -- Lester, the Lustre lister 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #ifndef __lester_h 9 | #define __lester_h 1 10 | 11 | #include 12 | #include "ext2fs-extra.h" 13 | #include "rbtree.h" 14 | #include "list.h" 15 | 16 | struct dentry { 17 | struct rb_node tree; 18 | struct dentry *d_parent; 19 | char *name; 20 | struct list_head d_children; /* a list of my children */ 21 | struct list_head list; /* My location on parent's child list */ 22 | ext2_ino_t ino; 23 | unsigned namelen:8; 24 | unsigned connected_to_root:1; 25 | unsigned is_file:1; 26 | unsigned is_dir:1; 27 | unsigned not_in_root:1; 28 | unsigned is_printed:1; 29 | unsigned refs; 30 | }; 31 | 32 | struct ea_info { 33 | /* These fields should be considered constant by users */ 34 | /* End of the EAs is indicate by name == NULL */ 35 | char *name; 36 | void *value; 37 | ext2_ino_t ext_ino; 38 | unsigned int value_len; 39 | unsigned char name_len; 40 | unsigned char index; 41 | int allocated; 42 | 43 | /* State info for async read of the external value file */ 44 | struct ext2_inode *inode; 45 | void (*done)(void *); 46 | void *data; 47 | unsigned int pending; 48 | 49 | /* Users can change these fields to request the value */ 50 | int requested; 51 | }; 52 | 53 | struct action_ops { 54 | const char *name; 55 | 56 | int (*init)(const char *dev, int argc, const char **argv); 57 | void (*help)(void); 58 | int (*iscan_begin)(void); 59 | int (*iscan)(ext2_ino_t, struct ext2_inode *, struct ea_info *ea); 60 | int (*iscan_end)(void); 61 | int (*dscan_begin)(void); 62 | int (*dscan)(ext2_ino_t, struct ext2_inode *, struct dentry *parent, 63 | const char *name, int namelen, struct ea_info *ea); 64 | int (*dscan_end)(void); 65 | 66 | unsigned int flags; 67 | }; 68 | 69 | enum { 70 | ACTION_COMPLETE = 0, 71 | ACTION_END_SCAN = 1, 72 | ACTION_WANT_PATH = 2, 73 | ACTION_WANT_INODE = 4, 74 | ACTION_WANT_ATTRS = 8, 75 | ACTION_WANT_READ_ATTRS = 16, 76 | ACTION_IGNORE_FILE = 32, 77 | }; 78 | 79 | #define ACTION_FLAG_ISCAN_NO_EAS 1 80 | 81 | /* From lester.c */ 82 | int enforce_async_limit(void); 83 | void diff_timevals(struct timeval *start, struct timeval *end, 84 | struct timeval *out); 85 | 86 | 87 | /* From dtree.c */ 88 | void ignore_file(ext2_ino_t ino); 89 | int is_file_interesting(ext2_ino_t ino); 90 | int create_root_dentries(char *root); 91 | struct dentry *create_dentry(ext2_ino_t ino); 92 | void dtree_add_dir(ext2_ino_t ino); 93 | void dtree_get_ino(ext2_ino_t ino); 94 | void dtree_put_ino(ext2_ino_t ino); 95 | void get_dentry(struct dentry *dentry); 96 | void put_dentry(struct dentry *dentry); 97 | struct dentry *find_dentry(ext2_ino_t ino); 98 | void dentry_attach_name(struct dentry *dentry, int namelen, const char *name); 99 | int dtree_name_dir(struct dentry *parent, ext2_ino_t ino, 100 | const char *name, int namelen); 101 | int dtree_name_file(struct dentry *parent, ext2_ino_t ino, 102 | const char *name, int namelen); 103 | int build_path(struct dentry *dentry, int len); 104 | 105 | /* From iscan.c */ 106 | int scan_inodes(const char *dev); 107 | 108 | /* From dscan.c */ 109 | int resolve_paths(void); 110 | int path_resolved(ext2_ino_t ino, struct dentry *parent, const char *name, 111 | int namelen, struct dentry *entry); 112 | 113 | /* From attr.c */ 114 | struct ea_info *build_ea_info(struct ext2_inode *in, void *ext_attr); 115 | struct ea_info *ea_memory_change(struct ea_info *orig, struct ext2_inode *in, 116 | void *ext_attr); 117 | void release_ea_info(struct ea_info *ea); 118 | void async_read_ea_value(struct ea_info *eas, void (*done)(void *), void *data); 119 | extern unsigned long ea_ext_value_read, ea_ext_block_read; 120 | 121 | /* Config params, defined in lester.c */ 122 | extern ext2_filsys fs; 123 | extern FILE *outfile; 124 | 125 | extern char *root_path; 126 | extern unsigned int verbosity; 127 | extern int use_unix; 128 | extern unsigned long grp_readahead; 129 | extern unsigned long dir_readahead; 130 | extern struct action_ops *scan_action; 131 | 132 | /* Action structures */ 133 | extern struct action_ops fslist_action; 134 | extern struct action_ops namei_action; 135 | extern struct action_ops lsost_action; 136 | 137 | extern char *path_buffer; 138 | extern unsigned long dentries_freed; 139 | extern unsigned long dentries_created; 140 | 141 | #endif /* __lester_h */ 142 | -------------------------------------------------------------------------------- /action-namei.c: -------------------------------------------------------------------------------- 1 | /* action-namei.c -- given a list of inodes, find their path name(s) 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #include "lester.h" 9 | #include "rbtree.h" 10 | 11 | struct target_inode { 12 | struct rb_node rb_node; 13 | ext2_ino_t ino; 14 | unsigned int nlinks; 15 | }; 16 | 17 | static struct rb_root namei_targets = RB_ROOT; 18 | static int namei_all_names; 19 | 20 | static void namei_help(void) 21 | { 22 | fprintf(stderr, "Action arguments for namei:\n"); 23 | fprintf(stderr, " file=FILE\t\tRead list of inodes from FILE\n"); 24 | fprintf(stderr, " all_names\t\tList all names for a file\n"); 25 | fprintf(stderr, " NUMBER\t\tInode number to name\n"); 26 | fprintf(stderr, "\nAs many inode numbers as needed may be listed\n"); 27 | } 28 | 29 | static void namei_add_inode(ext2_ino_t ino) 30 | { 31 | struct target_inode *t, *n; 32 | struct rb_node **p = &namei_targets.rb_node; 33 | struct rb_node *parent = NULL; 34 | 35 | n = malloc(sizeof(*n)); 36 | if (!n) { 37 | fprintf(stderr, "Unable to allocate space for inode\n"); 38 | exit(1); 39 | } 40 | 41 | RB_CLEAR_NODE(&n->rb_node); 42 | n->ino = ino; 43 | n->nlinks = 1; 44 | 45 | while (*p) { 46 | parent = *p; 47 | t = rb_entry(parent, struct target_inode, rb_node); 48 | 49 | if (ino < t->ino) 50 | p = &(*p)->rb_left; 51 | else if (ino > t->ino) 52 | p = &(*p)->rb_right; 53 | else 54 | return; 55 | } 56 | 57 | rb_link_node(&n->rb_node, parent, p); 58 | rb_insert_color(&n->rb_node, &namei_targets); 59 | } 60 | 61 | static struct target_inode *namei_find_inode(ext2_ino_t ino) 62 | { 63 | struct rb_node *n = namei_targets.rb_node; 64 | struct target_inode *t; 65 | 66 | while (n) { 67 | t = rb_entry(n, struct target_inode, rb_node); 68 | 69 | if (ino < t->ino) 70 | n = n->rb_left; 71 | else if (ino > t->ino) 72 | n = n->rb_right; 73 | else 74 | return t; 75 | } 76 | 77 | return NULL; 78 | } 79 | 80 | static int namei_init(const char *device, int argc, const char **argv) 81 | { 82 | unsigned long ino; 83 | FILE *file; 84 | int rc; 85 | 86 | while (argc--) { 87 | if (!strcmp(*argv, "all_names")) { 88 | namei_all_names = 1; 89 | } else if (!strncmp(*argv, "file=", 5)) { 90 | file = fopen(*argv + 5, "r"); 91 | if (!file) { 92 | int e = errno; 93 | fprintf(stderr, "Unable to open "); 94 | errno = e; 95 | perror(*argv + 5); 96 | return 1; 97 | } 98 | 99 | while (!feof(file)) { 100 | rc = fscanf(file, "%lu", &ino); 101 | if (rc == 1) 102 | namei_add_inode(ino); 103 | else if (rc != EOF) { 104 | fprintf(stderr, "Bad read from %s\n", 105 | *argv + 5); 106 | fclose(file); 107 | return 1; 108 | } 109 | } 110 | fclose(file); 111 | } else { 112 | char *end; 113 | if (!**argv) { 114 | fprintf(stderr, "Unable to parse empty action " 115 | "arg\n"); 116 | return 1; 117 | } 118 | ino = strtoul(*argv, &end, 0); 119 | if (*end || end == *argv) { 120 | fprintf(stderr, "Invalid action argument " 121 | "'%s'\n", *argv); 122 | return 1; 123 | } 124 | namei_add_inode(ino); 125 | } 126 | 127 | argv++; 128 | } 129 | 130 | if (RB_EMPTY_ROOT(&namei_targets)) { 131 | fprintf(stderr, "No inodes given to name\n"); 132 | return 1; 133 | } 134 | 135 | return 0; 136 | } 137 | 138 | static int namei_iscan(ext2_ino_t ino, struct ext2_inode *inode, 139 | struct ea_info *eas) 140 | { 141 | struct target_inode *t = namei_find_inode(ino); 142 | 143 | /* If it isn't in our tree, we don't care about it */ 144 | if (!t) 145 | return ACTION_COMPLETE; 146 | 147 | if (namei_all_names && !LINUX_S_ISDIR(inode->i_mode)) 148 | t->nlinks = inode->i_links_count; 149 | else 150 | t->nlinks = 1; 151 | 152 | return ACTION_WANT_PATH; 153 | } 154 | 155 | static int namei_dscan(ext2_ino_t ino, struct ext2_inode *inode, 156 | struct dentry *parent, const char *name, int namelen, 157 | struct ea_info *eas) 158 | { 159 | struct target_inode *t = namei_find_inode(ino); 160 | int offset; 161 | 162 | /* We may have already printed a name for this inode, and no longer 163 | * care about it. 164 | */ 165 | if (!t) 166 | return ACTION_COMPLETE; 167 | 168 | if (--t->nlinks == 0) 169 | rb_erase(&t->rb_node, &namei_targets); 170 | 171 | offset = build_path(parent, 0); 172 | fprintf(outfile, "%lu %.*s%.*s\n", ino, offset, path_buffer, 173 | namelen, name); 174 | 175 | if (RB_EMPTY_ROOT(&namei_targets)) 176 | return ACTION_END_SCAN; 177 | 178 | return ACTION_COMPLETE; 179 | } 180 | 181 | struct action_ops namei_action = { 182 | .name = "namei", 183 | .init = namei_init, 184 | .help = namei_help, 185 | .iscan = namei_iscan, 186 | .dscan = namei_dscan, 187 | .flags = ACTION_FLAG_ISCAN_NO_EAS, 188 | }; 189 | -------------------------------------------------------------------------------- /lib/rbtree.h: -------------------------------------------------------------------------------- 1 | /* 2 | Red Black Trees 3 | (C) 1999 Andrea Arcangeli 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program; if not, write to the Free Software 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | 19 | linux/include/linux/rbtree.h 20 | 21 | To use rbtrees you'll have to implement your own insert and search cores. 22 | This will avoid us to use callbacks and to drop drammatically performances. 23 | I know it's not the cleaner way, but in C (not in C++) to get 24 | performances and genericity... 25 | 26 | Some example of insert and search follows here. The search is a plain 27 | normal search over an ordered tree. The insert instead must be implemented 28 | in two steps: First, the code must insert the element in order as a red leaf 29 | in the tree, and then the support library function rb_insert_color() must 30 | be called. Such function will do the not trivial work to rebalance the 31 | rbtree, if necessary. 32 | 33 | ----------------------------------------------------------------------- 34 | static inline struct page * rb_search_page_cache(struct inode * inode, 35 | unsigned long offset) 36 | { 37 | struct rb_node * n = inode->i_rb_page_cache.rb_node; 38 | struct page * page; 39 | 40 | while (n) 41 | { 42 | page = rb_entry(n, struct page, rb_page_cache); 43 | 44 | if (offset < page->offset) 45 | n = n->rb_left; 46 | else if (offset > page->offset) 47 | n = n->rb_right; 48 | else 49 | return page; 50 | } 51 | return NULL; 52 | } 53 | 54 | static inline struct page * __rb_insert_page_cache(struct inode * inode, 55 | unsigned long offset, 56 | struct rb_node * node) 57 | { 58 | struct rb_node ** p = &inode->i_rb_page_cache.rb_node; 59 | struct rb_node * parent = NULL; 60 | struct page * page; 61 | 62 | while (*p) 63 | { 64 | parent = *p; 65 | page = rb_entry(parent, struct page, rb_page_cache); 66 | 67 | if (offset < page->offset) 68 | p = &(*p)->rb_left; 69 | else if (offset > page->offset) 70 | p = &(*p)->rb_right; 71 | else 72 | return page; 73 | } 74 | 75 | rb_link_node(node, parent, p); 76 | 77 | return NULL; 78 | } 79 | 80 | static inline struct page * rb_insert_page_cache(struct inode * inode, 81 | unsigned long offset, 82 | struct rb_node * node) 83 | { 84 | struct page * ret; 85 | if ((ret = __rb_insert_page_cache(inode, offset, node))) 86 | goto out; 87 | rb_insert_color(node, &inode->i_rb_page_cache); 88 | out: 89 | return ret; 90 | } 91 | ----------------------------------------------------------------------- 92 | */ 93 | 94 | #ifndef _LINUX_RBTREE_H 95 | #define _LINUX_RBTREE_H 96 | 97 | #include 98 | #include "container_of.h" 99 | 100 | struct rb_node 101 | { 102 | unsigned long rb_parent_color; 103 | #define RB_RED 0 104 | #define RB_BLACK 1 105 | struct rb_node *rb_right; 106 | struct rb_node *rb_left; 107 | } __attribute__((aligned(sizeof(long)))); 108 | /* The alignment might seem pointless, but allegedly CRIS needs it */ 109 | 110 | struct rb_root 111 | { 112 | struct rb_node *rb_node; 113 | }; 114 | 115 | 116 | #define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) 117 | #define rb_color(r) ((r)->rb_parent_color & 1) 118 | #define rb_is_red(r) (!rb_color(r)) 119 | #define rb_is_black(r) rb_color(r) 120 | #define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) 121 | #define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) 122 | 123 | static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) 124 | { 125 | rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; 126 | } 127 | static inline void rb_set_color(struct rb_node *rb, int color) 128 | { 129 | rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; 130 | } 131 | 132 | #define RB_ROOT (struct rb_root) { NULL, } 133 | #define rb_entry(ptr, type, member) container_of(ptr, type, member) 134 | 135 | #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) 136 | #define RB_EMPTY_NODE(node) (rb_parent(node) == node) 137 | #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) 138 | 139 | extern void rb_insert_color(struct rb_node *, struct rb_root *); 140 | extern void rb_erase(struct rb_node *, struct rb_root *); 141 | 142 | typedef void (*rb_augment_f)(struct rb_node *node, void *data); 143 | 144 | extern void rb_augment_insert(struct rb_node *node, 145 | rb_augment_f func, void *data); 146 | extern struct rb_node *rb_augment_erase_begin(struct rb_node *node); 147 | extern void rb_augment_erase_end(struct rb_node *node, 148 | rb_augment_f func, void *data); 149 | 150 | /* Find logical next and previous nodes in a tree */ 151 | extern struct rb_node *rb_next(const struct rb_node *); 152 | extern struct rb_node *rb_prev(const struct rb_node *); 153 | extern struct rb_node *rb_first(const struct rb_root *); 154 | extern struct rb_node *rb_last(const struct rb_root *); 155 | 156 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ 157 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 158 | struct rb_root *root); 159 | 160 | static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, 161 | struct rb_node ** rb_link) 162 | { 163 | node->rb_parent_color = (unsigned long )parent; 164 | node->rb_left = node->rb_right = NULL; 165 | 166 | *rb_link = node; 167 | } 168 | 169 | #endif /* _LINUX_RBTREE_H */ 170 | -------------------------------------------------------------------------------- /CC0-License: -------------------------------------------------------------------------------- 1 | Statement of Purpose 2 | 3 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). 4 | 5 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. 6 | 7 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 8 | 9 | 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: 10 | 11 | the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; 12 | moral rights retained by the original author(s) and/or performer(s); 13 | publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; 14 | rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; 15 | rights protecting the extraction, dissemination, use and reuse of data in a Work; 16 | database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and 17 | other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 18 | 19 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 20 | 21 | 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 22 | 23 | 4. Limitations and Disclaimers. 24 | 25 | No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. 26 | Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. 27 | Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. 28 | Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. 29 | -------------------------------------------------------------------------------- /action-lsost.c: -------------------------------------------------------------------------------- 1 | /* action-lsost.c -- find files with objects on specified OSTs 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #include "lester.h" 9 | #include "lustre_lov.h" 10 | 11 | #define BITS_PER_ENTRY (sizeof(unsigned long) * 8) 12 | static unsigned int lsost_max_ost; 13 | static unsigned long *lsost_interesting_osts; 14 | static unsigned long lsost_work_remaining; 15 | static int lsost_show_osts; 16 | 17 | static void lsost_help(void) 18 | { 19 | fprintf(stderr, "Action arguments for lsost:\n"); 20 | fprintf(stderr, " show_osts\t\tShow OST numbers (default if " 21 | "multiple OSTs requested)\n"); 22 | fprintf(stderr, " hide_osts\t\tDo not show OST numbers\n"); 23 | fprintf(stderr, " file=FILE\t\tRead list of OSTs from FILE\n"); 24 | fprintf(stderr, " NUMBER\t\tOST number to list files for\n"); 25 | fprintf(stderr, "\nAs many OSTs as needed may be listed\n"); 26 | } 27 | 28 | static void lsost_add_ost(unsigned int ost) 29 | { 30 | unsigned int entry = ost / BITS_PER_ENTRY; 31 | unsigned int bit = ost % BITS_PER_ENTRY; 32 | unsigned int new_max; 33 | 34 | if (ost < lsost_max_ost) { 35 | lsost_interesting_osts[entry] |= (1UL << bit); 36 | return; 37 | } 38 | 39 | new_max = 2 * ost; 40 | if (new_max < 1024) 41 | new_max = 1024; 42 | 43 | /* Make sure we allocate whole entries */ 44 | new_max *= BITS_PER_ENTRY; 45 | new_max--; 46 | new_max /= BITS_PER_ENTRY; 47 | 48 | lsost_interesting_osts = realloc(lsost_interesting_osts, new_max); 49 | if (!lsost_interesting_osts) { 50 | fprintf(stderr, "Unable to allocate memory for OST bitmap\n"); 51 | exit(1); 52 | } 53 | 54 | memset(lsost_interesting_osts + (lsost_max_ost / BITS_PER_ENTRY), 0, 55 | ((new_max - lsost_max_ost) / BITS_PER_ENTRY) * 56 | sizeof(unsigned long)); 57 | lsost_interesting_osts[entry] |= (1UL << bit); 58 | lsost_max_ost = new_max; 59 | } 60 | 61 | static int lsost_interesting(unsigned int ost) 62 | { 63 | unsigned int entry = ost / BITS_PER_ENTRY; 64 | unsigned int bit = ost % BITS_PER_ENTRY; 65 | 66 | if (ost >= lsost_max_ost) 67 | return 0; 68 | 69 | return !!(lsost_interesting_osts[entry] & (1UL << bit)); 70 | } 71 | 72 | static int lsost_file_interesting(struct ea_info *lov) 73 | { 74 | struct lov_mds_md_v1 *lov1; 75 | struct lov_ost_data_v1 *ost; 76 | int cnt; 77 | 78 | lov1 = lov->value; 79 | if (lov1->lmm_magic == LOV_MAGIC_V1) { 80 | cnt = lov1->lmm_stripe_count; 81 | ost = lov1->lmm_objects; 82 | } else if (lov1->lmm_magic == LOV_MAGIC_V3) { 83 | struct lov_mds_md_v3 *lov3 = lov->value; 84 | cnt = lov3->lmm_stripe_count; 85 | ost = lov3->lmm_objects; 86 | } else 87 | return 0; 88 | 89 | for (; cnt; ost++, cnt--) { 90 | if (lsost_interesting(ost->l_ost_idx)) 91 | return 1; 92 | } 93 | 94 | return 0; 95 | } 96 | 97 | static void lsost_print_interesting(struct ea_info *lov) 98 | { 99 | struct lov_mds_md_v1 *lov1; 100 | struct lov_ost_data_v1 *ost; 101 | char *sep = ""; 102 | int cnt; 103 | 104 | lov1 = lov->value; 105 | if (lov1->lmm_magic == LOV_MAGIC_V1) { 106 | cnt = lov1->lmm_stripe_count; 107 | ost = lov1->lmm_objects; 108 | } else if (lov1->lmm_magic == LOV_MAGIC_V3) { 109 | struct lov_mds_md_v3 *lov3 = lov->value; 110 | cnt = lov3->lmm_stripe_count; 111 | ost = lov3->lmm_objects; 112 | } else 113 | return; 114 | 115 | for (; cnt; ost++, cnt--) { 116 | if (lsost_interesting(ost->l_ost_idx)) { 117 | fprintf(outfile, "%s%u", sep, ost->l_ost_idx); 118 | sep = ","; 119 | } 120 | } 121 | 122 | fprintf(outfile, " "); 123 | } 124 | 125 | static struct ea_info *lsost_find_lov(struct ea_info *eas) 126 | { 127 | struct ea_info *ea; 128 | 129 | for (ea = eas; ea->name; ea++) { 130 | if (ea->index != EXT2_XATTR_INDEX_TRUSTED && 131 | ea->index != EXT2_XATTR_INDEX_LUSTRE) 132 | continue; 133 | 134 | if (ea->name_len != 3 || strncmp(ea->name, "lov", 3)) 135 | continue; 136 | 137 | return ea; 138 | } 139 | 140 | return NULL; 141 | } 142 | 143 | static int lsost_init(const char *device, int argc, const char **argv) 144 | { 145 | unsigned long ost; 146 | unsigned int count = 0; 147 | int hide_osts = 0; 148 | FILE *file; 149 | int rc; 150 | 151 | while (argc--) { 152 | if (!strcmp(*argv, "show_osts")) { 153 | lsost_show_osts = 1; 154 | } else if (!strcmp(*argv, "hide_osts")) { 155 | hide_osts = 1; 156 | } else if (!strncmp(*argv, "file=", 5)) { 157 | file = fopen(*argv + 5, "r"); 158 | if (!file) { 159 | int e = errno; 160 | fprintf(stderr, "Unable to open "); 161 | errno = e; 162 | perror(*argv + 5); 163 | return 1; 164 | } 165 | 166 | while (!feof(file)) { 167 | rc = fscanf(file, "%lu", &ost); 168 | if (rc == 1) { 169 | if (ost > ~0U) { 170 | fprintf(stderr, "OST %lu too " 171 | "large\n", ost); 172 | return 1; 173 | } 174 | lsost_add_ost(ost); 175 | count++; 176 | } else if (rc != EOF) { 177 | fprintf(stderr, "Bad read from %s\n", 178 | *argv + 5); 179 | fclose(file); 180 | return 1; 181 | } 182 | } 183 | fclose(file); 184 | } else { 185 | char *end; 186 | if (!**argv) { 187 | fprintf(stderr, "Unable to parse empty action " 188 | "arg\n"); 189 | return 1; 190 | } 191 | ost = strtoul(*argv, &end, 0); 192 | if (*end || end == *argv) { 193 | fprintf(stderr, "Invalid action argument " 194 | "'%s'\n", *argv); 195 | return 1; 196 | } 197 | if (ost > ~0U) { 198 | fprintf(stderr, "OST %lu too large\n", ost); 199 | return 1; 200 | } 201 | lsost_add_ost(ost); 202 | count++; 203 | } 204 | 205 | argv++; 206 | } 207 | 208 | if (!count) { 209 | fprintf(stderr, "No OSTs given for lsost action\n"); 210 | return 1; 211 | } 212 | 213 | if (count > 1 && !hide_osts) 214 | lsost_show_osts = 1; 215 | 216 | return 0; 217 | } 218 | 219 | static int lsost_iscan(ext2_ino_t ino, struct ext2_inode *inode, 220 | struct ea_info *eas) 221 | { 222 | struct ea_info *lov = NULL; 223 | 224 | if (!LINUX_S_ISREG(inode->i_mode)) 225 | return ACTION_COMPLETE; 226 | 227 | lov = lsost_find_lov(eas); 228 | if (lov && !lov->value) { 229 | lov->requested = 1; 230 | return ACTION_WANT_READ_ATTRS; 231 | } 232 | 233 | if (!lov || !lsost_file_interesting(lov)) 234 | return ACTION_COMPLETE; 235 | 236 | lsost_work_remaining++; 237 | return ACTION_WANT_PATH; 238 | } 239 | 240 | static int lsost_dscan(ext2_ino_t ino, struct ext2_inode *inode, 241 | struct dentry *parent, const char *name, int namelen, 242 | struct ea_info *eas) 243 | { 244 | int offset; 245 | 246 | if (lsost_show_osts) { 247 | struct ea_info *lov; 248 | 249 | if (!inode) 250 | return ACTION_WANT_INODE | ACTION_WANT_ATTRS; 251 | 252 | /* Get our LOV attribute; if we cannot find one, we've 253 | * been deleted. 254 | */ 255 | lov = lsost_find_lov(eas); 256 | if (!lov) 257 | return ACTION_COMPLETE; 258 | 259 | if (!lov->value) { 260 | lov->requested = 1; 261 | return ACTION_WANT_READ_ATTRS; 262 | } 263 | 264 | lsost_print_interesting(lov); 265 | } 266 | 267 | offset = build_path(parent, 0); 268 | fprintf(outfile, "%.*s%.*s\n", offset, path_buffer, namelen, name); 269 | 270 | if (--lsost_work_remaining) 271 | return ACTION_COMPLETE; 272 | 273 | return ACTION_END_SCAN; 274 | } 275 | 276 | struct action_ops lsost_action = { 277 | .name = "lsost", 278 | .init = lsost_init, 279 | .help = lsost_help, 280 | .iscan = lsost_iscan, 281 | .dscan = lsost_dscan, 282 | }; 283 | -------------------------------------------------------------------------------- /attr.c: -------------------------------------------------------------------------------- 1 | /* attr.c -- extended attribute handling 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #define _GNU_SOURCE 9 | #define _FILE_OFFSET_BITS 64 10 | #include "lester.h" 11 | 12 | unsigned long ea_ext_value_read = 0; 13 | unsigned long ea_ext_block_read = 0; 14 | 15 | static void ea_complete_read(struct ea_info *eas) 16 | { 17 | eas->pending--; 18 | if (!eas->pending) 19 | eas->done(eas->data); 20 | } 21 | 22 | static int ea_block_copy(ext2_loff_t offset, ssize_t size, void *priv1, 23 | unsigned long file_block, void *data) 24 | { 25 | /* We've read a portion of the EA, so copy it into our storage 26 | * as we're not guaranteed this is all of it. 27 | */ 28 | struct ea_info *eas = priv1; 29 | void *dst = eas->value + (file_block * fs->blocksize); 30 | 31 | /* If this is the last block of the value, we may only do a partial 32 | * copy. 33 | */ 34 | if ((eas->value_len / fs->blocksize) == file_block) 35 | size = eas->value_len % fs->blocksize; 36 | 37 | memcpy(dst, data, size); 38 | ea_complete_read(eas); 39 | return 0; 40 | } 41 | 42 | static int ea_block_iter_cb(ext2_filsys fs, blk64_t blocknr, 43 | e2_blkcnt_t blockcnt, void *priv) 44 | { 45 | struct ea_info *eas = priv; 46 | errcode_t rc; 47 | 48 | if (blocknr < 0) 49 | return 0; 50 | 51 | eas->pending++; 52 | rc = io_channel_async_read(fs->io, blocknr, 1, ea_block_copy, 53 | eas, blockcnt); 54 | if (rc) { 55 | com_err("ea_block_iter_cb", rc, "during async_read launch\n"); 56 | return BLOCK_ABORT; 57 | } 58 | return 0; 59 | } 60 | 61 | static void ea_block_iter_end(ext2_filsys fs, errcode_t error, void *priv) 62 | { 63 | struct ea_info *eas = priv; 64 | 65 | if (error) { 66 | com_err("ea_block_iter_end", error, "during iteration\n"); 67 | exit(1); 68 | } 69 | 70 | /* Drop the ref we held during the iteration; we've submitted all 71 | * of our requests, so we're safe to do the callback once all of 72 | * them complete. 73 | */ 74 | ea_complete_read(eas); 75 | } 76 | 77 | static int ea_block_iter_cb_sync(ext2_filsys fs, blk64_t *block_nr, 78 | e2_blkcnt_t blockcnt, blk64_t ref_block, 79 | int ref_offset, void *priv_data) 80 | { 81 | return ea_block_iter_cb(fs, *block_nr, blockcnt, priv_data); 82 | } 83 | 84 | static int ea_read_inode_cb(ext2_filsys fs, ext2_ino_t ino, 85 | struct ext2_inode *inode, void *priv) 86 | { 87 | struct ea_info *eas = priv; 88 | errcode_t rc; 89 | 90 | if (EXT2_I_SIZE(inode) != eas->value_len) { 91 | fprintf(stderr, "inode size does not match EA size\n"); 92 | exit(1); 93 | } 94 | 95 | /* Squirrel away the inode, as the block iteration may need it -- 96 | * this is somewhat future-proofing, as it currently won't need 97 | * it after the inital call returns, but properly handling extents 98 | * in an async manner may. 99 | * 100 | * To ensure we don't prematurely complete the EA read request 101 | * before all of the async IO completes -- ie, one read completes 102 | * before we can submit the second -- we hold an extra reference 103 | * while the iteration proceeds. 104 | */ 105 | memcpy(eas->inode, inode, EXT2_INODE_SIZE(fs->super)); 106 | eas->pending++; 107 | if (use_unix) { 108 | rc = ext2fs_block_iterate3(fs, eas->ext_ino, 0, NULL, 109 | ea_block_iter_cb_sync, eas); 110 | if (rc) { 111 | com_err("ext2fs_block_iterate2", rc, 112 | "failed during ea block iteration\n"); 113 | return 1; 114 | } 115 | 116 | /* The iteration was synchronous, so we can drop our ref now */ 117 | ea_complete_read(eas); 118 | } else { 119 | rc = ext2fs_block_iterate_async(fs, eas->ext_ino, eas->inode, 120 | ea_block_iter_cb, 121 | ea_block_iter_end, eas); 122 | if (rc) { 123 | com_err("ext2fs_block_iterate_async", rc, 124 | "failed to initiate ea async iteration"); 125 | return 1; 126 | } 127 | } 128 | 129 | return 0; 130 | } 131 | 132 | static void iterate_ea_entries(void *ea_data, 133 | void (*cb)(struct ext2_ext_attr_entry *, void *, void *), 134 | void *base, void *data) 135 | { 136 | struct ext2_ext_attr_entry *entry; 137 | for (entry = (struct ext2_ext_attr_entry *) ea_data; 138 | !EXT2_EXT_IS_LAST_ENTRY(entry); 139 | entry = EXT2_EXT_ATTR_NEXT(entry)) { 140 | cb(entry, base, data); 141 | } 142 | } 143 | 144 | static void count_entries(struct ext2_ext_attr_entry *entry, void *base, 145 | void *data) 146 | { 147 | unsigned int *n = data; 148 | *n += 1; 149 | } 150 | 151 | static void parse_entry(struct ext2_ext_attr_entry *entry, void *base, 152 | void *data) 153 | { 154 | struct ea_info **ea_iter = data; 155 | struct ea_info *ea = *ea_iter; 156 | 157 | /* Ugh; some installed libe2fs headers don't have e_name, so 158 | * we have to hardcode the offset here. Similarly for e_value_inum; 159 | * it is also known as e_value_block, but points to an inode that 160 | * holds the value. 161 | */ 162 | ea->name = (char *) entry + 16; 163 | ea->ext_ino = *((unsigned int *) entry + 1); 164 | ea->index = entry->e_name_index; 165 | ea->name_len = entry->e_name_len; 166 | ea->value_len = entry->e_value_size; 167 | if (!ea->ext_ino) 168 | ea->value = (char *) base + entry->e_value_offs; 169 | 170 | *ea_iter += 1; 171 | } 172 | 173 | struct ea_info *build_ea_info(struct ext2_inode *in, void *ext_attr) 174 | { 175 | /* NOTE: the returned ea_info chain points into the memory given 176 | * to this function; if you reuse that memory, you must copy 177 | * the old contents elsewhere and call ea_memory_change() to 178 | * reparse and copy the requests over. 179 | */ 180 | struct ext2_inode_large *inode = (struct ext2_inode_large *) in; 181 | struct ext2_ext_attr_header *hdr; 182 | struct ea_info *ea, *ea_iter; 183 | char *start; 184 | unsigned int count = 1; 185 | 186 | start = (char *)inode + EXT2_GOOD_OLD_INODE_SIZE + 187 | inode->i_extra_isize + sizeof(__u32); 188 | iterate_ea_entries(start, count_entries, start, &count); 189 | 190 | if (ext_attr) { 191 | /* Check that the external attribute block is still valid */ 192 | hdr = (struct ext2_ext_attr_header *) ext_attr; 193 | if (hdr->h_magic == EXT2_EXT_ATTR_MAGIC && hdr->h_blocks == 1) 194 | iterate_ea_entries(hdr + 1, count_entries, hdr, &count); 195 | else 196 | ext_attr = NULL; 197 | } 198 | 199 | ea = calloc(count, sizeof(*ea)); 200 | if (!ea) { 201 | fprintf(stderr, "unable to allocate EA info storage\n"); 202 | exit(1); 203 | } 204 | 205 | ea_iter = ea; 206 | iterate_ea_entries(start, parse_entry, start, &ea_iter); 207 | if (ext_attr) 208 | iterate_ea_entries(hdr + 1, parse_entry, hdr, &ea_iter); 209 | 210 | return ea; 211 | } 212 | 213 | void release_ea_info(struct ea_info *ea) 214 | { 215 | struct ea_info *entry; 216 | 217 | if (ea == NULL) 218 | return; 219 | 220 | for (entry = ea; entry->name; entry++) { 221 | if (entry->allocated) 222 | free(entry->value); 223 | } 224 | if (ea->inode) 225 | free(ea->inode); 226 | free(ea); 227 | } 228 | 229 | struct ea_info *ea_memory_change(struct ea_info *orig, struct ext2_inode *in, 230 | void *ext_attr) 231 | { 232 | /* The memory for the inode or external attribute block changed, 233 | * so we need to reindex the EA info structure -- build a new one 234 | * and copy the requests over from the old one. Both lists will 235 | * be in the same order. 236 | */ 237 | struct ea_info *o_ea, *n_ea, *eas; 238 | 239 | eas = build_ea_info(in, ext_attr); 240 | for (o_ea = orig, n_ea = eas; o_ea->name; o_ea++, n_ea++) { 241 | n_ea->requested = o_ea->requested; 242 | if (o_ea->allocated) { 243 | n_ea->allocated = o_ea->allocated; 244 | n_ea->value = o_ea->value; 245 | } 246 | } 247 | 248 | if (orig->inode) 249 | free(orig->inode); 250 | free(orig); 251 | 252 | return eas; 253 | } 254 | 255 | void async_read_ea_value(struct ea_info *eas, void (*done)(void *), void *data) 256 | { 257 | /* Read the value of an external EA value from the blocks associated 258 | * with the inode in the descriptor. We need to set aside space for 259 | * the EA value, and read the inode. From there, we'll iterate its 260 | * blocks and read into the appropriate place in the buffer. 261 | */ 262 | errcode_t rc; 263 | 264 | if (eas->value) { 265 | fprintf(stderr, "BUG: async_read_ea_value with non-NULL val\n"); 266 | exit(1); 267 | } 268 | if (!eas->ext_ino) { 269 | fprintf(stderr, "BUG: async_read_ea_value with inode 0\n"); 270 | exit(1); 271 | } 272 | 273 | /* Store our callback info for later use. */ 274 | eas->done = done; 275 | eas->data = data; 276 | eas->allocated = 1; 277 | eas->value = malloc(eas->value_len); 278 | eas->inode = malloc(EXT2_INODE_SIZE(fs->super)); 279 | if (!eas->value || !eas->inode) { 280 | fprintf(stderr, "unable to allocate external attr data\n"); 281 | exit(1); 282 | } 283 | 284 | rc = ext2fs_read_inode_async(fs, eas->ext_ino, NULL, 285 | ea_read_inode_cb, eas); 286 | if (rc) { 287 | com_err("ext2fs_read_inode_async", rc, 288 | "initiating ea inode read"); 289 | exit(1); 290 | } 291 | 292 | ea_ext_value_read++; 293 | } 294 | -------------------------------------------------------------------------------- /lib/block_async.c: -------------------------------------------------------------------------------- 1 | /* block_async.c --- async read-only iteration over all blocks in an inode 2 | * 3 | * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. 4 | * Copyright (C) 2013 UT-Battelle. 5 | * 6 | * This file may be redistributed under the terms of the GNU Library Public 7 | * License, version 2. 8 | */ 9 | #include 10 | #include 11 | #if HAVE_UNISTD_H 12 | #include 13 | #endif 14 | 15 | #include 16 | #include 17 | 18 | #include "ext2fs-extra.h" 19 | 20 | struct block_async_context { 21 | ext2_filsys fs; 22 | int (*func)(ext2_filsys fs, 23 | blk64_t blocknr, 24 | e2_blkcnt_t count, 25 | void *priv_data); 26 | void (*end)(ext2_filsys fs, 27 | errcode_t errcode, 28 | void *priv_data); 29 | errcode_t errcode; 30 | e2_blkcnt_t count; 31 | void *priv_data; 32 | unsigned long use_count; 33 | }; 34 | 35 | static int block_iterate_async_ind(ext2_loff_t offset, ssize_t size, 36 | void *private, unsigned long priv2, 37 | void *data) 38 | { 39 | struct block_async_context *ctx = private; 40 | blk_t *block_nr = data; 41 | e2_blkcnt_t bcount = priv2; 42 | int i, limit, flags; 43 | 44 | limit = ctx->fs->blocksize / sizeof(blk_t); 45 | for (i = 0; i < limit; i++, bcount++, block_nr++) { 46 | if (*block_nr == 0) 47 | continue; 48 | #ifdef WORDS_BIGENDIAN 49 | *block_nr = ext2fs_swab32(*block_nr); 50 | #endif 51 | flags = (*ctx->func)(ctx->fs, *block_nr, bcount, ctx->priv_data); 52 | if (flags & BLOCK_ABORT) 53 | break; 54 | } 55 | 56 | if (!--ctx->use_count) { 57 | if (ctx->end) 58 | (*ctx->end)(ctx->fs, ctx->errcode, ctx->priv_data); 59 | ext2fs_free_mem(&ctx); 60 | } 61 | 62 | return 0; 63 | } 64 | 65 | static int block_iterate_async_dind(ext2_loff_t offset, ssize_t size, 66 | void *private, unsigned long priv2, 67 | void *data) 68 | { 69 | struct block_async_context *ctx = private; 70 | blk_t *block_nr = data; 71 | e2_blkcnt_t bcount = priv2; 72 | int i, limit; 73 | errcode_t rc; 74 | 75 | limit = ctx->fs->blocksize / sizeof(blk_t); 76 | for (i = 0; i < limit; i++, block_nr++, bcount += limit) { 77 | if (*block_nr == 0) 78 | continue; 79 | #ifdef WORDS_BIGENDIAN 80 | *block_nr = ext2fs_swab32(*block_nr); 81 | #endif 82 | if (*block_nr >= ctx->fs->super->s_blocks_count || 83 | *block_nr < ctx->fs->super->s_first_data_block) { 84 | if (!ctx->errcode) 85 | ctx->errcode = EXT2_ET_BAD_IND_BLOCK; 86 | break; 87 | } 88 | rc = io_channel_async_read(ctx->fs->io, *block_nr, 1, 89 | block_iterate_async_ind, 90 | ctx, bcount); 91 | if (rc) { 92 | if (!ctx->errcode) 93 | ctx->errcode = rc; 94 | break; 95 | } 96 | ctx->use_count++; 97 | } 98 | 99 | if (!--ctx->use_count) { 100 | if (ctx->end) 101 | (*ctx->end)(ctx->fs, ctx->errcode, ctx->priv_data); 102 | ext2fs_free_mem(&ctx); 103 | } 104 | 105 | return 0; 106 | } 107 | 108 | static int block_iterate_async_tind(ext2_loff_t offset, ssize_t size, 109 | void *private, unsigned long priv2, 110 | void *data) 111 | { 112 | struct block_async_context *ctx = private; 113 | blk_t *block_nr = data; 114 | e2_blkcnt_t bcount = priv2; 115 | int i, limit; 116 | errcode_t rc; 117 | 118 | limit = ctx->fs->blocksize / sizeof(blk_t); 119 | for (i = 0; i < limit; i++, block_nr++, bcount += limit * limit) { 120 | if (*block_nr == 0) 121 | continue; 122 | #ifdef WORDS_BIGENDIAN 123 | *block_nr = ext2fs_swab32(*block_nr); 124 | #endif 125 | if (*block_nr >= ctx->fs->super->s_blocks_count || 126 | *block_nr < ctx->fs->super->s_first_data_block) { 127 | if (!ctx->errcode) 128 | ctx->errcode = EXT2_ET_BAD_DIND_BLOCK; 129 | break; 130 | } 131 | rc = io_channel_async_read(ctx->fs->io, *block_nr, 1, 132 | block_iterate_async_dind, 133 | ctx, bcount); 134 | if (rc) { 135 | if (!ctx->errcode) 136 | ctx->errcode = rc; 137 | break; 138 | } 139 | ctx->use_count++; 140 | } 141 | 142 | if (!--ctx->use_count) { 143 | if (ctx->end) 144 | (*ctx->end)(ctx->fs, ctx->errcode, ctx->priv_data); 145 | ext2fs_free_mem(&ctx); 146 | } 147 | 148 | return 0; 149 | } 150 | 151 | errcode_t ext2fs_block_iterate_async(ext2_filsys fs, 152 | ext2_ino_t ino, 153 | struct ext2_inode *inode, 154 | int (*func)(ext2_filsys fs, 155 | blk64_t blocknr, 156 | e2_blkcnt_t count, 157 | void *priv_data), 158 | void (*end)(ext2_filsys fs, 159 | errcode_t errcode, 160 | void *priv_data), 161 | void *priv_data) 162 | { 163 | int i; 164 | int ret = 0; 165 | struct ext2_inode local_inode; 166 | errcode_t retval; 167 | struct block_async_context *ctx; 168 | int limit; 169 | blk64_t block_nr; 170 | int called = 0; 171 | e2_blkcnt_t blockcnt = 0; 172 | 173 | EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); 174 | 175 | /* TODO To make this interface truely asynchronous, we need 176 | * to ensure that we are passed in an inode to work with, or 177 | * add some state tracking info to our context. 178 | * 179 | * ext2fs_extent_open2() will not do any IO if it is passed in 180 | * the contents of the inode, but we will need to import 181 | * ext2fs_extent_get() and modify it into ext2fs_extent_get_async(), 182 | * along with the tracking info (and block storage) required to 183 | * avoid blocking requests. 184 | * 185 | * For now, we'll punt since we expect to run on the MDS and most of 186 | * our extents should fit in the inode. 187 | */ 188 | if (!inode) { 189 | inode = &local_inode; 190 | retval = ext2fs_read_inode(fs, ino, inode); 191 | if (retval) 192 | return retval; 193 | } 194 | 195 | retval = ext2fs_get_mem(sizeof(struct block_async_context), &ctx); 196 | if (retval) 197 | return retval; 198 | 199 | limit = fs->blocksize >> 2; 200 | 201 | ctx->fs = fs; 202 | ctx->func = func; 203 | ctx->end = end; 204 | ctx->errcode = 0; 205 | ctx->priv_data = priv_data; 206 | ctx->use_count = 1; 207 | 208 | if (inode->i_flags & EXT4_EXTENTS_FL) { 209 | ext2_extent_handle_t handle; 210 | struct ext2fs_extent extent; 211 | blk64_t blk, new_blk; 212 | int op = EXT2_EXTENT_ROOT; 213 | unsigned int j; 214 | 215 | retval = ext2fs_extent_open2(fs, ino, inode, &handle); 216 | if (retval) 217 | goto errout; 218 | 219 | while (1) { 220 | retval = ext2fs_extent_get(handle, op, &extent); 221 | if (retval) { 222 | if (retval == EXT2_ET_EXTENT_NO_NEXT) 223 | retval = 0; 224 | break; 225 | } 226 | 227 | op = EXT2_EXTENT_NEXT; 228 | blk = extent.e_pblk; 229 | 230 | if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) 231 | continue; 232 | 233 | for (blockcnt = extent.e_lblk, j = 0; 234 | j < extent.e_len; 235 | blk++, blockcnt++, j++) { 236 | new_blk = blk; 237 | called = 1; 238 | ret = (*ctx->func)(fs, new_blk, blockcnt, 239 | priv_data); 240 | if (ret & BLOCK_ABORT) 241 | goto extent_errout; 242 | } 243 | } 244 | 245 | extent_errout: 246 | ext2fs_extent_free(handle); 247 | ctx->errcode = retval; 248 | goto errout; 249 | } 250 | 251 | /* 252 | * Iterate over normal data blocks 253 | */ 254 | for (i = 0; i < EXT2_NDIR_BLOCKS ; i++, blockcnt++) { 255 | if (inode->i_block[i]) { 256 | called = 1; 257 | ret |= (*ctx->func)(fs, inode->i_block[i], blockcnt, 258 | priv_data); 259 | if (ret & BLOCK_ABORT) 260 | goto errout; 261 | } 262 | } 263 | 264 | if (inode->i_block[EXT2_IND_BLOCK]) { 265 | block_nr = inode->i_block[EXT2_IND_BLOCK]; 266 | if (block_nr >= ctx->fs->super->s_blocks_count || 267 | block_nr < ctx->fs->super->s_first_data_block) { 268 | ctx->errcode = EXT2_ET_BAD_IND_BLOCK; 269 | goto errout; 270 | } 271 | retval = io_channel_async_read(fs->io, block_nr, 1, 272 | block_iterate_async_ind, 273 | ctx, blockcnt); 274 | if (retval) { 275 | ctx->errcode = retval; 276 | goto errout; 277 | } 278 | ctx->use_count++; 279 | called = 1; 280 | } 281 | blockcnt += limit; 282 | 283 | if (inode->i_block[EXT2_DIND_BLOCK]) { 284 | block_nr = inode->i_block[EXT2_DIND_BLOCK]; 285 | if (block_nr >= ctx->fs->super->s_blocks_count || 286 | block_nr < ctx->fs->super->s_first_data_block) { 287 | ctx->errcode = EXT2_ET_BAD_DIND_BLOCK; 288 | goto errout; 289 | } 290 | retval = io_channel_async_read(fs->io, block_nr, 1, 291 | block_iterate_async_dind, 292 | ctx, blockcnt); 293 | if (retval) { 294 | ctx->errcode = retval; 295 | goto errout; 296 | } 297 | ctx->use_count++; 298 | called = 1; 299 | } 300 | blockcnt += limit * limit; 301 | 302 | if (inode->i_block[EXT2_TIND_BLOCK]) { 303 | block_nr = inode->i_block[EXT2_TIND_BLOCK]; 304 | if (block_nr >= ctx->fs->super->s_blocks_count || 305 | block_nr < ctx->fs->super->s_first_data_block) { 306 | ctx->errcode = EXT2_ET_BAD_TIND_BLOCK; 307 | goto errout; 308 | } 309 | retval = io_channel_async_read(fs->io, block_nr, 1, 310 | block_iterate_async_tind, 311 | ctx, blockcnt); 312 | if (retval) { 313 | ctx->errcode = retval; 314 | goto errout; 315 | } 316 | ctx->use_count++; 317 | called = 1; 318 | } 319 | 320 | errout: 321 | if (!--ctx->use_count) { 322 | if (called && ctx->end) 323 | (*ctx->end)(fs, ctx->errcode, ctx->priv_data); 324 | ext2fs_free_mem(&ctx); 325 | } 326 | 327 | if (called) 328 | retval = 0; 329 | return retval; 330 | } 331 | -------------------------------------------------------------------------------- /lester.c: -------------------------------------------------------------------------------- 1 | /* lester.c -- the Lustre lister (also works for ext2+) 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #define _GNU_SOURCE 9 | #define _FILE_OFFSET_BITS 64 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "lester.h" 15 | 16 | ext2_filsys fs; 17 | FILE *outfile; 18 | 19 | char *root_path = "/"; 20 | unsigned int verbosity = 0; 21 | int use_unix = 0; 22 | 23 | static unsigned long max_async = 128 * 1024; 24 | struct action_ops *scan_action = NULL; 25 | 26 | void diff_timevals(struct timeval *start, struct timeval *end, 27 | struct timeval *out) 28 | { 29 | out->tv_sec = end->tv_sec - start->tv_sec; 30 | out->tv_usec = end->tv_usec - start->tv_usec; 31 | if (start->tv_usec > end->tv_usec) { 32 | out->tv_sec--; 33 | out->tv_usec += 1000000; 34 | } 35 | } 36 | 37 | int enforce_async_limit(void) 38 | { 39 | unsigned long async_count; 40 | errcode_t rc; 41 | 42 | rc = io_channel_async_count(fs->io, &async_count); 43 | if (rc) { 44 | com_err("io_channel_async_count", rc, 45 | "failed to get async count"); 46 | return 1; 47 | } 48 | 49 | if (async_count > max_async) { 50 | rc = io_channel_finish_async(fs->io, 0); 51 | if (rc) { 52 | com_err("io_channel_finish_async", rc, 53 | "failed to finish async"); 54 | return 1; 55 | } 56 | } 57 | 58 | return 0; 59 | } 60 | 61 | static int read_bitmaps(const char *dev) 62 | { 63 | struct timeval start, end, diff; 64 | errcode_t rc; 65 | int i; 66 | 67 | if (verbosity) 68 | fprintf(stdout, "Starting bitmaps\n"); 69 | 70 | gettimeofday(&start, NULL); 71 | for (i = 0; i < fs->group_desc_count; i++) { 72 | if (!(ext2fs_bg_flags(fs, i) & EXT2_BG_INODE_UNINIT)) 73 | io_channel_readahead(fs->io, 74 | ext2fs_inode_table_loc(fs, i), 1); 75 | } 76 | 77 | rc = ext2fs_read_inode_bitmap(fs); 78 | if (rc) { 79 | com_err("ext2fs_read_inode_bitmap", rc, 80 | "opening inode bitmap on %s\n", dev); 81 | return 1; 82 | } 83 | 84 | if (verbosity) { 85 | gettimeofday(&end, NULL); 86 | diff_timevals(&start, &end, &diff); 87 | fprintf(stdout, "Finished bitmaps in %d.%06u seconds\n", 88 | (int) diff.tv_sec, (unsigned int) diff.tv_usec); 89 | } 90 | return 0; 91 | } 92 | 93 | static int run_scan(const char *dev, const char *io_opts) 94 | { 95 | struct timeval start, now, diff; 96 | errcode_t rc; 97 | 98 | gettimeofday(&start, NULL); 99 | 100 | rc = ext2fs_open2(dev, io_opts, EXT2_FLAG_SOFTSUPP_FEATURES, 0, 0, 101 | use_unix ? unix_io_manager : aio_io_manager, &fs); 102 | if (rc) { 103 | com_err("ext2fs_open", rc, "opening %s\n", dev); 104 | return 1; 105 | } 106 | 107 | if (read_bitmaps(dev)) 108 | return 1; 109 | 110 | rc = ext2fs_init_dblist(fs, NULL); 111 | if (rc) { 112 | com_err("ext2fs_init_dblist", rc, "initializing dblist\n"); 113 | return 1; 114 | } 115 | 116 | if (scan_inodes(dev)) 117 | return 1; 118 | 119 | if (resolve_paths()) 120 | return 1; 121 | 122 | ext2fs_close(fs); 123 | 124 | if (verbosity) { 125 | gettimeofday(&now, NULL); 126 | diff_timevals(&start, &now, &diff); 127 | fprintf(stdout, "Success! Finished in %d.%06u seconds\n", 128 | (int) diff.tv_sec, (unsigned int) diff.tv_usec); 129 | } 130 | 131 | return 0; 132 | } 133 | 134 | static void usage(const char *pname) 135 | { 136 | fprintf(stderr, "Lester, the Lustre lister (version %s)\n\n", 137 | PACKAGE_VERSION); 138 | fprintf(stderr, "usage: %s [OPTIONS] BLOCKDEV\n", pname); 139 | fprintf(stderr, "Options:\n"); 140 | fprintf(stderr, " -h,--help\t\t\t\tThis message\n"); 141 | fprintf(stderr, " -v,--verbose\t\t\tIncrease verbosity level\n"); 142 | fprintf(stderr, " -u,--unix\t\t\t\tUse the Unix IO manager\n"); 143 | fprintf(stderr, " -o=FILE,--output=FILE\t\t" 144 | "Direct result of scan to FILE\n"); 145 | fprintf(stderr, " -A=NAME,--action=NAME\t\tScan action to " 146 | "perform (default fslist)\n"); 147 | fprintf(stderr, "\t\t\t\t\t fslist:\n"); 148 | fprintf(stderr, "\t\t\t\t\t\tList files matching criteria\n"); 149 | fprintf(stderr, "\t\t\t\t\t namei:\n"); 150 | fprintf(stderr, "\t\t\t\t\t\tFind names for inodes\n"); 151 | fprintf(stderr, "\t\t\t\t\t lsost:\n"); 152 | fprintf(stderr, "\t\t\t\t\t\tFind files on given OSTs\n"); 153 | fprintf(stderr, " -a=ARG,--action-arg=ARG\t\tPass argument to scan " 154 | "action\n"); 155 | fprintf(stderr, "\t\t\t\t\t Use \"-a help\" to get list\n"); 156 | fprintf(stderr, " -r=PATH,--root=PATH\t\t\tHide files not under " 157 | "PATH\n"); 158 | fprintf(stderr, " -g=NUM,--group-readahead=NUM\t" 159 | "Readahead NUM groups in the inode table\n"); 160 | fprintf(stderr, "\t\t\t\t\t Default 1 for Unix manager\n"); 161 | fprintf(stderr, "\t\t\t\t\t Default 8 for AIO manager\n"); 162 | fprintf(stderr, " -d=NUM,--dir-readahead=NUM\t\t" 163 | "Readahead NUM chunks in the dir scan\n"); 164 | fprintf(stderr, "\t\t\t\t\t Default 2 for Unix manager\n"); 165 | fprintf(stderr, "\t\t\t\t\t Default 64 for AIO manager\n"); 166 | fprintf(stderr, " -m,=NUM,--max-async=NUM\t\t" 167 | "Max number of outstanding async\n"); 168 | fprintf(stderr, "\t\t\t\t\t requests allowed\n"); 169 | fprintf(stderr, " -O=ARG,--io-options=ARG\t\t" 170 | "Pass options to IO manager\n"); 171 | fprintf(stderr, "\tAIO manager supported options (separated by &):\n"); 172 | fprintf(stderr, "\t\tmaxsize=KB\t\tMaximum request size in KB\n"); 173 | fprintf(stderr, "\t\t(qd|queuedepth)=INT\tMaximum queue depth\n"); 174 | fprintf(stderr, "\t\tcache_entries=INT\tNumber of cache blocks " 175 | "to allocate\n"); 176 | fprintf(stderr, "\t\treserved_entries=INT\tCache blocks reserved " 177 | "for sync IO\n"); 178 | fprintf(stderr, "\t\treq_preallocate=INT\tPreallocate queue entries\n"); 179 | fprintf(stderr, "\t\tmerge_gap=KB\t\tAllowed gap between " 180 | "merged async reqs\n"); 181 | 182 | exit(2); 183 | } 184 | 185 | int main(int argc, char **argv) 186 | { 187 | const char *output = NULL; 188 | const char *device = NULL; 189 | const char *io_opts = NULL; 190 | const char *action = NULL; 191 | const char **action_argv; 192 | int action_argc = 0; 193 | int action_help = 0; 194 | 195 | static struct option options[] = { 196 | { "help", no_argument, NULL, 'h' }, 197 | { "io-options", required_argument, NULL, 'O' }, 198 | { "unix", required_argument, NULL, 'u' }, 199 | { "group-readahead", required_argument, NULL, 'g' }, 200 | { "dir-readahead", required_argument, NULL, 'd' }, 201 | { "output", required_argument, NULL, 'p' }, 202 | { "max-async", required_argument, NULL, 'm' }, 203 | { "action", required_argument, NULL, 'A' }, 204 | { "action-arg", required_argument, NULL, 'a' }, 205 | { "verbose", no_argument, NULL, 'v' }, 206 | { "root", required_argument, NULL, 'r' }, 207 | { NULL } 208 | }; 209 | 210 | action_argv = calloc(argc, sizeof(char *)); 211 | if (!action_argv) { 212 | fprintf(stderr, "unable to allocate memory for args\n"); 213 | exit(1); 214 | } 215 | 216 | for (;;) { 217 | int opt = getopt_long(argc, argv, "hO:ug:d:o:m:A:a:vr:", 218 | options, NULL); 219 | 220 | if (opt == -1) 221 | break; 222 | 223 | switch (opt) { 224 | case 'o': 225 | output = optarg; 226 | break; 227 | case 'O': 228 | io_opts = optarg; 229 | break; 230 | case 'u': 231 | use_unix = 1; 232 | break; 233 | case 'g': 234 | grp_readahead = strtoul(optarg, NULL, 0); 235 | break; 236 | case 'd': 237 | dir_readahead = strtoul(optarg, NULL, 0); 238 | break; 239 | case 'm': 240 | max_async = strtoul(optarg, NULL, 0); 241 | break; 242 | case 'A': 243 | if (action) { 244 | fprintf(stderr, "Only specify one action\n"); 245 | exit(1); 246 | } 247 | action = optarg; 248 | break; 249 | case 'a': 250 | action_argv[action_argc++] = optarg; 251 | if (!strcmp(optarg, "help")) 252 | action_help = 1; 253 | break; 254 | case 'r': 255 | root_path = optarg; 256 | break; 257 | case 'v': 258 | verbosity++; 259 | break; 260 | case 'h': 261 | case '?': 262 | default: 263 | usage(argv[0]); 264 | break; 265 | } 266 | } 267 | 268 | if (!action) 269 | action = "fslist"; 270 | 271 | if (!action_help && optind == argc) { 272 | fprintf(stderr, "%s: missing block device\n", argv[0]); 273 | usage(argv[0]); 274 | } 275 | device = argv[optind]; 276 | 277 | if (grp_readahead == 0) 278 | grp_readahead = use_unix ? 1 : 8; 279 | 280 | if (dir_readahead == 0) 281 | dir_readahead = use_unix ? 2 : 64; 282 | 283 | add_error_table(&et_ext2_error_table); 284 | 285 | if (!strcmp(action, "fslist")) 286 | scan_action = &fslist_action; 287 | else if (!strcmp(action, "namei")) 288 | scan_action = &namei_action; 289 | else if (!strcmp(action, "lsost")) 290 | scan_action = &lsost_action; 291 | else { 292 | fprintf(stderr, "%s: unknown action \"%s\"\n", argv[0], action); 293 | return 1; 294 | } 295 | 296 | if (action_help) { 297 | scan_action->help(); 298 | return 2; 299 | } 300 | 301 | if (scan_action->init(device, action_argc, action_argv)) 302 | return 1; 303 | 304 | if (output) { 305 | outfile = fopen(output, "w"); 306 | if (!outfile) { 307 | com_err("fopen", errno, "opening output file\n"); 308 | return 1; 309 | } 310 | } else 311 | outfile = stdout; 312 | 313 | return run_scan(device, io_opts); 314 | } 315 | -------------------------------------------------------------------------------- /action-fslist.c: -------------------------------------------------------------------------------- 1 | /* action-fslist.c -- ne2scan-style listing for Lester 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #include "lester.h" 9 | #include "lustre_lov.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | enum { 18 | FORMAT_NAME, 19 | FORMAT_INUM, 20 | FORMAT_EXTENDED, 21 | FORMAT_LUSTRE, 22 | FORMAT_NE2SCAN, 23 | }; 24 | 25 | static int fslist_format = FORMAT_NE2SCAN; 26 | static const char *fsname = "UNKNOWN"; 27 | static const char *user_note = ""; 28 | static int show_dirs = 0; 29 | static int show_fid = 0; 30 | static int show_one = 0; 31 | static const char *target_dev; 32 | static time_t cutoff_time; 33 | static int accessed_before; 34 | static int newer_than; 35 | static FILE *genhit; 36 | 37 | static void report_fid(FILE *f, struct ea_info *lov) 38 | { 39 | struct lustre_mdt_attrs *lma = lov->value; 40 | fprintf(f, "0x%lx:0x%x:0x%x", lma->lma_self_fid.f_seq, 41 | lma->lma_self_fid.f_oid, lma->lma_self_fid.f_ver); 42 | } 43 | 44 | static void report_osts(FILE *f, struct ea_info *lov) 45 | { 46 | struct lov_mds_md_v1 *lov1; 47 | struct lov_ost_data_v1 *ost; 48 | int cnt; 49 | 50 | lov1 = lov->value; 51 | if (lov1->lmm_magic == LOV_MAGIC_V1) { 52 | cnt = lov1->lmm_stripe_count; 53 | ost = lov1->lmm_objects; 54 | } else if (lov1->lmm_magic == LOV_MAGIC_V3) { 55 | struct lov_mds_md_v3 *lov3 = lov->value; 56 | cnt = lov3->lmm_stripe_count; 57 | ost = lov3->lmm_objects; 58 | } else { 59 | fprintf(f, "UNKNOWN LOV %x", lov1->lmm_magic); 60 | return; 61 | } 62 | 63 | if (!cnt) 64 | return; 65 | 66 | /* Print in reverse order to keep compatibility with ne2scan output; 67 | * make ost[] start at index 1. 68 | */ 69 | ost--; 70 | fprintf(f, "%u:%lx", ost[cnt].l_ost_idx, 71 | (unsigned long) ost[cnt].l_object_id); 72 | while (--cnt) { 73 | fprintf(f, ",%u:%lx", ost[cnt].l_ost_idx, 74 | (unsigned long) ost[cnt].l_object_id); 75 | } 76 | } 77 | 78 | static int get_timestamp(const char *path, time_t *atime, time_t *mctime) 79 | { 80 | struct stat stats; 81 | 82 | if (stat(path, &stats)) { 83 | fprintf(stderr, "Unable to stat '%s': %s\n", path, 84 | strerror(errno)); 85 | return 1; 86 | } 87 | 88 | if (atime) 89 | *atime = stats.st_atime; 90 | if (mctime) { 91 | *mctime = stats.st_ctime; 92 | if (stats.st_ctime < stats.st_mtime) 93 | *mctime = stats.st_mtime; 94 | } 95 | 96 | return 0; 97 | } 98 | 99 | static void fslist_help(void) 100 | { 101 | fprintf(stderr, "Action arguments for fslist:\n"); 102 | fprintf(stderr, " format=FORMAT\tOutput format\n"); 103 | fprintf(stderr, "\tne2scan\t\t Full ne2scan output (default)\n"); 104 | fprintf(stderr, "\tlustre\t\t Include inode attributes and Lustre " 105 | "objects\n"); 106 | fprintf(stderr, "\textended\t Include inode attributes\n"); 107 | fprintf(stderr, "\tinum\t\t Include inode number\n"); 108 | fprintf(stderr, "\tname\t\t Only name of matching files\n"); 109 | fprintf(stderr, " show_one\t\tShow one name for hardlinked files " 110 | "(default all names)\n"); 111 | fprintf(stderr, " show_dirs\t\tAlso show directory names\n"); 112 | fprintf(stderr, " show_fid\t\tAlso show FID in lov, ne2scan " 113 | "format\n"); 114 | fprintf(stderr, " note=MSG\t\tAdd MSG to ne2scan header\n"); 115 | fprintf(stderr, " fs=NAME\t\tName filesystem for ne2scan output\n"); 116 | fprintf(stderr, " newer=FILE\t\tFiles newer than FILE\n"); 117 | fprintf(stderr, " before=FILE\t\tFiles not accessed since FILE\n"); 118 | fprintf(stderr, " genhit=FILE\t\tCopy entries matching newer/before " 119 | "options to FILE\n"); 120 | fprintf(stderr, "\t\t\t (Main output will get all files, matching " 121 | "or not)\n"); 122 | } 123 | 124 | static int fslist_init(const char *dev, int argc, const char **argv) 125 | { 126 | const char *genhit_name = NULL; 127 | target_dev = dev; 128 | 129 | while (argc--) { 130 | if (!strcmp(*argv, "show_dirs")) 131 | show_dirs = 1; 132 | else if (!strcmp(*argv, "show_fid")) 133 | show_fid = 1; 134 | else if (!strcmp(*argv, "show_one")) 135 | show_one = 1; 136 | else if (!strncmp(*argv, "fs=", 3)) 137 | fsname = *argv + 3; 138 | else if (!strncmp(*argv, "note=", 5)) 139 | user_note = *argv + 5; 140 | else if (!strncmp(*argv, "newer=", 6)) { 141 | if (newer_than || accessed_before) { 142 | fprintf(stderr, "Only one newer= or before= " 143 | "option allowed\n"); 144 | return 1; 145 | } 146 | if (get_timestamp(*argv + 6, NULL, &cutoff_time)) 147 | return 1; 148 | newer_than = 1; 149 | } else if (!strncmp(*argv, "before=", 7)) { 150 | if (newer_than || accessed_before) { 151 | fprintf(stderr, "Only one newer= or before= " 152 | "option allowed\n"); 153 | return 1; 154 | } 155 | if (get_timestamp(*argv + 7, &cutoff_time, NULL)) 156 | return 1; 157 | accessed_before = 1; 158 | } else if (!strncmp(*argv, "format=", 7)) { 159 | if (!strcmp(*argv, "format=ne2scan")) 160 | fslist_format = FORMAT_NE2SCAN; 161 | else if (!strcmp(*argv, "format=lustre")) 162 | fslist_format = FORMAT_LUSTRE; 163 | else if (!strcmp(*argv, "format=extended")) 164 | fslist_format = FORMAT_EXTENDED; 165 | else if (!strcmp(*argv, "format=inum")) 166 | fslist_format = FORMAT_INUM; 167 | else if (!strcmp(*argv, "format=name")) 168 | fslist_format = FORMAT_NAME; 169 | else { 170 | fprintf(stderr, "Unknown fslist format: %s\n", 171 | *argv + 7); 172 | return 1; 173 | } 174 | } else if (!strncmp(*argv, "genhit=", 7)) { 175 | genhit_name = *argv + 7; 176 | } else { 177 | fprintf(stderr, "Unknown fslist arg: %s\n", *argv); 178 | return 1; 179 | } 180 | 181 | argv++; 182 | } 183 | 184 | if (genhit_name) { 185 | if (!(newer_than || accessed_before)) { 186 | fprintf(stderr, "genhit only makes sense with newer= " 187 | "or before=\n"); 188 | return 1; 189 | } 190 | 191 | genhit = fopen(genhit_name, "w"); 192 | if (!genhit) { 193 | fprintf(stderr, "Unable to open genhit output " 194 | "file: %s\n", genhit_name); 195 | return 1; 196 | } 197 | } 198 | return 0; 199 | } 200 | 201 | static int fslist_iscan(ext2_ino_t ino, struct ext2_inode *inode, 202 | struct ea_info *eas) 203 | { 204 | /* We only show directories if asked, otherwise we'll want a 205 | * path name and inode info in the directory scan. 206 | */ 207 | if (!show_dirs && LINUX_S_ISDIR(inode->i_mode)) 208 | return ACTION_COMPLETE; 209 | 210 | /* Are we pruning the list based on a timestamp? 211 | * When looking for files accessed before the cutoff, we'll prune 212 | * it if any of the times are after the cutoff. For files newer 213 | * than the cutoff, we only care if the have been changed since then. 214 | * 215 | * Note, if we're sending data to a separate genhit file, then 216 | * we actually want everything for the main file. 217 | */ 218 | if (!genhit && accessed_before && (inode->i_atime >= cutoff_time || 219 | inode->i_mtime >= cutoff_time || 220 | inode->i_ctime >= cutoff_time)) 221 | return ACTION_COMPLETE; 222 | if (!genhit && newer_than && inode->i_ctime < cutoff_time && 223 | inode->i_mtime < cutoff_time) 224 | return ACTION_COMPLETE; 225 | 226 | return ACTION_WANT_PATH | ACTION_WANT_INODE; 227 | } 228 | 229 | static int fslist_dscan_begin(void) 230 | { 231 | time_t start = time(NULL); 232 | const char *e2ver; 233 | char host[256]; 234 | char stime[90]; 235 | 236 | /* We only put a header in for ne2scan compatibility */ 237 | if (fslist_format < FORMAT_NE2SCAN) 238 | return 0; 239 | 240 | ext2fs_get_library_version(&e2ver, NULL); 241 | gethostname(host, sizeof(host)); 242 | strftime(stime, sizeof(stime), "%a %b %d %X %Z %Y", gmtime(&start)); 243 | fprintf(outfile, "#IDENT#|%s|%s|%d|%s|%s|%s|0|0|0|0|%s|%s\n", 244 | PACKAGE_VERSION, e2ver, start, stime, host, target_dev, 245 | fsname, user_note); 246 | 247 | if (genhit) { 248 | fprintf(genhit, "#IDENT#|%s|%s|%d|%s|%s|%s|0|0|0|0|%s|%s\n", 249 | PACKAGE_VERSION, e2ver, start, stime, host, target_dev, 250 | fsname, user_note); 251 | } 252 | 253 | return 0; 254 | } 255 | 256 | static int fslist_output(FILE *f, ext2_ino_t ino, struct ext2_inode *inode, 257 | int offset, const char *name, int namelen, 258 | struct ea_info *lov, struct ea_info *lma) 259 | { 260 | if (fslist_format > FORMAT_INUM) { 261 | fprintf(f, "%u|%u|%u|%u|%u|%o|%lu|%u", inode->i_atime, 262 | inode->i_ctime, inode->i_mtime, inode_uid(*inode), 263 | inode_gid(*inode), inode->i_mode, 264 | (unsigned long) EXT2_I_SIZE(inode), ino); 265 | 266 | if (fslist_format >= FORMAT_LUSTRE) { 267 | /* TODO deal with default stripe info on dirs */ 268 | fprintf(f, "|"); 269 | if (lov && LINUX_S_ISREG(inode->i_mode)) 270 | report_osts(f, lov); 271 | } 272 | 273 | if (show_fid) { 274 | fprintf(f, "|"); 275 | if (lma) 276 | report_fid(f, lma); 277 | } 278 | 279 | fprintf(f, "|"); 280 | } else if (fslist_format == FORMAT_INUM) { 281 | fprintf(f, "%lu ", ino); 282 | } 283 | 284 | fprintf(f, "%.*s%.*s\n", offset, path_buffer, namelen, name); 285 | } 286 | 287 | static int fslist_dscan(ext2_ino_t ino, struct ext2_inode *inode, 288 | struct dentry *parent, const char *name, int namelen, 289 | struct ea_info *eas) 290 | { 291 | struct ea_info *lov = NULL; 292 | struct ea_info *lma = NULL; 293 | struct ea_info *ea; 294 | int requested = 0; 295 | int offset; 296 | 297 | if (!inode) 298 | return ACTION_WANT_INODE | ACTION_WANT_ATTRS; 299 | 300 | if (fslist_format >= FORMAT_LUSTRE) { 301 | for (ea = eas; ea->name; ea++) { 302 | if (ea->index != EXT2_XATTR_INDEX_TRUSTED && 303 | ea->index != EXT2_XATTR_INDEX_LUSTRE) 304 | continue; 305 | 306 | if (ea->name_len == 3 && !strncmp(ea->name, "lov", 3)) { 307 | lov = ea; 308 | /* Request the EA value if it isn't loaded */ 309 | if (!ea->value) { 310 | ea->requested = 1; 311 | requested++; 312 | } 313 | } 314 | 315 | if (show_fid && ea->name_len == 3 && 316 | !strncmp(ea->name, "lma", 3)) { 317 | lma = ea; 318 | if (!ea->value) { 319 | ea->requested = 1; 320 | requested++; 321 | } 322 | } 323 | } 324 | } 325 | 326 | if (requested) 327 | return ACTION_WANT_READ_ATTRS; 328 | 329 | offset = build_path(parent, 0); 330 | fslist_output(outfile, ino, inode, offset, name, namelen, lov, lma); 331 | 332 | if (genhit) { 333 | if (accessed_before && (inode->i_atime < cutoff_time && 334 | inode->i_mtime < cutoff_time && 335 | inode->i_ctime < cutoff_time)) { 336 | fslist_output(genhit, ino, inode, offset, name, 337 | namelen, lov, lma); 338 | } else if (newer_than && (inode->i_ctime >= cutoff_time || 339 | inode->i_mtime >= cutoff_time)) { 340 | fslist_output(genhit, ino, inode, offset, name, 341 | namelen, lov, lma); 342 | } 343 | } 344 | 345 | if (show_one) 346 | return ACTION_COMPLETE | ACTION_IGNORE_FILE; 347 | 348 | return ACTION_COMPLETE; 349 | } 350 | 351 | static int fslist_dscan_end(void) 352 | { 353 | /* We only put a footer in for ne2scan compatibility */ 354 | if (fslist_format == FORMAT_NE2SCAN) { 355 | fprintf(outfile, "#complete#%ld\n", time(NULL)); 356 | if (genhit) 357 | fprintf(genhit, "#complete#%ld\n", time(NULL)); 358 | } 359 | 360 | if (genhit) 361 | fclose(genhit); 362 | 363 | return 0; 364 | } 365 | 366 | struct action_ops fslist_action = { 367 | .name = "fslist", 368 | .init = fslist_init, 369 | .help = fslist_help, 370 | .iscan = fslist_iscan, 371 | .dscan_begin = fslist_dscan_begin, 372 | .dscan = fslist_dscan, 373 | .dscan_end = fslist_dscan_end, 374 | .flags = ACTION_FLAG_ISCAN_NO_EAS, 375 | }; 376 | -------------------------------------------------------------------------------- /lib/rbtree.c: -------------------------------------------------------------------------------- 1 | /* 2 | Red Black Trees 3 | (C) 1999 Andrea Arcangeli 4 | (C) 2002 David Woodhouse 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program; if not, write to the Free Software 18 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 | 20 | linux/lib/rbtree.c 21 | */ 22 | 23 | #include "rbtree.h" 24 | 25 | static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) 26 | { 27 | struct rb_node *right = node->rb_right; 28 | struct rb_node *parent = rb_parent(node); 29 | 30 | if ((node->rb_right = right->rb_left)) 31 | rb_set_parent(right->rb_left, node); 32 | right->rb_left = node; 33 | 34 | rb_set_parent(right, parent); 35 | 36 | if (parent) 37 | { 38 | if (node == parent->rb_left) 39 | parent->rb_left = right; 40 | else 41 | parent->rb_right = right; 42 | } 43 | else 44 | root->rb_node = right; 45 | rb_set_parent(node, right); 46 | } 47 | 48 | static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) 49 | { 50 | struct rb_node *left = node->rb_left; 51 | struct rb_node *parent = rb_parent(node); 52 | 53 | if ((node->rb_left = left->rb_right)) 54 | rb_set_parent(left->rb_right, node); 55 | left->rb_right = node; 56 | 57 | rb_set_parent(left, parent); 58 | 59 | if (parent) 60 | { 61 | if (node == parent->rb_right) 62 | parent->rb_right = left; 63 | else 64 | parent->rb_left = left; 65 | } 66 | else 67 | root->rb_node = left; 68 | rb_set_parent(node, left); 69 | } 70 | 71 | void rb_insert_color(struct rb_node *node, struct rb_root *root) 72 | { 73 | struct rb_node *parent, *gparent; 74 | 75 | while ((parent = rb_parent(node)) && rb_is_red(parent)) 76 | { 77 | gparent = rb_parent(parent); 78 | 79 | if (parent == gparent->rb_left) 80 | { 81 | { 82 | register struct rb_node *uncle = gparent->rb_right; 83 | if (uncle && rb_is_red(uncle)) 84 | { 85 | rb_set_black(uncle); 86 | rb_set_black(parent); 87 | rb_set_red(gparent); 88 | node = gparent; 89 | continue; 90 | } 91 | } 92 | 93 | if (parent->rb_right == node) 94 | { 95 | register struct rb_node *tmp; 96 | __rb_rotate_left(parent, root); 97 | tmp = parent; 98 | parent = node; 99 | node = tmp; 100 | } 101 | 102 | rb_set_black(parent); 103 | rb_set_red(gparent); 104 | __rb_rotate_right(gparent, root); 105 | } else { 106 | { 107 | register struct rb_node *uncle = gparent->rb_left; 108 | if (uncle && rb_is_red(uncle)) 109 | { 110 | rb_set_black(uncle); 111 | rb_set_black(parent); 112 | rb_set_red(gparent); 113 | node = gparent; 114 | continue; 115 | } 116 | } 117 | 118 | if (parent->rb_left == node) 119 | { 120 | register struct rb_node *tmp; 121 | __rb_rotate_right(parent, root); 122 | tmp = parent; 123 | parent = node; 124 | node = tmp; 125 | } 126 | 127 | rb_set_black(parent); 128 | rb_set_red(gparent); 129 | __rb_rotate_left(gparent, root); 130 | } 131 | } 132 | 133 | rb_set_black(root->rb_node); 134 | } 135 | 136 | static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, 137 | struct rb_root *root) 138 | { 139 | struct rb_node *other; 140 | 141 | while ((!node || rb_is_black(node)) && node != root->rb_node) 142 | { 143 | if (parent->rb_left == node) 144 | { 145 | other = parent->rb_right; 146 | if (rb_is_red(other)) 147 | { 148 | rb_set_black(other); 149 | rb_set_red(parent); 150 | __rb_rotate_left(parent, root); 151 | other = parent->rb_right; 152 | } 153 | if ((!other->rb_left || rb_is_black(other->rb_left)) && 154 | (!other->rb_right || rb_is_black(other->rb_right))) 155 | { 156 | rb_set_red(other); 157 | node = parent; 158 | parent = rb_parent(node); 159 | } 160 | else 161 | { 162 | if (!other->rb_right || rb_is_black(other->rb_right)) 163 | { 164 | rb_set_black(other->rb_left); 165 | rb_set_red(other); 166 | __rb_rotate_right(other, root); 167 | other = parent->rb_right; 168 | } 169 | rb_set_color(other, rb_color(parent)); 170 | rb_set_black(parent); 171 | rb_set_black(other->rb_right); 172 | __rb_rotate_left(parent, root); 173 | node = root->rb_node; 174 | break; 175 | } 176 | } 177 | else 178 | { 179 | other = parent->rb_left; 180 | if (rb_is_red(other)) 181 | { 182 | rb_set_black(other); 183 | rb_set_red(parent); 184 | __rb_rotate_right(parent, root); 185 | other = parent->rb_left; 186 | } 187 | if ((!other->rb_left || rb_is_black(other->rb_left)) && 188 | (!other->rb_right || rb_is_black(other->rb_right))) 189 | { 190 | rb_set_red(other); 191 | node = parent; 192 | parent = rb_parent(node); 193 | } 194 | else 195 | { 196 | if (!other->rb_left || rb_is_black(other->rb_left)) 197 | { 198 | rb_set_black(other->rb_right); 199 | rb_set_red(other); 200 | __rb_rotate_left(other, root); 201 | other = parent->rb_left; 202 | } 203 | rb_set_color(other, rb_color(parent)); 204 | rb_set_black(parent); 205 | rb_set_black(other->rb_left); 206 | __rb_rotate_right(parent, root); 207 | node = root->rb_node; 208 | break; 209 | } 210 | } 211 | } 212 | if (node) 213 | rb_set_black(node); 214 | } 215 | 216 | void rb_erase(struct rb_node *node, struct rb_root *root) 217 | { 218 | struct rb_node *child, *parent; 219 | int color; 220 | 221 | if (!node->rb_left) 222 | child = node->rb_right; 223 | else if (!node->rb_right) 224 | child = node->rb_left; 225 | else 226 | { 227 | struct rb_node *old = node, *left; 228 | 229 | node = node->rb_right; 230 | while ((left = node->rb_left) != NULL) 231 | node = left; 232 | 233 | if (rb_parent(old)) { 234 | if (rb_parent(old)->rb_left == old) 235 | rb_parent(old)->rb_left = node; 236 | else 237 | rb_parent(old)->rb_right = node; 238 | } else 239 | root->rb_node = node; 240 | 241 | child = node->rb_right; 242 | parent = rb_parent(node); 243 | color = rb_color(node); 244 | 245 | if (parent == old) { 246 | parent = node; 247 | } else { 248 | if (child) 249 | rb_set_parent(child, parent); 250 | parent->rb_left = child; 251 | 252 | node->rb_right = old->rb_right; 253 | rb_set_parent(old->rb_right, node); 254 | } 255 | 256 | node->rb_parent_color = old->rb_parent_color; 257 | node->rb_left = old->rb_left; 258 | rb_set_parent(old->rb_left, node); 259 | 260 | goto color; 261 | } 262 | 263 | parent = rb_parent(node); 264 | color = rb_color(node); 265 | 266 | if (child) 267 | rb_set_parent(child, parent); 268 | if (parent) 269 | { 270 | if (parent->rb_left == node) 271 | parent->rb_left = child; 272 | else 273 | parent->rb_right = child; 274 | } 275 | else 276 | root->rb_node = child; 277 | 278 | color: 279 | if (color == RB_BLACK) 280 | __rb_erase_color(child, parent, root); 281 | } 282 | 283 | static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) 284 | { 285 | struct rb_node *parent; 286 | 287 | up: 288 | func(node, data); 289 | parent = rb_parent(node); 290 | if (!parent) 291 | return; 292 | 293 | if (node == parent->rb_left && parent->rb_right) 294 | func(parent->rb_right, data); 295 | else if (parent->rb_left) 296 | func(parent->rb_left, data); 297 | 298 | node = parent; 299 | goto up; 300 | } 301 | 302 | /* 303 | * after inserting @node into the tree, update the tree to account for 304 | * both the new entry and any damage done by rebalance 305 | */ 306 | void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) 307 | { 308 | if (node->rb_left) 309 | node = node->rb_left; 310 | else if (node->rb_right) 311 | node = node->rb_right; 312 | 313 | rb_augment_path(node, func, data); 314 | } 315 | 316 | /* 317 | * before removing the node, find the deepest node on the rebalance path 318 | * that will still be there after @node gets removed 319 | */ 320 | struct rb_node *rb_augment_erase_begin(struct rb_node *node) 321 | { 322 | struct rb_node *deepest; 323 | 324 | if (!node->rb_right && !node->rb_left) 325 | deepest = rb_parent(node); 326 | else if (!node->rb_right) 327 | deepest = node->rb_left; 328 | else if (!node->rb_left) 329 | deepest = node->rb_right; 330 | else { 331 | deepest = rb_next(node); 332 | if (deepest->rb_right) 333 | deepest = deepest->rb_right; 334 | else if (rb_parent(deepest) != node) 335 | deepest = rb_parent(deepest); 336 | } 337 | 338 | return deepest; 339 | } 340 | 341 | /* 342 | * after removal, update the tree to account for the removed entry 343 | * and any rebalance damage. 344 | */ 345 | void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) 346 | { 347 | if (node) 348 | rb_augment_path(node, func, data); 349 | } 350 | 351 | /* 352 | * This function returns the first node (in sort order) of the tree. 353 | */ 354 | struct rb_node *rb_first(const struct rb_root *root) 355 | { 356 | struct rb_node *n; 357 | 358 | n = root->rb_node; 359 | if (!n) 360 | return NULL; 361 | while (n->rb_left) 362 | n = n->rb_left; 363 | return n; 364 | } 365 | 366 | struct rb_node *rb_last(const struct rb_root *root) 367 | { 368 | struct rb_node *n; 369 | 370 | n = root->rb_node; 371 | if (!n) 372 | return NULL; 373 | while (n->rb_right) 374 | n = n->rb_right; 375 | return n; 376 | } 377 | 378 | struct rb_node *rb_next(const struct rb_node *node) 379 | { 380 | struct rb_node *parent; 381 | 382 | if (rb_parent(node) == node) 383 | return NULL; 384 | 385 | /* If we have a right-hand child, go down and then left as far 386 | as we can. */ 387 | if (node->rb_right) { 388 | node = node->rb_right; 389 | while (node->rb_left) 390 | node=node->rb_left; 391 | return (struct rb_node *)node; 392 | } 393 | 394 | /* No right-hand children. Everything down and left is 395 | smaller than us, so any 'next' node must be in the general 396 | direction of our parent. Go up the tree; any time the 397 | ancestor is a right-hand child of its parent, keep going 398 | up. First time it's a left-hand child of its parent, said 399 | parent is our 'next' node. */ 400 | while ((parent = rb_parent(node)) && node == parent->rb_right) 401 | node = parent; 402 | 403 | return parent; 404 | } 405 | 406 | struct rb_node *rb_prev(const struct rb_node *node) 407 | { 408 | struct rb_node *parent; 409 | 410 | if (rb_parent(node) == node) 411 | return NULL; 412 | 413 | /* If we have a left-hand child, go down and then right as far 414 | as we can. */ 415 | if (node->rb_left) { 416 | node = node->rb_left; 417 | while (node->rb_right) 418 | node=node->rb_right; 419 | return (struct rb_node *)node; 420 | } 421 | 422 | /* No left-hand children. Go up till we find an ancestor which 423 | is a right-hand child of its parent */ 424 | while ((parent = rb_parent(node)) && node == parent->rb_left) 425 | node = parent; 426 | 427 | return parent; 428 | } 429 | 430 | void rb_replace_node(struct rb_node *victim, struct rb_node *new, 431 | struct rb_root *root) 432 | { 433 | struct rb_node *parent = rb_parent(victim); 434 | 435 | /* Set the surrounding nodes to point to the replacement */ 436 | if (parent) { 437 | if (victim == parent->rb_left) 438 | parent->rb_left = new; 439 | else 440 | parent->rb_right = new; 441 | } else { 442 | root->rb_node = new; 443 | } 444 | if (victim->rb_left) 445 | rb_set_parent(victim->rb_left, new); 446 | if (victim->rb_right) 447 | rb_set_parent(victim->rb_right, new); 448 | 449 | /* Copy the pointers/colour from the victim to the replacement */ 450 | *new = *victim; 451 | } 452 | -------------------------------------------------------------------------------- /dtree.c: -------------------------------------------------------------------------------- 1 | /* dtree.c -- Directory naming code 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * Much of this code is derived from Lustre e2fsprogs/e2scan this file may may 6 | * be redistributed under the terms of the GNU General Public License version 7 | * 2; see COPYING for details. 8 | */ 9 | #define _GNU_SOURCE 10 | #define _FILE_OFFSET_BITS 64 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "lester.h" 18 | 19 | #define DEBUG_REFS 1 20 | 21 | static ext2_ino_t visible_root_ino; 22 | 23 | /* 24 | create root dentry 25 | root->connected_to_root = 1 26 | root->d_path = "/" 27 | for each directory block: 28 | if (directory is not in memory) 29 | create new directory dentry 30 | set directory->connected_to_root = 0 31 | for each entry found in directory block: 32 | if (entry is a subdirectory) 33 | if (subdir is in memory) 34 | subdir->d_parent = directory 35 | if (directory->connected_to_root) 36 | recurse for each subsubdir 37 | subsubdir->connected_to_root = 1 38 | subsubdir->d_parent = subdir 39 | subsubdir->d_path = subdir->d_path + name 40 | for each non-directory entry on subdir 41 | generate full pathname and output 42 | drop filename entry from RAM 43 | else 44 | create new subdir dentry 45 | subdir->connected_to_root = directory->connected_to_root 46 | subdir->d_parent = directory 47 | if (directory->connected_to_root) 48 | subdir->d_path = directory->d_path + name 49 | else if (file is interesting) 50 | if (directory->connected_to_root) 51 | generate full pathname and output 52 | else 53 | create filename entry 54 | attach filename to directory 55 | */ 56 | 57 | char *path_buffer; 58 | 59 | static int max_path_size; 60 | static struct dentry *path_last_dentry; 61 | static int path_last_offset; 62 | 63 | static struct rb_root dentry_tree = RB_ROOT; 64 | 65 | void ignore_file(ext2_ino_t ino) 66 | { 67 | ext2fs_fast_unmark_inode_bitmap2(fs->inode_map, ino); 68 | } 69 | 70 | int is_file_interesting(ext2_ino_t ino) 71 | { 72 | return ext2fs_fast_test_inode_bitmap2(fs->inode_map, ino); 73 | } 74 | 75 | struct dentry *find_dentry(ext2_ino_t ino) 76 | { 77 | struct rb_node *n = dentry_tree.rb_node; 78 | struct dentry *dentry; 79 | 80 | while (n) { 81 | dentry = rb_entry(n, struct dentry, tree); 82 | if (ino < dentry->ino) 83 | n = n->rb_left; 84 | else if (ino > dentry->ino) 85 | n = n->rb_right; 86 | else 87 | return dentry; 88 | } 89 | return NULL; 90 | } 91 | 92 | static struct dentry *__find_dentry_or_parent(ext2_ino_t ino, 93 | struct rb_node **parent, 94 | struct rb_node ***pparent) 95 | { 96 | struct rb_node **p = &dentry_tree.rb_node; 97 | struct dentry *dentry; 98 | 99 | *parent = NULL; 100 | while (*p) { 101 | *parent = *p; 102 | dentry = rb_entry(*parent, struct dentry, tree); 103 | 104 | if (ino < dentry->ino) 105 | p = &(*p)->rb_left; 106 | else if (ino > dentry->ino) 107 | p = &(*p)->rb_right; 108 | else 109 | return dentry; 110 | } 111 | 112 | *pparent = p; 113 | return NULL; 114 | } 115 | 116 | static struct dentry *find_or_create_dentry(ext2_ino_t ino, int *created) 117 | { 118 | struct rb_node *parent, **pparent; 119 | struct dentry *dentry; 120 | 121 | *created = 0; 122 | dentry = __find_dentry_or_parent(ino, &parent, &pparent); 123 | if (!dentry) { 124 | dentry = create_dentry(ino); 125 | rb_link_node(&dentry->tree, parent, pparent); 126 | rb_insert_color(&dentry->tree, &dentry_tree); 127 | *created = 1; 128 | } 129 | return dentry; 130 | } 131 | 132 | static void link_to_parent(struct dentry *parent, struct dentry *child) 133 | { 134 | list_add(&child->list, &parent->d_children); 135 | child->d_parent = parent; 136 | get_dentry(parent); 137 | } 138 | 139 | void dentry_attach_name(struct dentry *dentry, int namelen, const char *name) 140 | { 141 | if (dentry->name) { 142 | if (namelen == 1 && (!strcmp(name, ".") || !strcmp(name, "/"))) 143 | return; 144 | fprintf(stderr, "BUG: dentry has name: %s, adding name %.*s\n", 145 | dentry->name, namelen, name); 146 | exit(1); 147 | } 148 | asprintf(&dentry->name, "%.*s", namelen, name); 149 | dentry->namelen = namelen; 150 | } 151 | 152 | /* create_root_dentries() 153 | * - look up $ROOT in the filesystem 154 | * - build dentry for each component of the path, starting at / 155 | * - for each component of the path except the last, mark dentry "not_in_root" 156 | */ 157 | int create_root_dentries(char *root) 158 | { 159 | int created; 160 | char *name; 161 | ext2_ino_t ino; 162 | struct dentry *child, *parent; 163 | struct ext2_inode inode; 164 | char *copy, *p; 165 | 166 | copy = p = strdup(root); 167 | 168 | ino = EXT2_ROOT_INO; 169 | name = "/"; 170 | parent = NULL; 171 | do { 172 | child = find_or_create_dentry(ino, &created); 173 | dentry_attach_name(child, strlen(name), name); 174 | child->connected_to_root = 1; 175 | child->not_in_root = 1; 176 | if (parent != NULL) 177 | link_to_parent(parent, child); 178 | parent = child; 179 | 180 | name = strtok(copy, "/"); 181 | if (name == NULL) 182 | break; 183 | copy = NULL; 184 | 185 | if (ext2fs_lookup(fs, ino, name, strlen(name), NULL, &ino)) 186 | return ENOENT; 187 | } while (1); 188 | 189 | if (ext2fs_read_inode(fs, ino, &inode)) 190 | return EIO; 191 | 192 | if (!LINUX_S_ISDIR(inode.i_mode)) { 193 | return ENOTDIR; 194 | } 195 | child->not_in_root = 0; 196 | visible_root_ino = ino; 197 | 198 | if (verbosity) 199 | fprintf(stdout, "visible root: \"%s\"\n", root); 200 | 201 | free(p); 202 | 203 | return 0; 204 | } 205 | 206 | static void check_path_size(int len) 207 | { 208 | if (len < max_path_size) 209 | return; 210 | 211 | if (!max_path_size) 212 | max_path_size = 8192; 213 | 214 | while (max_path_size <= len) 215 | max_path_size *= 2; 216 | 217 | if (path_buffer) 218 | free(path_buffer); 219 | path_buffer = malloc(max_path_size); 220 | if (!path_buffer) { 221 | fprintf(stderr, "unable able allocate path buffer\n"); 222 | exit(1); 223 | } 224 | } 225 | 226 | static int __build_path(struct dentry *dentry, int len) 227 | { 228 | /* On the descent of the tree to root, len accumulates the 229 | * length of the path. At the root, we ensure we have a large 230 | * enough buffer, and then we'll use our return value to let the 231 | * caller know where to put their path component. 232 | * 233 | * We return -1 if this path is not actually part of the visible 234 | * tree. 235 | */ 236 | int offset; 237 | 238 | if (dentry->ino == visible_root_ino) { 239 | /* Account for the root and trailing NULL */ 240 | len += 2; 241 | check_path_size(len); 242 | path_buffer[0] = '/'; 243 | return 1; 244 | } 245 | 246 | if (dentry->ino == EXT2_ROOT_INO) { 247 | /* This path is not visible from the designated root */ 248 | return -1; 249 | } 250 | 251 | /* Account for our name, plus the directory seperator */ 252 | len += dentry->namelen + 1; 253 | offset = __build_path(dentry->d_parent, len); 254 | if (offset == -1) 255 | return -1; 256 | 257 | memcpy(path_buffer + offset, dentry->name, dentry->namelen); 258 | offset += dentry->namelen; 259 | path_buffer[offset++] = '/'; 260 | 261 | return offset; 262 | } 263 | 264 | int build_path(struct dentry *dentry, int len) 265 | { 266 | if (path_last_dentry != dentry) { 267 | path_last_offset = __build_path(dentry, 0); 268 | path_last_dentry = dentry; 269 | } 270 | 271 | return path_last_offset; 272 | } 273 | 274 | static int path_is_visible(struct dentry *dentry) 275 | { 276 | static struct dentry *last_dentry; 277 | static int visible; 278 | 279 | if (path_last_dentry == dentry) 280 | return path_last_offset != -1; 281 | 282 | if (last_dentry != dentry) { 283 | last_dentry = dentry; 284 | visible = 1; 285 | while (dentry->ino != visible_root_ino) { 286 | if (dentry->ino == EXT2_ROOT_INO) { 287 | visible = 0; 288 | break; 289 | } 290 | dentry = dentry->d_parent; 291 | } 292 | } 293 | 294 | return visible; 295 | } 296 | 297 | static void connect_subtree_to_root(struct dentry *parent, int not_in_root) 298 | { 299 | struct dentry *child, *p; 300 | 301 | assert(!parent->is_file); 302 | parent->connected_to_root = 1; 303 | parent->not_in_root = not_in_root; 304 | 305 | /* Force our parent dentry to stick around until we're done */ 306 | get_dentry(parent); 307 | 308 | /* We try to print out the parent directory before its children, 309 | * but if printing the directory requires async requests then 310 | * it may be delayed. 311 | */ 312 | if (!parent->is_printed) { 313 | parent->is_printed = 1; 314 | if (is_file_interesting(parent->ino) && 315 | path_is_visible(parent)) { 316 | path_resolved(parent->ino, parent->d_parent, 317 | parent->name, parent->namelen, 318 | parent); 319 | } 320 | 321 | /* We held a reference from creation until we could try 322 | * to print it; we've done our part -- path_resolved() 323 | * must have its own reference if it needs to do async IO. 324 | */ 325 | put_dentry(parent); 326 | } 327 | 328 | list_for_each_entry_safe(child, p, &parent->d_children, list) { 329 | if (child->is_file) { 330 | if (is_file_interesting(child->ino) && 331 | path_is_visible(child)) { 332 | path_resolved(child->ino, parent, 333 | child->name, child->namelen, 334 | child); 335 | } 336 | 337 | /* As above, we've held our reference until we tried 338 | * to print the path name. 339 | */ 340 | put_dentry(child); 341 | continue; 342 | } 343 | 344 | connect_subtree_to_root(child, not_in_root); 345 | } 346 | 347 | put_dentry(parent); 348 | } 349 | 350 | struct dentry *create_dentry(ext2_ino_t ino) 351 | { 352 | struct dentry *dentry; 353 | 354 | dentry = calloc(1, sizeof(struct dentry)); 355 | if (!dentry) { 356 | fprintf(stderr, "malloc failed"); 357 | exit(1); 358 | } 359 | dentry->ino = ino; 360 | INIT_LIST_HEAD(&dentry->d_children); 361 | INIT_LIST_HEAD(&dentry->list); 362 | RB_CLEAR_NODE(&dentry->tree); 363 | 364 | dentries_created++; 365 | 366 | /* We hold a reference for each dentry until we get a chance to 367 | * try to print it. 368 | */ 369 | dentry->refs = 1; 370 | 371 | return dentry; 372 | } 373 | 374 | void get_dentry(struct dentry *dentry) 375 | { 376 | if (DEBUG_REFS && dentry->refs == 0) { 377 | fprintf(stderr, "ERROR get_dentry(ino %u)\n", dentry->ino); 378 | return; 379 | } 380 | 381 | dentry->refs++; 382 | } 383 | 384 | void put_dentry(struct dentry *dentry) 385 | { 386 | if (DEBUG_REFS && dentry->refs == 0) { 387 | fprintf(stderr, "ERROR put_dentry(ino %u)\n", dentry->ino); 388 | return; 389 | } 390 | 391 | if (--dentry->refs) 392 | return; 393 | 394 | /* Refcount hit zero, free the dentry */ 395 | rb_erase(&dentry->tree, &dentry_tree); 396 | list_del(&dentry->list); 397 | 398 | if (dentry->d_parent) 399 | put_dentry(dentry->d_parent); 400 | 401 | assert(list_empty(&dentry->d_children)); 402 | 403 | free(dentry->name); 404 | free(dentry); 405 | 406 | dentries_freed++; 407 | } 408 | 409 | void dtree_add_dir(ext2_ino_t ino) 410 | { 411 | struct dentry *dentry; 412 | int created; 413 | 414 | dentry = find_or_create_dentry(ino, &created); 415 | dentry->is_dir = 1; 416 | } 417 | 418 | void dtree_get_ino(ext2_ino_t ino) 419 | { 420 | struct dentry *dentry; 421 | 422 | dentry = find_dentry(ino); 423 | get_dentry(dentry); 424 | } 425 | 426 | void dtree_put_ino(ext2_ino_t ino) 427 | { 428 | struct dentry *dentry; 429 | 430 | dentry = find_dentry(ino); 431 | put_dentry(dentry); 432 | } 433 | 434 | int dtree_name_dir(struct dentry *parent, ext2_ino_t ino, 435 | const char *name, int namelen) 436 | { 437 | struct dentry *subdir; 438 | 439 | subdir = find_dentry(ino); 440 | if (!subdir) { 441 | /* This is a new subdirectory, so ignore it */ 442 | return 0; 443 | } 444 | 445 | if (subdir->d_parent) { 446 | /* We've been connected into the tree, but we must have been 447 | * renamed since then (active filesystem). Just keep the 448 | * old name for consistency. 449 | */ 450 | return 0; 451 | } 452 | 453 | dentry_attach_name(subdir, namelen, name); 454 | link_to_parent(parent, subdir); 455 | if (parent->connected_to_root) 456 | connect_subtree_to_root(subdir, parent->not_in_root); 457 | 458 | return 0; 459 | } 460 | 461 | int dtree_name_file(struct dentry *parent, ext2_ino_t ino, 462 | const char *name, int namelen) 463 | { 464 | struct dentry *file; 465 | 466 | if (!is_file_interesting(ino)) 467 | return 0; 468 | 469 | if (parent->connected_to_root) { 470 | if (path_is_visible(parent)) 471 | path_resolved(ino, parent, name, namelen, NULL); 472 | 473 | /* Since we never created a dentry for this name, we don't 474 | * have a reference to our parent. 475 | */ 476 | return 0; 477 | } 478 | 479 | /* We cannot name this inode just yet, so create a dentry for it 480 | */ 481 | file = create_dentry(ino); 482 | file->is_file = 1; 483 | dentry_attach_name(file, namelen, name); 484 | link_to_parent(parent, file); 485 | 486 | return 0; 487 | } 488 | -------------------------------------------------------------------------------- /iscan.c: -------------------------------------------------------------------------------- 1 | /* iscan.c -- Inode scan phase of Lester 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * Much of this code is derived from Lustre e2fsprogs/e2scan; this file may 6 | * may be redistributed under the terms of the GNU General Public License 7 | * version 2; see COPYING for details. 8 | */ 9 | #define _GNU_SOURCE 10 | #define _FILE_OFFSET_BITS 64 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "lester.h" 16 | 17 | struct iscan_state { 18 | ext2_ino_t ino; 19 | struct ea_info *eas; 20 | void *ext_attr_block; 21 | unsigned int pending_ios; 22 | struct ext2_inode inode[0]; 23 | }; 24 | 25 | unsigned long grp_readahead = 0; 26 | 27 | static unsigned long inode_scanned; 28 | static unsigned long inode_candidate; 29 | static unsigned long inode_path_requested; 30 | static int request_end_iscan; 31 | 32 | static char *block_iter_buf; 33 | 34 | static struct iscan_state *iscan_save_state(ext2_ino_t ino, void *inode) 35 | { 36 | size_t inode_size = EXT2_INODE_SIZE(fs->super); 37 | struct iscan_state *state; 38 | 39 | state = calloc(1, sizeof(*state) + inode_size); 40 | if (!state) { 41 | fprintf(stderr, "Unable to allocate iscan state\n"); 42 | exit(1); 43 | } 44 | 45 | memcpy(state->inode, inode, inode_size); 46 | state->ino = ino; 47 | return state; 48 | } 49 | 50 | static void release_iscan_state(struct iscan_state *state) 51 | { 52 | release_ea_info(state->eas); 53 | if (state->ext_attr_block) 54 | free(state->ext_attr_block); 55 | free(state); 56 | } 57 | 58 | static errcode_t group_done(ext2_filsys fs, ext2_inode_scan scan, dgrp_t group, 59 | void *vp) 60 | { 61 | int ra_blks; 62 | static dgrp_t ra_group; 63 | unsigned int inodes, inode_size; 64 | blk64_t ra_start; 65 | 66 | /* Start read ahead on the next block group descriptor, taking 67 | * care to not run on after the last one, and to only read the 68 | * blocks with active data in them. 69 | */ 70 | if (ra_group >= fs->group_desc_count) 71 | return 0; 72 | 73 | /* if we skipped readahead on this one, then don't get too far ahead */ 74 | if (ext2fs_bg_flags(fs, group) & EXT2_BG_INODE_UNINIT) 75 | return 0; 76 | 77 | while (ext2fs_bg_flags(fs, ra_group) & EXT2_BG_INODE_UNINIT) { 78 | ra_group++; 79 | if (ra_group >= fs->group_desc_count) 80 | return 0; 81 | } 82 | 83 | ra_start = ext2fs_inode_table_loc(fs, ra_group); 84 | inodes = EXT2_INODES_PER_GROUP(fs->super); 85 | inodes -= ext2fs_bg_itable_unused(fs, ra_group); 86 | inode_size = EXT2_INODE_SIZE(fs->super); 87 | ra_blks = (inodes + (fs->blocksize / inode_size - 1)) * 88 | inode_size / fs->blocksize; 89 | 90 | io_channel_readahead(fs->io, ra_start, ra_blks); 91 | ra_group++; 92 | return 0; 93 | } 94 | 95 | 96 | /* Main worker for directory block iteration; add this block to the 97 | * filesystem's dblist. 98 | */ 99 | static int block_async_iter_cb(ext2_filsys fs, blk64_t blocknr, 100 | e2_blkcnt_t blockcnt, void *priv) 101 | { 102 | ext2_ino_t ino = (ext2_ino_t) (unsigned long) priv; 103 | 104 | /* blockcnt is u64, but the constant for indirect blocks are given 105 | * as ints... 106 | */ 107 | if ((int) blockcnt < 0) 108 | return 0; 109 | 110 | if (ext2fs_add_dir_block2(fs->dblist, ino, blocknr, blockcnt)) 111 | return BLOCK_ABORT; 112 | 113 | /* Hold a reference to the dentry for this inode; we created the 114 | * entry during the inode scan, and we need to keep it in place 115 | * until we iterate every block of the directory contents. The 116 | * ref gets released in dscan.c:dblist_iterate_cb(). 117 | */ 118 | dtree_get_ino(ino); 119 | 120 | return 0; 121 | } 122 | 123 | static void block_async_iter_end(ext2_filsys fs, errcode_t error, void *priv) 124 | { 125 | if (error) { 126 | com_err("block_iterate_async", error, "during iteration\n"); 127 | exit(1); 128 | } 129 | } 130 | 131 | static int block_iterate_cb(ext2_filsys fs, blk64_t *block_nr, 132 | e2_blkcnt_t blockcnt, blk64_t ref_block, 133 | int ref_offset, void *priv_data) 134 | { 135 | return block_async_iter_cb(fs, *block_nr, blockcnt, priv_data); 136 | } 137 | 138 | static int add_directory(ext2_ino_t ino, struct ext2_inode *inode) 139 | { 140 | errcode_t rc; 141 | 142 | dtree_add_dir(ino); 143 | 144 | if (use_unix) { 145 | rc = ext2fs_block_iterate3(fs, ino, 0, block_iter_buf, 146 | block_iterate_cb, 147 | (void *)(unsigned long) ino); 148 | if (rc) { 149 | com_err("ext2fs_block_iterate2", rc, 150 | "failed during block iteration\n"); 151 | return 1; 152 | } 153 | } else { 154 | rc = ext2fs_block_iterate_async(fs, ino, inode, 155 | block_async_iter_cb, 156 | block_async_iter_end, 157 | (void *)(unsigned long) ino); 158 | if (rc) { 159 | com_err("ext2fs_block_iterate_async", rc, 160 | "failed to initiate async iteration"); 161 | return 1; 162 | } 163 | 164 | if (enforce_async_limit()) 165 | return 1; 166 | } 167 | 168 | return 0; 169 | } 170 | 171 | static void iscan_ea_done(void *data) 172 | { 173 | /* We read in a EA requested by the action; see if we've got all of 174 | * them and are ready to retry the iscan action. 175 | */ 176 | struct iscan_state *state = data; 177 | int action; 178 | 179 | state->pending_ios--; 180 | if (state->pending_ios) 181 | return; 182 | 183 | action = scan_action->iscan(state->ino, state->inode, state->eas); 184 | if (action & ACTION_END_SCAN) 185 | request_end_iscan = 1; 186 | else if (action & (ACTION_WANT_READ_ATTRS | ACTION_WANT_ATTRS)) { 187 | fprintf(stderr, "BUG: iscan action final attempt did not " 188 | "complete\n"); 189 | exit(1); 190 | } else if (action & ACTION_WANT_PATH) { 191 | inode_path_requested++; 192 | } else { 193 | /* We don't want a path, so we're done with this one. */ 194 | ext2fs_fast_unmark_inode_bitmap2(fs->inode_map, state->ino); 195 | } 196 | 197 | release_iscan_state(state); 198 | } 199 | 200 | static void iscan_read_attrs(struct iscan_state *state) 201 | { 202 | unsigned int ea_reqs = 0; 203 | struct ea_info *ea; 204 | 205 | if (!state->eas) { 206 | fprintf(stderr, "BUG: iscan action wants to read " 207 | "EAs, but none stored\n"); 208 | exit(1); 209 | } 210 | 211 | for (ea = state->eas; ea->name; ea++) { 212 | if (!ea->requested) 213 | continue; 214 | 215 | ea_reqs++; 216 | if (!ea->ext_ino) { 217 | fprintf(stderr, "BUG: iscan action requested " 218 | "read of non-external attr.\n"); 219 | exit(1); 220 | } 221 | } 222 | 223 | if (!ea_reqs) { 224 | fprintf(stderr, "BUG: iscan action requested extern " 225 | "attr read, but flagged none.\n"); 226 | exit(1); 227 | } 228 | 229 | /* initiate a file read for each external EA requested; keep 230 | * an extra reference while submitting the requests to avoid 231 | * an early completion callback. 232 | */ 233 | state->pending_ios = ea_reqs + 1; 234 | for (ea = state->eas; ea->name; ea++) { 235 | if (ea->requested) 236 | async_read_ea_value(ea, iscan_ea_done, state); 237 | } 238 | 239 | iscan_ea_done(state); 240 | return; 241 | } 242 | 243 | static int iscan_read_attr_cb(ext2_loff_t offset, ssize_t size, void *priv1, 244 | unsigned long priv2, void *xattr) 245 | { 246 | /* We just read in the external xattr block for the inode; parse 247 | * the EA chain and hand it to the iscan action. We may have more 248 | * IO to do if any of the EA values are stored in an external file. 249 | */ 250 | struct iscan_state *state = priv1; 251 | struct ea_info *eas; 252 | int action; 253 | 254 | ea_ext_block_read++; 255 | eas = build_ea_info(state->inode, xattr); 256 | action = scan_action->iscan(state->ino, state->inode, eas); 257 | 258 | /* Don't try to start new IO if we're ending, but we still need 259 | * to clean up our state below. 260 | */ 261 | if (action & ACTION_END_SCAN) { 262 | request_end_iscan = 1; 263 | } else if (action & ACTION_WANT_READ_ATTRS) { 264 | /* We need to save the external attribute block, but ea_info 265 | * points into it; copy the data, then reparse it. We can 266 | * then walk the chains (they will be in the same order) 267 | * and transfer the requests over. 268 | */ 269 | state->ext_attr_block = malloc(fs->blocksize); 270 | if (!state->ext_attr_block) { 271 | fprintf(stderr, "Unable to allocate attr block\n"); 272 | exit(1); 273 | } 274 | memcpy(state->ext_attr_block, xattr, fs->blocksize); 275 | state->eas = ea_memory_change(eas, state->inode, 276 | state->ext_attr_block); 277 | iscan_read_attrs(state); 278 | /* Don't release state just yet; we still have IO pending */ 279 | return 0; 280 | } else if (action & ACTION_WANT_PATH) { 281 | inode_path_requested++; 282 | } else { 283 | /* We don't want a path nor the xattr, so this inode 284 | * is no longer interesting... 285 | */ 286 | ext2fs_fast_unmark_inode_bitmap2(fs->inode_map, state->ino); 287 | } 288 | 289 | release_ea_info(eas); 290 | release_iscan_state(state); 291 | return 0; 292 | } 293 | 294 | int scan_inodes(const char *dev) 295 | { 296 | struct timeval scan_start, scan_end, async_end, diff; 297 | struct ea_info *eas = NULL; 298 | struct iscan_state *state; 299 | struct ext2_inode *inode; 300 | size_t inode_size; 301 | ext2_inode_scan scan; 302 | ext2_ino_t ino; 303 | errcode_t rc; 304 | int i, action; 305 | 306 | if (use_unix) { 307 | rc = ext2fs_get_mem(fs->blocksize * 3, &block_iter_buf); 308 | if (rc) { 309 | com_err("ext2fs_get_mem", rc, 310 | "allocating iter buff\n"); 311 | return 1; 312 | } 313 | } 314 | 315 | inode_size = EXT2_INODE_SIZE(fs->super); 316 | inode = malloc(inode_size); 317 | if (!inode) { 318 | fprintf(stderr, "Could not allocate inode storage for scan\n"); 319 | return 1; 320 | } 321 | 322 | rc = create_root_dentries(root_path); 323 | if (rc) { 324 | com_err("create_root_dentries", rc, 325 | "creating root dentries\n"); 326 | return 1; 327 | } 328 | 329 | if (scan_action->iscan_begin && scan_action->iscan_begin()) 330 | return 1; 331 | 332 | gettimeofday(&scan_start, NULL); 333 | 334 | rc = ext2fs_open_inode_scan(fs, fs->inode_blocks_per_group, &scan); 335 | if (rc) { 336 | com_err("ext2fs_open_inode_scan", rc, 337 | "opening inode scan on %s\n", dev); 338 | fprintf(stderr, "failed to open inode scan\n"); 339 | return 1; 340 | } 341 | ext2fs_set_inode_callback(scan, group_done, NULL); 342 | 343 | for (i = 0; i < grp_readahead; i++) 344 | group_done(fs, scan, 0, NULL); 345 | 346 | while (!ext2fs_get_next_inode_full(scan, &ino, inode, inode_size)) { 347 | if (request_end_iscan || !ino) 348 | break; 349 | 350 | inode_scanned++; 351 | 352 | /* Deleted inode? */ 353 | if (!ext2fs_fast_test_inode_bitmap2(fs->inode_map, ino)) 354 | continue; 355 | 356 | /* Ignore inodes that hold the external EA values */ 357 | if (inode->i_flags & EXT4_EA_INODE_FL) 358 | continue; 359 | 360 | inode_candidate++; 361 | 362 | if (LINUX_S_ISDIR(inode->i_mode) && add_directory(ino, inode)) 363 | return 1; 364 | 365 | if (scan_action->flags & ACTION_FLAG_ISCAN_NO_EAS) { 366 | action = scan_action->iscan(ino, inode, NULL); 367 | } else if (!inode->i_file_acl) { 368 | eas = build_ea_info(inode, NULL); 369 | action = scan_action->iscan(ino, inode, eas); 370 | } else { 371 | /* We need to come back to this inode as it has 372 | * an external attribute block... 373 | */ 374 | state = iscan_save_state(ino, inode); 375 | rc = io_channel_async_read(fs->io, inode->i_file_acl, 376 | 1, iscan_read_attr_cb, state, 377 | 0); 378 | if (rc) { 379 | com_err("io_channel_async_read", rc, 380 | "failed to start IO"); 381 | return 1; 382 | } 383 | continue; 384 | } 385 | 386 | if (action & ACTION_WANT_READ_ATTRS) { 387 | /* We asked for the externally stored EAs to be read 388 | * in, so we need to set up for async IO. 389 | */ 390 | state = iscan_save_state(ino, inode); 391 | state->eas = ea_memory_change(eas, state->inode, NULL); 392 | iscan_read_attrs(state); 393 | 394 | /* ea_memory_change() releases the EA info, so forget 395 | * about it. 396 | */ 397 | eas = NULL; 398 | } else if (action & ACTION_WANT_PATH) { 399 | inode_path_requested++; 400 | } else { 401 | /* We don't want a path nor the xattr, so this inode 402 | * is no longer interesting... 403 | */ 404 | ext2fs_fast_unmark_inode_bitmap2(fs->inode_map, ino); 405 | } 406 | 407 | release_ea_info(eas); 408 | 409 | if (action & ACTION_END_SCAN) 410 | request_end_iscan = 1; 411 | } 412 | 413 | gettimeofday(&scan_end, NULL); 414 | 415 | ext2fs_close_inode_scan(scan); 416 | 417 | if (!use_unix) { 418 | rc = io_channel_finish_async(fs->io, 0); 419 | if (rc) { 420 | com_err("io_channel_finish_async", rc, 421 | "failed to complete IO"); 422 | return 1; 423 | } 424 | 425 | gettimeofday(&async_end, NULL); 426 | } 427 | 428 | if (verbosity) { 429 | diff_timevals(&scan_start, &scan_end, &diff); 430 | fprintf(stdout, "counted %lu inodes (%lu non-deleted) in " 431 | "%d.%06u\n", inode_scanned, inode_candidate, 432 | (int) diff.tv_sec, (unsigned int) diff.tv_usec); 433 | } 434 | 435 | if (verbosity && !use_unix) { 436 | aio_stats stats; 437 | 438 | diff_timevals(&scan_end, &async_end, &diff); 439 | fprintf(stdout, "finished remaining inode scan async work in " 440 | "%d.%06u seconds\n", 441 | (int) diff.tv_sec, (unsigned int) diff.tv_usec); 442 | 443 | rc = io_channel_get_stats(fs->io, (io_stats *) &stats); 444 | if (rc) { 445 | com_err("io_channel_get_stats", rc, 446 | "failed to get stats"); 447 | return 1; 448 | } 449 | 450 | fprintf(stdout, "Had total %lu async requests\n", 451 | stats->total_async); 452 | fprintf(stdout, "Had max %lu async requests outstanding\n", 453 | stats->max_async); 454 | fprintf(stdout, "Inserted %lu async requests into readahead " 455 | "stream\n", stats->async_instream); 456 | 457 | stats->async_instream = 0; 458 | stats->total_async = stats->max_async = 0; 459 | } 460 | 461 | if (verbosity) { 462 | fprintf(stdout, "Read %lu external EA blocks during iscan\n", 463 | ea_ext_block_read); 464 | fprintf(stdout, "Read %lu external EA values during iscan\n", 465 | ea_ext_value_read); 466 | 467 | ea_ext_block_read = ea_ext_value_read = 0; 468 | } 469 | 470 | if (scan_action->iscan_end && scan_action->iscan_end()) 471 | return 1; 472 | 473 | free(inode); 474 | return 0; 475 | } 476 | -------------------------------------------------------------------------------- /dscan.c: -------------------------------------------------------------------------------- 1 | /* dscan.c -- Directory scan and naming phase of Lester 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * Some of this code is derived from Lustre e2fsprogs/e2scan; this file may may 6 | * be redistributed under the terms of the GNU General Public License version 7 | * 2; see COPYING for details. 8 | */ 9 | #define _GNU_SOURCE 10 | #define _FILE_OFFSET_BITS 64 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "lester.h" 16 | 17 | /* Lustre extended the file type in ext2_dir_entry_2; provide a helper so 18 | * we can be backwards compatible 19 | */ 20 | #ifndef EXT2_FT_MASK 21 | #define EXT2_FT_MASK 0x0f 22 | #endif 23 | 24 | unsigned long dentries_freed = 0; 25 | unsigned long dentries_created = 0; 26 | 27 | struct attr_state { 28 | struct dentry *dentry; 29 | struct ea_info *eas; 30 | void *ext_attr_block; 31 | unsigned int pending_ios; 32 | struct ext2_inode inode[0]; 33 | }; 34 | 35 | struct chunk { 36 | blk64_t start; 37 | e2_blkcnt_t len; 38 | }; 39 | 40 | static struct chunk *cur_chunk, *ra_chunk, *chunks; 41 | static e2_blkcnt_t nr_chunks; 42 | 43 | static int request_end_dscan; 44 | 45 | unsigned long dir_readahead = 0; 46 | 47 | static void release_state(struct attr_state *state) 48 | { 49 | release_ea_info(state->eas); 50 | if (state->ext_attr_block) 51 | free(state->ext_attr_block); 52 | if (state->dentry) 53 | put_dentry(state->dentry); 54 | free(state); 55 | } 56 | 57 | static void dscan_ea_done(void *data) 58 | { 59 | /* We completed reading in a requested EA; see if it is time 60 | * to retry the dscan action 61 | */ 62 | struct attr_state *state = data; 63 | struct dentry *dentry = state->dentry; 64 | int action; 65 | 66 | state->pending_ios--; 67 | if (state->pending_ios) 68 | return; 69 | 70 | if (is_file_interesting(dentry->ino)) { 71 | action = scan_action->dscan(dentry->ino, state->inode, 72 | dentry->d_parent, dentry->name, 73 | dentry->namelen, state->eas); 74 | 75 | if (action & ACTION_IGNORE_FILE) 76 | ignore_file(dentry->ino); 77 | 78 | if (action & ACTION_END_SCAN) { 79 | request_end_dscan = 1; 80 | } else if (action & ~ACTION_IGNORE_FILE) { 81 | fprintf(stderr, "BUG: dscan action final attempt did " 82 | "not complete\n"); 83 | exit(1); 84 | } 85 | } 86 | 87 | release_state(state); 88 | } 89 | 90 | static unsigned int validate_ea_reads(struct ea_info *ea) 91 | { 92 | unsigned int ea_reqs = 0; 93 | 94 | for ( ; ea->name; ea++) { 95 | if (!ea->requested) 96 | continue; 97 | 98 | ea_reqs++; 99 | if (!ea->ext_ino) { 100 | fprintf(stderr, "BUG: dscan action requested " 101 | "read of non-external attr\n"); 102 | exit(1); 103 | } 104 | } 105 | 106 | if (!ea_reqs) { 107 | fprintf(stderr, "BUG: dscan action requested extern " 108 | "attr read, but flagged none\n"); 109 | exit(1); 110 | } 111 | 112 | return ea_reqs; 113 | } 114 | 115 | static int read_ext_attr_cb(ext2_loff_t offset, ssize_t size, void *priv1, 116 | unsigned long priv2, void *xattr) 117 | { 118 | /* We just read in the external xattr block for this inode; we need 119 | * to parse the EA chain and hand it to our dscan action. We may 120 | * still have more IO to do if any of the EA values are stored in 121 | * external files/inodes. 122 | */ 123 | struct attr_state *state = priv1; 124 | struct dentry *dentry = state->dentry; 125 | unsigned int ea_reqs = 0; 126 | struct ea_info *ea; 127 | int action; 128 | 129 | ea_ext_block_read++; 130 | 131 | if (!is_file_interesting(dentry->ino)) 132 | goto complete; 133 | 134 | state->eas = build_ea_info(state->inode, xattr); 135 | action = scan_action->dscan(dentry->ino, state->inode, 136 | dentry->d_parent, dentry->name, 137 | dentry->namelen, state->eas); 138 | 139 | if (action & ACTION_IGNORE_FILE) 140 | ignore_file(dentry->ino); 141 | 142 | if (action & ACTION_WANT_READ_ATTRS) 143 | ea_reqs = validate_ea_reads(state->eas); 144 | 145 | if (action & ACTION_END_SCAN) { 146 | request_end_dscan = 1; 147 | goto complete; 148 | } 149 | 150 | action &= ~(ACTION_IGNORE_FILE | ACTION_END_SCAN); 151 | if (action == ACTION_COMPLETE) 152 | goto complete; 153 | 154 | /* We need to keep the external attribute block around... */ 155 | state->ext_attr_block = malloc(fs->blocksize); 156 | if (!state->ext_attr_block) { 157 | fprintf(stderr, "Unable to allocate attr block\n"); 158 | exit(1); 159 | } 160 | memcpy(state->ext_attr_block, xattr, fs->blocksize); 161 | state->eas = ea_memory_change(state->eas, state->inode, 162 | state->ext_attr_block); 163 | 164 | /* initiate file read (inode) for each EA requested; keep an extra 165 | * pending virtual IO while submitting the async requests to avoid 166 | * early completion. 167 | */ 168 | state->pending_ios = ea_reqs + 1; 169 | for (ea = state->eas; ea->name; ea++) { 170 | if (ea->requested) 171 | async_read_ea_value(ea, dscan_ea_done, state); 172 | } 173 | 174 | dscan_ea_done(state); 175 | return 0; 176 | 177 | complete: 178 | release_state(state); 179 | return 0; 180 | } 181 | 182 | static int read_inode_attr_cb(ext2_filsys fs, ext2_ino_t ino, 183 | struct ext2_inode *inode, void *priv) 184 | { 185 | /* The action requested the inode and EAs; see if we need to 186 | * read in an external block to satisfy the EA info. If not, 187 | * go ahead and parse the EAs and call the action again; we may 188 | * still have to store state if any of the EA values are in separate 189 | * inodes, though... 190 | */ 191 | struct dentry *dentry = priv; 192 | struct ea_info *ea, *eas = NULL; 193 | struct attr_state *state; 194 | unsigned int ea_reqs = 0; 195 | ssize_t inode_size; 196 | int action; 197 | 198 | if (!is_file_interesting(ino)) 199 | goto complete; 200 | 201 | if (!inode->i_file_acl) { 202 | eas = build_ea_info(inode, NULL); 203 | action = scan_action->dscan(ino, inode, dentry->d_parent, 204 | dentry->name, dentry->namelen, 205 | eas); 206 | 207 | if (action & ACTION_IGNORE_FILE) 208 | ignore_file(ino); 209 | 210 | if (action & ACTION_WANT_READ_ATTRS) 211 | ea_reqs = validate_ea_reads(eas); 212 | 213 | if (action & ACTION_END_SCAN) { 214 | request_end_dscan = 1; 215 | goto complete; 216 | } 217 | 218 | action &= ~(ACTION_IGNORE_FILE | ACTION_END_SCAN); 219 | if (action == ACTION_COMPLETE) 220 | goto complete; 221 | } 222 | 223 | /* We have more IO to do for this dscan action; save our inode 224 | * (and/or EA info) for later use. 225 | */ 226 | inode_size = EXT2_INODE_SIZE(fs->super); 227 | state = (struct attr_state *) calloc(1, sizeof(*state) + inode_size); 228 | if (!state) { 229 | fprintf(stderr, "unable to allocate attribute state buffer\n"); 230 | exit(1); 231 | } 232 | 233 | state->dentry = dentry; 234 | memcpy(state->inode, inode, inode_size); 235 | if (eas) 236 | state->eas = ea_memory_change(eas, state->inode, NULL); 237 | 238 | if (inode->i_file_acl) { 239 | return io_channel_async_read(fs->io, inode->i_file_acl, 1, 240 | read_ext_attr_cb, state, 0); 241 | } 242 | 243 | /* initiate file read (inode) for each EA requested; keep an extra 244 | * pending virtual IO while submitting the async requests to avoid 245 | * early completion. 246 | */ 247 | state->pending_ios = ea_reqs + 1; 248 | for (ea = state->eas; ea->name; ea++) { 249 | if (ea->requested) 250 | async_read_ea_value(ea, dscan_ea_done, state); 251 | } 252 | dscan_ea_done(state); 253 | return 0; 254 | 255 | complete: 256 | release_ea_info(eas); 257 | put_dentry(dentry); 258 | return 0; 259 | } 260 | 261 | static int read_inode_cb(ext2_filsys fs, ext2_ino_t ino, 262 | struct ext2_inode *inode, void *priv) 263 | { 264 | /* Called to report a file after async inode read completes */ 265 | struct dentry *dentry = priv; 266 | int action; 267 | 268 | /* The previous action call did not request attribute info, so no 269 | * need to parse them here. 270 | */ 271 | if (is_file_interesting(ino)) { 272 | action = scan_action->dscan(ino, inode, dentry->d_parent, 273 | dentry->name, dentry->namelen, 274 | NULL); 275 | 276 | if (action & ACTION_IGNORE_FILE) 277 | ignore_file(ino); 278 | 279 | if (action & ACTION_END_SCAN) { 280 | request_end_dscan = 1; 281 | } else if (action & ~ACTION_IGNORE_FILE) { 282 | fprintf(stderr, "BUG: action didn't complete " 283 | "(expanded info request)\n"); 284 | exit(1); 285 | } 286 | } 287 | 288 | put_dentry(dentry); 289 | return 0; 290 | } 291 | 292 | 293 | int path_resolved(ext2_ino_t ino, struct dentry *parent, const char *name, 294 | int namelen, struct dentry *entry) 295 | { 296 | errcode_t rc = 0; 297 | int action; 298 | 299 | /* We have a name, see if we care about the inode or attributes */ 300 | action = scan_action->dscan(ino, NULL, parent, name, namelen, NULL); 301 | if (!action) 302 | return 0; 303 | 304 | if (action & ACTION_IGNORE_FILE) { 305 | if (action & ~(ACTION_IGNORE_FILE | ACTION_END_SCAN)) { 306 | fprintf(stderr, "BUG: action ignored file but wanted " 307 | "callback with more info\n"); 308 | exit(1); 309 | } 310 | ignore_file(ino); 311 | } 312 | 313 | if (action & (ACTION_WANT_INODE | ACTION_WANT_ATTRS)) { 314 | if (!entry) { 315 | /* We're guaranteed to be a file here, as all dirs 316 | * get created during the inode scan. As a dentry 317 | * only needs to know its child dirs, we don't go 318 | * on that list. 319 | */ 320 | entry = create_dentry(ino); 321 | dentry_attach_name(entry, namelen, name); 322 | entry->d_parent = parent; 323 | entry->is_file = 1; 324 | get_dentry(parent); 325 | } else 326 | get_dentry(entry); 327 | 328 | /* We need the inode for both cases, but if the action 329 | * signals it wants attributes, then make sure we have them 330 | * before calling back. 331 | */ 332 | if (action & ACTION_WANT_ATTRS) { 333 | rc = ext2fs_read_inode_async(fs, ino, NULL, 334 | read_inode_attr_cb, entry); 335 | } else { 336 | rc = ext2fs_read_inode_async(fs, ino, NULL, 337 | read_inode_cb, entry); 338 | } 339 | if (rc) { 340 | com_err("ext2fs_read_inode_async", rc, 341 | "initiating read"); 342 | exit(1); 343 | } 344 | 345 | if (enforce_async_limit()) 346 | exit(1); 347 | } 348 | 349 | if (action & ACTION_END_SCAN) { 350 | request_end_dscan = 1; 351 | return 1; 352 | } 353 | 354 | return 0; 355 | } 356 | 357 | /* Collapse the dblist into a list of contiguous sections; this is called 358 | * by ext2fs_dblist_iterate2(). 359 | */ 360 | static int fill_chunks(ext2_filsys fs, struct ext2_db_entry2 *db_info, 361 | void *priv_data) 362 | { 363 | if (cur_chunk == NULL || 364 | db_info->blk != cur_chunk->start + cur_chunk->len) { 365 | /* new sweep starts */ 366 | if (cur_chunk == NULL) 367 | cur_chunk = chunks; 368 | else 369 | cur_chunk++; 370 | cur_chunk->start = db_info->blk; 371 | cur_chunk->len = 1; 372 | } else 373 | cur_chunk->len++; 374 | 375 | return 0; 376 | } 377 | 378 | /* Count the number of contiguous segments in the dblist; this is called 379 | * by ext2fs_dblist_iterate2(). 380 | */ 381 | static int count_chunks(ext2_filsys fs, struct ext2_db_entry2 *db_info, 382 | void *priv_data) 383 | { 384 | static blk64_t start = ~(blk64_t)0; 385 | static e2_blkcnt_t len; 386 | 387 | if (start == ~(blk64_t)0) { 388 | nr_chunks++; 389 | start = db_info->blk; 390 | len = 1; 391 | return 0; 392 | } 393 | 394 | if (db_info->blk != start + len) { 395 | nr_chunks++; 396 | start = db_info->blk; 397 | len = 1; 398 | } else 399 | len++; 400 | 401 | return 0; 402 | } 403 | 404 | static void start_dblist_readahead(unsigned long grpra) 405 | { 406 | /* First, we generate a list of the contiguous runs of directory 407 | * blocks, then we'll start readahead for the first few. 408 | */ 409 | ext2fs_dblist_iterate2(fs->dblist, count_chunks, NULL); 410 | chunks = malloc(sizeof(struct chunk) * nr_chunks); 411 | if (chunks == NULL) { 412 | fprintf(stderr, "malloc failed\n"); 413 | exit(1); 414 | } 415 | ext2fs_dblist_iterate2(fs->dblist, fill_chunks, NULL); 416 | 417 | /* start readahead for first chunks */ 418 | ra_chunk = chunks; 419 | cur_chunk = NULL; 420 | 421 | while (grpra-- && ra_chunk < chunks + nr_chunks) { 422 | io_channel_readahead(fs->io, ra_chunk->start, ra_chunk->len); 423 | ra_chunk++; 424 | } 425 | } 426 | 427 | static int dblist_readahead(void) 428 | { 429 | if (cur_chunk == NULL) 430 | cur_chunk = chunks; 431 | if (--cur_chunk->len == 0) { 432 | cur_chunk++; 433 | if (ra_chunk < chunks + nr_chunks) { 434 | io_channel_readahead(fs->io, ra_chunk->start, 435 | ra_chunk->len); 436 | ra_chunk++; 437 | } 438 | } 439 | return 0; 440 | } 441 | 442 | static int dblist_iterate_cb(ext2_ino_t dirino, int entry, 443 | struct ext2_dir_entry *dirent, int offset, 444 | int blocksize, char *buf, void *private) 445 | { 446 | struct ext2_dir_entry_2 *dirent2; 447 | int namelen; 448 | 449 | static struct dentry *parent; 450 | static ext2_ino_t lastino; 451 | 452 | if (request_end_dscan) 453 | return DIRENT_ABORT; 454 | 455 | /* We ask for empty directory entries in order to detect when we 456 | * transistion to the next block, indicated by offset == 0. As we 457 | * move to a new directory block, release the reference held by 458 | * iscan.c:block_async_iter_cb() on the previous directory's dentry. 459 | * 460 | * Note: this will leave a few dentries in the tree once we're 461 | * finished with the iteration, as we'll not do the final 462 | * put_dentry() on the last directory we iterate. 463 | */ 464 | if (offset == 0) { 465 | dblist_readahead(); 466 | if (parent) 467 | put_dentry(parent); 468 | if (dirino != lastino) { 469 | parent = find_dentry(dirino); 470 | lastino = dirino; 471 | } 472 | } 473 | 474 | if (dirent->inode == 0) 475 | return 0; 476 | 477 | namelen = (dirent->name_len & 0xFF); 478 | if (namelen == 2 && !strncmp(dirent->name, "..", 2)) 479 | return 0; 480 | 481 | if (namelen == 1 && !strncmp(dirent->name, ".", 1)) 482 | return 0; 483 | 484 | if (dirent->inode > fs->super->s_inodes_count) { 485 | fprintf(stderr, "BUG: too big ino %u (%.*s)\n", 486 | dirent->inode, namelen, dirent->name); 487 | exit(1); 488 | } 489 | 490 | /* TODO propogate error/stop actions */ 491 | dirent2 = (struct ext2_dir_entry_2 *) dirent; 492 | if ((dirent2->file_type & EXT2_FT_MASK) == EXT2_FT_DIR) 493 | dtree_name_dir(parent, dirent->inode, dirent->name, namelen); 494 | else 495 | dtree_name_file(parent, dirent->inode, dirent->name, namelen); 496 | return 0; 497 | } 498 | 499 | int resolve_paths(void) 500 | { 501 | struct timeval start, now, diff; 502 | aio_stats stats; 503 | errcode_t rc; 504 | 505 | gettimeofday(&start, NULL); 506 | 507 | if (scan_action->dscan_begin) { 508 | if (scan_action->dscan_begin() & ACTION_END_SCAN) 509 | return 0; 510 | } 511 | 512 | if (verbosity) { 513 | fprintf(stdout, "scanning %u directory blocks\n", 514 | ext2fs_dblist_count(fs->dblist)); 515 | } 516 | 517 | start_dblist_readahead(dir_readahead); 518 | 519 | gettimeofday(&now, NULL); 520 | diff_timevals(&start, &now, &diff); 521 | start = now; 522 | 523 | if (verbosity) { 524 | fprintf(stdout, "started dblist readahead in %d.%06u seconds\n", 525 | (int) diff.tv_sec, (unsigned int) diff.tv_usec); 526 | } 527 | 528 | rc = ext2fs_dblist_dir_iterate(fs->dblist, DIRENT_FLAG_INCLUDE_EMPTY, 529 | NULL, dblist_iterate_cb, NULL); 530 | if (rc) { 531 | com_err("ext2fs_dblist_dir_iterate", rc, 532 | "dir iterating dblist\n"); 533 | return 1; 534 | } 535 | 536 | gettimeofday(&now, NULL); 537 | diff_timevals(&start, &now, &diff); 538 | start = now; 539 | 540 | if (verbosity) { 541 | fprintf(stdout, "finished directory scan in %d.%06u\n", 542 | (int) diff.tv_sec, (unsigned int) diff.tv_usec); 543 | } 544 | 545 | if (!use_unix) { 546 | rc = io_channel_finish_async(fs->io, 0); 547 | if (rc) { 548 | com_err("io_channel_finish_async", rc, 549 | "failed to complete IO"); 550 | return 1; 551 | } 552 | 553 | gettimeofday(&now, NULL); 554 | diff_timevals(&start, &now, &diff); 555 | start = now; 556 | if (verbosity) { 557 | fprintf(stdout, "finished remaining dirscan async " 558 | "work in %d.%06u seconds\n", 559 | (int) diff.tv_sec, 560 | (unsigned int) diff.tv_usec); 561 | } 562 | } 563 | 564 | if (scan_action->dscan_end) { 565 | rc = scan_action->dscan_end(); 566 | if (rc) 567 | return rc; 568 | } 569 | 570 | if (verbosity && !use_unix) { 571 | rc = io_channel_get_stats(fs->io, (io_stats *) &stats); 572 | if (rc) { 573 | com_err("io_channel_get_stats", rc, 574 | "failed to get stats"); 575 | return 1; 576 | } 577 | 578 | fprintf(stdout, "Had total %lu async requests\n", 579 | stats->total_async); 580 | fprintf(stdout, "Had max %lu async requests outstanding\n", 581 | stats->max_async); 582 | fprintf(stdout, "Inserted %lu async into readahead stream\n", 583 | stats->async_instream); 584 | fprintf(stdout, "Issued %lu total requests\n", 585 | stats->issued_requests); 586 | fprintf(stdout, "Completed %lu total requests\n", 587 | stats->completed_requests); 588 | fprintf(stdout, "Issued %lu merged async requests\n", 589 | stats->merged_async_issued); 590 | fprintf(stdout, "Total of %lu async requests merged\n", 591 | stats->merged_async); 592 | fprintf(stdout, "Total of %llu gap bytes in merges\n", 593 | stats->merged_gap_bytes); 594 | } 595 | 596 | if (verbosity) { 597 | fprintf(stdout, "Freed %lu of %lu dentries during dscan\n", 598 | dentries_freed, dentries_created); 599 | fprintf(stdout, "Read %lu external EA blocks during dscan\n", 600 | ea_ext_block_read); 601 | fprintf(stdout, "Read %lu external EA values during dscan\n", 602 | ea_ext_value_read); 603 | } 604 | 605 | return 0; 606 | } 607 | -------------------------------------------------------------------------------- /lib/list.h: -------------------------------------------------------------------------------- 1 | /* List handling code from the Linux kernel, modified a bit for userspace use. 2 | * 3 | * This file may be redistributed under the terms of the GNU General 4 | * Public License version 2; see COPYING for details. 5 | */ 6 | #ifndef _LINUX_LIST_H 7 | #define _LINUX_LIST_H 8 | 9 | #include "container_of.h" 10 | 11 | #define LIST_POISON1 ((void *) 0xdeadbeef) 12 | #define LIST_POISON2 ((void *) 0xcafebebe) 13 | 14 | /* 15 | * Simple doubly linked list implementation. 16 | * 17 | * Some of the internal functions ("__xxx") are useful when 18 | * manipulating whole lists rather than single entries, as 19 | * sometimes we already know the next/prev entries and we can 20 | * generate better code by using them directly rather than 21 | * using the generic single-entry routines. 22 | */ 23 | 24 | struct list_head { 25 | struct list_head *next, *prev; 26 | }; 27 | 28 | #define LIST_HEAD_INIT(name) { &(name), &(name) } 29 | 30 | #define LIST_HEAD(name) \ 31 | struct list_head name = LIST_HEAD_INIT(name) 32 | 33 | static inline void INIT_LIST_HEAD(struct list_head *list) 34 | { 35 | list->next = list; 36 | list->prev = list; 37 | } 38 | 39 | /* 40 | * Insert a new entry between two known consecutive entries. 41 | * 42 | * This is only for internal list manipulation where we know 43 | * the prev/next entries already! 44 | */ 45 | #ifndef CONFIG_DEBUG_LIST 46 | static inline void __list_add(struct list_head *new, 47 | struct list_head *prev, 48 | struct list_head *next) 49 | { 50 | next->prev = new; 51 | new->next = next; 52 | new->prev = prev; 53 | prev->next = new; 54 | } 55 | #else 56 | extern void __list_add(struct list_head *new, 57 | struct list_head *prev, 58 | struct list_head *next); 59 | #endif 60 | 61 | /** 62 | * list_add - add a new entry 63 | * @new: new entry to be added 64 | * @head: list head to add it after 65 | * 66 | * Insert a new entry after the specified head. 67 | * This is good for implementing stacks. 68 | */ 69 | static inline void list_add(struct list_head *new, struct list_head *head) 70 | { 71 | __list_add(new, head, head->next); 72 | } 73 | 74 | 75 | /** 76 | * list_add_tail - add a new entry 77 | * @new: new entry to be added 78 | * @head: list head to add it before 79 | * 80 | * Insert a new entry before the specified head. 81 | * This is useful for implementing queues. 82 | */ 83 | static inline void list_add_tail(struct list_head *new, struct list_head *head) 84 | { 85 | __list_add(new, head->prev, head); 86 | } 87 | 88 | /* 89 | * Delete a list entry by making the prev/next entries 90 | * point to each other. 91 | * 92 | * This is only for internal list manipulation where we know 93 | * the prev/next entries already! 94 | */ 95 | static inline void __list_del(struct list_head * prev, struct list_head * next) 96 | { 97 | next->prev = prev; 98 | prev->next = next; 99 | } 100 | 101 | /** 102 | * list_del - deletes entry from list. 103 | * @entry: the element to delete from the list. 104 | * Note: list_empty() on entry does not return true after this, the entry is 105 | * in an undefined state. 106 | */ 107 | #ifndef CONFIG_DEBUG_LIST 108 | static inline void list_del(struct list_head *entry) 109 | { 110 | __list_del(entry->prev, entry->next); 111 | entry->next = LIST_POISON1; 112 | entry->prev = LIST_POISON2; 113 | } 114 | #else 115 | extern void list_del(struct list_head *entry); 116 | #endif 117 | 118 | /** 119 | * list_replace - replace old entry by new one 120 | * @old : the element to be replaced 121 | * @new : the new element to insert 122 | * 123 | * If @old was empty, it will be overwritten. 124 | */ 125 | static inline void list_replace(struct list_head *old, 126 | struct list_head *new) 127 | { 128 | new->next = old->next; 129 | new->next->prev = new; 130 | new->prev = old->prev; 131 | new->prev->next = new; 132 | } 133 | 134 | static inline void list_replace_init(struct list_head *old, 135 | struct list_head *new) 136 | { 137 | list_replace(old, new); 138 | INIT_LIST_HEAD(old); 139 | } 140 | 141 | /** 142 | * list_del_init - deletes entry from list and reinitialize it. 143 | * @entry: the element to delete from the list. 144 | */ 145 | static inline void list_del_init(struct list_head *entry) 146 | { 147 | __list_del(entry->prev, entry->next); 148 | INIT_LIST_HEAD(entry); 149 | } 150 | 151 | /** 152 | * list_move - delete from one list and add as another's head 153 | * @list: the entry to move 154 | * @head: the head that will precede our entry 155 | */ 156 | static inline void list_move(struct list_head *list, struct list_head *head) 157 | { 158 | __list_del(list->prev, list->next); 159 | list_add(list, head); 160 | } 161 | 162 | /** 163 | * list_move_tail - delete from one list and add as another's tail 164 | * @list: the entry to move 165 | * @head: the head that will follow our entry 166 | */ 167 | static inline void list_move_tail(struct list_head *list, 168 | struct list_head *head) 169 | { 170 | __list_del(list->prev, list->next); 171 | list_add_tail(list, head); 172 | } 173 | 174 | /** 175 | * list_is_last - tests whether @list is the last entry in list @head 176 | * @list: the entry to test 177 | * @head: the head of the list 178 | */ 179 | static inline int list_is_last(const struct list_head *list, 180 | const struct list_head *head) 181 | { 182 | return list->next == head; 183 | } 184 | 185 | /** 186 | * list_empty - tests whether a list is empty 187 | * @head: the list to test. 188 | */ 189 | static inline int list_empty(const struct list_head *head) 190 | { 191 | return head->next == head; 192 | } 193 | 194 | /** 195 | * list_empty_careful - tests whether a list is empty and not being modified 196 | * @head: the list to test 197 | * 198 | * Description: 199 | * tests whether a list is empty _and_ checks that no other CPU might be 200 | * in the process of modifying either member (next or prev) 201 | * 202 | * NOTE: using list_empty_careful() without synchronization 203 | * can only be safe if the only activity that can happen 204 | * to the list entry is list_del_init(). Eg. it cannot be used 205 | * if another CPU could re-list_add() it. 206 | */ 207 | static inline int list_empty_careful(const struct list_head *head) 208 | { 209 | struct list_head *next = head->next; 210 | return (next == head) && (next == head->prev); 211 | } 212 | 213 | /** 214 | * list_is_singular - tests whether a list has just one entry. 215 | * @head: the list to test. 216 | */ 217 | static inline int list_is_singular(const struct list_head *head) 218 | { 219 | return !list_empty(head) && (head->next == head->prev); 220 | } 221 | 222 | static inline void __list_cut_position(struct list_head *list, 223 | struct list_head *head, struct list_head *entry) 224 | { 225 | struct list_head *new_first = entry->next; 226 | list->next = head->next; 227 | list->next->prev = list; 228 | list->prev = entry; 229 | entry->next = list; 230 | head->next = new_first; 231 | new_first->prev = head; 232 | } 233 | 234 | /** 235 | * list_cut_position - cut a list into two 236 | * @list: a new list to add all removed entries 237 | * @head: a list with entries 238 | * @entry: an entry within head, could be the head itself 239 | * and if so we won't cut the list 240 | * 241 | * This helper moves the initial part of @head, up to and 242 | * including @entry, from @head to @list. You should 243 | * pass on @entry an element you know is on @head. @list 244 | * should be an empty list or a list you do not care about 245 | * losing its data. 246 | * 247 | */ 248 | static inline void list_cut_position(struct list_head *list, 249 | struct list_head *head, struct list_head *entry) 250 | { 251 | if (list_empty(head)) 252 | return; 253 | if (list_is_singular(head) && 254 | (head->next != entry && head != entry)) 255 | return; 256 | if (entry == head) 257 | INIT_LIST_HEAD(list); 258 | else 259 | __list_cut_position(list, head, entry); 260 | } 261 | 262 | static inline void __list_splice(const struct list_head *list, 263 | struct list_head *prev, 264 | struct list_head *next) 265 | { 266 | struct list_head *first = list->next; 267 | struct list_head *last = list->prev; 268 | 269 | first->prev = prev; 270 | prev->next = first; 271 | 272 | last->next = next; 273 | next->prev = last; 274 | } 275 | 276 | /** 277 | * list_splice - join two lists, this is designed for stacks 278 | * @list: the new list to add. 279 | * @head: the place to add it in the first list. 280 | */ 281 | static inline void list_splice(const struct list_head *list, 282 | struct list_head *head) 283 | { 284 | if (!list_empty(list)) 285 | __list_splice(list, head, head->next); 286 | } 287 | 288 | /** 289 | * list_splice_tail - join two lists, each list being a queue 290 | * @list: the new list to add. 291 | * @head: the place to add it in the first list. 292 | */ 293 | static inline void list_splice_tail(struct list_head *list, 294 | struct list_head *head) 295 | { 296 | if (!list_empty(list)) 297 | __list_splice(list, head->prev, head); 298 | } 299 | 300 | /** 301 | * list_splice_init - join two lists and reinitialise the emptied list. 302 | * @list: the new list to add. 303 | * @head: the place to add it in the first list. 304 | * 305 | * The list at @list is reinitialised 306 | */ 307 | static inline void list_splice_init(struct list_head *list, 308 | struct list_head *head) 309 | { 310 | if (!list_empty(list)) { 311 | __list_splice(list, head, head->next); 312 | INIT_LIST_HEAD(list); 313 | } 314 | } 315 | 316 | /** 317 | * list_splice_tail_init - join two lists and reinitialise the emptied list 318 | * @list: the new list to add. 319 | * @head: the place to add it in the first list. 320 | * 321 | * Each of the lists is a queue. 322 | * The list at @list is reinitialised 323 | */ 324 | static inline void list_splice_tail_init(struct list_head *list, 325 | struct list_head *head) 326 | { 327 | if (!list_empty(list)) { 328 | __list_splice(list, head->prev, head); 329 | INIT_LIST_HEAD(list); 330 | } 331 | } 332 | 333 | /** 334 | * list_entry - get the struct for this entry 335 | * @ptr: the &struct list_head pointer. 336 | * @type: the type of the struct this is embedded in. 337 | * @member: the name of the list_struct within the struct. 338 | */ 339 | #define list_entry(ptr, type, member) \ 340 | container_of(ptr, type, member) 341 | 342 | /** 343 | * list_first_entry - get the first element from a list 344 | * @ptr: the list head to take the element from. 345 | * @type: the type of the struct this is embedded in. 346 | * @member: the name of the list_struct within the struct. 347 | * 348 | * Note, that list is expected to be not empty. 349 | */ 350 | #define list_first_entry(ptr, type, member) \ 351 | list_entry((ptr)->next, type, member) 352 | 353 | /** 354 | * list_last_entry - get the last element from a list 355 | * @ptr: the list head to take the element from. 356 | * @type: the type of the struct this is embedded in. 357 | * @member: the name of the list_struct within the struct. 358 | * 359 | * Note, that list is expected to be not empty. 360 | */ 361 | #define list_last_entry(ptr, type, member) \ 362 | list_entry((ptr)->prev, type, member) 363 | 364 | /** 365 | * list_for_each - iterate over a list 366 | * @pos: the &struct list_head to use as a loop cursor. 367 | * @head: the head for your list. 368 | */ 369 | #define list_for_each(pos, head) \ 370 | for (pos = (head)->next; pos != (head); \ 371 | pos = pos->next) 372 | 373 | /** 374 | * __list_for_each - iterate over a list 375 | * @pos: the &struct list_head to use as a loop cursor. 376 | * @head: the head for your list. 377 | * 378 | * This variant differs from list_for_each() in that it's the 379 | * simplest possible list iteration code, no prefetching is done. 380 | * Use this for code that knows the list to be very short (empty 381 | * or 1 entry) most of the time. 382 | */ 383 | #define __list_for_each(pos, head) \ 384 | for (pos = (head)->next; pos != (head); pos = pos->next) 385 | 386 | /** 387 | * list_for_each_prev - iterate over a list backwards 388 | * @pos: the &struct list_head to use as a loop cursor. 389 | * @head: the head for your list. 390 | */ 391 | #define list_for_each_prev(pos, head) \ 392 | for (pos = (head)->prev; pos != (head); \ 393 | pos = pos->prev) 394 | 395 | /** 396 | * list_for_each_safe - iterate over a list safe against removal of list entry 397 | * @pos: the &struct list_head to use as a loop cursor. 398 | * @n: another &struct list_head to use as temporary storage 399 | * @head: the head for your list. 400 | */ 401 | #define list_for_each_safe(pos, n, head) \ 402 | for (pos = (head)->next, n = pos->next; pos != (head); \ 403 | pos = n, n = pos->next) 404 | 405 | /** 406 | * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry 407 | * @pos: the &struct list_head to use as a loop cursor. 408 | * @n: another &struct list_head to use as temporary storage 409 | * @head: the head for your list. 410 | */ 411 | #define list_for_each_prev_safe(pos, n, head) \ 412 | for (pos = (head)->prev, n = pos->prev; \ 413 | pos != (head); \ 414 | pos = n, n = pos->prev) 415 | 416 | /** 417 | * list_for_each_entry - iterate over list of given type 418 | * @pos: the type * to use as a loop cursor. 419 | * @head: the head for your list. 420 | * @member: the name of the list_struct within the struct. 421 | */ 422 | #define list_for_each_entry(pos, head, member) \ 423 | for (pos = list_entry((head)->next, typeof(*pos), member); \ 424 | &pos->member != (head); \ 425 | pos = list_entry(pos->member.next, typeof(*pos), member)) 426 | 427 | /** 428 | * list_for_each_entry_reverse - iterate backwards over list of given type. 429 | * @pos: the type * to use as a loop cursor. 430 | * @head: the head for your list. 431 | * @member: the name of the list_struct within the struct. 432 | */ 433 | #define list_for_each_entry_reverse(pos, head, member) \ 434 | for (pos = list_entry((head)->prev, typeof(*pos), member); \ 435 | &pos->member != (head); \ 436 | pos = list_entry(pos->member.prev, typeof(*pos), member)) 437 | 438 | /** 439 | * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue() 440 | * @pos: the type * to use as a start point 441 | * @head: the head of the list 442 | * @member: the name of the list_struct within the struct. 443 | * 444 | * Prepares a pos entry for use as a start point in list_for_each_entry_continue(). 445 | */ 446 | #define list_prepare_entry(pos, head, member) \ 447 | ((pos) ? : list_entry(head, typeof(*pos), member)) 448 | 449 | /** 450 | * list_for_each_entry_continue - continue iteration over list of given type 451 | * @pos: the type * to use as a loop cursor. 452 | * @head: the head for your list. 453 | * @member: the name of the list_struct within the struct. 454 | * 455 | * Continue to iterate over list of given type, continuing after 456 | * the current position. 457 | */ 458 | #define list_for_each_entry_continue(pos, head, member) \ 459 | for (pos = list_entry(pos->member.next, typeof(*pos), member); \ 460 | &pos->member != (head); \ 461 | pos = list_entry(pos->member.next, typeof(*pos), member)) 462 | 463 | /** 464 | * list_for_each_entry_continue_reverse - iterate backwards from the given point 465 | * @pos: the type * to use as a loop cursor. 466 | * @head: the head for your list. 467 | * @member: the name of the list_struct within the struct. 468 | * 469 | * Start to iterate over list of given type backwards, continuing after 470 | * the current position. 471 | */ 472 | #define list_for_each_entry_continue_reverse(pos, head, member) \ 473 | for (pos = list_entry(pos->member.prev, typeof(*pos), member); \ 474 | &pos->member != (head); \ 475 | pos = list_entry(pos->member.prev, typeof(*pos), member)) 476 | 477 | /** 478 | * list_for_each_entry_from - iterate over list of given type from the current point 479 | * @pos: the type * to use as a loop cursor. 480 | * @head: the head for your list. 481 | * @member: the name of the list_struct within the struct. 482 | * 483 | * Iterate over list of given type, continuing from current position. 484 | */ 485 | #define list_for_each_entry_from(pos, head, member) \ 486 | for (; &pos->member != (head); \ 487 | pos = list_entry(pos->member.next, typeof(*pos), member)) 488 | 489 | #define list_for_each_entry_from_reverse(pos, head, member) \ 490 | for (; &pos->member != (head); \ 491 | pos = list_entry(pos->member.prev, typeof(*pos), member)) 492 | 493 | /** 494 | * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry 495 | * @pos: the type * to use as a loop cursor. 496 | * @n: another type * to use as temporary storage 497 | * @head: the head for your list. 498 | * @member: the name of the list_struct within the struct. 499 | */ 500 | #define list_for_each_entry_safe(pos, n, head, member) \ 501 | for (pos = list_entry((head)->next, typeof(*pos), member), \ 502 | n = list_entry(pos->member.next, typeof(*pos), member); \ 503 | &pos->member != (head); \ 504 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 505 | 506 | /** 507 | * list_for_each_entry_safe_continue 508 | * @pos: the type * to use as a loop cursor. 509 | * @n: another type * to use as temporary storage 510 | * @head: the head for your list. 511 | * @member: the name of the list_struct within the struct. 512 | * 513 | * Iterate over list of given type, continuing after current point, 514 | * safe against removal of list entry. 515 | */ 516 | #define list_for_each_entry_safe_continue(pos, n, head, member) \ 517 | for (pos = list_entry(pos->member.next, typeof(*pos), member), \ 518 | n = list_entry(pos->member.next, typeof(*pos), member); \ 519 | &pos->member != (head); \ 520 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 521 | 522 | /** 523 | * list_for_each_entry_safe_from 524 | * @pos: the type * to use as a loop cursor. 525 | * @n: another type * to use as temporary storage 526 | * @head: the head for your list. 527 | * @member: the name of the list_struct within the struct. 528 | * 529 | * Iterate over list of given type from current point, safe against 530 | * removal of list entry. 531 | */ 532 | #define list_for_each_entry_safe_from(pos, n, head, member) \ 533 | for (n = list_entry(pos->member.next, typeof(*pos), member); \ 534 | &pos->member != (head); \ 535 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 536 | 537 | /** 538 | * list_for_each_entry_safe_reverse 539 | * @pos: the type * to use as a loop cursor. 540 | * @n: another type * to use as temporary storage 541 | * @head: the head for your list. 542 | * @member: the name of the list_struct within the struct. 543 | * 544 | * Iterate backwards over list of given type, safe against removal 545 | * of list entry. 546 | */ 547 | #define list_for_each_entry_safe_reverse(pos, n, head, member) \ 548 | for (pos = list_entry((head)->prev, typeof(*pos), member), \ 549 | n = list_entry(pos->member.prev, typeof(*pos), member); \ 550 | &pos->member != (head); \ 551 | pos = n, n = list_entry(n->member.prev, typeof(*n), member)) 552 | 553 | /* 554 | * Double linked lists with a single pointer list head. 555 | * Mostly useful for hash tables where the two pointer list head is 556 | * too wasteful. 557 | * You lose the ability to access the tail in O(1). 558 | */ 559 | 560 | struct hlist_head { 561 | struct hlist_node *first; 562 | }; 563 | 564 | struct hlist_node { 565 | struct hlist_node *next, **pprev; 566 | }; 567 | 568 | #define HLIST_HEAD_INIT { .first = NULL } 569 | #define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } 570 | #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) 571 | static inline void INIT_HLIST_NODE(struct hlist_node *h) 572 | { 573 | h->next = NULL; 574 | h->pprev = NULL; 575 | } 576 | 577 | static inline int hlist_unhashed(const struct hlist_node *h) 578 | { 579 | return !h->pprev; 580 | } 581 | 582 | static inline int hlist_empty(const struct hlist_head *h) 583 | { 584 | return !h->first; 585 | } 586 | 587 | static inline void __hlist_del(struct hlist_node *n) 588 | { 589 | struct hlist_node *next = n->next; 590 | struct hlist_node **pprev = n->pprev; 591 | *pprev = next; 592 | if (next) 593 | next->pprev = pprev; 594 | } 595 | 596 | static inline void hlist_del(struct hlist_node *n) 597 | { 598 | __hlist_del(n); 599 | n->next = LIST_POISON1; 600 | n->pprev = LIST_POISON2; 601 | } 602 | 603 | static inline void hlist_del_init(struct hlist_node *n) 604 | { 605 | if (!hlist_unhashed(n)) { 606 | __hlist_del(n); 607 | INIT_HLIST_NODE(n); 608 | } 609 | } 610 | 611 | static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) 612 | { 613 | struct hlist_node *first = h->first; 614 | n->next = first; 615 | if (first) 616 | first->pprev = &n->next; 617 | h->first = n; 618 | n->pprev = &h->first; 619 | } 620 | 621 | /* next must be != NULL */ 622 | static inline void hlist_add_before(struct hlist_node *n, 623 | struct hlist_node *next) 624 | { 625 | n->pprev = next->pprev; 626 | n->next = next; 627 | next->pprev = &n->next; 628 | *(n->pprev) = n; 629 | } 630 | 631 | static inline void hlist_add_after(struct hlist_node *n, 632 | struct hlist_node *next) 633 | { 634 | next->next = n->next; 635 | n->next = next; 636 | next->pprev = &n->next; 637 | 638 | if(next->next) 639 | next->next->pprev = &next->next; 640 | } 641 | 642 | /* 643 | * Move a list from one list head to another. Fixup the pprev 644 | * reference of the first entry if it exists. 645 | */ 646 | static inline void hlist_move_list(struct hlist_head *old, 647 | struct hlist_head *new) 648 | { 649 | new->first = old->first; 650 | if (new->first) 651 | new->first->pprev = &new->first; 652 | old->first = NULL; 653 | } 654 | 655 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) 656 | 657 | #define hlist_for_each(pos, head) \ 658 | for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ 659 | pos = pos->next) 660 | 661 | #define hlist_for_each_safe(pos, n, head) \ 662 | for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ 663 | pos = n) 664 | 665 | /** 666 | * hlist_for_each_entry - iterate over list of given type 667 | * @tpos: the type * to use as a loop cursor. 668 | * @pos: the &struct hlist_node to use as a loop cursor. 669 | * @head: the head for your list. 670 | * @member: the name of the hlist_node within the struct. 671 | */ 672 | #define hlist_for_each_entry(tpos, pos, head, member) \ 673 | for (pos = (head)->first; \ 674 | pos && ({ prefetch(pos->next); 1;}) && \ 675 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 676 | pos = pos->next) 677 | 678 | /** 679 | * hlist_for_each_entry_continue - iterate over a hlist continuing after current point 680 | * @tpos: the type * to use as a loop cursor. 681 | * @pos: the &struct hlist_node to use as a loop cursor. 682 | * @member: the name of the hlist_node within the struct. 683 | */ 684 | #define hlist_for_each_entry_continue(tpos, pos, member) \ 685 | for (pos = (pos)->next; \ 686 | pos && ({ prefetch(pos->next); 1;}) && \ 687 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 688 | pos = pos->next) 689 | 690 | /** 691 | * hlist_for_each_entry_from - iterate over a hlist continuing from current point 692 | * @tpos: the type * to use as a loop cursor. 693 | * @pos: the &struct hlist_node to use as a loop cursor. 694 | * @member: the name of the hlist_node within the struct. 695 | */ 696 | #define hlist_for_each_entry_from(tpos, pos, member) \ 697 | for (; pos && ({ prefetch(pos->next); 1;}) && \ 698 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 699 | pos = pos->next) 700 | 701 | /** 702 | * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry 703 | * @tpos: the type * to use as a loop cursor. 704 | * @pos: the &struct hlist_node to use as a loop cursor. 705 | * @n: another &struct hlist_node to use as temporary storage 706 | * @head: the head for your list. 707 | * @member: the name of the hlist_node within the struct. 708 | */ 709 | #define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ 710 | for (pos = (head)->first; \ 711 | pos && ({ n = pos->next; 1; }) && \ 712 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 713 | pos = n) 714 | 715 | #endif 716 | -------------------------------------------------------------------------------- /lib/aio_manager.c: -------------------------------------------------------------------------------- 1 | /* aio_manager.c -- read-only, asynchronous IO manager based on libaio 2 | * 3 | * Copyright (C) 2013 UT-Battelle. 4 | * 5 | * This file may be redistributed under the terms of the GNU Library General 6 | * Public License version 2; see COPYING for details. 7 | */ 8 | #define _GNU_SOURCE 9 | #define _FILE_OFFSET_BITS 64 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "list.h" 24 | #include "rbtree.h" 25 | #include "ext2fs-extra.h" 26 | 27 | #include 28 | 29 | #define DEBUG 0 30 | 31 | typedef int (*req_callback_t)(ext2_loff_t offset, ssize_t size, 32 | void *priv1, unsigned long priv2, void *data); 33 | 34 | struct span { 35 | struct list_head list; 36 | ext2_loff_t offset; 37 | ssize_t size; 38 | void *data; 39 | }; 40 | 41 | struct request { 42 | struct list_head list; 43 | struct list_head active; 44 | struct rb_node rb_node; 45 | ext2_loff_t offset; 46 | ssize_t size; 47 | unsigned long order; 48 | 49 | unsigned int *waiting; 50 | req_callback_t callback; 51 | void *priv1; 52 | unsigned long priv2; 53 | }; 54 | 55 | struct cacheblock { 56 | struct list_head list; 57 | struct list_head reqs; 58 | struct iocb iocb; 59 | void *buffer; 60 | unsigned long order; 61 | 62 | /* These fields are used for retrieving data out of the read ahead 63 | * requests. We could reuse the iocb internal fields, but this 64 | * is cleaner. 65 | */ 66 | ext2_loff_t offset; 67 | ssize_t size; 68 | void *data; 69 | unsigned int age; 70 | }; 71 | 72 | struct aio_data { 73 | int magic; 74 | int fd; 75 | int in_runqueue; 76 | int ignore_async; 77 | int async_only; 78 | ssize_t merge_gap; 79 | ssize_t max_size; 80 | unsigned int target_qd; 81 | unsigned int in_flight; 82 | unsigned int reserved_cacheblocks; 83 | ssize_t sector_size; 84 | unsigned long next_order; 85 | unsigned long used_order; 86 | 87 | unsigned long num_async; 88 | ext2_loff_t last_offset; 89 | 90 | io_context_t ioctx; 91 | struct iocb **iolist; 92 | struct io_event *events; 93 | 94 | /* Lists for cache blocks */ 95 | unsigned int avail_cacheblocks; 96 | struct list_head cb_list; 97 | struct list_head cache; 98 | struct list_head waiting; 99 | 100 | /* span structs ready for use */ 101 | struct list_head span_list; 102 | 103 | /* lists for requests */ 104 | struct list_head req_list; 105 | struct list_head active; 106 | struct list_head rq; 107 | struct rb_root async_rq; 108 | 109 | time_t last_update; 110 | unsigned int num_bufs; 111 | unsigned int preallocate_reqs; 112 | struct cacheblock *cacheblock_base; 113 | unsigned long arena_size; 114 | void *arena; 115 | 116 | struct struct_aio_stats stats; 117 | }; 118 | 119 | #define AIO_GET_PRIVATE(d) \ 120 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); \ 121 | aio = (struct aio_data *) channel->private_data; \ 122 | EXT2_CHECK_MAGIC(aio, EXT2_ET_MAGIC_UNIX_IO_CHANNEL) 123 | 124 | static void init_request(struct request *req) 125 | { 126 | INIT_LIST_HEAD(&req->list); 127 | INIT_LIST_HEAD(&req->active); 128 | RB_CLEAR_NODE(&req->rb_node); 129 | } 130 | 131 | static struct request *__rb_insert_req(struct rb_root *root, 132 | ext2_loff_t offset, 133 | struct rb_node *node) 134 | { 135 | struct rb_node **p = &root->rb_node; 136 | struct rb_node *parent = NULL; 137 | struct request *req; 138 | 139 | while (*p) { 140 | parent = *p; 141 | req = rb_entry(parent, struct request, rb_node); 142 | 143 | if (offset < req->offset) 144 | p = &(*p)->rb_left; 145 | else if (offset > req->offset) 146 | p = &(*p)->rb_right; 147 | else 148 | return req; 149 | } 150 | 151 | rb_link_node(node, parent, p); 152 | return NULL; 153 | } 154 | 155 | static void insert_async_req(struct aio_data *aio, struct request *req) 156 | { 157 | struct request *found; 158 | 159 | found = __rb_insert_req(&aio->async_rq, req->offset, &req->rb_node); 160 | if (found) 161 | list_add_tail(&req->list, &found->list); 162 | else 163 | rb_insert_color(&req->rb_node, &aio->async_rq); 164 | } 165 | 166 | static struct request *next_async_request(struct aio_data *aio, 167 | ext2_loff_t offset) 168 | { 169 | /* Search the async RB tree for the next offset greater than 170 | * or equal to the one given 171 | */ 172 | struct rb_node *p, *n = aio->async_rq.rb_node; 173 | struct request *req; 174 | 175 | if (!n) 176 | return NULL; 177 | 178 | while (n) { 179 | p = n; 180 | req = rb_entry(n, struct request, rb_node); 181 | 182 | if (offset < req->offset) 183 | n = n->rb_left; 184 | else if (offset > req->offset) 185 | n = n->rb_right; 186 | else 187 | return req; 188 | } 189 | 190 | /* We didn't find an exact match for the offset, walk up the 191 | * tree to find the next largest one. 192 | */ 193 | while (p) { 194 | req = rb_entry(p, struct request, rb_node); 195 | 196 | if (req->offset > offset) 197 | return req; 198 | p = rb_parent(p); 199 | } 200 | 201 | /* Everything in the tree is before the offset */ 202 | return NULL; 203 | } 204 | 205 | static errcode_t init_aio(struct aio_data *aio) 206 | { 207 | unsigned int num_bufs; 208 | unsigned char *arena; 209 | struct cacheblock *cb, *cblocks = NULL; 210 | errcode_t rc; 211 | int i; 212 | 213 | /* Set our defaults unless overridden */ 214 | if (!aio->max_size) 215 | aio->max_size = 1024 * 1024; 216 | if (!aio->target_qd) 217 | aio->target_qd = 8; 218 | if (!aio->reserved_cacheblocks) 219 | aio->reserved_cacheblocks = 4; 220 | 221 | if (aio->max_size < aio->merge_gap) 222 | aio->merge_gap = aio->max_size; 223 | 224 | num_bufs = aio->target_qd * 2 + aio->reserved_cacheblocks; 225 | if (!aio->num_bufs || aio->num_bufs < num_bufs) 226 | aio->num_bufs = num_bufs; 227 | 228 | aio->arena_size = aio->num_bufs * aio->max_size; 229 | 230 | rc = io_queue_init(aio->target_qd, &aio->ioctx); 231 | if (rc) 232 | return rc; 233 | 234 | arena = mmap(NULL, aio->arena_size, PROT_READ|PROT_WRITE, 235 | MAP_ANONYMOUS|MAP_POPULATE|MAP_PRIVATE, -1, 0); 236 | if (arena == MAP_FAILED) 237 | goto error_ioctx; 238 | aio->arena = arena; 239 | 240 | rc = ext2fs_get_array(aio->target_qd, sizeof(struct io_event), 241 | &aio->events); 242 | if (!rc) 243 | rc = ext2fs_get_array(aio->target_qd, sizeof(struct iocb *), 244 | &aio->iolist); 245 | if (!rc) 246 | rc = ext2fs_get_array(aio->num_bufs, sizeof(struct cacheblock), 247 | &cblocks); 248 | if (rc) 249 | goto error_mem; 250 | 251 | for (cb = cblocks, i = 0; i < aio->num_bufs; cb++, i++) { 252 | cb->buffer = arena; 253 | INIT_LIST_HEAD(&cb->list); 254 | INIT_LIST_HEAD(&cb->reqs); 255 | list_add_tail(&cb->list, &aio->cb_list); 256 | aio->avail_cacheblocks++; 257 | arena += aio->max_size; 258 | } 259 | 260 | for (i = 0; i < aio->preallocate_reqs; i++) { 261 | struct request *req; 262 | rc = ext2fs_get_mem(sizeof(struct request), &req); 263 | if (rc) 264 | goto error_mem; 265 | 266 | init_request(req); 267 | list_add(&req->list, &aio->req_list); 268 | } 269 | 270 | /* We use cacheblock_base as a flag that we've been initialized, 271 | * so do this last. 272 | */ 273 | aio->cacheblock_base = cblocks; 274 | return 0; 275 | 276 | error_mem: 277 | if (cblocks) 278 | ext2fs_free_mem(&cblocks); 279 | if (aio->iolist) 280 | ext2fs_free_mem(&aio->iolist); 281 | if (aio->events) 282 | ext2fs_free_mem(&aio->events); 283 | munmap(arena, aio->arena_size); 284 | 285 | error_ioctx: 286 | io_destroy(aio->ioctx); 287 | 288 | /* Flag that we're clean for aio_close() */ 289 | aio->cacheblock_base = NULL; 290 | return rc; 291 | } 292 | 293 | static errcode_t ensure_aio_init(struct aio_data *aio) 294 | { 295 | if (aio->cacheblock_base) 296 | return 0; 297 | return init_aio(aio); 298 | } 299 | 300 | static void reclaim_cache(struct aio_data *aio) 301 | { 302 | /* reclaim the smallest buffer that's been used or from before the 303 | * last read-ahead request matched by a specific read call. If there 304 | * is a tie for size, chose the oldest one. 305 | * 306 | * we use an unsigned long for the order fields, so wrapping of that 307 | * field can be ignored for 64 bit machines for now. 308 | */ 309 | struct cacheblock *cb, *reap = NULL; 310 | unsigned long zero = 0, order = 0; 311 | 312 | list_for_each_entry(cb, &aio->cache, list) { 313 | if (!cb->order) 314 | zero++; 315 | else if (!order) 316 | order = cb->order; 317 | 318 | if (cb->order >= aio->used_order) 319 | continue; 320 | 321 | if (!reap) 322 | reap = cb; 323 | else if (cb->size < reap->size) 324 | reap = cb; 325 | else if (cb->size == reap->size && cb->age > reap->age) 326 | reap = cb; 327 | } 328 | 329 | if (DEBUG) 330 | fprintf(stderr, "checking for reclaimable buffers %u " 331 | "(used %lu, order %lu, zero %lu)\n", 332 | aio->avail_cacheblocks, aio->used_order, 333 | order, zero); 334 | 335 | if (reap) { 336 | if (DEBUG) 337 | fprintf(stderr, "reclaiming %llu:%ld age %u order %lu " 338 | "used %lu\n", 339 | reap->offset, reap->size, reap->age, 340 | reap->order, aio->used_order); 341 | list_move(&reap->list, &aio->cb_list); 342 | aio->avail_cacheblocks++; 343 | } 344 | } 345 | 346 | static int cache_below_reserve(struct aio_data *aio) 347 | { 348 | return aio->avail_cacheblocks < aio->reserved_cacheblocks; 349 | } 350 | 351 | static struct cacheblock *get_cacheblock(struct aio_data *aio, 352 | struct request *req) 353 | { 354 | struct cacheblock *cb; 355 | 356 | if (cache_below_reserve(aio)) 357 | reclaim_cache(aio); 358 | 359 | if (!aio->avail_cacheblocks) 360 | return NULL; 361 | 362 | /* Allow waiting and async requests to tap into the reserve */ 363 | if (!(req->waiting || req->callback) && cache_below_reserve(aio)) 364 | return NULL; 365 | 366 | cb = list_first_entry(&aio->cb_list, struct cacheblock, list); 367 | list_del_init(&cb->list); 368 | aio->avail_cacheblocks--; 369 | 370 | /* Prep the cache fields */ 371 | cb->data = cb->buffer; 372 | cb->offset = req->offset; 373 | cb->size = req->size; 374 | cb->order = req->order; 375 | cb->age = 0; 376 | 377 | return cb; 378 | } 379 | 380 | static struct request *next_request(struct aio_data *aio) 381 | { 382 | struct request *areq, *req = NULL; 383 | 384 | if (!aio->async_only && !list_empty(&aio->rq)) 385 | req = list_first_entry(&aio->rq, struct request, list); 386 | 387 | /* If there's no async requests, then we have a simple answer */ 388 | if (RB_EMPTY_ROOT(&aio->async_rq)) 389 | return req; 390 | 391 | areq = next_async_request(aio, aio->last_offset); 392 | if (areq && req) { 393 | if (areq->offset <= req->offset) 394 | return areq; 395 | return req; 396 | } 397 | 398 | if (areq || req) { 399 | if (areq) 400 | return areq; 401 | return req; 402 | } 403 | 404 | /* We have neither a readahead request nor an async request after 405 | * the last offset sent to the disk. Start over at the beginning 406 | * of the async queue 407 | */ 408 | areq = rb_entry(rb_first(&aio->async_rq), struct request, rb_node); 409 | return areq; 410 | } 411 | 412 | static struct cacheblock *build_async_request(struct aio_data *aio, 413 | struct request *req) 414 | { 415 | struct cacheblock *cb = get_cacheblock(aio, req); 416 | struct rb_node *next; 417 | ssize_t size, gap; 418 | unsigned int req_count = 0; 419 | int instream = 1; 420 | 421 | if (!cb) 422 | return NULL; 423 | 424 | if (list_empty(&aio->rq) || aio->async_only) 425 | instream = 0; 426 | 427 | cb->offset = req->offset; 428 | gap = size = 0; 429 | 430 | for (;;) { 431 | req_count++; 432 | aio->stats.async_instream += instream; 433 | aio->stats.merged_gap_bytes += gap; 434 | size += req->size + gap; 435 | 436 | if (DEBUG) { 437 | fprintf(stderr, "build_async_request adding req %u: " 438 | "%llu:%ld to cb %llu:%ld\n", 439 | req_count, req->offset, req->size, 440 | cb->offset, size); 441 | } 442 | 443 | /* Add this request (and all duplicates) to the cacheblock 444 | * If there are no duplicates, req->list will be empty and 445 | * list_splice_tail_init() will be a noop. The list_add_tail() 446 | * always adds this request. 447 | */ 448 | list_splice_tail_init(&req->list, &cb->reqs); 449 | list_add_tail(&req->list, &cb->reqs); 450 | 451 | /* Get the next merge candidate. If we're full, there's no 452 | * reason to continue merging. 453 | */ 454 | next = (size == aio->max_size) ? NULL : rb_next(&req->rb_node); 455 | rb_erase(&req->rb_node, &aio->async_rq); 456 | 457 | if (!next) 458 | break; 459 | 460 | req = rb_entry(next, struct request, rb_node); 461 | 462 | gap = req->offset - (cb->offset + size); 463 | if (gap > aio->merge_gap) 464 | break; 465 | 466 | if (aio->max_size < size + gap + req->size) 467 | break; 468 | } 469 | 470 | io_prep_pread(&cb->iocb, aio->fd, cb->buffer, size, cb->offset); 471 | cb->size = size; 472 | 473 | if (req_count > 1) { 474 | aio->stats.merged_async_issued++; 475 | aio->stats.merged_async += req_count; 476 | } 477 | 478 | return cb; 479 | } 480 | 481 | static struct cacheblock *build_regular_request(struct aio_data *aio, 482 | struct request *req) 483 | { 484 | struct cacheblock *cb = get_cacheblock(aio, req); 485 | 486 | if (!cb) 487 | return NULL; 488 | 489 | io_prep_pread(&cb->iocb, aio->fd, cb->buffer, req->size, req->offset); 490 | 491 | /* Add this request (there can be no duplicates here) 492 | */ 493 | list_move_tail(&req->list, &cb->reqs); 494 | list_add_tail(&req->active, &aio->active); 495 | return cb; 496 | } 497 | 498 | static errcode_t submit_requests(struct aio_data *aio) 499 | { 500 | unsigned int needed = aio->target_qd - aio->in_flight; 501 | struct iocb **iolist = aio->iolist; 502 | struct request *req; 503 | struct cacheblock *cb; 504 | int ready = 0; 505 | 506 | if (DEBUG) 507 | fprintf(stderr, "submit_request %d %u\n", needed, 508 | aio->avail_cacheblocks); 509 | 510 | while (needed) { 511 | req = next_request(aio); 512 | if (!req) 513 | break; 514 | 515 | if (req->callback) 516 | cb = build_async_request(aio, req); 517 | else 518 | cb = build_regular_request(aio, req); 519 | 520 | if (!cb) 521 | break; 522 | 523 | if (DEBUG) { 524 | fprintf(stderr, "aio submit %d %llu:%ld:%lu%s%s\n", 525 | needed, cb->offset, cb->size, req->order, 526 | req->waiting ? " waiting" : "", 527 | req->callback ? " async" : ""); 528 | if (req->callback && !list_empty(&aio->rq)) { 529 | struct request *t; 530 | t = list_first_entry(&aio->rq, struct request, 531 | list); 532 | fprintf(stderr, "aio submit last %lu next " 533 | "%lu\n", 534 | (unsigned long) aio->last_offset, 535 | (unsigned long) t->offset); 536 | } 537 | } 538 | 539 | aio->last_offset = cb->offset; 540 | *iolist++ = &cb->iocb; 541 | ready++; 542 | needed--; 543 | } 544 | 545 | if (DEBUG && needed && !list_empty(&aio->rq)) { 546 | int count = 0; 547 | list_for_each_entry(req, &aio->rq, list) 548 | if (++count == needed) 549 | break; 550 | fprintf(stderr, "postponing %d requests due to no buffers\n", 551 | count); 552 | } 553 | 554 | return ready; 555 | } 556 | 557 | static void insert_cacheblock(struct aio_data *aio, struct cacheblock *cb) 558 | { 559 | /* Regular readahead, so put it on the cache list in order of 560 | * submission. Start at the back in the hope that the requests 561 | * complete in the same order they completed. 562 | */ 563 | struct list_head *pos = aio->cache.prev; 564 | struct cacheblock *pcb; 565 | 566 | while (pos != &aio->cache) { 567 | pcb = list_entry(pos, struct cacheblock, list); 568 | if (cb->order > pcb->order) 569 | break; 570 | pos = pos->prev; 571 | } 572 | list_add(&cb->list, pos); 573 | } 574 | 575 | static errcode_t process_completion(struct aio_data *aio, 576 | struct io_event *event) 577 | { 578 | struct iocb *iocb = event->obj; 579 | struct cacheblock *cb = container_of(iocb, struct cacheblock, iocb); 580 | int waiting_for_cb = 0; 581 | int release_cb = 1; 582 | struct request *req, *pos; 583 | ssize_t offset; 584 | errcode_t rc; 585 | 586 | if (DEBUG) { 587 | /* We only merge async requests, so the first req on the 588 | * list will tell us what we're dealing with. 589 | */ 590 | req = list_first_entry(&cb->reqs, struct request, list); 591 | fprintf(stderr, "process_completion %llu, %llu:%ld%s%s\n", 592 | iocb->u.c.offset, cb->offset, cb->size, 593 | req->waiting ? " waiting" : "", 594 | req->callback ? " async" : ""); 595 | } 596 | 597 | if (event->res != cb->size) { 598 | // XXX check this, should we print here? 599 | fprintf(stderr, "Funky return for request: got %ld, " 600 | "expected %ld @ %lu\n", 601 | event->res, cb->size, 602 | (unsigned long) cb->offset); 603 | // XXX need proper errorcode 604 | return 1; 605 | } 606 | 607 | aio->stats.base.bytes_read += cb->size; 608 | 609 | list_for_each_entry_safe(req, pos, &cb->reqs, list) { 610 | aio->stats.completed_requests++; 611 | 612 | if (req->waiting) { 613 | /* synchronous request, put it on a separate list so we 614 | * don't have to scan the entire cache for it. 615 | */ 616 | (*req->waiting)--; 617 | waiting_for_cb = 1; 618 | } else if (req->callback && !aio->ignore_async) { 619 | /* async request, go ahead and handle the callback */ 620 | aio->num_async--; 621 | offset = req->offset - cb->offset; 622 | rc = req->callback(req->offset, req->size, req->priv1, 623 | req->priv2, cb->data + offset); 624 | if (rc) 625 | return rc; 626 | } else 627 | release_cb = 0; 628 | 629 | list_del_init(&req->active); 630 | list_move(&req->list, &aio->req_list); 631 | } 632 | 633 | /* Now that we've processed the requests for this cache block, 634 | * determine its disposition. 635 | */ 636 | if (waiting_for_cb) 637 | list_add_tail(&cb->list, &aio->waiting); 638 | else if (release_cb) { 639 | list_add(&cb->list, &aio->cb_list); 640 | aio->avail_cacheblocks++; 641 | } else 642 | insert_cacheblock(aio, cb); 643 | return 0; 644 | } 645 | 646 | static errcode_t run_queue(struct aio_data *aio, int wait) 647 | { 648 | int io_ready, rc, i; 649 | int min = wait ? 1 : 0; 650 | 651 | if (aio->in_runqueue) 652 | return 0; 653 | 654 | aio->in_runqueue = 1; 655 | 656 | if (DEBUG) { 657 | time_t now = time(NULL); 658 | 659 | if (aio->last_update != now) { 660 | struct tm *tm = localtime(&now); 661 | fprintf(stderr, "timestamp %02d:%02d:%02d\n", tm->tm_hour, 662 | tm->tm_min, tm->tm_sec); 663 | aio->last_update = now; 664 | } 665 | fprintf(stderr, "run_queue start %d\n", aio->in_flight); 666 | } 667 | 668 | if (aio->in_flight) { 669 | do { 670 | rc = io_getevents(aio->ioctx, min, aio->in_flight, 671 | aio->events, NULL); 672 | } while (rc == -EINTR); 673 | if (rc < 0) { 674 | /* Unexpected failure, programming error? */ 675 | fprintf(stderr, "failed io_getevents(%u, %u) = %d\n", 676 | min, aio->in_flight, rc); 677 | exit(1); 678 | } 679 | 680 | aio->in_flight -= rc; 681 | for (i = 0; i < rc; i++) { 682 | if (process_completion(aio, &aio->events[i])) { 683 | // XXX failure, how to recover? 684 | fprintf(stderr, "process_completion failed\n"); 685 | exit(1); 686 | } 687 | } 688 | } 689 | 690 | io_ready = submit_requests(aio); 691 | if (io_ready) { 692 | do { 693 | rc = io_submit(aio->ioctx, io_ready, aio->iolist); 694 | } while (rc == -EINTR); 695 | if (rc < 0) { 696 | // XXX failure, how to recover 697 | fprintf(stderr, "failed io_submit(%u, %u) = %d\n", 698 | io_ready, aio->in_flight, rc); 699 | exit(1); 700 | } 701 | 702 | aio->in_flight += io_ready; 703 | aio->stats.issued_requests += io_ready; 704 | } 705 | 706 | if (DEBUG) 707 | fprintf(stderr, "run_queue end %d\n", aio->in_flight); 708 | 709 | aio->in_runqueue = 0; 710 | return 0; 711 | } 712 | 713 | 714 | static errcode_t aio_close(io_channel channel) 715 | { 716 | struct aio_data *aio; 717 | struct request *req, *rpos; 718 | struct span *s, *spos; 719 | struct rb_node *n; 720 | errcode_t rc = 0; 721 | 722 | AIO_GET_PRIVATE(channel); 723 | 724 | if (--channel->refcount > 0) 725 | return 0; 726 | 727 | if (aio->cacheblock_base) { 728 | /* Clean up the async_rq rb_tree without doing an explicit 729 | * erase on each entry. Simple method is to move the requests 730 | * to the readahead queue and clean them as part of that 731 | * effort. 732 | */ 733 | n = rb_first(&aio->async_rq); 734 | while (n) { 735 | req = rb_entry(n, struct request, rb_node); 736 | list_splice_tail_init(&req->list, &aio->rq); 737 | list_add_tail(&req->list, &aio->rq); 738 | n = rb_next(n); 739 | } 740 | list_for_each_entry_safe(req, rpos, &aio->rq, list) { 741 | list_del(&req->list); 742 | ext2fs_free_mem(&req); 743 | } 744 | 745 | aio->ignore_async = 1; 746 | while (aio->in_flight) { 747 | if (run_queue(aio, 1)) 748 | goto error; 749 | } 750 | 751 | list_for_each_entry_safe(req, rpos, &aio->req_list, list) { 752 | list_del(&req->list); 753 | ext2fs_free_mem(&req); 754 | } 755 | list_for_each_entry_safe(s, spos, &aio->span_list, list) { 756 | list_del(&s->list); 757 | ext2fs_free_mem(&s); 758 | } 759 | 760 | ext2fs_free_mem(&aio->cacheblock_base); 761 | ext2fs_free_mem(&aio->iolist); 762 | ext2fs_free_mem(&aio->events); 763 | munmap(aio->arena, aio->arena_size); 764 | } 765 | 766 | error: 767 | if (close(aio->fd) < 0) 768 | rc = errno; 769 | 770 | ext2fs_free_mem(&aio); 771 | if (channel->name) 772 | ext2fs_free_mem(&channel->name); 773 | ext2fs_free_mem(&channel); 774 | return rc; 775 | } 776 | 777 | static errcode_t aio_set_blksize(io_channel channel, int blksize) 778 | { 779 | struct aio_data *aio; 780 | 781 | AIO_GET_PRIVATE(channel); 782 | 783 | channel->block_size = blksize; 784 | return 0; 785 | } 786 | 787 | static errcode_t aio_set_option(io_channel channel, const char *option, 788 | const char *arg) 789 | { 790 | struct aio_data *aio; 791 | unsigned long long tmp; 792 | char *end; 793 | 794 | AIO_GET_PRIVATE(channel); 795 | 796 | if (!arg) 797 | return EXT2_ET_INVALID_ARGUMENT; 798 | 799 | tmp = strtoull(arg, &end, 0); 800 | if (*end) 801 | return EXT2_ET_INVALID_ARGUMENT; 802 | 803 | if (!strcmp(option, "maxsize")) 804 | aio->max_size = tmp * 1024; 805 | else if (!strcmp(option, "qd") || !strcmp(option, "queuedepth")) 806 | aio->target_qd = tmp; 807 | else if (!strcmp(option, "req_preallocate")) 808 | aio->preallocate_reqs = tmp; 809 | else if (!strcmp(option, "cache_entries")) 810 | aio->num_bufs = tmp; 811 | else if (!strcmp(option, "reserved_entries")) 812 | aio->reserved_cacheblocks = tmp; 813 | else if (!strcmp(option, "merge_gap")) 814 | aio->merge_gap = tmp * 1024; 815 | else 816 | return EXT2_ET_INVALID_ARGUMENT; 817 | 818 | return 0; 819 | } 820 | 821 | static errcode_t aio_get_stats(io_channel channel, io_stats *stats) 822 | { 823 | struct aio_data *aio; 824 | 825 | AIO_GET_PRIVATE(channel); 826 | 827 | if (stats) 828 | *stats = &aio->stats.base; 829 | return 0; 830 | } 831 | 832 | static errcode_t make_requests(io_channel channel, struct list_head *requests, 833 | unsigned long block, int count, 834 | unsigned int *waiting, req_callback_t callback, 835 | void *req_priv1, unsigned long req_priv2) 836 | { 837 | struct aio_data *aio = channel->private_data; 838 | struct request *req; 839 | ext2_loff_t offset; 840 | ssize_t size; 841 | errcode_t rc; 842 | 843 | offset = block * channel->block_size; 844 | size = (count < 0) ? -count : count * channel->block_size; 845 | if (offset % aio->sector_size) { 846 | size += offset % aio->sector_size; 847 | offset -= offset % aio->sector_size; 848 | } 849 | size = (size + aio->sector_size - 1) & ~(aio->sector_size - 1); 850 | 851 | if (DEBUG) 852 | fprintf(stderr, "making request for %llu:%ld\n", offset, size); 853 | 854 | while (size > 0) { 855 | if (list_empty(&aio->req_list)) { 856 | rc = ext2fs_get_mem(sizeof(struct request), &req); 857 | if (rc) 858 | return rc; 859 | 860 | init_request(req); 861 | } else { 862 | req = list_first_entry(&aio->req_list, struct request, list); 863 | list_del_init(&req->list); 864 | } 865 | 866 | req->waiting = waiting; 867 | if (waiting) { 868 | (*waiting)++; 869 | req->order = 0; 870 | } else 871 | req->order = aio->next_order++; 872 | req->callback = callback; 873 | req->priv1 = req_priv1; 874 | req->priv2 = req_priv2; 875 | req->offset = offset; 876 | if (size <= aio->max_size) 877 | req->size = size; 878 | else 879 | req->size = aio->max_size; 880 | 881 | offset += req->size; 882 | size -= req->size; 883 | list_add_tail(&req->list, requests); 884 | } 885 | 886 | if (DEBUG) { 887 | list_for_each_entry(req, requests, list) { 888 | fprintf(stderr, "\t subreq %llu:%ld:%lu\n", 889 | req->offset, req->size, 890 | req->order); 891 | } 892 | } 893 | 894 | return 0; 895 | } 896 | 897 | static errcode_t aio_readahead(io_channel channel, unsigned long block, 898 | int count) 899 | { 900 | struct aio_data *aio; 901 | LIST_HEAD(requests); 902 | errcode_t rc; 903 | 904 | AIO_GET_PRIVATE(channel); 905 | 906 | rc = ensure_aio_init(aio); 907 | if (rc) 908 | return rc; 909 | 910 | if (DEBUG) 911 | fprintf(stderr, "aio_readahead %lu %d\n", block, count); 912 | 913 | rc = make_requests(channel, &requests, block, count, 914 | NULL, NULL, NULL, 0); 915 | if (rc) 916 | return rc; 917 | 918 | /* Readahead requests are assumed to be submitted in the order 919 | * they will be needed, so just put them on the tail of the list. 920 | */ 921 | list_splice_tail(&requests, &aio->rq); 922 | return run_queue(aio, 0); 923 | } 924 | 925 | static errcode_t add_span(struct aio_data *aio, struct list_head *prior_entry, 926 | ext2_loff_t offset, ssize_t size, void *data) 927 | { 928 | struct span *s; 929 | errcode_t rc; 930 | 931 | if (DEBUG) 932 | fprintf(stderr, "adding span %llu %ld\n", offset, size); 933 | 934 | if (list_empty(&aio->span_list)) { 935 | rc = ext2fs_get_mem(sizeof(struct span), &s); 936 | if (rc) 937 | return rc; 938 | 939 | INIT_LIST_HEAD(&s->list); 940 | } else { 941 | s = list_first_entry(&aio->span_list, struct span, list); 942 | list_del_init(&s->list); 943 | } 944 | 945 | s->offset = offset; 946 | s->size = size; 947 | s->data = data; 948 | 949 | /* Insert this span after the list head given */ 950 | list_add(&s->list, prior_entry); 951 | return 0; 952 | } 953 | 954 | static errcode_t clone_spans(struct aio_data *aio, struct list_head *orig, 955 | struct list_head *clone) 956 | { 957 | struct span *s; 958 | errcode_t rc; 959 | 960 | list_for_each_entry(s, orig, list) { 961 | rc = add_span(aio, clone, s->offset, s->size, s->data); 962 | if (rc) 963 | return rc; 964 | 965 | /* Keep appending after the last span added. clone is 966 | * assumed to be an empty list on entry 967 | */ 968 | clone = clone->next; 969 | } 970 | 971 | return 0; 972 | } 973 | 974 | static errcode_t fill_span(struct aio_data *aio, struct list_head *spans, 975 | struct cacheblock *cb) 976 | { 977 | /* See if this IO buffer can fullfill any of the spans we need. 978 | * Keep the spans ordered, so we can stop early if possible. 979 | */ 980 | ext2_loff_t start, end, i_end, s_end, noffset; 981 | ssize_t len, nsize; 982 | void *src, *dest, *ndata; 983 | struct span *s, *pos; 984 | errcode_t rc; 985 | 986 | list_for_each_entry_safe(s, pos, spans, list) { 987 | i_end = cb->offset + cb->size - 1; 988 | s_end = s->offset + s->size - 1; 989 | 990 | if (DEBUG) { 991 | fprintf(stderr, "checking cache block %llu:%ld:%llu:%lu " 992 | "for span %llu:%ld:%llu\n", 993 | cb->offset, cb->size, i_end, cb->order, 994 | s->offset, s->size, s_end); 995 | } 996 | 997 | /* is the span completely past the buffer? */ 998 | if (s->offset > i_end) 999 | return 0; 1000 | 1001 | /* Is the buffer after this span? */ 1002 | if (cb->offset > s_end) 1003 | continue; 1004 | 1005 | if (cb->order > aio->used_order) 1006 | aio->used_order = cb->order; 1007 | 1008 | /* We have some degree of overlap */ 1009 | start = (s->offset > cb->offset) ? s->offset : cb->offset; 1010 | end = (s_end < i_end) ? s_end : i_end; 1011 | len = end - start + 1; 1012 | 1013 | if (cb->offset > s->offset && i_end < s_end) { 1014 | /* This buffer splits the span */ 1015 | noffset = end + 1; 1016 | nsize = s_end - noffset + 1; 1017 | ndata = s->data + noffset - s->offset; 1018 | 1019 | dest = s->data + start - s->offset; 1020 | src = cb->data; 1021 | 1022 | s->size -= nsize; 1023 | rc = add_span(aio, &s->list, noffset, nsize, ndata); 1024 | if (rc) 1025 | return rc; 1026 | } else if (s->offset > cb->offset && s_end < i_end) { 1027 | /* This span splits the buffer, so first trim the 1028 | * front of the buffer to coincide with the span. 1029 | * If we start using spans in the buffer management, 1030 | * we could avoid discarding the data. 1031 | */ 1032 | cb->data += s->offset - cb->offset; 1033 | cb->size -= s->offset - cb->offset; 1034 | cb->offset = s->offset; 1035 | 1036 | dest = s->data; 1037 | src = cb->data; 1038 | 1039 | /* Span is consumed; only need to update cache block */ 1040 | cb->offset += len; 1041 | cb->data += len; 1042 | } else { 1043 | dest = s->data + (start - s->offset); 1044 | src = cb->data + (start - cb->offset); 1045 | 1046 | if (start == cb->offset) { 1047 | /* Covered tail of span, or span starts 1048 | * at the buffer's offset. 1049 | */ 1050 | cb->offset += len; 1051 | cb->data += len; 1052 | if (start == s->offset) { 1053 | s->offset += len; 1054 | s->data += len; 1055 | } 1056 | } else { 1057 | /* Covered head of span */ 1058 | s->offset += len; 1059 | s->data += len; 1060 | } 1061 | } 1062 | 1063 | memcpy(dest, src, len); 1064 | cb->size -= len; 1065 | s->size -= len; 1066 | 1067 | if (!s->size) 1068 | list_move(&s->list, &aio->span_list); 1069 | 1070 | if (!cb->size) { 1071 | list_move(&cb->list, &aio->cb_list); 1072 | aio->avail_cacheblocks++; 1073 | return 0; 1074 | } 1075 | } 1076 | 1077 | return 0; 1078 | } 1079 | 1080 | static errcode_t req_needed_for_span(struct aio_data *aio, struct span *s, 1081 | struct request *req, 1082 | unsigned int *waiting, 1083 | struct list_head *track) 1084 | { 1085 | ext2_loff_t start, end, s_end, r_end; 1086 | ssize_t len; 1087 | errcode_t rc; 1088 | 1089 | r_end = req->offset + req->size - 1; 1090 | s_end = s->offset + s->size - 1; 1091 | 1092 | if (DEBUG) { 1093 | fprintf(stderr, "checking %s req %llu:%ld:%llu " 1094 | "span %llu:%ld:%llu\n", 1095 | track ? "queued" : "active", 1096 | req->offset, req->size, r_end, 1097 | s->offset, s->size, s_end); 1098 | } 1099 | 1100 | if (s_end < req->offset || s->offset > r_end) 1101 | return 0; 1102 | 1103 | /* This request holds data we need. 1104 | * The caller must ensure that we do not try to track an active 1105 | * request, or we'll corrupt the lists. 1106 | */ 1107 | if (track) 1108 | list_move_tail(&req->list, track); 1109 | if (req->order > aio->used_order) 1110 | aio->used_order = req->order; 1111 | req->waiting = waiting; 1112 | (*waiting)++; 1113 | 1114 | start = (s->offset > req->offset) ? s->offset : req->offset; 1115 | end = (s_end < r_end) ? s_end : r_end; 1116 | len = end - start + 1; 1117 | 1118 | if (s->offset > req->offset && s_end < r_end) { 1119 | /* This span splits the request, which means it is used up */ 1120 | s->size = 0; 1121 | } else if (req->offset > s->offset && r_end < s_end) { 1122 | /* This request splits the span */ 1123 | ext2_loff_t noffset = end + 1; 1124 | ssize_t nsize = s_end - noffset + 1; 1125 | rc = add_span(aio, &s->list, noffset, nsize, NULL); 1126 | if (rc) 1127 | return rc; 1128 | s->size = start - s->offset; 1129 | } else { 1130 | if (start == s->offset) 1131 | s->offset += len; 1132 | s->size -= len; 1133 | } 1134 | 1135 | if (DEBUG) { 1136 | fprintf(stderr, "checking %s, len %ld, new span %llu:%ld\n", 1137 | track ? "queued" : "active", 1138 | len, s->offset, s->size); 1139 | } 1140 | 1141 | if (!s->size) 1142 | list_move(&s->list, &aio->span_list); 1143 | 1144 | return 0; 1145 | } 1146 | 1147 | static errcode_t aio_read_blk64(io_channel channel, unsigned long long block, 1148 | int count, void *data) 1149 | { 1150 | struct aio_data *aio; 1151 | struct cacheblock *cb, *pos; 1152 | struct span *s, *spos; 1153 | struct request *req, *rpos; 1154 | ext2_loff_t offset; 1155 | ssize_t size; 1156 | errcode_t rc; 1157 | LIST_HEAD(spans); 1158 | LIST_HEAD(cloned_spans); 1159 | LIST_HEAD(requests); 1160 | LIST_HEAD(promote); 1161 | struct list_head *needed_spans; 1162 | unsigned int waiting = 0; 1163 | unsigned long orig_used; 1164 | 1165 | AIO_GET_PRIVATE(channel); 1166 | 1167 | if (DEBUG) 1168 | fprintf(stderr, "read %llu %d\n", block, count); 1169 | 1170 | rc = ensure_aio_init(aio); 1171 | if (rc) 1172 | return rc; 1173 | 1174 | if (aio->in_runqueue) 1175 | return EXT2_ET_OP_NOT_SUPPORTED; 1176 | 1177 | offset = block * channel->block_size; 1178 | size = (count < 0) ? -count : count * channel->block_size; 1179 | 1180 | if (DEBUG) 1181 | fprintf(stderr, "read init %llu %ld\n", offset, size); 1182 | 1183 | rc = add_span(aio, &spans, offset, size, data); 1184 | if (rc) 1185 | return rc; 1186 | 1187 | orig_used = aio->used_order; 1188 | list_for_each_entry_safe(cb, pos, &aio->cache, list) { 1189 | cb->age++; 1190 | rc = fill_span(aio, &spans, cb); 1191 | if (rc) 1192 | return rc; 1193 | 1194 | /* If there are no more spans to fill, then we're done. */ 1195 | if (list_empty(&spans)) 1196 | return run_queue(aio, 0); 1197 | } 1198 | 1199 | if (DEBUG && aio->used_order != orig_used) { 1200 | fprintf(stderr, "cache moved used from %lu to %lu\n", 1201 | orig_used, aio->used_order); 1202 | orig_used = aio->used_order; 1203 | } 1204 | 1205 | /* Walk active and queued requests looking for ones that will 1206 | * fulfill the spans. We need a copy of the spans to keep track of 1207 | * what's still required. 1208 | */ 1209 | if (!list_empty(&aio->active) || !list_empty(&aio->rq)) { 1210 | rc = clone_spans(aio, &spans, &cloned_spans); 1211 | if (rc) 1212 | return rc; 1213 | 1214 | /* First, search active requests */ 1215 | list_for_each_entry(req, &aio->active, active) { 1216 | list_for_each_entry_safe(s, spos, &cloned_spans, list) { 1217 | rc = req_needed_for_span(aio, s, req, &waiting, 1218 | NULL); 1219 | if (rc) 1220 | return rc; 1221 | } 1222 | 1223 | if (list_empty(&cloned_spans)) 1224 | break; 1225 | } 1226 | 1227 | /* then queued requests, tracking those that will satisfy 1228 | * one of the spans 1229 | */ 1230 | list_for_each_entry_safe(req, rpos, &aio->rq, list) { 1231 | list_for_each_entry_safe(s, spos, &cloned_spans, list) { 1232 | rc = req_needed_for_span(aio, s, req, &waiting, 1233 | &promote); 1234 | if (rc) 1235 | return rc; 1236 | } 1237 | 1238 | if (list_empty(&cloned_spans)) 1239 | break; 1240 | } 1241 | 1242 | /* If we found any requests to promote, then we can zap the 1243 | * intervening readahead requests in the assumption that 1244 | * they will be abandoned. 1245 | */ 1246 | if (!list_empty(&promote)) { 1247 | unsigned count = 0; 1248 | 1249 | list_for_each_entry_safe(req, rpos, &aio->rq, list) { 1250 | if (req->order > aio->used_order) 1251 | break; 1252 | count++; 1253 | list_move(&req->list, &aio->req_list); 1254 | } 1255 | 1256 | if (DEBUG) { 1257 | fprintf(stderr, "found queued requests for " 1258 | "read, discarding %u " 1259 | "intervening requests\n", 1260 | count); 1261 | } 1262 | 1263 | list_splice(&promote, &aio->rq); 1264 | } 1265 | 1266 | needed_spans = &cloned_spans; 1267 | } else 1268 | needed_spans = &spans; 1269 | 1270 | if (DEBUG && aio->used_order != orig_used) { 1271 | fprintf(stderr, "requests moved used from %lu to %lu\n", 1272 | orig_used, aio->used_order); 1273 | orig_used = aio->used_order; 1274 | } 1275 | 1276 | /* convert remaining spans into requests and place at the front 1277 | * of the queue. Traverse the list in reverse as so that we push 1278 | * requests into the front of the queue such that they come out 1279 | * in sequential order. 1280 | */ 1281 | if (DEBUG && !list_empty(needed_spans)) 1282 | fprintf(stderr, "making requests for sync read\n"); 1283 | 1284 | list_for_each_entry_safe_reverse(s, spos, needed_spans, list) { 1285 | rc = make_requests(channel, &requests, 1286 | s->offset / channel->block_size, 1287 | -s->size, &waiting, NULL, NULL, 0); 1288 | if (rc) 1289 | return rc; 1290 | 1291 | list_splice_init(&requests, &aio->rq); 1292 | 1293 | if (needed_spans == &cloned_spans) 1294 | list_move(&s->list, &aio->span_list); 1295 | } 1296 | 1297 | if (DEBUG) 1298 | fprintf(stderr, "waiting for %u requests\n", waiting); 1299 | 1300 | /* wait for needed requests to complete */ 1301 | while (waiting) { 1302 | rc = run_queue(aio, 1); 1303 | if (rc) 1304 | return rc; 1305 | 1306 | /* walk completed requests to fill final spans. We do 1307 | * this early to free up buffers as we go, as they may be 1308 | * needed to fulfill our read request 1309 | */ 1310 | list_for_each_entry_safe(cb, pos, &aio->waiting, list) { 1311 | rc = fill_span(aio, &spans, cb); 1312 | if (rc) 1313 | return rc; 1314 | 1315 | if (cb->size) { 1316 | list_del_init(&cb->list); 1317 | insert_cacheblock(aio, cb); 1318 | } 1319 | 1320 | /* We're done if we've filled all of the spans. */ 1321 | if (list_empty(&spans)) 1322 | break; 1323 | } 1324 | } 1325 | 1326 | list_for_each_entry_safe(cb, pos, &aio->waiting, list) { 1327 | if (cb->size) { 1328 | list_del_init(&cb->list); 1329 | insert_cacheblock(aio, cb); 1330 | } 1331 | } 1332 | 1333 | if (DEBUG) { 1334 | list_for_each_entry_safe(cb, pos, &aio->cache, list) { 1335 | fprintf(stderr, "cache check %llu:%ld\n", 1336 | cb->offset, cb->size); 1337 | } 1338 | } 1339 | 1340 | /* The span list should be empty at this point. If not, report 1341 | * an error. I'd like a better one, but this will have to do for 1342 | * now. 1343 | */ 1344 | if (!list_empty(&spans)) { 1345 | fprintf(stderr, "Had spans left after read!\n"); 1346 | return EXT2_ET_INVALID_ARGUMENT; 1347 | } 1348 | 1349 | /* We may have freed up some buffers, so try to send off some 1350 | * more requests. 1351 | */ 1352 | return run_queue(aio, 0); 1353 | } 1354 | 1355 | static errcode_t aio_read_blk(io_channel channel, unsigned long block, 1356 | int count, void *data) 1357 | { 1358 | return aio_read_blk64(channel, block, count, data); 1359 | } 1360 | 1361 | static errcode_t aio_flush(io_channel channel) 1362 | { 1363 | /* We don't write, so nothing to flush */ 1364 | return 0; 1365 | } 1366 | 1367 | static errcode_t aio_write_blk(io_channel channel, unsigned long block, 1368 | int count, const void *data) 1369 | { 1370 | return EXT2_ET_OP_NOT_SUPPORTED; 1371 | } 1372 | 1373 | static errcode_t aio_write_byte(io_channel channel, unsigned long offset, 1374 | int size, const void *data) 1375 | { 1376 | return EXT2_ET_OP_NOT_SUPPORTED; 1377 | } 1378 | 1379 | static errcode_t aio_write_blk64(io_channel channel, unsigned long long block, 1380 | int count, const void *data) 1381 | { 1382 | return EXT2_ET_OP_NOT_SUPPORTED; 1383 | } 1384 | 1385 | static errcode_t aio_async_read(io_channel channel, unsigned long block, 1386 | int count, req_callback_t cb, void *priv1, 1387 | unsigned long priv2) 1388 | { 1389 | struct aio_data *aio; 1390 | LIST_HEAD(requests); 1391 | struct request *req, *pos; 1392 | errcode_t rc; 1393 | 1394 | AIO_GET_PRIVATE(channel); 1395 | 1396 | rc = ensure_aio_init(aio); 1397 | if (rc) 1398 | return rc; 1399 | 1400 | if (DEBUG) 1401 | fprintf(stderr, "aio_async_read %lu %d\n", block, count); 1402 | 1403 | rc = make_requests(channel, &requests, block, count, NULL, 1404 | cb, priv1, priv2); 1405 | if (rc) 1406 | return rc; 1407 | 1408 | list_for_each_entry_safe(req, pos, &requests, list) { 1409 | list_del_init(&req->list); 1410 | insert_async_req(aio, req); 1411 | 1412 | aio->num_async++; 1413 | aio->stats.total_async++; 1414 | if (aio->num_async > aio->stats.max_async) 1415 | aio->stats.max_async = aio->num_async; 1416 | } 1417 | 1418 | return run_queue(aio, 0); 1419 | } 1420 | 1421 | static errcode_t aio_finish_async(io_channel channel, unsigned long max_async) 1422 | { 1423 | struct aio_data *aio; 1424 | errcode_t rc; 1425 | 1426 | AIO_GET_PRIVATE(channel); 1427 | 1428 | rc = ensure_aio_init(aio); 1429 | if (rc) 1430 | return rc; 1431 | 1432 | if (DEBUG) 1433 | fprintf(stderr, "aio_finish_async\n"); 1434 | 1435 | aio->async_only = 1; 1436 | while (!rc && aio->num_async > max_async) 1437 | rc = run_queue(aio, 1); 1438 | 1439 | aio->async_only = 0; 1440 | return rc; 1441 | } 1442 | 1443 | static errcode_t aio_async_count(io_channel channel, unsigned long *count) 1444 | { 1445 | struct aio_data *aio; 1446 | errcode_t rc; 1447 | 1448 | AIO_GET_PRIVATE(channel); 1449 | 1450 | rc = ensure_aio_init(aio); 1451 | if (rc) 1452 | return rc; 1453 | 1454 | *count = aio->num_async; 1455 | return 0; 1456 | } 1457 | 1458 | static errcode_t aio_open(const char *name, int flags, io_channel *channel); 1459 | 1460 | static struct struct_io_manager struct_aio_manager = { 1461 | .magic = EXT2_ET_MAGIC_IO_MANAGER, 1462 | .name = "Linux AIO Manager", 1463 | .open = aio_open, 1464 | .close = aio_close, 1465 | .set_blksize = aio_set_blksize, 1466 | .read_blk = aio_read_blk, 1467 | .write_blk = aio_write_blk, 1468 | .flush = aio_flush, 1469 | .write_byte = aio_write_byte, 1470 | .set_option = aio_set_option, 1471 | .get_stats = aio_get_stats, 1472 | .read_blk64 = aio_read_blk64, 1473 | .write_blk64 = aio_write_blk64, 1474 | #if HAVE_LUSTRE_EXTFS2 1475 | /* only available for Lustre e2fsprogs; make life easy during 1476 | * development by commenting it out here and providing an alternate 1477 | * interface 1478 | */ 1479 | .readahead = aio_readahead, 1480 | #endif 1481 | }; 1482 | 1483 | io_manager aio_io_manager = &struct_aio_manager; 1484 | 1485 | static errcode_t aio_open(const char *name, int flags, io_channel *channel) 1486 | { 1487 | io_channel io; 1488 | struct aio_data *aio; 1489 | errcode_t rc; 1490 | 1491 | if (!name) 1492 | return EXT2_ET_BAD_DEVICE_NAME; 1493 | 1494 | rc = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 1495 | if (rc) 1496 | return rc; 1497 | 1498 | memset(io, 0, sizeof(struct struct_io_channel)); 1499 | io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 1500 | rc = ext2fs_get_mem(sizeof(struct aio_data), &aio); 1501 | if (rc) 1502 | goto error_io; 1503 | 1504 | io->manager = aio_io_manager; 1505 | rc = ext2fs_get_mem(strlen(name) + 1, &io->name); 1506 | if (rc) 1507 | goto error_aio; 1508 | 1509 | strcpy(io->name, name); 1510 | io->private_data = aio; 1511 | io->block_size = 1024; 1512 | io->read_error = 0; 1513 | io->write_error = 0; 1514 | io->refcount = 1; 1515 | 1516 | memset(aio, 0, sizeof(struct aio_data)); 1517 | aio->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 1518 | aio->next_order = 1; 1519 | aio->stats.base.num_fields = 2; 1520 | 1521 | INIT_LIST_HEAD(&aio->cb_list); 1522 | INIT_LIST_HEAD(&aio->req_list); 1523 | INIT_LIST_HEAD(&aio->span_list); 1524 | INIT_LIST_HEAD(&aio->cache); 1525 | INIT_LIST_HEAD(&aio->waiting); 1526 | INIT_LIST_HEAD(&aio->active); 1527 | INIT_LIST_HEAD(&aio->rq); 1528 | aio->async_rq = RB_ROOT; 1529 | 1530 | if (flags & IO_FLAG_RW) { 1531 | rc = EXT2_ET_OP_NOT_SUPPORTED; 1532 | goto error_name; 1533 | } 1534 | 1535 | aio->fd = open(io->name, O_RDONLY | O_DIRECT); 1536 | if (aio->fd < 0) { 1537 | rc = errno; 1538 | goto error_name; 1539 | } 1540 | 1541 | /* We use O_DIRECT, so we need to align our size to the actual 1542 | * sector size of the device. 1543 | */ 1544 | if (ioctl(aio->fd, BLKSSZGET, &aio->sector_size) < 0) { 1545 | rc = errno; 1546 | goto error_name; 1547 | } 1548 | 1549 | *channel = io; 1550 | return 0; 1551 | 1552 | error_name: 1553 | ext2fs_free_mem(&io->name); 1554 | 1555 | error_aio: 1556 | ext2fs_free_mem(&aio); 1557 | 1558 | error_io: 1559 | ext2fs_free_mem(&io); 1560 | return rc; 1561 | } 1562 | 1563 | #if !HAVE_LUSTRE_EXTFS2 1564 | errcode_t io_channel_readahead(io_channel channel, unsigned long block, 1565 | int count) 1566 | { 1567 | /* cannot help the unix manager do readahead here */ 1568 | if (channel->manager != aio_io_manager) 1569 | return 0; 1570 | 1571 | return aio_readahead(channel, block, count); 1572 | } 1573 | #endif 1574 | 1575 | static unsigned char unix_async_buffer[1024 * 1024]; 1576 | 1577 | errcode_t io_channel_async_read(io_channel channel, unsigned long block, 1578 | int count, int (*cb)(ext2_loff_t offset, 1579 | ssize_t size, 1580 | void *priv1, 1581 | unsigned long priv2, 1582 | void *data), 1583 | void *priv1, unsigned long priv2) 1584 | { 1585 | errcode_t rc; 1586 | 1587 | /* Should be checking for existence of method, but that requires 1588 | * mods to libext2fs we want to defer until merge time. 1589 | */ 1590 | if (channel->manager == aio_io_manager) 1591 | return aio_async_read(channel, block, count, cb, priv1, priv2); 1592 | 1593 | if (count * channel->block_size > (1024 * 1024)) 1594 | return EXT2_ET_FILE_TOO_BIG; 1595 | 1596 | rc = io_channel_read_blk64(channel, block, count, unix_async_buffer); 1597 | if (!rc) { 1598 | block *= channel->block_size; 1599 | count *= channel->block_size; 1600 | rc = cb(block, count, priv1, priv2, unix_async_buffer); 1601 | } 1602 | return rc; 1603 | } 1604 | 1605 | errcode_t io_channel_finish_async(io_channel channel, unsigned long max_async) 1606 | { 1607 | /* Should be checking for existence of method, but that requires 1608 | * mods to libext2fs we want to defer until merge time. 1609 | */ 1610 | if (channel->manager != aio_io_manager) 1611 | return 0; 1612 | return aio_finish_async(channel, max_async); 1613 | } 1614 | 1615 | errcode_t io_channel_async_count(io_channel channel, unsigned long *count) 1616 | { 1617 | /* Should be checking for existence of method, but that requires 1618 | * mods to libext2fs we want to defer until merge time. 1619 | */ 1620 | if (channel->manager != aio_io_manager) 1621 | return 0; 1622 | return aio_async_count(channel, count); 1623 | } 1624 | --------------------------------------------------------------------------------