├── scripts
    ├── stopkgpu
    ├── Makefile
    └── runkgpu
├── services
    ├── test
    │   ├── libsrv_test
    │   │   ├── Makefile
    │   │   └── srv_test.cu
    │   ├── Makefile
    │   ├── calg2
    │   │   ├── Makefile
    │   │   └── calg2.c
    │   ├── sysbm
    │   │   ├── Makefile
    │   │   └── sysbm.c
    │   └── callgpu
    │   │   ├── Makefile
    │   │   └── callgpu.c
    ├── Makefile
    ├── gaes
    │   ├── libsrv_gaes
    │   │   ├── Makefile
    │   │   └── srv_gaes.cu
    │   ├── Makefile
    │   ├── callaes
    │   │   ├── Makefile
    │   │   └── callgaes.c
    │   ├── gaes_ctr
    │   │   ├── Makefile
    │   │   └── gaes_ctr.c
    │   ├── gaes_ecb
    │   │   └── Makefile
    │   ├── gaes_xts
    │   │   └── Makefile
    │   ├── dm-crypt
    │   │   └── Makefile
    │   ├── ecryptfs
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── dentry.c
    │   │   ├── debug.c
    │   │   ├── kthread.c
    │   │   ├── super.c
    │   │   ├── file.c
    │   │   └── read_write.c
    │   ├── gaes_common.h
    │   ├── gaesk.h
    │   └── gaes_test.c
    ├── raid6
    │   ├── Makefile
    │   ├── gpq
    │   │   ├── Makefile
    │   │   └── gpq.c
    │   ├── raid456
    │   │   ├── 38
    │   │   │   ├── Makefile
    │   │   │   ├── raid0.h
    │   │   │   └── bitmap.h
    │   │   ├── 39
    │   │   │   ├── Makefile
    │   │   │   ├── raid0.h
    │   │   │   └── bitmap.h
    │   │   └── Makefile
    │   ├── grecov
    │   │   ├── Makefile
    │   │   └── raid6test.c
    │   ├── r62_recov.h
    │   ├── gpq.h
    │   └── libsrv_raid6
    │   │   ├── Makefile
    │   │   ├── test.c
    │   │   ├── testlib.cu
    │   │   ├── dev.cu
    │   │   └── srv_raid6.cu
    └── SERVICE_DEV_GUIDE
├── Makefile
├── kgpu
    ├── Makefile
    ├── kkgpu.h
    ├── gputils.h
    ├── kgpu_log.h
    ├── kgpu_kutils.c
    ├── service.h
    ├── helper.h
    ├── kgpu_log.c
    ├── list.h
    ├── kgpu.h
    ├── service.c
    ├── gpuops.cu
    └── helper.c
└── README


/scripts/stopkgpu:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | rmmod gaes_ecb && rmmod gaes_ctr && rmmod gaes_xts && rmmod kgpu


--------------------------------------------------------------------------------
/scripts/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	$(if $(BUILD_DIR), cp ./* $(BUILD_DIR)/ )
3 | 
4 | clean:
5 | 	@echo "Scripts clean done"
6 | 


--------------------------------------------------------------------------------
/services/test/libsrv_test/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | all:
3 | 	nvcc --shared --compiler-options '-fPIC' -o libsrv_test.so srv_test.cu
4 | 	$(if $(BUILD_DIR), cp libsrv_test.so $(BUILD_DIR)/ )
5 | 
6 | clean:
7 | 	rm -f *.o *.so
8 | 


--------------------------------------------------------------------------------
/scripts/runkgpu:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | insmod ./kgpu.ko &&
4 | modprobe aes &&
5 | modprobe xts &&
6 | insmod ./gaes_ecb.ko zero_copy=0 split_threshold=32 max_splits=8 &&
7 | insmod ./gaes_ctr.ko &&
8 | insmod ./gaes_xts.ko &&
9 | ./helper -l `pwd`


--------------------------------------------------------------------------------
/services/Makefile:
--------------------------------------------------------------------------------
 1 | SUBDIRS = gaes test raid6
 2 | 
 3 | all: $(SUBDIRS)
 4 | 
 5 | .PHONY: $(SUBDIRS)
 6 | 
 7 | $(SUBDIRS):
 8 | 	$(MAKE) -C $@ $(TARGET) kv=$(kv) BUILD_DIR=$(BUILD_DIR)
 9 | 
10 | clean:
11 | 	$(MAKE) all TARGET=clean
12 | 


--------------------------------------------------------------------------------
/services/gaes/libsrv_gaes/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | all: srv_gaes.cu
3 | 	nvcc -arch=sm_20 -O2 --shared --compiler-options '-fPIC' -o libsrv_gaes.so srv_gaes.cu
4 | 	$(if $(BUILD_DIR), cp libsrv_gaes.so $(BUILD_DIR)/ ) 
5 | 
6 | clean:
7 | 	rm -f *.o *.so
8 | 


--------------------------------------------------------------------------------
/services/test/Makefile:
--------------------------------------------------------------------------------
 1 | SUBDIRS = calg2 callgpu libsrv_test sysbm
 2 | 
 3 | all: $(SUBDIRS)
 4 | 
 5 | .PHONY: $(SUBDIRS)
 6 | 
 7 | $(SUBDIRS):
 8 | 	$(MAKE) -C $@ $(TARGET) BUILD_DIR=$(BUILD_DIR)
 9 | 
10 | clean:
11 | 	$(MAKE) all TARGET=clean
12 | 


--------------------------------------------------------------------------------
/services/raid6/Makefile:
--------------------------------------------------------------------------------
 1 | SUBDIRS = libsrv_raid6 gpq grecov raid456
 2 | 
 3 | all: $(SUBDIRS)
 4 | 
 5 | .PHONY: $(SUBDIRS)
 6 | 
 7 | $(SUBDIRS):
 8 | 	$(MAKE) -C $@ $(TARGET) kv=$(kv) BUILD_DIR=$(BUILD_DIR)
 9 | 
10 | clean:
11 | 	$(MAKE) all TARGET=clean


--------------------------------------------------------------------------------
/services/gaes/Makefile:
--------------------------------------------------------------------------------
 1 | SUBDIRS = callaes ecryptfs gaes_ctr libsrv_gaes dm-crypt gaes_ecb gaes_xts
 2 | 
 3 | all: $(SUBDIRS)
 4 | 
 5 | .PHONY: $(SUBDIRS)
 6 | 
 7 | $(SUBDIRS):
 8 | 	$(MAKE) -C $@ $(TARGET) BUILD_DIR=$(BUILD_DIR)
 9 | 
10 | clean:
11 | 	$(MAKE) all TARGET=clean


--------------------------------------------------------------------------------
/services/gaes/callaes/Makefile:
--------------------------------------------------------------------------------
1 | obj-m += callgaes.o
2 | 
3 | all:
4 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
5 | 	$(if $(BUILD_DIR), cp callgaes.ko $(BUILD_DIR)/ ) 
6 | 
7 | clean:
8 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
9 | 


--------------------------------------------------------------------------------
/services/SERVICE_DEV_GUIDE:
--------------------------------------------------------------------------------
1 | 
2 |         KGPU Service Development Guide
3 | 
4 | (Will be more formal later, now just for tips)
5 | 
6 |  - Service providers can use the log functions defined in kgpu_log.h and link
7 |    with kgpu_log.a. To define your own shorthand, see kkgpu.h and helper.h
8 |    for reference.
9 |    


--------------------------------------------------------------------------------
/services/raid6/gpq/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += gpq.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp gpq.ko $(BUILD_DIR)/ ) 
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/test/calg2/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += calg2.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp calg2.ko $(BUILD_DIR)/ )
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/test/sysbm/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += sysbm.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp sysbm.ko $(BUILD_DIR)/ )
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/test/callgpu/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += callgpu.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp callgpu.ko $(BUILD_DIR)/ )
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/gaes/gaes_ctr/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += gaes_ctr.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp gaes_ctr.ko $(BUILD_DIR)/ ) 
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/gaes/gaes_ecb/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += gaes_ecb.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp gaes_ecb.ko $(BUILD_DIR)/ ) 
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/gaes/gaes_xts/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += gaes_xts.o
 2 | 
 3 | all:
 4 | 	cp ../../../kgpu/Module.symvers ./
 5 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 6 | 	$(if $(BUILD_DIR), cp gaes_xts.ko $(BUILD_DIR)/ ) 
 7 | 
 8 | clean:
 9 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
10 | 


--------------------------------------------------------------------------------
/services/gaes/dm-crypt/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Makefile for the Linux 2.6 dm-crypt
 3 | #
 4 | 
 5 | obj-m += dm-crypt.o
 6 | 
 7 | all:
 8 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 9 | 	$(if $(BUILD_DIR), cp dm-crypt.ko $(BUILD_DIR)/ ) 
10 | 
11 | clean:
12 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SUBDIRS = kgpu services scripts
 2 | 
 3 | all: $(SUBDIRS)
 4 | 
 5 | 
 6 | .PHONY: $(SUBDIRS)
 7 | 
 8 | $(SUBDIRS): mkbuilddir
 9 | 	$(MAKE) -C $@ $(TARGET) kv=$(kv) BUILD_DIR=`pwd`/build
10 | 
11 | mkbuilddir:
12 | 	mkdir -p build
13 | 
14 | services: kgpu
15 | 
16 | distclean:
17 | 	$(MAKE) all kv=$(kv) TARGET=clean
18 | 
19 | clean: distclean
20 | 	rm -rf build
21 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/38/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Makefile for the kernel software RAID 456
 3 | #
 4 | raid456-y	+= raid5.o
 5 | obj-$(CONFIG_MD_RAID456)	+= raid456.o
 6 | 
 7 | all:
 8 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 9 | 	$(if $(BUILD_DIR), cp raid456.ko $(BUILD_DIR)/ )
10 | 
11 | clean:
12 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
13 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/39/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Makefile for the kernel software RAID 456
 3 | #
 4 | raid456-y	+= raid5.o
 5 | obj-$(CONFIG_MD_RAID456)	+= raid456.o
 6 | 
 7 | all:
 8 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 9 | 	$(if $(BUILD_DIR), cp raid456.ko $(BUILD_DIR)/ )
10 | 
11 | clean:
12 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
13 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Makefile for the kernel software RAID 456
 3 | #
 4 | all:
 5 | ifeq ($(kv), 38)
 6 | 	$(MAKE) -C 38 all BUILD_DIR=$(BUILD_DIR)
 7 | else
 8 | 	$(MAKE) -C 39 all BUILD_DIR=$(BUILD_DIR)
 9 | endif
10 | 
11 | clean:
12 | ifeq ($(kv), 38)
13 | 	$(MAKE) -C 38 clean BUILD_DIR=$(BUILD_DIR)
14 | else
15 | 	$(MAKE) -C 39 clean BUILD_DIR=$(BUILD_DIR)
16 | endif
17 | 


--------------------------------------------------------------------------------
/services/raid6/grecov/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += async_raid6_recov.o
 2 | obj-m += raid6test.o
 3 | 
 4 | all:
 5 | 	cp ../../../kgpu/Module.symvers ./
 6 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
 7 | 	$(if $(BUILD_DIR), cp async_raid6_recov.ko $(BUILD_DIR)/ ) 
 8 | 	$(if $(BUILD_DIR), cp raid6test.ko $(BUILD_DIR)/ ) 
 9 | 
10 | clean:
11 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
12 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Makefile for the Linux 2.6 eCryptfs
 3 | #
 4 | 
 5 | obj-m += ecryptfs.o
 6 | 
 7 | ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o
 8 | 
 9 | all:
10 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules
11 | 	$(if $(BUILD_DIR), cp ecryptfs.ko $(BUILD_DIR)/ ) 
12 | 
13 | clean:
14 | 	make -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
15 | 


--------------------------------------------------------------------------------
/services/raid6/r62_recov.h:
--------------------------------------------------------------------------------
 1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
 2 |  * the GPL-COPYING file in the top-level directory.
 3 |  *
 4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 5 |  * All rights reserved.
 6 |  */
 7 | 
 8 | #ifndef __R62_RECOV_H__
 9 | #define __R62_RECOV_H__
10 | 
11 | struct r62_tbl {
12 |     int pbidx;
13 |     int qidx;
14 | };
15 | 
16 | struct r62_recov_data {
17 |     size_t bytes;
18 |     int n;
19 |     struct r62_tbl idx[0];
20 | };
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/38/raid0.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RAID0_H
 2 | #define _RAID0_H
 3 | 
 4 | struct strip_zone
 5 | {
 6 | 	sector_t zone_end;	/* Start of the next zone (in sectors) */
 7 | 	sector_t dev_start;	/* Zone offset in real dev (in sectors) */
 8 | 	int nb_dev;		/* # of devices attached to the zone */
 9 | };
10 | 
11 | struct raid0_private_data
12 | {
13 | 	struct strip_zone *strip_zone;
14 | 	mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
15 | 	int nr_strip_zones;
16 | };
17 | 
18 | typedef struct raid0_private_data raid0_conf_t;
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/39/raid0.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RAID0_H
 2 | #define _RAID0_H
 3 | 
 4 | struct strip_zone
 5 | {
 6 | 	sector_t zone_end;	/* Start of the next zone (in sectors) */
 7 | 	sector_t dev_start;	/* Zone offset in real dev (in sectors) */
 8 | 	int nb_dev;		/* # of devices attached to the zone */
 9 | };
10 | 
11 | struct raid0_private_data
12 | {
13 | 	struct strip_zone *strip_zone;
14 | 	mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
15 | 	int nr_strip_zones;
16 | };
17 | 
18 | typedef struct raid0_private_data raid0_conf_t;
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/services/raid6/gpq.h:
--------------------------------------------------------------------------------
 1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
 2 |  * the GPL-COPYING file in the top-level directory.
 3 |  *
 4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 5 |  * All rights reserved.
 6 |  */
 7 | #ifndef __GPQ_H__
 8 | #define __GPQ_H__
 9 | 
10 | struct raid6_pq_data {
11 |     unsigned long dsize;
12 |     unsigned int nr_d;
13 | };
14 | 
15 | /*
16 |  * Not that efficient, but can save some time because
17 |  * we can allocate disk pointers statically.
18 |  */
19 | #define MAX_DISKS 50
20 | 
21 | long test_gpq(int disks, size_t dsize);
22 | long test_cpq(int disks, size_t dsize);
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/README:
--------------------------------------------------------------------------------
 1 | 
 2 | Modified eCryptfs for Linux using GPU-AES
 3 | 
 4 | 
 5 | This is a demo to show the performance gain by using GPU accelerated
 6 | AES functions inside the kernel, with the help of KGPU.
 7 | 
 8 | The performance optimization only aims at large trunk of data reading/
 9 | writing or multiple parallel reads/writes. Small sized(typically smaller
10 | than 8KB or 16KB) fs ops can not be sped up. Since the fs is called
11 | eCryptfs, where e means 'enterprise', large trunked reads/writes
12 | are expected to be very often and more frequent than small ones.
13 | 
14 | The eCryptfs is an encrypted filesystem in Linux kernel.
15 | 
16 | Weibin Sun, Xing Lin
17 | {wbsun, xinglin}@cs.utah.edu
18 | 


--------------------------------------------------------------------------------
/services/raid6/libsrv_raid6/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: service
 3 | 
 4 | service: srv_raid6.cu
 5 | 	nvcc -O3 -arch=sm_35 --shared -D__LIB_SERVICE__ --compiler-options '-fPIC' -o libsrv_raid6.so srv_raid6.cu
 6 | 	$(if $(BUILD_DIR), cp libsrv_raid6.so $(BUILD_DIR)/ )
 7 | 
 8 | testlib: testlib.cu
 9 | 	nvcc -arch=sm_35 -O3 --shared --compiler-options '-fPIC' -o libcudapq.so testlib.cu
10 | 	$(if $(BUILD_DIR), cp libcudapq.so $(BUILD_DIR)/ )
11 | 
12 | test: test.c testlib.cu
13 | 	nvcc -arch=sm_35 -O3 -c testlib.cu -o testlib.o
14 | 	nvcc -O3 testlib.o test.c -o test
15 | 
16 | test-old: test.c testlib.cu
17 | 	nvcc -arch=sm_20 -O3 -c testlib.cu -o testlib-old.o
18 | 	nvcc -O3 testlib-old.o test.c -o test-old
19 | 
20 | clean:
21 | 	rm -f test
22 | 	rm -f *.o *.so
23 | 


--------------------------------------------------------------------------------
/kgpu/Makefile:
--------------------------------------------------------------------------------
 1 | obj-m += kgpu.o
 2 | 
 3 | all:	kgpu helper
 4 | 
 5 | kgpu-objs := main.o kgpu_kutils.o kgpu_log.o
 6 | 
 7 | kgpu:
 8 | 	make -C /lib/modules/$(shell uname -r)/build M=`pwd` modules
 9 | 	$(if $(BUILD_DIR), cp kgpu.ko $(BUILD_DIR)/ ) 
10 | 
11 | kgpu_log:
12 | 	gcc -O2 -D__KGPU__ -c kgpu_log.c -o kgpu_log_user.o
13 | 	ar -rcs kgpu_log.a kgpu_log_user.o
14 | 	$(if $(BUILD_DIR), cp kgpu_log.a $(BUILD_DIR)/ )
15 | 
16 | helper: kgpu_log
17 | 	gcc -O2 -D__KGPU__ -c helper.c
18 | 	gcc -O2 -D__KGPU__ -c service.c
19 | 	nvcc -O2 -D__KGPU__ -c -arch=sm_20 gpuops.cu
20 | 	nvcc -link -D__KGPU__ -O2 -arch=sm_20 service.o helper.o kgpu_log_user.o gpuops.o -o helper -ldl
21 | 	$(if $(BUILD_DIR), cp helper $(BUILD_DIR)/ )
22 | 
23 | clean:
24 | 	make -C /lib/modules/$(shell uname -r)/build M=`pwd` clean
25 | 	rm -f helper
26 | 	rm -f kgpu_log.a
27 | 	rm -f *.o
28 | 


--------------------------------------------------------------------------------
/kgpu/kkgpu.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  * Internal header used by KGPU only
 9 |  *
10 |  */
11 | 
12 | #ifndef ___KKGPU_H__
13 | #define ___KKGPU_H__
14 | 
15 | #include "kgpu.h"
16 | #include <linux/types.h>
17 | 
18 | #define kgpu_log(level, ...) kgpu_do_log(level, "kgpu", ##__VA_ARGS__)
19 | #define dbg(...) kgpu_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
20 | 
21 | /*
22 |  * Buffer management stuff, put them here in case we may
23 |  * create a kgpu_buf.c for buffer related functions.
24 |  */
25 | #define KGPU_BUF_UNIT_SIZE (1024*1024)
26 | #define KGPU_BUF_NR_FRAMES_PER_UNIT (KGPU_BUF_UNIT_SIZE/PAGE_SIZE)
27 | 
28 | /* memory ops */
29 | extern struct page *kgpu_v2page(unsigned long vaddr);
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/services/raid6/libsrv_raid6/test.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  */
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | extern void cuda_gen_syndrome(int disks, unsigned long dsize, void **dps, int stride);
13 | 
14 | #define NDISKS 12
15 | #define MAX_DSZ (1024*64)
16 | #define MIN_DSZ (1024*4)
17 | #define DSZ (1024*128)
18 | 
19 | int main()
20 | {
21 |     int i;
22 |     size_t sz;
23 |     void *dps[NDISKS];
24 |     char *data = (char*)malloc(NDISKS*MAX_DSZ);
25 | 
26 |     for (i=0; i<NDISKS; i++)
27 | 	dps[i] = data+MAX_DSZ*i;
28 | 
29 |     printf("pre-init for CUDA ... \n");
30 |     cuda_gen_syndrome(NDISKS, MAX_DSZ, dps, 2);
31 | 
32 |     printf("do testing...\n");
33 |     for (sz = MIN_DSZ; sz <= MAX_DSZ; sz += MIN_DSZ)
34 |     	//for (i=1; i<32; i++) {    	
35 | 	    cuda_gen_syndrome(NDISKS, sz, dps, 2);
36 | 	//}
37 |     
38 |     printf("done!\n");
39 |     
40 |     free(data);
41 |     
42 |     return 0;
43 | }
44 | 


--------------------------------------------------------------------------------
/services/gaes/gaes_common.h:
--------------------------------------------------------------------------------
 1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
 2 |  * the GPL-COPYING file in the top-level directory.
 3 |  *
 4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 5 |  * All rights reserved.
 6 |  *
 7 |  * KGPU GAES common header
 8 |  */
 9 | 
10 | #ifndef __GAES_COMMON_H__
11 | #define __GAES_COMMON_H__
12 | 
13 | struct crypto_gaes_ctr_config {
14 |     u32 key_length;
15 |     u32 ctr_range;
16 | };
17 | 
18 | struct crypto_gaes_ctr_info {
19 |     u32 key_enc[AES_MAX_KEYLENGTH_U32];
20 |     u32 key_dec[AES_MAX_KEYLENGTH_U32];
21 |     u32 key_length;
22 |     u32 ctr_range;
23 |     u8  padding[24];
24 |     u8  ctrblk[AES_BLOCK_SIZE];	
25 | };
26 | 
27 | #ifndef __KERNEL__
28 | typedef struct {
29 |     u64 a, b;
30 | } be128;
31 | #else
32 | #include <crypto/b128ops.h>
33 | #endif
34 | 
35 | #define XTS_SECTOR_SIZE 512
36 | 
37 | struct crypto_xts_info {
38 |     u32 key_enc[AES_MAX_KEYLENGTH_U32];
39 |     u8 padding1[16];
40 |     u32 key_dec[AES_MAX_KEYLENGTH_U32];
41 |     u8 padding2[12];
42 |     u32 key_length;
43 |     be128 ivs[XTS_SECTOR_SIZE/AES_BLOCK_SIZE];
44 | };
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/kgpu/gputils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  */
 9 |  
10 | #ifndef __GPUTILS_H__
11 | #define __GPUTILS_H__
12 | 
13 | #define csc(...) _cuda_safe_call(__VA_ARGS__, __FILE__, __LINE__)
14 | static cudaError_t _cuda_safe_call(cudaError_t e, const char *file, int line) {
15 |     if (e!=cudaSuccess) {
16 | 	fprintf(stderr, "kgpu Error: %s %d %s\n",
17 | 		file, line, cudaGetErrorString(e));
18 | 	cudaThreadExit();
19 | 	abort();
20 |     }
21 |     return e;
22 | }
23 | 
24 | 
25 | static void *alloc_dev_mem(unsigned long size) {
26 |     void *h;
27 |     csc( cudaMalloc(&h, size) );
28 |     return h;
29 | }
30 | 
31 | static void free_dev_mem(void *p) {
32 |     csc( cudaFree(p) );
33 | }
34 | 
35 | #define ah2dcpy(dst, src, sz, stream) \
36 |     cudaMemcpyAsync((void*)(dst), (void*)(src), (sz), cudaMemcpyHostToDevice, (stream))
37 | 
38 | #define ad2hcpy(dst, src, sz, stream) \
39 |     cudaMemcpyAsync((void*)(dst), (void*)(src), (sz), cudaMemcpyDeviceToHost, (stream))
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/kgpu/kgpu_log.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  * Log functions
 9 |  *
10 |  */
11 | #ifndef __KGPU_LOG_H__
12 | #define __KGPU_LOG_H__
13 | 
14 | #define KGPU_LOG_INFO  1
15 | #define KGPU_LOG_DEBUG 2
16 | #define KGPU_LOG_ALERT 3
17 | #define KGPU_LOG_ERROR 4
18 | #define KGPU_LOG_PRINT 5
19 | 
20 | extern void kgpu_generic_log(
21 |     int level, const char *module, const char *filename,
22 |     int lineno, const char *func, const char *fmt, ...);
23 | extern int kgpu_log_level;
24 | 
25 | #ifdef __KGPU_LOG_FULL_FILE_PATH__
26 |   #define __FILE_NAME__ __FILE__
27 | #else
28 |   #ifdef __KERNEL__
29 |     #include <linux/string.h>
30 |   #else
31 |     #include <string.h>
32 |   #endif
33 |   #define __FILE_NAME__         \
34 |     (strrchr(__FILE__,'/')      \
35 |      ? strrchr(__FILE__,'/')+1	\
36 |      : __FILE__                 \
37 |     )
38 | #endif
39 | 
40 | #define kgpu_do_log(level, module, ...) \
41 |     kgpu_generic_log(level, module, \
42 | 		     __FILE_NAME__, __LINE__, __func__, ##__VA_ARGS__)
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/kgpu/kgpu_kutils.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  * KGPU kernel module utilities
 9 |  *
10 |  */
11 | #include "kkgpu.h"
12 | #include <linux/kernel.h>
13 | #include <linux/sched.h>
14 | #include <linux/mm.h>
15 | #include <linux/mm_types.h>
16 | #include <linux/uaccess.h>
17 | #include <linux/slab.h>
18 | #include <asm/current.h>
19 | 
20 | struct page* kgpu_v2page(unsigned long v)
21 | {
22 |     struct page *p = NULL;
23 |     pgd_t *pgd = pgd_offset(current->mm, v);
24 | 
25 |     if (!pgd_none(*pgd)) {
26 | 	pud_t *pud = pud_offset(pgd, v);
27 | 	if (!pud_none(*pud)) {
28 | 	    pmd_t *pmd = pmd_offset(pud, v);
29 | 	    if (!pmd_none(*pmd)) {
30 | 		pte_t *pte;
31 | 
32 | 		pte = pte_offset_map(pmd, v);
33 | 		if (pte_present(*pte))
34 | 		    p = pte_page(*pte);
35 | 		
36 | 		/*
37 | 		 * although KGPU doesn't support x86_32, but in case
38 | 		 * some day it does, the pte_unmap should not be called
39 | 		 * because we want the pte stay in mem.
40 | 		 */
41 | 		pte_unmap(pte);
42 | 	    }
43 | 	}
44 |     }
45 |     if (!p)
46 | 	kgpu_log(KGPU_LOG_ALERT, "bad address 0x%lX\n", v);
47 |     return p;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/kgpu/service.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  */
 9 | 
10 | #ifndef __SERVICE_H__
11 | #define __SERVICE_H__
12 | 
13 | struct kgpu_service {
14 |     char name[KGPU_SERVICE_NAME_SIZE];
15 |     int sid;
16 |     int (*compute_size)(struct kgpu_service_request *sreq);
17 |     int (*launch)(struct kgpu_service_request *sreq);
18 |     int (*prepare)(struct kgpu_service_request *sreq);
19 |     int (*post)(struct kgpu_service_request *sreq);
20 | };
21 | 
22 | #define SERVICE_INIT "init_service"
23 | #define SERVICE_FINIT "finit_service"
24 | #define SERVICE_LIB_PREFIX "libsrv_"
25 | 
26 | typedef int (*fn_init_service)(
27 |     void* libhandle, int (*reg_srv)(struct kgpu_service *, void*));
28 | typedef int (*fn_finit_service)(
29 |     void* libhandle, int (*unreg_srv)(const char*));
30 | 
31 | 
32 | #ifdef __KGPU__
33 | 
34 | struct kgpu_service * kh_lookup_service(const char *name);
35 | int kh_register_service(struct kgpu_service *s, void *libhandle);
36 | int kh_unregister_service(const char *name);
37 | int kh_load_service(const char *libpath);
38 | int kh_load_all_services(const char *libdir);
39 | int kh_unload_service(const char *name);
40 | int kh_unload_all_services();
41 | 
42 | #endif /* __KGPU__ */
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/services/gaes/gaesk.h:
--------------------------------------------------------------------------------
 1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
 2 |  * the GPL-COPYING file in the top-level directory.
 3 |  *
 4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 5 |  * All rights reserved.
 6 |  *
 7 |  * KGPU GAES header
 8 |  */
 9 | 
10 | #ifndef __GAESK_H__
11 | #define __GAESK_H__
12 | 
13 | #include "gaes_common.h"
14 | 
15 | #define GAES_ECB_SIZE_THRESHOLD (PAGE_SIZE-1)
16 | #define GAES_CTR_SIZE_THRESHOLD (PAGE_SIZE-1)
17 | #define GAES_XTS_SIZE_THRESHOLD (PAGE_SIZE-1)
18 | 
19 | long test_gaes_ecb(size_t sz, int enc);
20 | long test_gaes_ctr(size_t sz);
21 | long test_gaes_lctr(size_t sz);
22 | 
23 | static void cvt_endian_u32(u32* buf, int n)
24 | {
25 |   u8* b = (u8*)buf;
26 |   int nb = n*4;
27 |   
28 |   u8 t;
29 |   int i;
30 |   
31 |   for (i=0; i<nb; i+=4, b+=4) {
32 |     t = b[0];
33 |     b[0] = b[3];
34 |     b[3] = t;
35 |     
36 |     t = b[1];
37 |     b[1] = b[2];
38 |     b[2] = t;
39 |   }
40 | }
41 | 
42 | #if 0
43 | static void dump_page_content(u8 *p)
44 | {
45 |     int r,c;
46 |     printk("dump page content:\n");
47 |     for (r=0; r<16; r++) {
48 | 	for (c=0; c<32; c++)
49 | 	    printk("%02x ", p[r*32+c]);
50 | 	printk("\n");
51 |     }
52 | }
53 | 
54 | static void dump_hex(u8 *p, int r, int c)
55 | {
56 |     int i,j;
57 |     printk("dump hex:\n");
58 |     for (i=0; i<r; i++) {
59 | 	for (j=0; j<c; j++) {
60 | 	    printk("%02x ", p[c*i+j]);
61 | 	}
62 | 	printk("\n");
63 |     }    
64 | }
65 | #endif /* test only */
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/kgpu/helper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  */
 9 |  
10 | #ifndef __HELPER_H__
11 | #define __HELPER_H__
12 | 
13 | #include "kgpu.h"
14 | 
15 | #define kh_log(level, ...) kgpu_do_log(level, "helper", ##__VA_ARGS__)
16 | #define dbg(...) kh_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
17 | 
18 | extern struct kgpu_gpu_mem_info hostbuf;
19 | extern struct kgpu_gpu_mem_info hostvma;
20 | extern struct kgpu_gpu_mem_info devbuf;
21 | extern struct kgpu_gpu_mem_info devbuf4vma;
22 | 
23 | #define __round_mask(x, y) ((__typeof__(x))((y)-1))
24 | #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
25 | #define round_down(x, y) ((x) & ~__round_mask(x, y))
26 | 
27 | #ifndef PAGE_SIZE
28 | #define PAGE_SIZE 4096
29 | #endif
30 | 
31 | #ifdef __cplusplus
32 | extern "C" {
33 | #endif
34 | 
35 |     void gpu_init(void);
36 |     void gpu_finit(void);
37 | 
38 |     void *gpu_alloc_pinned_mem(unsigned long size);
39 |     void gpu_free_pinned_mem(void *p);
40 | 
41 |     void gpu_pin_mem(void *p, size_t sz);
42 |     void gpu_unpin_mem(void *p);
43 | 
44 |     int gpu_alloc_device_mem(struct kgpu_service_request *sreq);
45 |     void gpu_free_device_mem(struct kgpu_service_request *sreq);
46 |     int gpu_alloc_stream(struct kgpu_service_request *sreq);
47 |     void gpu_free_stream(struct kgpu_service_request *sreq);
48 | 
49 |     int gpu_execution_finished(struct kgpu_service_request *sreq);
50 |     int gpu_post_finished(struct kgpu_service_request *sreq);
51 | 
52 |     unsigned long gpu_get_stream(int sid);
53 | 
54 | #ifdef __cplusplus
55 | }
56 | #endif
57 |    
58 | #endif
59 | 


--------------------------------------------------------------------------------
/kgpu/kgpu_log.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  * Log functions used by both kernel and user space.
 9 |  */
10 | #include "kgpu.h"
11 | 
12 | #ifndef __KERNEL__
13 | 
14 | #include <stdio.h>
15 | #include <stdarg.h>
16 | 
17 | #define printk printf
18 | #define vprintk vprintf
19 | 
20 | #else
21 | 
22 | #include <linux/kernel.h>
23 | #include <linux/module.h>
24 | 
25 | #endif /* __KERNEL__ */
26 | 
27 | #ifdef __KGPU_LOG_LEVEL__
28 | int kgpu_log_level = __KGPU_LOG_LEVEL__;
29 | #else
30 | int kgpu_log_level = KGPU_LOG_ALERT;
31 | #endif
32 | 
33 | void
34 | kgpu_generic_log(int level, const char *module, const char *filename,
35 | 		 int lineno, const char *func, const char *fmt, ...)
36 | {
37 |     va_list args;
38 |     
39 |     if (level < kgpu_log_level)
40 | 	return;
41 |     
42 |     switch(level) {
43 |     case KGPU_LOG_INFO:
44 | 	printk("[%s] %s::%d %s() INFO: ", module, filename, lineno, func);
45 | 	break;
46 |     case KGPU_LOG_DEBUG:
47 | 	printk("[%s] %s::%d %s() DEBUG: ", module, filename, lineno, func);
48 | 	break;
49 |     case KGPU_LOG_ALERT:
50 | 	printk("[%s] %s::%d %s() ALERT: ", module, filename, lineno, func);
51 | 	break;
52 |     case KGPU_LOG_ERROR:
53 | 	printk("[%s] %s::%d %s() ERROR: ", module, filename, lineno, func);
54 | 	break;
55 |     case KGPU_LOG_PRINT:
56 | 	printk("[%s] %s::%d %s(): ", module, filename, lineno, func);
57 | 	break;
58 |     default:
59 | 	break;
60 |     }
61 |     
62 |     va_start(args, fmt);	
63 |     vprintk(fmt, args);
64 |     va_end(args);
65 | }
66 | 
67 | #ifdef __KERNEL__
68 | 
69 | EXPORT_SYMBOL_GPL(kgpu_generic_log);
70 | EXPORT_SYMBOL_GPL(kgpu_log_level);
71 | 
72 | #endif /* __KERNEL__ */
73 | 


--------------------------------------------------------------------------------
/services/test/libsrv_test/srv_test.cu:
--------------------------------------------------------------------------------
 1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
 2 |  * the GPL-COPYING file in the top-level directory.
 3 |  *
 4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 5 |  * All rights reserved.
 6 |  */
 7 |  
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | #include <cuda.h>
11 | #include "../../../kgpu/kgpu.h"
12 | #include "../../../kgpu/gputils.h"
13 | 
14 | __global__ void empty_kernel(void)
15 | {
16 | }
17 | 
18 | static int empty_cs(struct kgpu_service_request *sr)
19 | {
20 |     sr->block_x = 1;
21 |     sr->grid_x = 1;
22 |     sr->block_y = 1;
23 |     sr->grid_y = 1;
24 |     return 0;
25 | }
26 | 
27 | static int empty_launch(struct kgpu_service_request *sr)
28 | {
29 |     empty_kernel<<<dim3(sr->grid_x, sr->grid_y),
30 | 	dim3(sr->block_x, sr->block_y), 0, (cudaStream_t)(sr->stream)>>>();
31 |     return 0;
32 | }
33 | 
34 | static int empty_prepare(struct kgpu_service_request *sr)
35 | {
36 |     cudaStream_t s = (cudaStream_t)(sr->stream);
37 |     csc( ah2dcpy( sr->din, sr->hin, sr->insize, s) );
38 |     return 0;
39 | }
40 | 
41 | static int empty_post(struct kgpu_service_request *sr)
42 | {
43 |     cudaStream_t s = (cudaStream_t)(sr->stream);
44 |     csc( ad2hcpy( sr->hout, sr->dout, sr->outsize, s) );
45 |     return 0;
46 | }
47 | 
48 | static struct kgpu_service empty_srv;
49 | 
50 | extern "C" int init_service(void *lh, int (*reg_srv)(struct kgpu_service*, void*))
51 | {
52 |     printf("[libsrv_test] Info: init test service\n");
53 |     
54 |     sprintf(empty_srv.name, "empty_service");
55 |     empty_srv.sid = 1;
56 |     empty_srv.compute_size = empty_cs;
57 |     empty_srv.launch = empty_launch;
58 |     empty_srv.prepare = empty_prepare;
59 |     empty_srv.post = empty_post;
60 | 
61 |     return reg_srv(&empty_srv, lh);
62 | }
63 | 
64 | extern "C" int finit_service(void *lh, int (*unreg_srv)(const char*))
65 | {
66 |     printf("[libsrv_test] Info: finit test service\n");
67 |     return unreg_srv(empty_srv.name);
68 | }
69 | 


--------------------------------------------------------------------------------
/services/test/callgpu/callgpu.c:
--------------------------------------------------------------------------------
 1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
 2 |  * the GPL-COPYING file in the top-level directory.
 3 |  *
 4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 5 |  * All rights reserved.
 6 |  */
 7 | 
 8 | #include <linux/module.h>
 9 | #include <linux/init.h>
10 | #include <linux/types.h>
11 | #include <linux/kernel.h>
12 | #include <linux/list.h>
13 | #include <linux/spinlock.h>
14 | #include <linux/gfp.h>
15 | #include <linux/kthread.h>
16 | #include <linux/proc_fs.h>
17 | #include <linux/mm.h>
18 | #include <linux/mm_types.h>
19 | #include <linux/string.h>
20 | #include <linux/uaccess.h>
21 | #include <asm/page.h>
22 | 
23 | #include "../../../kgpu/kgpu.h"
24 | 
25 | /* customized log function */
26 | #define g_log(level, ...) kgpu_do_log(level, "calg2", ##__VA_ARGS__)
27 | #define dbg(...) g_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
28 | 
29 | int mycb(struct kgpu_request *req)
30 | {
31 |     g_log(KGPU_LOG_PRINT, "REQ ID: %d, RESP CODE: %d\n",
32 | 	   req->id, req->errcode);
33 |     kgpu_vfree(req->in);
34 |     kgpu_free_request(req);
35 |     return 0;
36 | }
37 | 
38 | static int __init minit(void)
39 | {
40 |     struct kgpu_request* req;
41 |     
42 |     g_log(KGPU_LOG_PRINT, "loaded\n");
43 | 
44 |     req = kgpu_alloc_request();
45 |     if (!req) {
46 | 	g_log(KGPU_LOG_ERROR, "request null\n");
47 | 	return 0;
48 |     }
49 |     
50 |     req->in = kgpu_vmalloc(1024*2);
51 |     if (!req->in) {
52 | 	g_log(KGPU_LOG_ERROR, "callgpu out of memory\n");
53 | 	kgpu_free_request(req);
54 | 	return 0;
55 |     }
56 |     req->insize = 1024;
57 |     req->out = (void*)((unsigned long)(req->in)+1024);
58 |     req->outsize = 1024;
59 |     strcpy(req->service_name, "nonexist service");
60 |     req->callback = mycb;
61 | 
62 |     kgpu_call_async(req);
63 |     
64 |     return 0;
65 | }
66 | 
67 | static void __exit mexit(void)
68 | {
69 |     g_log(KGPU_LOG_PRINT, "unload\n");
70 | }
71 | 
72 | module_init(minit);
73 | module_exit(mexit);
74 | 
75 | MODULE_LICENSE("GPL");
76 | 


--------------------------------------------------------------------------------
/kgpu/list.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
 3 |  * the GPL-COPYING file in the top-level directory.
 4 |  *
 5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 6 |  * All rights reserved.
 7 |  *
 8 |  * List utility, mostly from Linux Kernel. /linux/list.h
 9 |  */
10 | #ifndef __LIST_H__
11 | #define __LIST_H__
12 | 
13 | struct list_head{
14 |     struct list_head *prev, *next;
15 | };
16 | 
17 | #define LIST_HEAD(h) \
18 |     struct list_head h = { &(h), &(h) }
19 | 
20 | static inline void INIT_LIST_HEAD(struct list_head *list)
21 | {
22 |     list->next = list;
23 |     list->prev = list;
24 | }
25 | 
26 | static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next)
27 | {
28 |     next->prev = new;
29 |     new->next = next;
30 |     new->prev = prev;
31 |     prev->next = new;
32 | }
33 | 
34 | static inline void list_add(struct list_head *new, struct list_head *head)
35 | {
36 |     __list_add(new, head, head->next);
37 | }
38 | 
39 | static inline void list_add_tail(struct list_head *new, struct list_head *head)
40 | {
41 |     __list_add(new, head->prev, head);
42 | }
43 | 
44 | static inline void __list_del(struct list_head * prev, struct list_head * next)
45 | {
46 |     next->prev = prev;
47 |     prev->next = next;
48 | }
49 | 
50 | static inline void list_del(struct list_head *entry)
51 | {
52 |     __list_del(entry->prev, entry->next);
53 |     entry->next = NULL;
54 |     entry->prev = NULL;
55 | }
56 | 
57 | static inline int list_empty(const struct list_head *head)
58 | {
59 |     return head->next == head;
60 | }
61 | 
62 | 
63 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
64 | 
65 | #define container_of(ptr, type, member) ({                  \
66 |     const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
67 |     (type *)( (char *)__mptr - offsetof(type,member) );})
68 | 
69 | #define list_entry(ptr, type, member) \
70 |     container_of(ptr, type, member)
71 | 
72 | #define list_first_entry(ptr, type, member) \
73 |     list_entry((ptr)->next, type, member)
74 | 
75 | #define list_for_each_safe(pos, n, head) \
76 |     for (pos = (head)->next, n = pos->next; pos != (head); \
77 | 	 pos = n, n = pos->next)
78 | 
79 | #define list_for_each(pos, head) \
80 |     for (pos = (head)->next; pos != (head); pos = pos->next)
81 | 
82 | static inline int list_size(const struct list_head *head)
83 | {
84 |     int s=0;
85 |     struct list_head *p;
86 | 
87 |     list_for_each(p, head)
88 | 	s++;
89 |     return s;
90 | }
91 | 
92 | #endif
93 | 


--------------------------------------------------------------------------------
/services/test/calg2/calg2.c:
--------------------------------------------------------------------------------
  1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
  2 |  * the GPL-COPYING file in the top-level directory.
  3 |  *
  4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  5 |  * All rights reserved.
  6 |  */
  7 | 
  8 | #include <linux/module.h>
  9 | #include <linux/init.h>
 10 | #include <linux/types.h>
 11 | #include <linux/kernel.h>
 12 | #include <linux/list.h>
 13 | #include <linux/spinlock.h>
 14 | #include <linux/gfp.h>
 15 | #include <linux/kthread.h>
 16 | #include <linux/proc_fs.h>
 17 | #include <linux/mm.h>
 18 | #include <linux/mm_types.h>
 19 | #include <linux/string.h>
 20 | #include <linux/uaccess.h>
 21 | #include <asm/page.h>
 22 | 
 23 | #include "../../../kgpu/kgpu.h"
 24 | 
 25 | /* customized log function */
 26 | #define g_log(level, ...) kgpu_do_log(level, "calg2", ##__VA_ARGS__)
 27 | #define dbg(...) g_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
 28 | 
 29 | 
 30 | int mycb(struct kgpu_request *req)
 31 | {
 32 |     g_log(KGPU_LOG_PRINT, "REQ ID: %d, RESP CODE: %d, %d\n",
 33 | 	   req->id, req->errcode,
 34 | 	   ((int*)(req->kdata))[0]);
 35 | 
 36 |     kgpu_unmap_area(TO_UL(req->in));
 37 |     free_page(TO_UL(req->kdata));
 38 |     kgpu_free_request(req);
 39 |     return 0;
 40 | }
 41 | 
 42 | static int __init minit(void)
 43 | {
 44 |     struct kgpu_request* req;
 45 |     char *buf;
 46 |     unsigned long pfn;
 47 |     void *ga;
 48 |     
 49 |     g_log(KGPU_LOG_PRINT, "loaded\n");
 50 | 
 51 |     req = kgpu_alloc_request();
 52 |     if (!req) {
 53 | 	g_log(KGPU_LOG_ERROR, "request null\n");
 54 | 	return 0;
 55 |     }
 56 |     
 57 |     buf = (char*)__get_free_page(GFP_KERNEL);
 58 |     if (!buf) {
 59 |         g_log(KGPU_LOG_ERROR, "buffer null\n");
 60 | 	kgpu_free_request(req);
 61 | 	return 0;
 62 |     }
 63 |     pfn = __pa(buf)>>PAGE_SHIFT;
 64 | 
 65 |     ga = kgpu_map_pfns(&pfn, 1);
 66 |     if (!ga) {
 67 | 	g_log(KGPU_LOG_ERROR, "mmap error\n");
 68 | 	kgpu_free_request(req);
 69 | 	free_page(TO_UL(buf));
 70 | 	return 0;
 71 |     }
 72 | 
 73 |     req->in = ga;
 74 |     req->insize = 1024;
 75 |     req->out = ga;/*+1024;*/
 76 |     req->outsize = 1024;
 77 |     req->udata = ga;
 78 |     req->udatasize = 1024;
 79 |     strcpy(req->service_name, "test_service");
 80 |     req->callback = mycb;
 81 | 
 82 |     req->kdata = buf;
 83 | 
 84 |     ((int*)(buf))[0] = 100;
 85 | 
 86 |     kgpu_call_async(req);
 87 |     
 88 |     return 0;
 89 | }
 90 | 
 91 | static void __exit mexit(void)
 92 | {
 93 |     g_log(KGPU_LOG_PRINT, "unload\n");
 94 | }
 95 | 
 96 | module_init(minit);
 97 | module_exit(mexit);
 98 | 
 99 | MODULE_LICENSE("GPL");
100 | 


--------------------------------------------------------------------------------
/services/gaes/gaes_test.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  *
  8 |  * For test purpose only, to be inclued by other src files.
  9 |  *
 10 |  */
 11 | 
 12 | #include <linux/timex.h>
 13 | 
 14 | static long test_gaes(size_t sz, int enc, const char *cipher)
 15 | {
 16 |     struct crypto_blkcipher *tfm;
 17 |     struct blkcipher_desc desc;
 18 |     int i;
 19 |     u32 npages, ret;
 20 |     
 21 |     struct scatterlist *src, *dst;
 22 |     char *buf, *mpool, **ins, **outs;
 23 |     u8 *iv;
 24 | 
 25 |     struct timeval t0, t1;
 26 |     long t = 0;
 27 |     
 28 |     u8 key[] = {0x00, 0x01, 0x02, 0x03, 0x05,
 29 | 		0x06, 0x07, 0x08, 0x0A, 0x0B,
 30 | 		0x0C, 0x0D, 0x0F, 0x10, 0x11, 0x12};
 31 | 
 32 |     npages = DIV_ROUND_UP(sz, PAGE_SIZE);
 33 |     mpool = kmalloc(
 34 | 	npages*2*(sizeof(struct scatterlist)+sizeof(char*))+32,
 35 | 	__GFP_ZERO|GFP_KERNEL);
 36 |     if (!mpool) {
 37 | 	g_log(KGPU_LOG_ERROR, "out of memory for test\n");
 38 | 	return 0;
 39 |     }
 40 | 
 41 |     src = (struct scatterlist*)mpool;
 42 |     dst = (struct scatterlist*)mpool+npages*sizeof(struct scatterlist);
 43 |     ins = (char**)(mpool + 2*npages*sizeof(struct scatterlist));
 44 |     outs = (char**)(ins + npages);
 45 |     iv = ((char*)outs) + npages*sizeof(char*);
 46 | 
 47 |     tfm = crypto_alloc_blkcipher(cipher, 0, 0);
 48 |     if (IS_ERR(tfm)) {
 49 | 	g_log(KGPU_LOG_ERROR, "failed to load transform for %s: %ld\n",
 50 | 	      cipher,
 51 | 	      PTR_ERR(tfm));
 52 | 	goto out;
 53 |     }
 54 |     desc.tfm = tfm;
 55 |     desc.flags = 0;
 56 |     desc.info = iv;
 57 |     
 58 |     ret = crypto_blkcipher_setkey(tfm, key, sizeof(key));
 59 |     if (ret) {
 60 | 	g_log(KGPU_LOG_ERROR, "setkey() failed flags=%x\n",
 61 | 	       crypto_blkcipher_get_flags(tfm));
 62 | 	goto out;
 63 |     }
 64 |     
 65 |     sg_init_table(src, npages);
 66 |     sg_init_table(dst, npages);
 67 |     for (i=0; i<npages; i++) {
 68 | 	buf = (void *)__get_free_page(GFP_KERNEL);
 69 | 	if (!buf) {
 70 | 	    g_log(KGPU_LOG_ERROR, "alloc free page error\n");
 71 | 	    goto free_err_pages;
 72 | 	}
 73 | 	ins[i] = buf;
 74 | 	sg_set_buf(src+i, buf, PAGE_SIZE);
 75 | 
 76 | 	buf = (void *)__get_free_page(GFP_KERNEL);
 77 | 	if (!buf) {
 78 | 	    g_log(KGPU_LOG_ERROR, "alloc free page error\n");
 79 | 	    goto free_err_pages;
 80 | 	}
 81 | 	outs[i] = buf;
 82 | 	sg_set_buf(dst+i, buf, PAGE_SIZE);
 83 |     }
 84 | 
 85 |     do_gettimeofday(&t0);
 86 |     if (enc)
 87 | 	ret = crypto_blkcipher_encrypt_iv(&desc, dst, src, sz);
 88 |     else
 89 | 	ret = crypto_blkcipher_decrypt_iv(&desc, dst, src, sz);
 90 |     do_gettimeofday(&t1);
 91 | 
 92 |     if (ret) {
 93 | 	g_log(KGPU_LOG_ERROR, "dec/enc error\n");
 94 | 	goto free_err_pages;
 95 |     }
 96 | 
 97 |     t = 1000000*(t1.tv_sec-t0.tv_sec) + 
 98 | 	((int)(t1.tv_usec) - (int)(t0.tv_usec));
 99 | 
100 | free_err_pages:
101 |     for (i=0; i<npages && ins[i]; i++){		
102 | 	free_page((unsigned long)ins[i]);
103 |     }
104 |     for (i=0; i<npages && outs[i]; i++){
105 | 	free_page((unsigned long)outs[i]);
106 |     }
107 | out:
108 |     kfree(mpool);
109 |     crypto_free_blkcipher(tfm);
110 | 
111 |     return t;    
112 | }
113 | 


--------------------------------------------------------------------------------
/services/raid6/libsrv_raid6/testlib.cu:
--------------------------------------------------------------------------------
  1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
  2 |  * the GPL-COPYING file in the top-level directory.
  3 |  *
  4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  5 |  * All rights reserved.
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <cuda.h>
 11 | #include <sys/time.h>
 12 | #include "../../../kgpu/gputils.h"
 13 | #include "../r62_recov.h"
 14 | 
 15 | #define THREADS_PER_BLOCK 512
 16 | 
 17 | #include "dev.cu"
 18 | 
 19 | #define NSTREAM 32
 20 | 
 21 | extern "C" void cuda_gen_syndrome(int disks, unsigned long dsize, void**dps, int stride)
 22 | {
 23 |     u8 *dd, *hd;
 24 |     int i, j;
 25 |     unsigned long tsz = dsize*disks;
 26 | 
 27 |     int ngrids = dsize/(stride*THREADS_PER_BLOCK*sizeof(u64));
 28 |     int nthreads = THREADS_PER_BLOCK;
 29 |     if (!ngrids) {
 30 |     	ngrids = 1;
 31 |     	nthreads = dsize/(stride*sizeof(u64));
 32 |     }
 33 | 
 34 |     struct timeval t1, t2, t3;
 35 |     float ta, tc;
 36 | 
 37 |     cudaStream_t s[NSTREAM];
 38 |     cudaEvent_t e, st;
 39 |     cudaEventCreate(&e);
 40 |     cudaEventCreate(&st);
 41 | 
 42 |     for (i=0; i<NSTREAM; i++)
 43 | 	csc(cudaStreamCreate(&s[i]));
 44 | 
 45 |     /* these ops should not be counted because
 46 |      * with kgpu, allocation memory is simply bitmap search
 47 |      * and fast.
 48 |      */
 49 |     csc(cudaMalloc(&dd, tsz*NSTREAM));
 50 |     csc(cudaHostAlloc(&hd, tsz*NSTREAM, 0));
 51 | 
 52 |     if (!dd || !hd) {
 53 | 	printf("out of memory\n");
 54 | 	if (dd) cudaFree(dd);
 55 | 	if (hd) cudaFreeHost(hd);
 56 | 	return;
 57 |     }
 58 | 
 59 |     memset(hd, 0, tsz*NSTREAM);
 60 |     
 61 |     gettimeofday(&t1,NULL);
 62 | 
 63 |     /* frankly speaking, this could be avoided by letting raid456 module
 64 |      * use gpu buffer.
 65 |      *
 66 |      * for testing: data not copied here
 67 |      */
 68 |      for(j=0; j<NSTREAM; j++)
 69 |      for (i=0;i<disks-2;i++) {
 70 | 	   memcpy(hd+i*dsize, dps[i], dsize);
 71 |      }
 72 | 
 73 |     //cudaEventRecord(st, 0);
 74 |     
 75 |     gettimeofday(&t2, NULL);
 76 | 
 77 |     for (j=0; j<NSTREAM; j++) {
 78 | 	csc(ah2dcpy((dd+j*tsz), (hd+j*tsz), (tsz-2*dsize), s[j]));
 79 | 
 80 | 	raid6_pq_str<<<dim3(ngrids,1), dim3(nthreads,1), 0, s[j]>>>(
 81 | 	    (unsigned int)disks, dsize, (u8*)(dd+j*tsz), stride);
 82 | 
 83 | 	csc(ad2hcpy((hd+(j+1)*tsz-2*dsize), (dd+(j+1)*tsz-2*dsize), 2*dsize, s[j]));
 84 |     }
 85 | 	/*csc(cudaMemcpy(
 86 | 	    hd+tsz-2*dsize,
 87 | 	    dd+tsz-2*dsize,
 88 | 	    2*dsize,
 89 | 	    cudaMemcpyDeviceToHost));*/
 90 | 
 91 |     /* kgpu could avoid this with async execution */
 92 |     //cudaThreadSynchronize();
 93 |     cudaEventRecord(e,0);
 94 |     cudaEventSynchronize(e);
 95 |     
 96 |     gettimeofday(&t3, NULL);
 97 |     
 98 |     //cudaThreadSynchronize();
 99 |    
100 |     ta = (t3.tv_sec-t1.tv_sec)*1000
101 | 	+ (t3.tv_usec-t1.tv_usec)/1000.0f;
102 |     tc = (t3.tv_sec-t2.tv_sec)*1000
103 | 	+ (t3.tv_usec-t2.tv_usec)/1000.0f;
104 |     printf("GPU PQ: str: %3i, c&c: %8.3fms, data: %8lu*%i bw: %9.3fMB/s\n",
105 | 	   stride, tc/NSTREAM, dsize, disks, dsize*(disks-2)*NSTREAM/(tc*1000));
106 | 
107 |     cudaEventDestroy(e);
108 |     cudaEventDestroy(st);
109 |     
110 |     for(i=0; i<NSTREAM; i++)
111 | 	csc(cudaStreamDestroy(s[i]));
112 |     
113 |     for (j=0;j<NSTREAM; j++)
114 |     for(i=disks-2;i<disks;i++)
115 | 	memcpy(dps[i], hd+i*dsize, dsize);
116 |     csc(cudaFree(dd));
117 |     csc(cudaFreeHost(hd));
118 | }
119 | 


--------------------------------------------------------------------------------
/services/raid6/libsrv_raid6/dev.cu:
--------------------------------------------------------------------------------
  1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
  2 |  * the GPL-COPYING file in the top-level directory.
  3 |  *
  4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  5 |  * All rights reserved.
  6 |  *
  7 |  *
  8 |  * P and Q disk computing function, mostly derived from the kernel:
  9 |  * /lib/raid6/int.uc
 10 |  * Support x86_64 only.
 11 |  *
 12 |  * To be included by others.
 13 |  */
 14 | 
 15 | typedef unsigned long u64;
 16 | typedef unsigned char u8;
 17 | 
 18 | #include "table.h"
 19 | 
 20 | #define NBYTES(x) ((x) * 0x0101010101010101UL)
 21 | #define NSIZE  8
 22 | #define NSHIFT 3
 23 | 
 24 | #define SHLBYTE(v) (((v)<<1)&(0xfefefefefefefefe))
 25 | //(((v)<<1)&NBYTES(0xfe))
 26 | #define MASK(v) ({ u64 vv = (v)&(0x8080808080808080); (vv<<1)-(vv>>7); })
 27 | 
 28 | __global__ void raid6_recov_2data_nc(
 29 |     u8 *p, u8 *q, u8 *dp, u8 *dq,
 30 |     const u8 *pbmul, const u8 *qmul)
 31 | {
 32 |     int tid = threadIdx.x+blockDim.x*blockIdx.x;
 33 |     u8 px = p[tid]^dp[tid];
 34 |     u8 qx = qmul[q[tid]^dq[tid]];
 35 |     dq[tid] = pbmul[px]^qx;
 36 |     dp[tid] = dq[tid]^px;
 37 | }
 38 | 
 39 | __global__ void raid6_recov_2data(u8 *p, u8 *q, u8 *dp, u8 *dq,
 40 | 				  struct r62_recov_data* data)
 41 | {
 42 |     int tid = threadIdx.x+blockDim.x*(blockIdx.x*gridDim.y+blockIdx.y);
 43 |     const u8 *pbmul = draid6_gfmul[data->idx[blockIdx.x].pbidx];
 44 |     const u8 *qmul = draid6_gfmul[data->idx[blockIdx.x].qidx];
 45 |     
 46 |     u8 px = p[tid] ^ dp[tid];
 47 |     u8 qx = qmul[q[tid] ^ dq[tid]];
 48 |     dq[tid] = pbmul[px] ^ qx;
 49 |     dp[tid] = dq[tid] ^ px;
 50 | }
 51 | 
 52 | 
 53 | 
 54 | /*
 55 |  * @disks: number of disks, p and q included
 56 |  * @dsize: unit size, or a stripe?
 57 |  * @data: disk data 
 58 |  */
 59 | __global__ void raid6_pq(unsigned int disks, unsigned long dsize, u8 *data)
 60 | {
 61 |     u64 *d = (u64*)data;
 62 |     int z0, offset64, step64, tid;
 63 | 
 64 |     u64 wd0, wq0, wp0;
 65 |     
 66 |     tid = blockDim.x*blockIdx.x+threadIdx.x;
 67 |     step64 = dsize/sizeof(u64);
 68 |     z0 = disks-3;
 69 |     offset64 = step64*z0+tid;
 70 |     
 71 |     wq0 = wp0 = d[offset64];
 72 |     #pragma unroll 16
 73 |     for (offset64 -= step64; offset64>=0; offset64 -=step64) {
 74 | 	wd0 = d[offset64];
 75 | 	wp0 ^= wd0;
 76 | 	wq0 = SHLBYTE(wq0) ^ (MASK(wq0)&(0x1d1d1d1d1d1d1d1d)) ^ wd0;
 77 |     }
 78 |     d[step64*(z0+1)+tid] = wp0;
 79 |     d[step64*(z0+2)+tid] = wq0;    
 80 | }
 81 | 
 82 | /*
 83 |  * PQ with stride
 84 |  * @disks: number of disks, p and q included
 85 |  * @dsize: unit size, or a stripe?
 86 |  * @data: disk data 
 87 |  */
 88 | __global__ void raid6_pq_str(unsigned int disks, unsigned long dsize, u8 *data, unsigned int stride)
 89 | {
 90 |     u64 *d = (u64*)data;
 91 |     int z0, offset64, step64, tid, i;
 92 | 
 93 |     u64 wd0, wq0, wp0;
 94 |     
 95 |     tid = blockDim.x*blockIdx.x+threadIdx.x;
 96 |     step64 = dsize/(sizeof(u64));
 97 |     z0 = disks-3;
 98 |     
 99 |     #pragma unroll 4
100 |     for (i=0; i<stride; i++) 
101 |     {
102 |         offset64 = step64*z0+tid*stride+i;
103 |     
104 |         wq0 = wp0 = d[offset64];
105 |         
106 |         #pragma unroll 16
107 |         for (offset64 -= step64; offset64>=0; offset64 -=step64) {
108 | 	    wd0 = d[offset64];
109 | 	    wp0 ^= wd0;
110 | 	    wq0 = SHLBYTE(wq0) ^ (MASK(wq0)&NBYTES(0x1d)) ^ wd0;
111 |         }
112 |         d[step64*(z0+1)+tid*stride+i] = wp0;
113 |         d[step64*(z0+2)+tid*stride+i] = wq0;
114 |     }
115 |         
116 | }
117 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/dentry.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * eCryptfs: Linux filesystem encryption layer
  3 |  *
  4 |  * Copyright (C) 1997-2003 Erez Zadok
  5 |  * Copyright (C) 2001-2003 Stony Brook University
  6 |  * Copyright (C) 2004-2006 International Business Machines Corp.
  7 |  *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
  8 |  *
  9 |  * This program is free software; you can redistribute it and/or
 10 |  * modify it under the terms of the GNU General Public License as
 11 |  * published by the Free Software Foundation; either version 2 of the
 12 |  * License, or (at your option) any later version.
 13 |  *
 14 |  * This program is distributed in the hope that it will be useful, but
 15 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 16 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17 |  * General Public License for more details.
 18 |  *
 19 |  * You should have received a copy of the GNU General Public License
 20 |  * along with this program; if not, write to the Free Software
 21 |  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 22 |  * 02111-1307, USA.
 23 |  * 
 24 |  *
 25 |  * See the GPL-COPYING file in the top-level directory.
 26 |  *
 27 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 28 |  * All rights reserved.
 29 |  */
 30 | 
 31 | #include <linux/dcache.h>
 32 | #include <linux/namei.h>
 33 | #include <linux/mount.h>
 34 | #include <linux/fs_stack.h>
 35 | #include <linux/slab.h>
 36 | #include "ecryptfs_kernel.h"
 37 | 
 38 | /**
 39 |  * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
 40 |  * @dentry: The ecryptfs dentry
 41 |  * @nd: The associated nameidata
 42 |  *
 43 |  * Called when the VFS needs to revalidate a dentry. This
 44 |  * is called whenever a name lookup finds a dentry in the
 45 |  * dcache. Most filesystems leave this as NULL, because all their
 46 |  * dentries in the dcache are valid.
 47 |  *
 48 |  * Returns 1 if valid, 0 otherwise.
 49 |  *
 50 |  */
 51 | static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 52 | {
 53 | 	struct dentry *lower_dentry;
 54 | 	struct vfsmount *lower_mnt;
 55 | 	struct dentry *dentry_save = NULL;
 56 | 	struct vfsmount *vfsmount_save = NULL;
 57 | 	int rc = 1;
 58 | 
 59 | 	if (nd && nd->flags & LOOKUP_RCU)
 60 | 		return -ECHILD;
 61 | 
 62 | 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 63 | 	lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
 64 | 	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
 65 | 		goto out;
 66 | 	if (nd) {
 67 | 		dentry_save = nd->path.dentry;
 68 | 		vfsmount_save = nd->path.mnt;
 69 | 		nd->path.dentry = lower_dentry;
 70 | 		nd->path.mnt = lower_mnt;
 71 | 	}
 72 | 	rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
 73 | 	if (nd) {
 74 | 		nd->path.dentry = dentry_save;
 75 | 		nd->path.mnt = vfsmount_save;
 76 | 	}
 77 | 	if (dentry->d_inode) {
 78 | 		struct inode *lower_inode =
 79 | 			ecryptfs_inode_to_lower(dentry->d_inode);
 80 | 
 81 | 		fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 82 | 	}
 83 | out:
 84 | 	return rc;
 85 | }
 86 | 
 87 | struct kmem_cache *ecryptfs_dentry_info_cache;
 88 | 
 89 | /**
 90 |  * ecryptfs_d_release
 91 |  * @dentry: The ecryptfs dentry
 92 |  *
 93 |  * Called when a dentry is really deallocated.
 94 |  */
 95 | static void ecryptfs_d_release(struct dentry *dentry)
 96 | {
 97 | 	if (ecryptfs_dentry_to_private(dentry)) {
 98 | 		if (ecryptfs_dentry_to_lower(dentry)) {
 99 | 			dput(ecryptfs_dentry_to_lower(dentry));
100 | 			mntput(ecryptfs_dentry_to_lower_mnt(dentry));
101 | 		}
102 | 		kmem_cache_free(ecryptfs_dentry_info_cache,
103 | 				ecryptfs_dentry_to_private(dentry));
104 | 	}
105 | 	return;
106 | }
107 | 
108 | const struct dentry_operations ecryptfs_dops = {
109 | 	.d_revalidate = ecryptfs_d_revalidate,
110 | 	.d_release = ecryptfs_d_release,
111 | };
112 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | 
  2 |      KGPU - Augmenting Linux with GPUs
  3 | 
  4 | ** Important note:
  5 | 
  6 |      A new branch 'k32' is created for KGPU compilation on 3.x kernels,
  7 |      I tested 3.2.16. If you want to try KGPU on recent kernels, definitely
  8 |      checkout that branch.
  9 | 
 10 |      I don't have time to modify everything to comply with the latest kernel,
 11 |      so k32 branch has gaes and raid6 services disabled. Just leaves an
 12 |      example service for hubbyists to borrow code to start with their own
 13 |      service development.
 14 | 
 15 | 
 16 | What is it?
 17 | 
 18 |      Treating the GPU as a computing co-processor. To enable the
 19 |      data-parallel computation inside the Linux kernel. Using SIMD (or
 20 |      SIMT in CUDA) style code to accelerate Linux kernel
 21 |      functionality.
 22 | 
 23 |      Make the Linux kernel really parallelized: which is not only
 24 |      processing multiple requests concurrently, but can also partition
 25 |      a single large requested computation into tiles and do them on
 26 |      GPU cores.
 27 | 
 28 |      GPU can give the OS kernel dedicated cores that can be fully
 29 |      occupied by the kernel. But the multicore CPUs should not be
 30 |      occupied by the kernel because other tasks also need them.
 31 | 
 32 |      KGPU is not an OS running on GPU, which is almost impossible
 33 |      because of the limited functionality of current GPU
 34 |      architectures. KGPU tries to enable vector computing for the
 35 |      kernel.
 36 | 
 37 |      *To access the code, using git to clone:
 38 |      git@github.com:wbsun/kgpu.git or goto
 39 |      https://github.com/wbsun/kgpu .*
 40 | 
 41 |      As for copyright license, we use GPLv2.
 42 | 
 43 | News
 44 | 	* RAID6 PQ computing function added as a service, gpq module
 45 | 	  for its kernel part to replace the global raid6_call
 46 | 	  algorithm with GPU one, it can beat the fastest SSE version
 47 | 	  with 16 disks and >= 1MB data on my machine. Try it with a
 48 | 	  RAID6 on dm driver.
 49 | 	* Scripts to run and stop kgpu.
 50 | 	* Simple build system.
 51 | 	* dm-crypt can use gaes_ecb or gaes_ctr directly.
 52 | 
 53 | Try it?
 54 | 
 55 |     Hardware:
 56 | 	We use GTX480. You don't need such high-end video
 57 |     	card, but you should have a NVIDIA card that support CUDA
 58 |     	computing capability 2.0 or higher.  If you don't have more
 59 |     	than 1G video memory, change KGPU_BUF_SIZE in kgpu/kgpu.h to
 60 |     	make sure KGPU_BUF_SIZE*2 < Size of Your Video
 61 |     	Memory - (x) where the max of x is a value that you need try
 62 |     	some times to figure out. Or simply leave x = 64M or 128M.
 63 | 
 64 | 	Notice a new change: we enabled a new feature to allow
 65 | 	KGPU remapping any kernel pages into CUDA page-locked
 66 | 	memory, the remapping also need video memory on the GPU
 67 | 	side, so now there are two GPU buffers with the same size,
 68 | 	which is KGPU_BUF_SIZE. So KGPU_BUF_SIZE should be <=
 69 | 	video memory size/2.
 70 | 
 71 |     Software:
 72 |         We compile the CUDA code with nvcc in CUDA 4.0. The OS
 73 | 	kernel is vanilla Linux 2.6.39.4. You MUST use a 64bit linux
 74 | 	kernel compiled targeting at x86_64!
 75 | 
 76 |     Make and Run it:
 77 |         Check out the code from Github or download the
 78 |         archive from Google Code and extract files into say kgpu
 79 |         directory:
 80 | 	    cd kgpu && make all
 81 | 		
 82 | 	Now all outputs are in build directory. To run it:
 83 | 	    cd build && sudo ./runkgpu
 84 | 
 85 | 	This only starts KGPU module, helper and loads AES ciphers.
 86 | 	To use modified eCryptfs and dm-crypt, in the build directory:
 87 | 	    sudo insmod ./ecryptfs.ko && sudo insmod ./dm-crypt
 88 |      
 89 | 
 90 |         NOTE: DO NOT USE THIS ECRYPTS FOR IMPORTANT DATA!!!
 91 |               THIS IS NOT COMPATIBLE WITH THE VANILLA ECRYPTFS.
 92 | 	      SAME CARE SHOULD BE TAKEN WITH DM-CRYPT.
 93 | 	      
 94 |     To stop it:
 95 |         Umount your eCryptfs partition, delete dm-crypt mappers and:
 96 |         sudo rmmod ecryptfs && sudo rmmod dm-crypt
 97 |         Stop "helper" program by Ctrl-C
 98 |         sudo ./stopkgpu (in build/)
 99 | 
100 | 
101 | Weibin Sun, Xing Lin
102 | {wbsun, xinglin}@cs.utah.edu
103 | 


--------------------------------------------------------------------------------
/kgpu/kgpu.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  *
  8 |  * Common header for userspace helper, kernel mode KGPU and KGPU clients
  9 |  *
 10 |  */
 11 | 
 12 | #ifndef __KGPU_H__
 13 | #define __KGPU_H__
 14 | 
 15 | #define TO_UL(v) ((unsigned long)(v))
 16 | 
 17 | #define ADDR_WITHIN(pointer, base, size)		\
 18 |     (TO_UL(pointer) >= TO_UL(base) &&			\
 19 |      (TO_UL(pointer) < TO_UL(base)+TO_UL(size)))
 20 | 
 21 | #define ADDR_REBASE(dst_base, src_base, pointer)			\
 22 |     (TO_UL(dst_base) + (						\
 23 | 	TO_UL(pointer)-TO_UL(src_base)))
 24 | 
 25 | struct kgpu_gpu_mem_info {
 26 |     void *uva;
 27 |     unsigned long size;
 28 | };
 29 | 
 30 | #define KGPU_SERVICE_NAME_SIZE 32
 31 | 
 32 | struct kgpu_ku_request {
 33 |     int id;
 34 |     char service_name[KGPU_SERVICE_NAME_SIZE];
 35 |     void *in, *out, *data;
 36 |     unsigned long insize, outsize, datasize;
 37 | };
 38 | 
 39 | /* kgpu's errno */
 40 | #define KGPU_OK 0
 41 | #define KGPU_NO_RESPONSE 1
 42 | #define KGPU_NO_SERVICE 2
 43 | #define KGPU_TERMINATED 3
 44 | 
 45 | struct kgpu_ku_response {
 46 |     int id;
 47 |     int errcode;
 48 | };
 49 | 
 50 | /*
 51 |  * Only for kernel code or helper
 52 |  */
 53 | #if defined __KERNEL__ || defined __KGPU__
 54 | 
 55 | /* the NR will not be used */
 56 | #define KGPU_BUF_NR 1
 57 | #define KGPU_BUF_SIZE (1024*1024*1024)
 58 | 
 59 | #define KGPU_MMAP_SIZE KGPU_BUF_SIZE
 60 | 
 61 | #define KGPU_DEV_NAME "kgpu"
 62 | 
 63 | /* ioctl */
 64 | #include <linux/ioctl.h>
 65 | 
 66 | #define KGPU_IOC_MAGIC 'g'
 67 | 
 68 | #define KGPU_IOC_SET_GPU_BUFS \
 69 |     _IOW(KGPU_IOC_MAGIC, 1, struct kgpu_gpu_mem_info[KGPU_BUF_NR])
 70 | #define KGPU_IOC_GET_GPU_BUFS \
 71 |     _IOR(KGPU_IOC_MAGIC, 2, struct kgpu_gpu_mem_info[KGPU_BUF_NR])
 72 | #define KGPU_IOC_SET_STOP     _IO(KGPU_IOC_MAGIC, 3)
 73 | #define KGPU_IOC_GET_REQS     _IOR(KGPU_IOC_MAGIC, 4, 
 74 | 
 75 | #define KGPU_IOC_MAXNR 4
 76 | 
 77 | #include "kgpu_log.h"
 78 | 
 79 | #endif /* __KERNEL__ || __KGPU__  */
 80 | 
 81 | /*
 82 |  * For helper and service providers
 83 |  */
 84 | #ifndef __KERNEL__
 85 | 
 86 | struct kgpu_service;
 87 | 
 88 | struct kgpu_service_request {
 89 |     int id;
 90 |     void *hin, *hout, *hdata;
 91 |     void *din, *dout, *ddata;
 92 |     unsigned long insize, outsize, datasize;
 93 |     int errcode;
 94 |     struct kgpu_service *s;
 95 |     int block_x, block_y;
 96 |     int grid_x, grid_y;
 97 |     int state;
 98 |     int stream_id;
 99 |     unsigned long stream;
100 | };
101 | 
102 | /* service request states: */
103 | #define KGPU_REQ_INIT 1
104 | #define KGPU_REQ_MEM_DONE 2
105 | #define KGPU_REQ_PREPARED 3
106 | #define KGPU_REQ_RUNNING 4
107 | #define KGPU_REQ_POST_EXEC 5
108 | #define KGPU_REQ_DONE 6
109 | 
110 | #include "service.h"
111 | 
112 | #endif /* no __KERNEL__ */
113 | 
114 | /*
115 |  * For kernel code only
116 |  */
117 | #ifdef __KERNEL__
118 | 
119 | #include <linux/list.h>
120 | 
121 | struct kgpu_request;
122 | 
123 | typedef int (*kgpu_callback)(struct kgpu_request *req);
124 | 
125 | struct kgpu_request {
126 |     int id;
127 |     void *in, *out, *udata, *kdata;
128 |     unsigned long insize, outsize, udatasize, kdatasize;
129 |     char service_name[KGPU_SERVICE_NAME_SIZE];
130 |     kgpu_callback callback;
131 |     int errcode;
132 | };
133 | 
134 | extern int kgpu_call_sync(struct kgpu_request*);
135 | extern int kgpu_call_async(struct kgpu_request*);
136 | 
137 | extern int kgpu_next_request_id(void);
138 | extern struct kgpu_request* kgpu_alloc_request(void);
139 | extern void kgpu_free_request(struct kgpu_request*);
140 | 
141 | extern void *kgpu_vmalloc(unsigned long nbytes);
142 | extern void kgpu_vfree(void* p);
143 | 
144 | extern void *kgpu_map_pfns(unsigned long *pfns, int n);
145 | extern void *kgpu_map_pages(struct page **pages, int n);
146 | extern void kgpu_unmap_area(unsigned long addr);
147 | extern int kgpu_map_page(struct page*, unsigned long);
148 | extern void kgpu_free_mmap_area(unsigned long);
149 | extern unsigned long kgpu_alloc_mmap_area(unsigned long);
150 | 
151 | #endif /* __KERNEL__ */
152 | 
153 | #endif
154 | 


--------------------------------------------------------------------------------
/services/test/sysbm/sysbm.c:
--------------------------------------------------------------------------------
  1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
  2 |  * the GPL-COPYING file in the top-level directory.
  3 |  *
  4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  5 |  * All rights reserved.
  6 |  */
  7 | 
  8 | #include <linux/module.h>
  9 | #include <linux/init.h>
 10 | #include <linux/types.h>
 11 | #include <linux/kernel.h>
 12 | #include <linux/list.h>
 13 | #include <linux/spinlock.h>
 14 | #include <linux/gfp.h>
 15 | #include <linux/kthread.h>
 16 | #include <linux/proc_fs.h>
 17 | #include <linux/mm.h>
 18 | #include <linux/mm_types.h>
 19 | #include <linux/string.h>
 20 | #include <linux/completion.h>
 21 | #include <linux/uaccess.h>
 22 | #include <asm/page.h>
 23 | #include <linux/timex.h>
 24 | 
 25 | #include "../../../kgpu/kgpu.h"
 26 | 
 27 | /* customized log function */
 28 | #define g_log(level, ...) kgpu_do_log(level, "sysbm", ##__VA_ARGS__)
 29 | #define dbg(...) g_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
 30 | #define prt(...) g_log(KGPU_LOG_PRINT, ##__VA_ARGS__)
 31 | 
 32 | #define MAX_MEM_SZ (32*1024*1024)
 33 | #define MIN_MEM_SZ (0)
 34 | 
 35 | #define BATCH_NR 10
 36 | 
 37 | int mycb(struct kgpu_request *req)
 38 | {
 39 |     struct completion *c = (struct completion*)req->kdata;
 40 |     complete(c);
 41 |     return 0;
 42 | }
 43 | 
 44 | static int __init minit(void)
 45 | {
 46 |     struct kgpu_request *rs[BATCH_NR];
 47 |     void *bufs[BATCH_NR];
 48 |     struct completion cs[BATCH_NR];
 49 | 
 50 |     int i;
 51 |     struct timeval t0, t1;
 52 |     long tt;
 53 |     unsigned long sz;
 54 | 
 55 |     memset(rs, 0, sizeof(struct kgpu_request*)*BATCH_NR);
 56 |     memset(bufs, 0, sizeof(void*)*BATCH_NR);
 57 |     
 58 |     prt("prepare for testing\n");
 59 | 
 60 |     for (i=0; i<BATCH_NR; i++) {
 61 | 	rs[i] = kgpu_alloc_request();
 62 | 	if (!rs[i]) {
 63 | 	    g_log(KGPU_LOG_ERROR, "request %d null\n", i);
 64 | 	    goto cleanup;
 65 | 	}
 66 | 	bufs[i] = kgpu_vmalloc(MAX_MEM_SZ);
 67 | 	if (!bufs[i]) {
 68 | 	    g_log(KGPU_LOG_ERROR, "buf %d null\n", i);
 69 | 	    goto cleanup;
 70 | 	}
 71 | 	rs[i]->in = bufs[i];
 72 | 	rs[i]->out = bufs[i];
 73 | 	rs[i]->callback = mycb;
 74 | 	init_completion(cs+i);
 75 | 	rs[i]->kdata = (void*)(cs+i);
 76 | 	rs[i]->kdatasize = sizeof(void*);
 77 | 	strcpy(rs[i]->service_name, "empty_service");
 78 | 	rs[i]->insize = PAGE_SIZE;
 79 | 	rs[i]->outsize = PAGE_SIZE;
 80 | 	rs[i]->udata = NULL;
 81 | 	rs[i]->udatasize = 0;
 82 |     }
 83 | 
 84 |     prt("done allocations, start first test\n");
 85 | 
 86 |     kgpu_call_sync(rs[0]);
 87 | 
 88 |     prt("done first test for CUDA init\n");
 89 | 
 90 |     rs[0]->id = kgpu_next_request_id();
 91 | 
 92 |     for (sz=MIN_MEM_SZ; sz<=MAX_MEM_SZ; sz=(sz?sz<<1:PAGE_SIZE)) {
 93 | 	for (i=0; i<BATCH_NR; i++) {
 94 | 	    rs[i]->insize = sz;
 95 | 	    rs[i]->outsize = sz;
 96 | 	}
 97 | 	
 98 | 	do_gettimeofday(&t0);
 99 | 	for (i=0; i<BATCH_NR; i++) {
100 | 	    kgpu_call_async(rs[i]);
101 | 	}
102 | 
103 | 	for (i=0; i<BATCH_NR; i++)
104 | 	    wait_for_completion(cs+i);
105 | 	do_gettimeofday(&t1);
106 | 
107 | 	tt = 1000000*(t1.tv_sec-t0.tv_sec) + 
108 | 			((long)(t1.tv_usec) - (long)(t0.tv_usec));
109 | 	tt /= BATCH_NR;
110 | 
111 | 	printk("ASYNC SIZE: %10lu B, TIME: %10lu MS, OPS: %8lu, BW: %8lu MB/S\n",
112 | 	       sz, tt, 1000000/tt, sz/tt);
113 | 
114 | 	for (i=0; i<BATCH_NR; i++) {
115 | 	    init_completion(cs+i);
116 | 	    rs[i]->id = kgpu_next_request_id();
117 | 	}	
118 |     }
119 | 
120 |     prt("done async, start sync\n");
121 |     for (sz=MIN_MEM_SZ; sz<=MAX_MEM_SZ; sz=(sz?sz<<1:PAGE_SIZE)) {
122 | 	rs[0]->insize = sz;
123 | 	rs[0]->outsize = sz;
124 | 
125 | 	do_gettimeofday(&t0);
126 | 	kgpu_call_sync(rs[0]);
127 | 	do_gettimeofday(&t1);
128 | 
129 | 	tt = 1000000*(t1.tv_sec-t0.tv_sec) + 
130 | 			((long)(t1.tv_usec) - (long)(t0.tv_usec));
131 | 
132 | 	printk("SYNC  SIZE: %10lu B, TIME: %10lu MS, OPS: %8lu, BW: %8lu MB/S\n",
133 | 	       sz, tt, 1000000/tt, sz/tt);
134 | 
135 | 	rs[0]->id = kgpu_next_request_id();
136 |     }
137 | 
138 |     prt("done sync\n");
139 | 
140 | cleanup:
141 |     for (i=0; i<BATCH_NR; i++) {
142 | 	if (rs[i]) kgpu_free_request(rs[i]);
143 | 	if (bufs[i]) kgpu_vfree(bufs[i]);
144 |     }
145 | 
146 |     prt("done test\n");
147 |     
148 |     return 0;
149 | }
150 | 
151 | static void __exit mexit(void)
152 | {
153 |     g_log(KGPU_LOG_PRINT, "unload\n");
154 | }
155 | 
156 | module_init(minit);
157 | module_exit(mexit);
158 | 
159 | MODULE_LICENSE("GPL");
160 | 


--------------------------------------------------------------------------------
/kgpu/service.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  *
  8 |  * GPU service management.
  9 |  *
 10 |  * link with -ldl
 11 |  */
 12 | #include <string.h>
 13 | #include <dlfcn.h>
 14 | #include <stdio.h>
 15 | #include <glob.h>
 16 | #include <stdlib.h>
 17 | #include "list.h"
 18 | #include "helper.h"
 19 | 
 20 | struct _kgpu_sitem {
 21 |     struct kgpu_service *s;
 22 |     void* libhandle;
 23 |     struct list_head list;
 24 | };
 25 | 
 26 | LIST_HEAD(services);
 27 | 
 28 | static struct _kgpu_sitem *lookup_kgpu_sitem(const char *name)
 29 | {
 30 |     struct _kgpu_sitem *i;
 31 |     struct list_head *e;
 32 |     
 33 |     if (!name)
 34 | 	return NULL;
 35 | 
 36 |     list_for_each(e, &services) {
 37 | 	i = list_entry(e, struct _kgpu_sitem, list);
 38 | 	if (!strncmp(name, i->s->name, KGPU_SERVICE_NAME_SIZE))
 39 | 	    return i;
 40 |     }
 41 | 
 42 |     return NULL;    
 43 | }
 44 | 
 45 | struct kgpu_service *kh_lookup_service(const char *name)
 46 | {
 47 |     struct _kgpu_sitem *i = lookup_kgpu_sitem(name);
 48 |     if (!i)
 49 | 	return NULL;
 50 |     return i->s;
 51 | }
 52 | 
 53 | int kh_register_service(struct kgpu_service *s, void *libhandle)
 54 | {
 55 |     struct _kgpu_sitem *i;
 56 | 
 57 |     if (!s)
 58 | 	return 1;
 59 |     i = (struct _kgpu_sitem *)malloc(sizeof(struct _kgpu_sitem));
 60 |     if (!i)
 61 | 	return 1;
 62 | 
 63 |     i->s = s;
 64 |     i->libhandle = libhandle;
 65 |     INIT_LIST_HEAD(&i->list);
 66 | 
 67 |     list_add_tail(&i->list, &services);
 68 | 
 69 |     return 0;
 70 | }
 71 | 
 72 | static int __unregister_service(struct _kgpu_sitem *i)
 73 | {
 74 |     if (!i)
 75 | 	return 1;
 76 | 
 77 |     list_del(&i->list);
 78 |     free(i);
 79 | 
 80 |     return 0;
 81 | }
 82 | 
 83 | int kh_unregister_service(const char *name)
 84 | {
 85 |     return __unregister_service(lookup_kgpu_sitem(name));    
 86 | }
 87 | 
 88 | int kh_load_service(const char *libpath)
 89 | {
 90 |     void *lh;
 91 |     fn_init_service init;
 92 |     char *err;
 93 |     int r=1;
 94 |     
 95 |     lh = dlopen(libpath, RTLD_LAZY);
 96 |     if (!lh)
 97 |     {
 98 | 	fprintf(stderr,
 99 | 		"Warning: open %s error, %s\n",
100 | 		libpath, dlerror());
101 |     } else {
102 | 	init = (fn_init_service)dlsym(lh, SERVICE_INIT);
103 | 	if (!init)
104 | 	{
105 | 	    fprintf(stderr,
106 | 		    "Warning: %s has no service %s\n",
107 | 		    libpath, ((err=dlerror()) == NULL?"": err));
108 | 	    dlclose(lh);
109 | 	} else {
110 | 	    if (init(lh, kh_register_service))
111 | 	    {
112 | 		fprintf(stderr,
113 | 			"Warning: %s failed to register service\n",
114 | 			libpath);
115 | 		dlclose(lh);
116 | 	    } else
117 | 		r = 0;
118 | 	}	    
119 |     }
120 | 
121 |     return r;
122 | }
123 | 
124 | int kh_load_all_services(const char *dir)
125 | {
126 |     char path[256];
127 |     int i;
128 |     char *libpath;
129 |     int e=0;
130 | 
131 |     glob_t glb = {0,NULL,0};
132 | 
133 |     snprintf(path, 256, "%s/%s*", dir, SERVICE_LIB_PREFIX);
134 |     glob(path, 0, NULL, &glb);
135 | 
136 |     for (i=0; i<glb.gl_pathc; i++)
137 |     {
138 | 	libpath = glb.gl_pathv[i];
139 | 	e += kh_load_service(libpath);
140 |     }
141 | 
142 |     globfree(&glb);
143 |     return e;
144 | }
145 | 
146 | static int __unload_service(struct _kgpu_sitem* i)
147 | {
148 |     void *lh;
149 |     fn_finit_service finit;
150 |     int r=1;
151 |     if (!i)
152 | 	return 1;
153 | 
154 |     lh = i->libhandle;
155 |     
156 |     if (lh) {
157 | 	finit = (fn_finit_service)dlsym(lh, SERVICE_FINIT);
158 | 	if (finit)
159 | 	{
160 | 	    if (finit(lh, kh_unregister_service))
161 | 	    {
162 | 		fprintf(stderr,
163 | 			"Warning: failed to unregister service %s\n",
164 | 			i->s->name);
165 | 	    } else
166 | 		r = 0;
167 | 	} else {
168 | 	    __unregister_service(i);
169 | 	    r = 0;
170 | 	}
171 | 	
172 | 	dlclose(lh);
173 |     } else {
174 | 	__unregister_service(i);
175 | 	r=0;
176 |     }
177 | 
178 |     return r;
179 | }
180 | 
181 | int kh_unload_service(const char *name)
182 | {
183 |     return __unload_service(lookup_kgpu_sitem(name));
184 | }
185 | 
186 | int kh_unload_all_services()
187 | {
188 |     struct list_head *p, *n;
189 |     int e=0;
190 | 
191 |     list_for_each_safe(p, n, &services) {
192 | 	e += __unload_service(list_entry(p, struct _kgpu_sitem, list));
193 |     }
194 |     return e;
195 | }
196 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/debug.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * eCryptfs: Linux filesystem encryption layer
  3 |  * Functions only useful for debugging.
  4 |  *
  5 |  * Copyright (C) 2006 International Business Machines Corp.
  6 |  *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
  7 |  *
  8 |  * This program is free software; you can redistribute it and/or
  9 |  * modify it under the terms of the GNU General Public License as
 10 |  * published by the Free Software Foundation; either version 2 of the
 11 |  * License, or (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful, but
 14 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 16 |  * General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program; if not, write to the Free Software
 20 |  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 21 |  * 02111-1307, USA.
 22 |  * 
 23 |  * 
 24 |  * See the GPL-COPYING file in the top-level directory.
 25 |  *
 26 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 27 |  * All rights reserved.
 28 |  * 
 29 |  */
 30 | 
 31 | #include "ecryptfs_kernel.h"
 32 | 
 33 | /**
 34 |  * ecryptfs_dump_auth_tok - debug function to print auth toks
 35 |  *
 36 |  * This function will print the contents of an ecryptfs authentication
 37 |  * token.
 38 |  */
 39 | void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
 40 | {
 41 | 	char salt[ECRYPTFS_SALT_SIZE * 2 + 1];
 42 | 	char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
 43 | 
 44 | 	ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n",
 45 | 			auth_tok);
 46 | 	if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) {
 47 | 		ecryptfs_printk(KERN_DEBUG, " * private key type\n");
 48 | 	} else {
 49 | 		ecryptfs_printk(KERN_DEBUG, " * passphrase type\n");
 50 | 		ecryptfs_to_hex(salt, auth_tok->token.password.salt,
 51 | 				ECRYPTFS_SALT_SIZE);
 52 | 		salt[ECRYPTFS_SALT_SIZE * 2] = '\0';
 53 | 		ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt);
 54 | 		if (auth_tok->token.password.flags &
 55 | 		    ECRYPTFS_PERSISTENT_PASSWORD) {
 56 | 			ecryptfs_printk(KERN_DEBUG, " * persistent\n");
 57 | 		}
 58 | 		memcpy(sig, auth_tok->token.password.signature,
 59 | 		       ECRYPTFS_SIG_SIZE_HEX);
 60 | 		sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
 61 | 		ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig);
 62 | 	}
 63 | 	ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n",
 64 | 			auth_tok->session_key.flags);
 65 | 	if (auth_tok->session_key.flags
 66 | 	    & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT)
 67 | 		ecryptfs_printk(KERN_DEBUG,
 68 | 				" * Userspace decrypt request set\n");
 69 | 	if (auth_tok->session_key.flags
 70 | 	    & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT)
 71 | 		ecryptfs_printk(KERN_DEBUG,
 72 | 				" * Userspace encrypt request set\n");
 73 | 	if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) {
 74 | 		ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n");
 75 | 		ecryptfs_printk(KERN_DEBUG,
 76 | 				" * session_key.decrypted_key_size = [0x%x]\n",
 77 | 				auth_tok->session_key.decrypted_key_size);
 78 | 		ecryptfs_printk(KERN_DEBUG, " * Decrypted session key "
 79 | 				"dump:\n");
 80 | 		if (ecryptfs_verbosity > 0)
 81 | 			ecryptfs_dump_hex(auth_tok->session_key.decrypted_key,
 82 | 					  ECRYPTFS_DEFAULT_KEY_BYTES);
 83 | 	}
 84 | 	if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) {
 85 | 		ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n");
 86 | 		ecryptfs_printk(KERN_DEBUG,
 87 | 				" * session_key.encrypted_key_size = [0x%x]\n",
 88 | 				auth_tok->session_key.encrypted_key_size);
 89 | 		ecryptfs_printk(KERN_DEBUG, " * Encrypted session key "
 90 | 				"dump:\n");
 91 | 		if (ecryptfs_verbosity > 0)
 92 | 			ecryptfs_dump_hex(auth_tok->session_key.encrypted_key,
 93 | 					  auth_tok->session_key.
 94 | 					  encrypted_key_size);
 95 | 	}
 96 | }
 97 | 
 98 | /**
 99 |  * ecryptfs_dump_hex - debug hex printer
100 |  * @data: string of bytes to be printed
101 |  * @bytes: number of bytes to print
102 |  *
103 |  * Dump hexadecimal representation of char array
104 |  */
105 | void ecryptfs_dump_hex(char *data, int bytes)
106 | {
107 | 	int i = 0;
108 | 	int add_newline = 1;
109 | 
110 | 	if (ecryptfs_verbosity < 1)
111 | 		return;
112 | 	if (bytes != 0) {
113 | 		printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]);
114 | 		i++;
115 | 	}
116 | 	while (i < bytes) {
117 | 		printk("0x%.2x.", (unsigned char)data[i]);
118 | 		i++;
119 | 		if (i % 16 == 0) {
120 | 			printk("\n");
121 | 			add_newline = 0;
122 | 		} else
123 | 			add_newline = 1;
124 | 	}
125 | 	if (add_newline)
126 | 		printk("\n");
127 | }
128 | 
129 | 


--------------------------------------------------------------------------------
/services/raid6/libsrv_raid6/srv_raid6.cu:
--------------------------------------------------------------------------------
  1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
  2 |  * the GPL-COPYING file in the top-level directory.
  3 |  *
  4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  5 |  * All rights reserved.
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <cuda.h>
 11 | #include "../../../kgpu/kgpu.h"
 12 | #include "../../../kgpu/gputils.h"
 13 | #include "../gpq.h"
 14 | #include "../r62_recov.h"
 15 | 
 16 | #define SECTOR_SIZE 512
 17 | #ifndef PAGE_SIZE
 18 | #define PAGE_SIZE 4096
 19 | #endif
 20 | #define BYTES_PER_THREAD 8
 21 | #define BYTES_PER_BLOCK (SECTOR_SIZE*8)
 22 | #define THREADS_PER_BLOCK (BYTES_PER_BLOCK/BYTES_PER_THREAD)
 23 | 
 24 | struct kgpu_service raid6_pq_srv;
 25 | struct kgpu_service r62_recov_srv;
 26 | 
 27 | /*
 28 |  * Include device code
 29 |  */
 30 | #include "dev.cu"
 31 | 
 32 | int r62_recov_compute_size(struct kgpu_service_request *sr)
 33 | {
 34 |     struct r62_recov_data *data = (struct r62_recov_data*)sr->hdata;
 35 |     
 36 |     sr->block_x = SECTOR_SIZE;
 37 |     sr->block_y = 1;
 38 |     sr->grid_x  = data->n;
 39 |     sr->grid_y  = PAGE_SIZE/SECTOR_SIZE;
 40 | 
 41 |     return 0;
 42 | }
 43 | 
 44 | int r62_recov_prepare(struct kgpu_service_request *sr)
 45 | {
 46 |     cudaStream_t s = (cudaStream_t)(sr->stream);
 47 |   
 48 |     csc( ah2dcpy( sr->din, sr->hin, sr->insize, s) );
 49 | 
 50 |     return 0;
 51 | }
 52 | 
 53 | int r62_recov_launch(struct kgpu_service_request *sr)
 54 | {
 55 |     struct r62_recov_data *data = (struct r62_recov_data*)sr->hdata;
 56 |     struct r62_recov_data *dd = (struct r62_recov_data*)sr->ddata;
 57 |     cudaStream_t s = (cudaStream_t)(sr->stream);
 58 | 
 59 |     raid6_recov_2data<<<dim3(sr->grid_x, sr->grid_y),
 60 | 	dim3(sr->block_x, sr->block_y), 0, s>>>(
 61 | 	    (u8*)(sr->din),
 62 | 	    ((u8*)(sr->din))+data->bytes,
 63 | 	    (u8*)(sr->dout),
 64 | 	    ((u8*)(sr->dout))+data->bytes,
 65 | 	    dd);
 66 |     
 67 |     return 0;
 68 | }
 69 | 
 70 | int r62_recov_post(struct kgpu_service_request *sr)
 71 | {
 72 |     cudaStream_t s = (cudaStream_t)(sr->stream);
 73 |     
 74 |     csc( ad2hcpy( sr->hout, sr->dout, sr->outsize, s ) );
 75 | 
 76 |     return 0;
 77 | }
 78 | 
 79 | 
 80 | int raid6_pq_compute_size(struct kgpu_service_request *sr)
 81 | {
 82 |     struct raid6_pq_data* data = (struct raid6_pq_data*)(sr->hdata);
 83 |     
 84 |     sr->block_x = THREADS_PER_BLOCK;
 85 |     sr->block_y = 1;
 86 |     sr->grid_x  = data->dsize/BYTES_PER_BLOCK;
 87 |     sr->grid_y  = 1;
 88 | 
 89 |     return 0;
 90 | }
 91 | 
 92 | int raid6_pq_prepare(struct kgpu_service_request *sr)
 93 | {
 94 |     struct raid6_pq_data* data = (struct raid6_pq_data*)(sr->hdata);
 95 |     cudaStream_t s = (cudaStream_t)(sr->stream);
 96 |   
 97 |     csc( ah2dcpy( sr->din, sr->hin, data->dsize*(data->nr_d-2), s) );
 98 | 
 99 |     return 0;
100 | }
101 | 
102 | int raid6_pq_launch(struct kgpu_service_request *sr)
103 | {
104 |     struct raid6_pq_data* data = (struct raid6_pq_data*)(sr->hdata);
105 |     cudaStream_t s = (cudaStream_t)(sr->stream);
106 | 
107 |     raid6_pq<<<dim3(sr->grid_x, sr->grid_y),
108 | 	dim3(sr->block_x, sr->block_y), 0,
109 |       s>>>(
110 | 	  (unsigned int)data->nr_d,
111 | 	  (unsigned long)data->dsize, (u8*)sr->din);
112 |       
113 |     return 0;
114 | }
115 | 
116 | int raid6_pq_post(struct kgpu_service_request *sr)
117 | {
118 |     cudaStream_t s = (cudaStream_t)(sr->stream);
119 | 
120 |     csc( ad2hcpy( sr->hout, sr->dout, sr->outsize, s ) );
121 | 
122 |     return 0;
123 | }
124 | 
125 | extern "C" int init_service(void *lh, int (*reg_srv)(struct kgpu_service*, void*))
126 | {
127 |     int err;
128 |     printf("[libsrv_raid6] Info: init raid6 services\n");
129 | 
130 |     csc( cudaFuncSetCacheConfig(raid6_pq, cudaFuncCachePreferL1) );
131 |     csc( cudaFuncSetCacheConfig(raid6_recov_2data, cudaFuncCachePreferL1) );
132 | 
133 |     sprintf(raid6_pq_srv.name, "raid6_pq");
134 |     raid6_pq_srv.sid = 0;
135 |     raid6_pq_srv.compute_size = raid6_pq_compute_size;
136 |     raid6_pq_srv.launch = raid6_pq_launch;
137 |     raid6_pq_srv.prepare = raid6_pq_prepare;
138 |     raid6_pq_srv.post = raid6_pq_post;
139 | 
140 |     err = reg_srv(&raid6_pq_srv, lh);
141 |     if (err) {
142 | 	fprintf(stderr, "[libsrv_raid6] Error: failed"
143 | 	    " to register raid6_pq service\n");
144 | 	return err;
145 |     }
146 | 
147 |     sprintf(r62_recov_srv.name, "r62_recov");
148 |     r62_recov_srv.sid = 1;
149 |     r62_recov_srv.compute_size = r62_recov_compute_size;
150 |     r62_recov_srv.launch = r62_recov_launch;
151 |     r62_recov_srv.prepare = r62_recov_prepare;
152 |     r62_recov_srv.post = r62_recov_post;
153 | 
154 |     err = reg_srv(&r62_recov_srv, lh);
155 |     if (err) {
156 | 	fprintf(stderr, "[libsrv_raid6] Error: failed"
157 | 	    " to register r62_recov service\n");
158 |     }
159 |     return err;
160 | }
161 | 
162 | extern "C" int finit_service(void *lh, int (*unreg_srv)(const char *))
163 | {
164 |     int err1, err2;
165 |     printf("[libsrv_raid6] Info: finit raid6 services\n");
166 |     
167 |     err1 = unreg_srv(raid6_pq_srv.name);
168 |     if (err1) {
169 | 	fprintf(stderr, "[libsrv_raid6] Error: failed"
170 | 	    " to unregister raid6_pq service\n");
171 |     }
172 | 
173 |     err2 = unreg_srv(r62_recov_srv.name);
174 |     if (err2) {
175 | 	fprintf(stderr, "[libsrv_raid6] Error: failed"
176 | 	    " to unregister r62_recov service\n");
177 |     }
178 |     return err1|err2;
179 | }
180 | 
181 | 


--------------------------------------------------------------------------------
/services/gaes/callaes/callgaes.c:
--------------------------------------------------------------------------------
  1 | /* -*- linux-c -*-
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  *
  8 |  */
  9 | 
 10 | #include <linux/module.h>
 11 | #include <linux/init.h>
 12 | #include <linux/types.h>
 13 | #include <linux/crypto.h>
 14 | #include <linux/scatterlist.h>
 15 | #include <linux/string.h>
 16 | #include <linux/gfp.h>
 17 | #include <linux/err.h>
 18 | #include <linux/jiffies.h>
 19 | #include <linux/timex.h>
 20 | 
 21 | 
 22 | char* AES_GENERIC = "ecb(aes-generic)";
 23 | char* AES_ASM = "ecb(aes-asm)";
 24 | char* AES = "ecb(aes)";
 25 | 
 26 | char* AES_GPU_GENERIC = "gaes_ecb(aes-generic)";
 27 | char* AES_GPU_ASM = "gaes_ecb(aes-asm)";
 28 | char* AES_GPU = "gaes_ecb(aes)";
 29 | 
 30 | char* CIPHER;
 31 | 
 32 | #define MAX_BLK_SIZE (32*1024*1024)
 33 | #define MIN_BLK_SIZE (4*1024)
 34 | 
 35 | #define TEST_TIMES 10
 36 | 
 37 | int test_gpu = 0;
 38 | 
 39 | static int skip_cpu=0;
 40 | 
 41 | module_param(skip_cpu, int, 0444);
 42 | MODULE_PARM_DESC(skip_cpu, "do not test CPU cipher, default 0 (No)");
 43 | 
 44 | 
 45 | #if 0
 46 | 
 47 | static void dump_page_content(u8 *p)
 48 | {
 49 |     int r,c;
 50 |     printk("dump page content:\n");
 51 |     for (r=0; r<16; r++) {
 52 | 	for (c=0; c<32; c++)
 53 | 	    printk("%02x ", p[r*32+c]);
 54 | 	printk("\n");
 55 |     }
 56 | }
 57 | 
 58 | static void dump_hex(u8 *p, int sz)
 59 | {
 60 |     int i;
 61 |     printk("dump hex:\n");
 62 |     for (i=0; i<sz; i++)
 63 | 	printk("%02x ", p[i]);
 64 |     printk("\n");
 65 | }
 66 | 
 67 | #endif /* test only */
 68 | 
 69 | 
 70 | void test_aes(void)
 71 | {
 72 | 	struct crypto_blkcipher *tfm;
 73 | 	struct blkcipher_desc desc;
 74 | 	u32 bs;
 75 | 	int i,j;
 76 | 	u32 npages;
 77 | 	
 78 | 	struct scatterlist *src;
 79 | 	struct scatterlist *dst;
 80 | 	char *buf;
 81 | 	char **ins, **outs;
 82 | 	u8 *iv;
 83 | 	
 84 | 	unsigned int ret;
 85 | 	
 86 | 	u8 key[] = {0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x0A, 0x0B, 0x0C, 0x0D, 0x0F, 0x10, 0x11, 0x12,
 87 | 	0x00, 0x01, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x0A, 0x0B, 0x0C, 0x0D, 0x0F, 0x10, 0x11, 0x12};
 88 | 	
 89 | 	npages = MAX_BLK_SIZE/PAGE_SIZE;
 90 | 
 91 | 	iv = kmalloc(32, GFP_KERNEL);
 92 | 	if (!iv) {
 93 | 	    printk("taes Error: failed to alloc IV\n");
 94 | 	    return;
 95 | 	}
 96 | 	
 97 | 	src = kmalloc(npages*sizeof(struct scatterlist), __GFP_ZERO|GFP_KERNEL);
 98 | 	if (!src) {
 99 | 		printk("taes ERROR: failed to alloc src\n");		
100 | 		return;
101 | 	}
102 | 	dst = kmalloc(npages*sizeof(struct scatterlist), __GFP_ZERO|GFP_KERNEL);
103 | 	if (!dst) {
104 | 		printk("taes ERROR: failed to alloc dst\n");
105 | 		kfree(src);		
106 | 		return;
107 | 	}
108 | 	ins = kmalloc(npages*sizeof(char*), __GFP_ZERO|GFP_KERNEL);
109 | 	if (!ins) {
110 | 		printk("taes ERROR: failed to alloc ins\n");
111 | 		kfree(src);
112 | 		kfree(dst);
113 | 		return;
114 | 	}
115 | 	outs = kmalloc(npages*sizeof(char*), __GFP_ZERO|GFP_KERNEL);
116 | 	if (!outs) {
117 | 		printk("taes ERROR: failed to alloc outs\n");
118 | 		kfree(src);
119 | 		kfree(dst);
120 | 		kfree(ins);		
121 | 		return;
122 | 	}
123 | 	
124 | 	tfm = crypto_alloc_blkcipher(CIPHER, 0, 0);
125 | 	
126 | 	if (IS_ERR(tfm)) {
127 | 		printk("failed to load transform for %s: %ld\n", CIPHER,
128 | 			PTR_ERR(tfm));
129 | 		goto out;
130 | 	}
131 | 	desc.tfm = tfm;
132 | 	desc.flags = 0;
133 | 	desc.info = iv;
134 | 
135 | 	if (test_gpu)
136 | 		ret = crypto_blkcipher_setkey(tfm, key, 16);
137 | 	else
138 | 		ret = crypto_blkcipher_setkey(tfm, key, 16);
139 | 	if (ret) {
140 | 		printk("setkey() failed flags=%x %lu\n",
141 | 				crypto_blkcipher_get_flags(tfm), sizeof(key));
142 | 	 	goto out;
143 | 	}
144 | 	
145 | 	sg_init_table(src, npages);
146 | 	for (i=0; i<npages; i++) {
147 | 		buf = (void *)__get_free_page(GFP_KERNEL);
148 | 		if (!buf) {
149 | 			printk("taes ERROR: alloc free page error\n");
150 | 			goto free_err_pages;
151 | 		}
152 | 		ins[i] = buf;
153 | 		strcpy(buf, "this is a plain text!");
154 | 		sg_set_buf(src+i, buf, PAGE_SIZE);
155 | 		buf = (void *)__get_free_page(GFP_KERNEL);
156 | 		if (!buf) {
157 | 			printk("taes ERROR: alloc free page error\n");
158 | 			goto free_err_pages;
159 | 		}
160 | 		outs[i] = buf;
161 | 		sg_set_buf(dst+i, buf, PAGE_SIZE);
162 | 	}
163 | 	
164 | 	for (bs = MAX_BLK_SIZE; bs >= MIN_BLK_SIZE; bs >>= 1) {
165 | 		struct timeval t0, t1;
166 | 		long int enc, dec;
167 | 
168 | 		do_gettimeofday(&t0);
169 | 		for (j=0; j<TEST_TIMES; j++) {
170 | 			ret = crypto_blkcipher_encrypt_iv(&desc, dst, dst, bs);
171 | 			if (ret) {
172 | 				printk("taes ERROR: enc error\n");
173 | 				goto free_err_pages;
174 | 			}
175 | 		}
176 | 		do_gettimeofday(&t1);
177 | 		enc = 1000000*(t1.tv_sec-t0.tv_sec) + 
178 | 			((int)(t1.tv_usec) - (int)(t0.tv_usec));
179 | 
180 | 		do_gettimeofday(&t0);
181 | 		for (j=0; j<TEST_TIMES; j++) {
182 | 			ret = crypto_blkcipher_decrypt_iv(&desc, src, src, bs);
183 | 			if (ret) {
184 | 				printk("taes ERROR: dec error\n");
185 | 				goto free_err_pages;
186 | 			}
187 | 		}
188 | 		do_gettimeofday(&t1);
189 | 		dec = 1000000*(t1.tv_sec-t0.tv_sec) + 
190 | 			((int)(t1.tv_usec) - (int)(t0.tv_usec));
191 | 
192 | 		printk("%5s: Size %10u, enc BW: %6ld MB/s dec BW: %6ld MB/s\n",
193 | 		       test_gpu?"GAES":"CAES", bs, (bs*TEST_TIMES)/enc, (bs*TEST_TIMES)/dec);
194 | 	}
195 | 	
196 | 	
197 | free_err_pages:
198 | 	for (i=0; i<npages && ins[i]; i++){		
199 | 		free_page((unsigned long)ins[i]);
200 | 	}
201 | 	for (i=0; i<npages && outs[i]; i++){
202 | 		free_page((unsigned long)outs[i]);
203 | 	}
204 | out:
205 | 	kfree(src);
206 | 	kfree(dst);
207 | 	kfree(ins);
208 | 	kfree(outs);
209 | 	kfree(iv);
210 | 	crypto_free_blkcipher(tfm);	
211 | }
212 | 
213 | static int __init taes_init(void)
214 | {
215 | 	printk("test gaes loaded\n");
216 | 	CIPHER = AES_GPU_ASM;
217 | 	test_gpu = 1;
218 | 	test_aes();
219 | 	/* CIPHER = AES_GPU; */
220 | 	/* test_aes(); */
221 | 	/* CIPHER = AES_GPU; */
222 | 	/* test_aes(); */
223 | 	/* CIPHER = AES_ASM; */
224 | 	if (skip_cpu)
225 | 		return 0;
226 | 	test_gpu = 0;
227 | 	/* test_aes(); */
228 | 	/* CIPHER = AES; */
229 | 	/* test_aes(); */
230 | 	CIPHER = AES_GENERIC;
231 | 	test_aes();
232 | 	return 0;
233 | }
234 | 
235 | static void __exit taes_exit(void)
236 | {
237 | 	printk("test gaes unloaded\n");
238 | }
239 | 
240 | module_init(taes_init);
241 | module_exit(taes_exit);
242 | 
243 | MODULE_DESCRIPTION("Test CUDA AES-ECB");
244 | MODULE_LICENSE("GPL");
245 | 
246 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/kthread.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * eCryptfs: Linux filesystem encryption layer
  3 |  *
  4 |  * Copyright (C) 2008 International Business Machines Corp.
  5 |  *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
  6 |  *
  7 |  * This program is free software; you can redistribute it and/or
  8 |  * modify it under the terms of the GNU General Public License as
  9 |  * published by the Free Software Foundation; either version 2 of the
 10 |  * License, or (at your option) any later version.
 11 |  *
 12 |  * This program is distributed in the hope that it will be useful, but
 13 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 15 |  * General Public License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with this program; if not, write to the Free Software
 19 |  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 20 |  * 02111-1307, USA.
 21 |  * 
 22 |  * 
 23 |  * See the GPL-COPYING file in the top-level directory.
 24 |  *
 25 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 26 |  * All rights reserved.
 27 |  * 
 28 |  * 
 29 |  */
 30 | 
 31 | #include <linux/kthread.h>
 32 | #include <linux/freezer.h>
 33 | #include <linux/slab.h>
 34 | #include <linux/wait.h>
 35 | #include <linux/mount.h>
 36 | #include "ecryptfs_kernel.h"
 37 | 
 38 | struct kmem_cache *ecryptfs_open_req_cache;
 39 | 
 40 | static struct ecryptfs_kthread_ctl {
 41 | #define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
 42 | 	u32 flags;
 43 | 	struct mutex mux;
 44 | 	struct list_head req_list;
 45 | 	wait_queue_head_t wait;
 46 | } ecryptfs_kthread_ctl;
 47 | 
 48 | static struct task_struct *ecryptfs_kthread;
 49 | 
 50 | /**
 51 |  * ecryptfs_threadfn
 52 |  * @ignored: ignored
 53 |  *
 54 |  * The eCryptfs kernel thread that has the responsibility of getting
 55 |  * the lower persistent file with RW permissions.
 56 |  *
 57 |  * Returns zero on success; non-zero otherwise
 58 |  */
 59 | static int ecryptfs_threadfn(void *ignored)
 60 | {
 61 | 	set_freezable();
 62 | 	while (1)  {
 63 | 		struct ecryptfs_open_req *req;
 64 | 
 65 | 		wait_event_freezable(
 66 | 			ecryptfs_kthread_ctl.wait,
 67 | 			(!list_empty(&ecryptfs_kthread_ctl.req_list)
 68 | 			 || kthread_should_stop()));
 69 | 		mutex_lock(&ecryptfs_kthread_ctl.mux);
 70 | 		if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
 71 | 			mutex_unlock(&ecryptfs_kthread_ctl.mux);
 72 | 			goto out;
 73 | 		}
 74 | 		while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
 75 | 			req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
 76 | 					       struct ecryptfs_open_req,
 77 | 					       kthread_ctl_list);
 78 | 			mutex_lock(&req->mux);
 79 | 			list_del(&req->kthread_ctl_list);
 80 | 			if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
 81 | 				dget(req->lower_dentry);
 82 | 				mntget(req->lower_mnt);
 83 | 				(*req->lower_file) = dentry_open(
 84 | 					req->lower_dentry, req->lower_mnt,
 85 | 					(O_RDWR | O_LARGEFILE), current_cred());
 86 | 				req->flags |= ECRYPTFS_REQ_PROCESSED;
 87 | 			}
 88 | 			wake_up(&req->wait);
 89 | 			mutex_unlock(&req->mux);
 90 | 		}
 91 | 		mutex_unlock(&ecryptfs_kthread_ctl.mux);
 92 | 	}
 93 | out:
 94 | 	return 0;
 95 | }
 96 | 
 97 | int __init ecryptfs_init_kthread(void)
 98 | {
 99 | 	int rc = 0;
100 | 
101 | 	mutex_init(&ecryptfs_kthread_ctl.mux);
102 | 	init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
103 | 	INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
104 | 	ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
105 | 				       "ecryptfs-kthread");
106 | 	if (IS_ERR(ecryptfs_kthread)) {
107 | 		rc = PTR_ERR(ecryptfs_kthread);
108 | 		printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
109 | 		       "\n", __func__, rc);
110 | 	}
111 | 	return rc;
112 | }
113 | 
114 | void ecryptfs_destroy_kthread(void)
115 | {
116 | 	struct ecryptfs_open_req *req;
117 | 
118 | 	mutex_lock(&ecryptfs_kthread_ctl.mux);
119 | 	ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
120 | 	list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
121 | 			    kthread_ctl_list) {
122 | 		mutex_lock(&req->mux);
123 | 		req->flags |= ECRYPTFS_REQ_ZOMBIE;
124 | 		wake_up(&req->wait);
125 | 		mutex_unlock(&req->mux);
126 | 	}
127 | 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
128 | 	kthread_stop(ecryptfs_kthread);
129 | 	wake_up(&ecryptfs_kthread_ctl.wait);
130 | }
131 | 
132 | /**
133 |  * ecryptfs_privileged_open
134 |  * @lower_file: Result of dentry_open by root on lower dentry
135 |  * @lower_dentry: Lower dentry for file to open
136 |  * @lower_mnt: Lower vfsmount for file to open
137 |  *
138 |  * This function gets a r/w file opened againt the lower dentry.
139 |  *
140 |  * Returns zero on success; non-zero otherwise
141 |  */
142 | int ecryptfs_privileged_open(struct file **lower_file,
143 | 			     struct dentry *lower_dentry,
144 | 			     struct vfsmount *lower_mnt,
145 | 			     const struct cred *cred)
146 | {
147 | 	struct ecryptfs_open_req *req;
148 | 	int flags = O_LARGEFILE;
149 | 	int rc = 0;
150 | 
151 | 	/* Corresponding dput() and mntput() are done when the
152 | 	 * persistent file is fput() when the eCryptfs inode is
153 | 	 * destroyed. */
154 | 	dget(lower_dentry);
155 | 	mntget(lower_mnt);
156 | 	flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
157 | 	(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
158 | 	if (!IS_ERR(*lower_file))
159 | 		goto out;
160 | 	if (flags & O_RDONLY) {
161 | 		rc = PTR_ERR((*lower_file));
162 | 		goto out;
163 | 	}
164 | 	req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
165 | 	if (!req) {
166 | 		rc = -ENOMEM;
167 | 		goto out;
168 | 	}
169 | 	mutex_init(&req->mux);
170 | 	req->lower_file = lower_file;
171 | 	req->lower_dentry = lower_dentry;
172 | 	req->lower_mnt = lower_mnt;
173 | 	init_waitqueue_head(&req->wait);
174 | 	req->flags = 0;
175 | 	mutex_lock(&ecryptfs_kthread_ctl.mux);
176 | 	if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
177 | 		rc = -EIO;
178 | 		mutex_unlock(&ecryptfs_kthread_ctl.mux);
179 | 		printk(KERN_ERR "%s: We are in the middle of shutting down; "
180 | 		       "aborting privileged request to open lower file\n",
181 | 			__func__);
182 | 		goto out_free;
183 | 	}
184 | 	list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
185 | 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
186 | 	wake_up(&ecryptfs_kthread_ctl.wait);
187 | 	wait_event(req->wait, (req->flags != 0));
188 | 	mutex_lock(&req->mux);
189 | 	BUG_ON(req->flags == 0);
190 | 	if (req->flags & ECRYPTFS_REQ_DROPPED
191 | 	    || req->flags & ECRYPTFS_REQ_ZOMBIE) {
192 | 		rc = -EIO;
193 | 		printk(KERN_WARNING "%s: Privileged open request dropped\n",
194 | 		       __func__);
195 | 		goto out_unlock;
196 | 	}
197 | 	if (IS_ERR(*req->lower_file))
198 | 		rc = PTR_ERR(*req->lower_file);
199 | out_unlock:
200 | 	mutex_unlock(&req->mux);
201 | out_free:
202 | 	kmem_cache_free(ecryptfs_open_req_cache, req);
203 | out:
204 | 	return rc;
205 | }
206 | 


--------------------------------------------------------------------------------
/services/raid6/grecov/raid6test.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * asynchronous raid6 recovery self test
  3 |  * Copyright (c) 2009, Intel Corporation.
  4 |  *
  5 |  * based on drivers/md/raid6test/test.c:
  6 |  * 	Copyright 2002-2007 H. Peter Anvin
  7 |  *
  8 |  * This program is free software; you can redistribute it and/or modify it
  9 |  * under the terms and conditions of the GNU General Public License,
 10 |  * version 2, as published by the Free Software Foundation.
 11 |  *
 12 |  * This program is distributed in the hope it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 15 |  * more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License along with
 18 |  * this program; if not, write to the Free Software Foundation, Inc.,
 19 |  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 20 |  *
 21 |  */
 22 | #include <linux/async_tx.h>
 23 | #include <linux/gfp.h>
 24 | #include <linux/random.h>
 25 | 
 26 | #undef pr
 27 | #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
 28 | 
 29 | #define NDISKS 16 /* Including P and Q */
 30 | 
 31 | static struct page *dataptrs[NDISKS];
 32 | static addr_conv_t addr_conv[NDISKS];
 33 | static struct page *data[NDISKS+3];
 34 | static struct page *spare;
 35 | static struct page *recovi;
 36 | static struct page *recovj;
 37 | 
 38 | static void callback(void *param)
 39 | {
 40 | 	struct completion *cmp = param;
 41 | 
 42 | 	complete(cmp);
 43 | }
 44 | 
 45 | static void makedata(int disks)
 46 | {
 47 | 	int i, j;
 48 | 
 49 | 	for (i = 0; i < disks; i++) {
 50 | 		for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
 51 | 			u32 *p = page_address(data[i]) + j;
 52 | 
 53 | 			*p = random32();
 54 | 		}
 55 | 
 56 | 		dataptrs[i] = data[i];
 57 | 	}
 58 | }
 59 | 
 60 | static char disk_type(int d, int disks)
 61 | {
 62 | 	if (d == disks - 2)
 63 | 		return 'P';
 64 | 	else if (d == disks - 1)
 65 | 		return 'Q';
 66 | 	else
 67 | 		return 'D';
 68 | }
 69 | 
 70 | /* Recover two failed blocks. */
 71 | static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
 72 | {
 73 | 	struct async_submit_ctl submit;
 74 | 	struct completion cmp;
 75 | 	struct dma_async_tx_descriptor *tx = NULL;
 76 | 	enum sum_check_flags result = ~0;
 77 | 
 78 | 	if (faila > failb)
 79 | 		swap(faila, failb);
 80 | 
 81 | 	if (failb == disks-1) {
 82 | 		if (faila == disks-2) {
 83 | 			/* P+Q failure.  Just rebuild the syndrome. */
 84 | 			init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
 85 | 			tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
 86 | 		} else {
 87 | 			struct page *blocks[disks];
 88 | 			struct page *dest;
 89 | 			int count = 0;
 90 | 			int i;
 91 | 
 92 | 			/* data+Q failure.  Reconstruct data from P,
 93 | 			 * then rebuild syndrome
 94 | 			 */
 95 | 			for (i = disks; i-- ; ) {
 96 | 				if (i == faila || i == failb)
 97 | 					continue;
 98 | 				blocks[count++] = ptrs[i];
 99 | 			}
100 | 			dest = ptrs[faila];
101 | 			init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
102 | 					  NULL, NULL, addr_conv);
103 | 			tx = async_xor(dest, blocks, 0, count, bytes, &submit);
104 | 
105 | 			init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
106 | 			tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
107 | 		}
108 | 	} else {
109 | 		if (failb == disks-2) {
110 | 			/* data+P failure. */
111 | 			init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
112 | 			tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
113 | 		} else {
114 | 			/* data+data failure. */
115 | 			init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
116 | 			tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
117 | 		}
118 | 	}
119 | 	init_completion(&cmp);
120 | 	init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
121 | 	tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
122 | 	async_tx_issue_pending(tx);
123 | 
124 | 	if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
125 | 		pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
126 | 		   __func__, faila, failb, disks);
127 | 
128 | 	if (result != 0)
129 | 		pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
130 | 		   __func__, faila, failb, result);
131 | }
132 | 
133 | static int test_disks(int i, int j, int disks)
134 | {
135 | 	int erra, errb;
136 | 
137 | 	memset(page_address(recovi), 0xf0, PAGE_SIZE);
138 | 	memset(page_address(recovj), 0xba, PAGE_SIZE);
139 | 
140 | 	dataptrs[i] = recovi;
141 | 	dataptrs[j] = recovj;
142 | 
143 | 	raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
144 | 
145 | 	erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
146 | 	errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
147 | 
148 | 	pr("%s(%d, %d): faila=%3d(%c)  failb=%3d(%c)  %s\n",
149 | 	   __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
150 | 	   (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
151 | 
152 | 	dataptrs[i] = data[i];
153 | 	dataptrs[j] = data[j];
154 | 
155 | 	return erra || errb;
156 | }
157 | 
158 | static int test(int disks, int *tests)
159 | {
160 | 	struct dma_async_tx_descriptor *tx;
161 | 	struct async_submit_ctl submit;
162 | 	struct completion cmp;
163 | 	int err = 0;
164 | 	int i, j;
165 | 
166 | 	recovi = data[disks];
167 | 	recovj = data[disks+1];
168 | 	spare  = data[disks+2];
169 | 
170 | 	makedata(disks);
171 | 
172 | 	/* Nuke syndromes */
173 | 	memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
174 | 	memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
175 | 
176 | 	/* Generate assumed good syndrome */
177 | 	init_completion(&cmp);
178 | 	init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
179 | 	tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
180 | 	async_tx_issue_pending(tx);
181 | 
182 | 	if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
183 | 		pr("error: initial gen_syndrome(%d) timed out\n", disks);
184 | 		return 1;
185 | 	}
186 | 
187 | 	pr("testing the %d-disk case...\n", disks);
188 | 	for (i = 0; i < disks-1; i++)
189 | 		for (j = i+1; j < disks; j++) {
190 | 		    (*tests)++;
191 | 		    err += test_disks(0, 1, disks);
192 | 		}
193 | 
194 | 	return err;
195 | }
196 | 
197 | 
198 | static int raid6_test(void)
199 | {
200 | 	int err = 0;
201 | 	int tests = 0;
202 | 	int i;
203 | 
204 | 	for (i = 0; i < NDISKS+3; i++) {
205 | 		data[i] = alloc_page(GFP_KERNEL);
206 | 		if (!data[i]) {
207 | 			while (i--)
208 | 				put_page(data[i]);
209 | 			return -ENOMEM;
210 | 		}
211 | 	}
212 | 
213 | 	/* the 4-disk and 5-disk cases are special for the recovery code */
214 | 	if (NDISKS > 4)
215 | 		err += test(4, &tests);
216 | 	if (NDISKS > 5)
217 | 		err += test(5, &tests);
218 | 	/* the 11 and 12 disk cases are special for ioatdma (p-disabled
219 | 	 * q-continuation without extended descriptor)
220 | 	 */
221 | 	if (NDISKS > 12) {
222 | 		err += test(11, &tests);
223 | 		err += test(12, &tests);
224 | 	}
225 | 	err += test(NDISKS, &tests);
226 | 
227 | 	pr("\n");
228 | 	pr("complete (%d tests, %d failure%s)\n",
229 | 	   tests, err, err == 1 ? "" : "s");
230 | 
231 | 	for (i = 0; i < NDISKS+3; i++)
232 | 		put_page(data[i]);
233 | 
234 | 	return 0;
235 | }
236 | 
237 | static void raid6_test_exit(void)
238 | {
239 | }
240 | 
241 | /* when compiled-in wait for drivers to load first (assumes dma drivers
242 |  * are also compliled-in)
243 |  */
244 | late_initcall(raid6_test);
245 | module_exit(raid6_test_exit);
246 | MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
247 | MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
248 | MODULE_LICENSE("GPL");
249 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/super.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * eCryptfs: Linux filesystem encryption layer
  3 |  *
  4 |  * Copyright (C) 1997-2003 Erez Zadok
  5 |  * Copyright (C) 2001-2003 Stony Brook University
  6 |  * Copyright (C) 2004-2006 International Business Machines Corp.
  7 |  *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
  8 |  *              Michael C. Thompson <mcthomps@us.ibm.com>
  9 |  *
 10 |  * This program is free software; you can redistribute it and/or
 11 |  * modify it under the terms of the GNU General Public License as
 12 |  * published by the Free Software Foundation; either version 2 of the
 13 |  * License, or (at your option) any later version.
 14 |  *
 15 |  * This program is distributed in the hope that it will be useful, but
 16 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 18 |  * General Public License for more details.
 19 |  *
 20 |  * You should have received a copy of the GNU General Public License
 21 |  * along with this program; if not, write to the Free Software
 22 |  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 23 |  * 02111-1307, USA.
 24 |  * 
 25 |  * 
 26 |  * See the GPL-COPYING file in the top-level directory.
 27 |  *
 28 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 29 |  * All rights reserved.
 30 |  * 
 31 |  */
 32 | 
 33 | #include <linux/fs.h>
 34 | #include <linux/mount.h>
 35 | #include <linux/key.h>
 36 | #include <linux/slab.h>
 37 | #include <linux/seq_file.h>
 38 | #include <linux/file.h>
 39 | #include <linux/crypto.h>
 40 | #include "ecryptfs_kernel.h"
 41 | 
 42 | struct kmem_cache *ecryptfs_inode_info_cache;
 43 | 
 44 | /**
 45 |  * ecryptfs_alloc_inode - allocate an ecryptfs inode
 46 |  * @sb: Pointer to the ecryptfs super block
 47 |  *
 48 |  * Called to bring an inode into existence.
 49 |  *
 50 |  * Only handle allocation, setting up structures should be done in
 51 |  * ecryptfs_read_inode. This is because the kernel, between now and
 52 |  * then, will 0 out the private data pointer.
 53 |  *
 54 |  * Returns a pointer to a newly allocated inode, NULL otherwise
 55 |  */
 56 | static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
 57 | {
 58 | 	struct ecryptfs_inode_info *inode_info;
 59 | 	struct inode *inode = NULL;
 60 | 
 61 | 	inode_info = kmem_cache_alloc(ecryptfs_inode_info_cache, GFP_KERNEL);
 62 | 	if (unlikely(!inode_info))
 63 | 		goto out;
 64 | 	ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
 65 | 	mutex_init(&inode_info->lower_file_mutex);
 66 | 	inode_info->lower_file = NULL;
 67 | 	inode = &inode_info->vfs_inode;
 68 | out:
 69 | 	return inode;
 70 | }
 71 | 
 72 | static void ecryptfs_i_callback(struct rcu_head *head)
 73 | {
 74 | 	struct inode *inode = container_of(head, struct inode, i_rcu);
 75 | 	struct ecryptfs_inode_info *inode_info;
 76 | 	inode_info = ecryptfs_inode_to_private(inode);
 77 | 
 78 | 	INIT_LIST_HEAD(&inode->i_dentry);
 79 | 	kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
 80 | }
 81 | 
 82 | /**
 83 |  * ecryptfs_destroy_inode
 84 |  * @inode: The ecryptfs inode
 85 |  *
 86 |  * This is used during the final destruction of the inode.  All
 87 |  * allocation of memory related to the inode, including allocated
 88 |  * memory in the crypt_stat struct, will be released here. This
 89 |  * function also fput()'s the persistent file for the lower inode.
 90 |  * There should be no chance that this deallocation will be missed.
 91 |  */
 92 | static void ecryptfs_destroy_inode(struct inode *inode)
 93 | {
 94 | 	struct ecryptfs_inode_info *inode_info;
 95 | 
 96 | 	inode_info = ecryptfs_inode_to_private(inode);
 97 | 	if (inode_info->lower_file) {
 98 | 		struct dentry *lower_dentry =
 99 | 			inode_info->lower_file->f_dentry;
100 | 
101 | 		BUG_ON(!lower_dentry);
102 | 		if (lower_dentry->d_inode) {
103 | 			fput(inode_info->lower_file);
104 | 			inode_info->lower_file = NULL;
105 | 		}
106 | 	}
107 | 	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
108 | 	call_rcu(&inode->i_rcu, ecryptfs_i_callback);
109 | }
110 | 
111 | /**
112 |  * ecryptfs_init_inode
113 |  * @inode: The ecryptfs inode
114 |  *
115 |  * Set up the ecryptfs inode.
116 |  */
117 | void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
118 | {
119 | 	ecryptfs_set_inode_lower(inode, lower_inode);
120 | 	inode->i_ino = lower_inode->i_ino;
121 | 	inode->i_version++;
122 | 	inode->i_op = &ecryptfs_main_iops;
123 | 	inode->i_fop = &ecryptfs_main_fops;
124 | 	inode->i_mapping->a_ops = &ecryptfs_aops;
125 | }
126 | 
127 | /**
128 |  * ecryptfs_statfs
129 |  * @sb: The ecryptfs super block
130 |  * @buf: The struct kstatfs to fill in with stats
131 |  *
132 |  * Get the filesystem statistics. Currently, we let this pass right through
133 |  * to the lower filesystem and take no action ourselves.
134 |  */
135 | static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
136 | {
137 | 	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
138 | 
139 | 	if (!lower_dentry->d_sb->s_op->statfs)
140 | 		return -ENOSYS;
141 | 	return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
142 | }
143 | 
144 | /**
145 |  * ecryptfs_evict_inode
146 |  * @inode - The ecryptfs inode
147 |  *
148 |  * Called by iput() when the inode reference count reached zero
149 |  * and the inode is not hashed anywhere.  Used to clear anything
150 |  * that needs to be, before the inode is completely destroyed and put
151 |  * on the inode free list. We use this to drop out reference to the
152 |  * lower inode.
153 |  */
154 | static void ecryptfs_evict_inode(struct inode *inode)
155 | {
156 | 	truncate_inode_pages(&inode->i_data, 0);
157 | 	end_writeback(inode);
158 | 	iput(ecryptfs_inode_to_lower(inode));
159 | }
160 | 
161 | /**
162 |  * ecryptfs_show_options
163 |  *
164 |  * Prints the mount options for a given superblock.
165 |  * Returns zero; does not fail.
166 |  */
167 | static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
168 | {
169 | 	struct super_block *sb = mnt->mnt_sb;
170 | 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
171 | 		&ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
172 | 	struct ecryptfs_global_auth_tok *walker;
173 | 
174 | 	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
175 | 	list_for_each_entry(walker,
176 | 			    &mount_crypt_stat->global_auth_tok_list,
177 | 			    mount_crypt_stat_list) {
178 | 		if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK)
179 | 			seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig);
180 | 		else
181 | 			seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
182 | 	}
183 | 	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
184 | 
185 | 	seq_printf(m, ",ecryptfs_cipher=%s",
186 | 		mount_crypt_stat->global_default_cipher_name);
187 | 
188 | 	if (mount_crypt_stat->global_default_cipher_key_size)
189 | 		seq_printf(m, ",ecryptfs_key_bytes=%zd",
190 | 			   mount_crypt_stat->global_default_cipher_key_size);
191 | 	if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)
192 | 		seq_printf(m, ",ecryptfs_passthrough");
193 | 	if (mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED)
194 | 		seq_printf(m, ",ecryptfs_xattr_metadata");
195 | 	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
196 | 		seq_printf(m, ",ecryptfs_encrypted_view");
197 | 	if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
198 | 		seq_printf(m, ",ecryptfs_unlink_sigs");
199 | 	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
200 | 		seq_printf(m, ",ecryptfs_mount_auth_tok_only");
201 | 
202 | 	return 0;
203 | }
204 | 
205 | const struct super_operations ecryptfs_sops = {
206 | 	.alloc_inode = ecryptfs_alloc_inode,
207 | 	.destroy_inode = ecryptfs_destroy_inode,
208 | 	.drop_inode = generic_delete_inode,
209 | 	.statfs = ecryptfs_statfs,
210 | 	.remount_fs = NULL,
211 | 	.evict_inode = ecryptfs_evict_inode,
212 | 	.show_options = ecryptfs_show_options
213 | };
214 | 


--------------------------------------------------------------------------------
/kgpu/gpuops.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  *
  8 |  */
  9 | 
 10 | #include <cuda.h>
 11 | #include <stdlib.h>
 12 | #include <stdio.h>
 13 | #include "helper.h"
 14 | #include "gputils.h"
 15 | 
 16 | extern "C" void gpu_init();
 17 | extern "C" void gpu_finit();
 18 | 
 19 | extern "C" void *gpu_alloc_pinned_mem(unsigned long size);
 20 | extern "C" void gpu_free_pinned_mem(void *p);
 21 | 
 22 | extern "C" void gpu_pin_mem(void *p, size_t sz);
 23 | extern "C" void gpu_unpin_mem(void *p);
 24 | 
 25 | extern "C" int gpu_alloc_device_mem(struct kgpu_service_request *sreq);
 26 | extern "C" void gpu_free_device_mem(struct kgpu_service_request *sreq);
 27 | extern "C" int gpu_alloc_stream(struct kgpu_service_request *sreq);
 28 | extern "C" void gpu_free_stream(struct kgpu_service_request *sreq);
 29 | 
 30 | extern "C" int gpu_execution_finished(struct kgpu_service_request *sreq);
 31 | extern "C" int gpu_post_finished(struct kgpu_service_request *sreq);
 32 | 
 33 | extern "C" unsigned long gpu_get_stream(int sid);
 34 | 
 35 | #define MAX_STREAM_NR 8
 36 | static cudaStream_t streams[MAX_STREAM_NR];
 37 | static int streamuses[MAX_STREAM_NR];
 38 | 
 39 | static const dim3 default_block_size(32,1);
 40 | static const dim3 default_grid_size(512,1);
 41 | 
 42 | struct kgpu_gpu_mem_info devbuf;
 43 | struct kgpu_gpu_mem_info devbuf4vma;
 44 | 
 45 | void gpu_init()
 46 | {
 47 |     int i;
 48 | 
 49 |     devbuf.uva = alloc_dev_mem(KGPU_BUF_SIZE);
 50 |     devbuf4vma.uva = alloc_dev_mem(KGPU_BUF_SIZE);
 51 | 
 52 |     for (i=0; i<MAX_STREAM_NR; i++) {
 53 |         csc( cudaStreamCreate(&streams[i]) );
 54 | 	streamuses[i] = 0;
 55 |     }
 56 | }
 57 | 
 58 | void gpu_finit()
 59 | {
 60 |     int i;
 61 | 
 62 |     free_dev_mem(devbuf.uva);
 63 |     free_dev_mem(devbuf4vma.uva);
 64 | 
 65 |     for (i=0; i<MAX_STREAM_NR; i++) {
 66 | 	csc( cudaStreamDestroy(streams[i]));
 67 |     }
 68 | }
 69 | 
 70 | unsigned long gpu_get_stream(int stid)
 71 | {
 72 |     if (stid < 0 || stid >= MAX_STREAM_NR)
 73 | 	return 0;
 74 |     else
 75 | 	return (unsigned long)streams[stid];
 76 | }
 77 | 
 78 | void *gpu_alloc_pinned_mem(unsigned long size) {
 79 |     void *h;
 80 |     csc( cudaHostAlloc(&h, size, 0));//cudaHostAllocWriteCombined) );
 81 |     return h;
 82 | }
 83 | 
 84 | void gpu_free_pinned_mem(void* p) {
 85 |     csc( cudaFreeHost(p) );
 86 | }
 87 | 
 88 | void gpu_pin_mem(void *p, size_t sz)
 89 | {
 90 |     size_t rsz = round_up(sz, PAGE_SIZE);
 91 |     csc( cudaHostRegister(p, rsz, cudaHostRegisterPortable) );
 92 | }
 93 | 
 94 | void gpu_unpin_mem(void *p)
 95 | {
 96 |     csc( cudaHostUnregister(p) );
 97 | }
 98 | 
 99 | static int __check_stream_done(cudaStream_t s)
100 | {
101 |     cudaError_t e = cudaStreamQuery(s);
102 |     if (e == cudaSuccess) {
103 | 	return 1;
104 |     } else if (e != cudaErrorNotReady)
105 | 	csc(e);
106 | 
107 |     return 0;
108 | }
109 | 
110 | int gpu_execution_finished(struct kgpu_service_request *sreq)
111 | {
112 |     cudaStream_t s = (cudaStream_t)gpu_get_stream(sreq->stream_id);
113 |     return __check_stream_done(s);
114 | }
115 | 
116 | int gpu_post_finished(struct kgpu_service_request *sreq)
117 | {
118 |     cudaStream_t s = (cudaStream_t)gpu_get_stream(sreq->stream_id);
119 |     return __check_stream_done(s);
120 | }
121 | 
122 | #define min(a,b) (((a)<(b))?(a):(b))
123 | #define max(a,b) (((a)>(b))?(a):(b))
124 | 
125 | static int __merge_2ranges(
126 | 	unsigned long r1, unsigned long s1, unsigned long r2, unsigned long s2,
127 | 	unsigned long *e, unsigned long *s) 
128 | {
129 |     // r1   r2
130 |     if (r1 < r2) {
131 |         if (r1+s1 >= r2) {
132 |             *e = r1;
133 |             *s = max(r1+s1, r2+s2) - r1;
134 |             return 1;
135 |         }
136 |         
137 |         return 0;
138 |     } else if (r1 == r2) {
139 |         *e = r1;
140 |         *s = max(s1, s2);
141 |         return 1;
142 |     } else {
143 |         // r2  r1
144 |         if (r2+s2 >= r1) {
145 |             *e = r2;
146 |             *s = max(r1+s1, r2+s2) - r2;
147 |             return 1;
148 |         }
149 |         
150 |         return 0;
151 |     }
152 | }
153 | 
154 | 
155 | static int __merge_ranges(unsigned long ad[], unsigned long sz[], int n)
156 | {
157 |     int i;
158 |     
159 |     for (i=0; i<n; i++) {
160 |     	ad[i] = round_down(ad[i], PAGE_SIZE);
161 |     	sz[i] = round_up(sz[i], PAGE_SIZE);
162 |     }
163 |     
164 |     switch(n) {
165 |     	case 0:
166 |     	    return 0;
167 |     	case 1:
168 |     	    return 1;
169 |     	case 2:
170 |     	    if (__merge_2ranges(ad[0], sz[0], ad[1], sz[1], &ad[0], &sz[0]))
171 |     	        return 1;
172 |     	    else
173 |     	        return 2;
174 |     	case 3:
175 |     	    if (__merge_2ranges(ad[0], sz[0], ad[1], sz[1], &ad[0], &sz[0])) {
176 |     	        if (__merge_2ranges(ad[0], sz[0], ad[2], sz[2], &ad[0], &sz[0])) {
177 |     	            return 1;
178 |     	        } else {
179 |     	            ad[1] = ad[2];
180 |     	            sz[1] = sz[2];
181 |     	            return 2;
182 |     	        }
183 |     	    } else if (__merge_2ranges(ad[0], sz[0], ad[2], sz[2], &ad[0], &sz[0])) {
184 |     	        if (__merge_2ranges(ad[0], sz[0], ad[1], sz[1], &ad[0], &sz[0])) {
185 |     	            return 1;
186 |     	        } else
187 |     	            return 2;
188 |     	    } else if (__merge_2ranges(ad[2], sz[2], ad[1], sz[1], &ad[1], &sz[1])) {
189 |     	        if (__merge_2ranges(ad[0], sz[0], ad[1], sz[1], &ad[0], &sz[0]))
190 |     	            return 1;
191 |     	        else
192 |     	            return 2;
193 |     	    } else
194 |     	        return 3;
195 |     	   
196 |     	default:
197 |     	    return 0;
198 |     }
199 |     
200 |     // should never reach here
201 |     //return 0;
202 | }
203 | 
204 | /*
205 |  * A little bit old policy, but may give you a brief pic of what
206 |  * K-U mm does.
207 |  *
208 |  * Allocation policy is simple here: copy what the kernel part does
209 |  * for the GPU memory. This works because:
210 |  *   - GPU memory and host memory are identical in size
211 |  *   - Whenever a host memory region is allocated, the same-sized
212 |  *     GPU memory must be used for its GPU computation.
213 |  *   - The data field in ku_request also uses pinned memory but we
214 |  *     won't allocate GPU memory for it cause it is just for
215 |  *     service provider. This is fine since the data tend to be
216 |  *     very tiny.
217 |  */
218 | int gpu_alloc_device_mem(struct kgpu_service_request *sreq)
219 | {
220 |     unsigned long pin_addr[3] = {0,0,0}, pin_sz[3] = {0,0,0};
221 |     int npins = 0, i;
222 |     
223 |     if (ADDR_WITHIN(sreq->hin, hostbuf.uva, hostbuf.size))
224 | 	sreq->din =
225 | 	    (void*)ADDR_REBASE(devbuf.uva, hostbuf.uva, sreq->hin);
226 |     else {
227 | 	sreq->din =
228 | 	    (void*)ADDR_REBASE(devbuf4vma.uva, hostvma.uva, sreq->hin);
229 | 
230 | 	pin_addr[npins] = TO_UL(sreq->hin);
231 | 	pin_sz[npins] = sreq->insize;
232 | 	npins++;
233 |     }
234 | 
235 |     if (ADDR_WITHIN(sreq->hout, hostbuf.uva, hostbuf.size))
236 | 	sreq->dout =
237 | 	    (void*)ADDR_REBASE(devbuf.uva, hostbuf.uva, sreq->hout);
238 |     else {
239 | 	sreq->dout =
240 | 	    (void*)ADDR_REBASE(devbuf4vma.uva, hostvma.uva, sreq->hout);
241 | 
242 | 	pin_addr[npins] = TO_UL(sreq->hout);
243 | 	pin_sz[npins] = sreq->outsize;
244 | 	npins++;
245 |     }
246 | 
247 |     if (ADDR_WITHIN(sreq->hdata, hostbuf.uva, hostbuf.size))
248 | 	sreq->ddata =
249 | 	    (void*)ADDR_REBASE(devbuf.uva, hostbuf.uva, sreq->hdata);
250 |     else if (ADDR_WITHIN(sreq->hdata, hostvma.uva, hostvma.size)){
251 | 	sreq->ddata =
252 | 	    (void*)ADDR_REBASE(devbuf4vma.uva, hostvma.uva, sreq->hdata);
253 | 
254 | 	pin_addr[npins] = TO_UL(sreq->hdata);
255 | 	pin_sz[npins] = sreq->datasize;
256 | 	npins++;
257 |     }
258 |     
259 |     npins = __merge_ranges(pin_addr, pin_sz, npins);
260 |     for (i=0; i<npins; i++) {
261 |     	gpu_pin_mem((void*)pin_addr[i], pin_sz[i]);
262 |     }
263 | 
264 |     return 0;
265 | }
266 | 
267 | void gpu_free_device_mem(struct kgpu_service_request *sreq)
268 | {
269 |     unsigned long pin_addr[3] = {0,0,0}, pin_sz[3] = {0,0,0};
270 |     int npins = 0, i;
271 |     
272 |     sreq->din = NULL;
273 |     sreq->dout = NULL;
274 |     sreq->ddata = NULL;   
275 |     
276 |     if (ADDR_WITHIN(sreq->hin, hostvma.uva, hostvma.size)) {
277 |     	pin_addr[npins] = TO_UL(sreq->hin);
278 | 	pin_sz[npins] = sreq->insize;
279 | 	npins++;
280 |     }
281 |     if (ADDR_WITHIN(sreq->hout, hostvma.uva, hostvma.size)) {
282 |     	pin_addr[npins] = TO_UL(sreq->hout);
283 | 	pin_sz[npins] = sreq->outsize;
284 | 	npins++;
285 |     }
286 |     if (ADDR_WITHIN(sreq->hdata, hostvma.uva, hostvma.size)) {
287 |     	pin_addr[npins] = TO_UL(sreq->hdata);
288 | 	pin_sz[npins] = sreq->datasize;
289 | 	npins++;
290 |     }
291 |     
292 |     npins = __merge_ranges(pin_addr, pin_sz, npins);
293 |     for (i=0; i<npins; i++) {
294 |     	gpu_unpin_mem((void*)pin_addr[i]);
295 |     }
296 | }
297 | 
298 | int gpu_alloc_stream(struct kgpu_service_request *sreq)
299 | {
300 |     int i;
301 | 
302 |     for (i=0; i<MAX_STREAM_NR; i++) {
303 | 	if (!streamuses[i]) {
304 | 	    streamuses[i] = 1;
305 | 	    sreq->stream_id = i;
306 | 	    sreq->stream = (unsigned long)(streams[i]);
307 | 	    return 0;
308 | 	}
309 |     }
310 |     return 1;
311 | }
312 | 
313 | void gpu_free_stream(struct kgpu_service_request *sreq)
314 | {
315 |     if (sreq->stream_id >= 0 && sreq->stream_id < MAX_STREAM_NR) {
316 | 	streamuses[sreq->stream_id] = 0;
317 |     }
318 | }
319 | 
320 | 
321 | int default_compute_size(struct kgpu_service_request *sreq)
322 | {
323 |     sreq->block_x = default_block_size.x;
324 |     sreq->block_y = default_block_size.y;
325 |     sreq->grid_x = default_grid_size.x;
326 |     sreq->grid_y = default_grid_size.y;
327 |     return 0;
328 | }
329 | 
330 | int default_prepare(struct kgpu_service_request *sreq)
331 | {
332 |     cudaStream_t s = (cudaStream_t)gpu_get_stream(sreq->stream_id);
333 |     csc( ah2dcpy( sreq->din, sreq->hin, sreq->insize, s) );
334 |     return 0;
335 | }
336 | 
337 | int default_post(struct kgpu_service_request *sreq)
338 | {
339 |     cudaStream_t s = (cudaStream_t)gpu_get_stream(sreq->stream_id);
340 |     csc( ad2hcpy( sreq->hout, sreq->dout, sreq->outsize, s) );
341 |     return 0;
342 | }
343 | 


--------------------------------------------------------------------------------
/kgpu/helper.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  *
  8 |  * Userspace helper program.
  9 |  *
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <unistd.h>
 15 | #include <sys/mman.h>
 16 | #include <sys/types.h>
 17 | #include <sys/stat.h>
 18 | #include <sys/ioctl.h>
 19 | #include <fcntl.h>
 20 | #include <errno.h>
 21 | #include <string.h>
 22 | #include <poll.h>
 23 | #include "list.h"
 24 | #include "helper.h"
 25 | 
 26 | struct _kgpu_sritem {
 27 |     struct kgpu_service_request sr;
 28 |     struct list_head glist;
 29 |     struct list_head list;
 30 | };
 31 | 
 32 | static int devfd;
 33 | 
 34 | struct kgpu_gpu_mem_info hostbuf;
 35 | struct kgpu_gpu_mem_info hostvma;
 36 | 
 37 | volatile int kh_loop_continue = 1;
 38 | 
 39 | static char *service_lib_dir;
 40 | static char *kgpudev;
 41 | 
 42 | /* lists of requests of different states */
 43 | LIST_HEAD(all_reqs);
 44 | LIST_HEAD(init_reqs);
 45 | LIST_HEAD(memdone_reqs);
 46 | LIST_HEAD(prepared_reqs);
 47 | LIST_HEAD(running_reqs);
 48 | LIST_HEAD(post_exec_reqs);
 49 | LIST_HEAD(done_reqs);
 50 | 
 51 | #define ssc(...) _safe_syscall(__VA_ARGS__, __FILE__, __LINE__)
 52 | 
 53 | int _safe_syscall(int r, const char *file, int line)
 54 | {
 55 |     if (r<0) {
 56 | 	fprintf(stderr, "Error in %s:%d, ", file, line);
 57 | 	perror("");
 58 | 	abort();
 59 |     }
 60 |     return r;
 61 | }
 62 | 
 63 | typedef unsigned char u8;
 64 | 
 65 | static void dump_hex(u8* p, int rs, int cs)
 66 | {
 67 |     int r,c;
 68 |     printf("\n");
 69 |     for (r=0; r<rs; r++) {
 70 | 	for (c=0; c<cs; c++) {
 71 | 	    printf("%02x ", p[r*cs+c]);
 72 | 	}
 73 | 	printf("\n");
 74 |     }
 75 | }
 76 | 
 77 | static int kh_init(void)
 78 | {
 79 |     int  i, len, r;
 80 |     void *p;
 81 |     
 82 |     devfd = ssc(open(kgpudev, O_RDWR));
 83 | 
 84 |     /* alloc GPU Pinned memory buffers */
 85 |     p = (void*)gpu_alloc_pinned_mem(KGPU_BUF_SIZE+PAGE_SIZE);
 86 |     hostbuf.uva = p;
 87 |     hostbuf.size = KGPU_BUF_SIZE;
 88 |     dbg("%p \n", hostbuf.uva);
 89 |     memset(hostbuf.uva, 0, KGPU_BUF_SIZE);
 90 |     ssc( mlock(hostbuf.uva, KGPU_BUF_SIZE));
 91 | 
 92 |     gpu_init();
 93 | 
 94 |     hostvma.uva = (void*)mmap(
 95 | 	NULL, KGPU_BUF_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, devfd, 0);
 96 |     hostvma.size = KGPU_BUF_SIZE;
 97 |     if (hostvma.uva == MAP_FAILED) {
 98 | 	kh_log(KGPU_LOG_ERROR,
 99 | 		 "set up mmap area failed\n");
100 | 	perror("mmap for GPU");
101 | 	abort();
102 |     }
103 |     kh_log(KGPU_LOG_PRINT,
104 | 	   "mmap start 0x%lX\n", hostvma.uva);
105 |     
106 |     len = sizeof(struct kgpu_gpu_mem_info);
107 | 
108 |     /* tell kernel the buffers */
109 |     r = ioctl(devfd, KGPU_IOC_SET_GPU_BUFS, (unsigned long)&hostbuf);
110 |     if (r < 0) {
111 | 	perror("Write req file for buffers.");
112 | 	abort();
113 |     }
114 | 
115 |     return 0;
116 | }
117 | 
118 | 
119 | static int kh_finit(void)
120 | {
121 |     int i;
122 | 
123 |     ioctl(devfd, KGPU_IOC_SET_STOP);
124 |     close(devfd);
125 |     gpu_finit();
126 | 
127 |     gpu_free_pinned_mem(hostbuf.uva);
128 | 
129 |     return 0;
130 | }
131 | 
132 | static int kh_send_response(struct kgpu_ku_response *resp)
133 | {
134 |     ssc(write(devfd, resp, sizeof(struct kgpu_ku_response)));
135 |     return 0;
136 | }
137 | 
138 | static void kh_fail_request(struct _kgpu_sritem *sreq, int serr)
139 | {
140 |     sreq->sr.state = KGPU_REQ_DONE;
141 |     sreq->sr.errcode = serr;
142 |     list_del(&sreq->list);
143 |     list_add_tail(&sreq->list, &done_reqs);
144 | }
145 | 
146 | static struct _kgpu_sritem *kh_alloc_service_request()
147 | {
148 |     struct _kgpu_sritem *s = (struct _kgpu_sritem *)
149 | 	malloc(sizeof(struct _kgpu_sritem));
150 |     if (s) {
151 |     	memset(s, 0, sizeof(struct _kgpu_sritem));
152 | 	INIT_LIST_HEAD(&s->list);
153 | 	INIT_LIST_HEAD(&s->glist);
154 |     }
155 |     return s;
156 | }
157 | 
158 | static void kh_free_service_request(struct _kgpu_sritem *s)
159 | {
160 |     free(s);
161 | }
162 | 
163 | static void kh_init_service_request(struct _kgpu_sritem *item,
164 | 			       struct kgpu_ku_request *kureq)
165 | {
166 |     list_add_tail(&item->glist, &all_reqs);
167 | 
168 |     memset(&item->sr, 0, sizeof(struct kgpu_service_request));
169 |     item->sr.id = kureq->id;
170 |     item->sr.hin = kureq->in;
171 |     item->sr.hout = kureq->out;
172 |     item->sr.hdata = kureq->data;
173 |     item->sr.insize = kureq->insize;
174 |     item->sr.outsize = kureq->outsize;
175 |     item->sr.datasize = kureq->datasize;
176 |     item->sr.stream_id = -1;
177 |     item->sr.s = kh_lookup_service(kureq->service_name);
178 |     if (!item->sr.s) {
179 | 	dbg("can't find service\n");
180 | 	kh_fail_request(item, KGPU_NO_SERVICE);
181 |     } else {
182 | 	item->sr.s->compute_size(&item->sr);
183 | 	item->sr.state = KGPU_REQ_INIT;
184 | 	item->sr.errcode = 0;
185 | 	list_add_tail(&item->list, &init_reqs);
186 |     }
187 | }
188 | 
189 | static int kh_get_next_service_request(void)
190 | {
191 |     int err;
192 |     struct pollfd pfd;
193 | 
194 |     struct _kgpu_sritem *sreq;
195 |     struct kgpu_ku_request kureq;
196 | 
197 |     pfd.fd = devfd;
198 |     pfd.events = POLLIN;
199 |     pfd.revents = 0;
200 | 
201 |     err = poll(&pfd, 1, list_empty(&all_reqs)? -1:0);
202 |     if (err == 0 || (err && !(pfd.revents & POLLIN)) ) {
203 | 	return -1;
204 |     } else if (err == 1 && pfd.revents & POLLIN)
205 |     {
206 | 	sreq = kh_alloc_service_request();
207 |     
208 | 	if (!sreq)
209 | 	    return -1;
210 | 
211 | 	err = read(devfd, (char*)(&kureq), sizeof(struct kgpu_ku_request));
212 | 	if (err <= 0) {
213 | 	    if (errno == EAGAIN || err == 0) {
214 | 		kh_free_service_request(sreq);
215 | 		return -1;
216 | 	    } else {
217 | 		perror("Read request.");
218 | 		abort();
219 | 	    }
220 | 	} else {
221 | 	    kh_init_service_request(sreq, &kureq);	
222 | 	    return 0;
223 | 	}
224 |     } else {
225 | 	if (err < 0) {
226 | 	    perror("Poll request");
227 | 	    abort();
228 | 	} else {
229 | 	    fprintf(stderr, "Poll returns multiple fd's results\n");
230 | 	    abort();
231 | 	}
232 |     }    
233 | }
234 | 
235 | static int kh_request_alloc_mem(struct _kgpu_sritem *sreq)
236 | {
237 |     int r = gpu_alloc_device_mem(&sreq->sr);
238 |     if (r) {
239 | 	return -1;
240 |     } else {
241 | 	sreq->sr.state = KGPU_REQ_MEM_DONE;
242 | 	list_del(&sreq->list);
243 | 	list_add_tail(&sreq->list, &memdone_reqs);
244 | 	return 0;
245 |     }
246 | }
247 | 
248 | static int kh_prepare_exec(struct _kgpu_sritem *sreq)
249 | {
250 |     int r;
251 |     if (gpu_alloc_stream(&sreq->sr)) {
252 | 	r = -1;
253 |     } else {
254 | 	r = sreq->sr.s->prepare(&sreq->sr);
255 | 	
256 | 	if (r) {
257 | 	    dbg("%d fails prepare\n", sreq->sr.id);
258 | 	    kh_fail_request(sreq, r);
259 | 	} else {
260 | 	    sreq->sr.state = KGPU_REQ_PREPARED;
261 | 	    list_del(&sreq->list);
262 | 	    list_add_tail(&sreq->list, &prepared_reqs);
263 | 	}
264 |     }
265 | 
266 |     return r;
267 | }
268 | 	
269 | static int kh_launch_exec(struct _kgpu_sritem *sreq)
270 | {
271 |     int r = sreq->sr.s->launch(&sreq->sr);
272 |     if (r) {
273 | 	dbg("%d fails launch\n", sreq->sr.id);
274 | 	kh_fail_request(sreq, r);	
275 |     } else {
276 | 	sreq->sr.state = KGPU_REQ_RUNNING;
277 | 	list_del(&sreq->list);
278 | 	list_add_tail(&sreq->list, &running_reqs);
279 |     }
280 |     return 0;
281 | }
282 | 
283 | static int kh_post_exec(struct _kgpu_sritem *sreq)
284 | {
285 |     int r = 1;
286 |     if (gpu_execution_finished(&sreq->sr)) {
287 | 	if (!(r = sreq->sr.s->post(&sreq->sr))) {
288 | 	    sreq->sr.state = KGPU_REQ_POST_EXEC;
289 | 	    list_del(&sreq->list);
290 | 	    list_add_tail(&sreq->list, &post_exec_reqs);
291 | 	}
292 | 	else {
293 | 	    dbg("%d fails post\n", sreq->sr.id);
294 | 	    kh_fail_request(sreq, r);
295 | 	}
296 |     }
297 | 
298 |     return r;
299 | }
300 | 
301 | static int kh_finish_post(struct _kgpu_sritem *sreq)
302 | {
303 |     if (gpu_post_finished(&sreq->sr)) {
304 | 	sreq->sr.state = KGPU_REQ_DONE;
305 | 	list_del(&sreq->list);
306 | 	list_add_tail(&sreq->list, &done_reqs);
307 | 	
308 | 	return 0;
309 |     }
310 | 
311 |     return 1;
312 | }
313 | 
314 | static int kh_service_done(struct _kgpu_sritem *sreq)
315 | {
316 |     struct kgpu_ku_response resp;
317 | 
318 |     resp.id = sreq->sr.id;
319 |     resp.errcode = sreq->sr.errcode;
320 |     
321 |     kh_send_response(&resp);
322 |     
323 |     list_del(&sreq->list);
324 |     list_del(&sreq->glist);
325 |     gpu_free_device_mem(&sreq->sr);
326 |     gpu_free_stream(&sreq->sr);   
327 |     kh_free_service_request(sreq);
328 |     return 0;
329 | }
330 | 
331 | static int __kh_process_request(int (*op)(struct _kgpu_sritem *),
332 | 			      struct list_head *lst, int once)
333 | {
334 |     struct list_head *pos, *n;
335 |     int r = 0;
336 |     
337 |     list_for_each_safe(pos, n, lst) {
338 | 	r = op(list_entry(pos, struct _kgpu_sritem, list));
339 | 	if (!r && once)
340 | 	    break;
341 |     }
342 | 
343 |     return r;	
344 | }
345 | 
346 | static int kh_main_loop()
347 | {    
348 |     while (kh_loop_continue)
349 |     {
350 | 	__kh_process_request(kh_service_done, &done_reqs, 0);
351 | 	__kh_process_request(kh_finish_post, &post_exec_reqs, 0);
352 | 	__kh_process_request(kh_post_exec, &running_reqs, 1);
353 | 	__kh_process_request(kh_launch_exec, &prepared_reqs, 1);
354 | 	__kh_process_request(kh_prepare_exec, &memdone_reqs, 1);
355 | 	__kh_process_request(kh_request_alloc_mem, &init_reqs, 0);
356 | 	kh_get_next_service_request();	
357 |     }
358 | 
359 |     return 0;
360 | }
361 | 
362 | int main(int argc, char *argv[])
363 | {
364 |     int c;
365 |     kgpudev = "/dev/kgpu";
366 |     service_lib_dir = "./";
367 | 
368 |     while ((c = getopt(argc, argv, "d:l:v:")) != -1)
369 |     {
370 | 	switch (c)
371 | 	{
372 | 	case 'd':
373 | 	    kgpudev = optarg;
374 | 	    break;
375 | 	case 'l':
376 | 	    service_lib_dir = optarg;
377 | 	    break;
378 | 	case 'v':
379 | 	    kgpu_log_level = atoi(optarg);
380 | 	    break;
381 | 	default:
382 | 	    fprintf(stderr,
383 | 		    "Usage %s"
384 | 		    " [-d device]"
385 | 		    " [-l service_lib_dir]"
386 | 		    " [-v log_level"
387 | 		    "\n",
388 | 		    argv[0]);
389 | 	    return 0;
390 | 	}
391 |     }
392 |     
393 |     kh_init();
394 |     kh_load_all_services(service_lib_dir);
395 |     kh_main_loop();
396 |     kh_finit();
397 |     return 0;
398 | }
399 | 


--------------------------------------------------------------------------------
/services/gaes/libsrv_gaes/srv_gaes.cu:
--------------------------------------------------------------------------------
  1 | /* This work is licensed under the terms of the GNU GPL, version 2.  See
  2 |  * the GPL-COPYING file in the top-level directory.
  3 |  *
  4 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  5 |  * All rights reserved.
  6 |  */
  7 |  
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <cuda.h>
 11 | #include "../../../kgpu/kgpu.h"
 12 | #include "../../../kgpu/gputils.h"
 13 | #include "../gaesu.h"
 14 | 
 15 | #define BYTES_PER_BLOCK  1024
 16 | #define BYTES_PER_THREAD 4
 17 | #define BYTES_PER_GROUP  16
 18 | #define THREAD_PER_BLOCK (BYTES_PER_BLOCK/BYTES_PER_THREAD)
 19 | #define WORDS_PER_BLOCK (BYTES_PER_BLOCK/4)
 20 | 
 21 | #define BPT_BYTES_PER_BLOCK 4096
 22 | 
 23 | struct kgpu_service gaes_ecb_enc_srv;
 24 | struct kgpu_service gaes_ecb_dec_srv;
 25 | 
 26 | struct kgpu_service gaes_ctr_srv;
 27 | struct kgpu_service gaes_lctr_srv;
 28 | 
 29 | struct kgpu_service bp4t_gaes_ecb_enc_srv;
 30 | struct kgpu_service bp4t_gaes_ecb_dec_srv;
 31 | 
 32 | struct kgpu_service gaes_xts_enc_srv;
 33 | struct kgpu_service gaes_xts_dec_srv;
 34 | 
 35 | struct gaes_ecb_data {
 36 |     u32 *d_key;
 37 |     u32 *h_key;
 38 |     int nrounds;
 39 |     int nr_dblks_per_tblk;
 40 | };
 41 | 
 42 | struct gaes_ctr_data {
 43 |     u32 *d_key;
 44 |     u32 *h_key;
 45 |     u8 *d_ctr;
 46 |     u8 *h_ctr;
 47 |     int nrounds;
 48 |     int nr_dblks_per_tblk;
 49 | };
 50 | 
 51 | #if 0
 52 | static void dump_hex(u8* p, int rs, int cs)
 53 | {
 54 |     int r,c;
 55 |     printf("\n");
 56 |     for (r=0; r<rs; r++) {
 57 | 	for (c=0; c<cs; c++) {
 58 | 	    printf("%02x ", p[r*cs+c]);
 59 | 	}
 60 | 	printf("\n");
 61 |     }
 62 | }
 63 | #endif /* test only */
 64 | 
 65 | /*
 66 |  * Include device code
 67 |  */
 68 | #include "dev.cu"
 69 | 
 70 | int gaes_ecb_compute_size_bpt(struct kgpu_service_request *sr)
 71 | {
 72 |     sr->block_x =
 73 | 	sr->outsize>=BPT_BYTES_PER_BLOCK?
 74 | 	BPT_BYTES_PER_BLOCK/16: sr->outsize/16;
 75 |     sr->grid_x =
 76 | 	sr->outsize/BPT_BYTES_PER_BLOCK?
 77 | 	sr->outsize/BPT_BYTES_PER_BLOCK:1;
 78 |     sr->block_y = 1;
 79 |     sr->grid_y = 1;
 80 | 
 81 |     return 0;
 82 | }
 83 | 
 84 | int gaes_ecb_compute_size_bp4t(struct kgpu_service_request *sr)
 85 | {
 86 |     sr->block_y =
 87 | 	sr->outsize>=BYTES_PER_BLOCK?
 88 | 	BYTES_PER_BLOCK/BYTES_PER_GROUP: (sr->outsize/BYTES_PER_GROUP);
 89 |     sr->grid_x =
 90 | 	sr->outsize/BYTES_PER_BLOCK?
 91 | 	sr->outsize/BYTES_PER_BLOCK:1;
 92 |     sr->block_x = BYTES_PER_GROUP/BYTES_PER_THREAD;
 93 |     sr->grid_y = 1;
 94 | 
 95 |     return 0;
 96 | }
 97 | 
 98 | int gaes_ecb_launch_bpt(struct kgpu_service_request *sr)
 99 | {
100 |     struct crypto_aes_ctx *hctx = (struct crypto_aes_ctx*)sr->hdata;
101 |     struct crypto_aes_ctx *dctx = (struct crypto_aes_ctx*)sr->ddata;
102 |     
103 |     if (sr->s == &gaes_ecb_dec_srv)
104 | 	aes_decrypt_bpt
105 | 	    <<<dim3(sr->grid_x, sr->grid_y),
106 | 	    dim3(sr->block_x, sr->block_y),
107 | 	    0, (cudaStream_t)(sr->stream)>>>
108 | 	    (
109 | 		(u32*)dctx->key_dec,
110 | 		hctx->key_length/4+6,
111 | 		(u8*)sr->dout
112 | 		);
113 |     else
114 | 	aes_encrypt_bpt
115 | 	    <<<dim3(sr->grid_x, sr->grid_y),
116 | 	    dim3(sr->block_x, sr->block_y),
117 | 	    0, (cudaStream_t)(sr->stream)>>>
118 | 	    (
119 | 		(u32*)dctx->key_enc,
120 | 		hctx->key_length/4+6,
121 | 		(u8*)sr->dout
122 | 		);
123 |     return 0;
124 | }
125 | 
126 | int gaes_ecb_launch_bp4t(struct kgpu_service_request *sr)
127 | {
128 |     struct crypto_aes_ctx *hctx = (struct crypto_aes_ctx*)sr->hdata;
129 |     struct crypto_aes_ctx *dctx = (struct crypto_aes_ctx*)sr->ddata;
130 |     
131 |     if (sr->s == &gaes_ecb_dec_srv)        
132 | 	aes_decrypt_bp4t<<<
133 | 	    dim3(sr->grid_x, sr->grid_y),
134 | 	    dim3(sr->block_x, sr->block_y),
135 | 	    0, (cudaStream_t)(sr->stream)>>>
136 | 	    ((u32*)dctx->key_dec,
137 | 	     hctx->key_length/4+6,
138 | 	     (u8*)sr->dout);
139 |     else
140 | 	aes_encrypt_bp4t<<<
141 | 	    dim3(sr->grid_x, sr->grid_y),
142 | 	    dim3(sr->block_x, sr->block_y),
143 | 	    0, (cudaStream_t)(sr->stream)>>>
144 | 	    ((u32*)dctx->key_enc,
145 | 	     hctx->key_length/4+6,
146 | 	     (u8*)sr->dout);
147 |    
148 |     return 0;
149 | }
150 | 
151 | int gaes_ecb_prepare(struct kgpu_service_request *sr)
152 | {
153 |     cudaStream_t s = (cudaStream_t)(sr->stream);//gpu_get_stream(sr->stream_id);
154 |     
155 |     csc( ah2dcpy( sr->din, sr->hin, sr->insize, s) );
156 |     
157 |     return 0;
158 | }
159 | 
160 | int gaes_ecb_post(struct kgpu_service_request *sr)
161 | {
162 |     cudaStream_t s = (cudaStream_t)(sr->stream);//gpu_get_stream(sr->stream_id);
163 | 
164 |     csc( ad2hcpy( sr->hout, sr->dout, sr->outsize, s) );
165 |     
166 |     return 0;
167 | }
168 | 
169 | #define gaes_xts_post gaes_ecb_post
170 | #define gaes_xts_prepare gaes_ecb_prepare
171 | 
172 | int gaes_xts_compute_size(struct kgpu_service_request *sr)
173 | {
174 |     sr->block_x = XTS_SECTOR_SIZE/AES_BLOCK_SIZE;
175 |     sr->grid_x = sr->outsize/XTS_SECTOR_SIZE;
176 |     sr->block_y = 1;
177 |     sr->grid_y = 1;
178 | 
179 |     return 0;
180 | }
181 | 
182 | int gaes_xts_launch(struct kgpu_service_request *sr)
183 | {
184 |     struct crypto_xts_info *hinfo =
185 | 	(struct crypto_xts_info*)(sr->hdata);
186 |     struct crypto_xts_info *dinfo =
187 | 	(struct crypto_xts_info*)(sr->ddata);
188 | 
189 |     if (sr->s == &gaes_xts_dec_srv) 
190 |         xts_decrypt<<<
191 | 	dim3(sr->grid_x, sr->grid_y),
192 | 	dim3(sr->block_x, sr->block_y),
193 | 	0, (cudaStream_t)(sr->stream)>>>
194 | 	    ((u32*)dinfo->key_dec,
195 | 	     hinfo->key_length/4+6,
196 | 	     (u8*)sr->dout,
197 | 	     dinfo->ivs);
198 |     else
199 |         xts_encrypt<<<
200 | 	dim3(sr->grid_x, sr->grid_y),
201 | 	dim3(sr->block_x, sr->block_y),
202 | 	0, (cudaStream_t)(sr->stream)>>>
203 | 	    ((u32*)dinfo->key_enc,
204 | 	     hinfo->key_length/4+6,
205 | 	     (u8*)sr->dout,
206 | 	     dinfo->ivs);
207 |     return 0;
208 | }
209 | 
210 | #define gaes_ctr_compute_size gaes_ecb_compute_size_bpt
211 | #define gaes_ctr_post gaes_ecb_post
212 | #define gaes_ctr_prepare gaes_ecb_prepare
213 | 
214 | int gaes_lctr_compute_size(struct kgpu_service_request *sr)
215 | {
216 |     struct crypto_gaes_ctr_info *info
217 | 	= (struct crypto_gaes_ctr_info*)(sr->hdata);
218 |     sr->block_x = info->ctr_range/16;
219 |     sr->grid_x = sr->outsize/sr->block_x;
220 |     sr->block_y = 1;
221 |     sr->grid_y = 1;
222 | 
223 |     return 0;
224 | }
225 | 
226 | int gaes_ctr_launch(struct kgpu_service_request *sr)
227 | {
228 |     struct crypto_gaes_ctr_info *hinfo =
229 | 	(struct crypto_gaes_ctr_info*)(sr->hdata);
230 |     struct crypto_gaes_ctr_info *dinfo =
231 | 	(struct crypto_gaes_ctr_info*)(sr->ddata);
232 | 
233 |     aes_ctr_crypt<<<
234 | 	dim3(sr->grid_x, sr->grid_y),
235 | 	dim3(sr->block_x, sr->block_y),
236 | 	0, (cudaStream_t)(sr->stream)>>>
237 | 	((u32*)dinfo->key_enc,
238 | 	 hinfo->key_length/4+6,
239 | 	 (u8*)sr->dout,
240 | 	 dinfo->ctrblk);
241 |     return 0;
242 | }
243 | 
244 | int gaes_lctr_launch(struct kgpu_service_request *sr)
245 | {
246 |     struct crypto_gaes_ctr_info *hinfo =
247 | 	(struct crypto_gaes_ctr_info*)(sr->hdata);
248 |     struct crypto_gaes_ctr_info *dinfo =
249 | 	(struct crypto_gaes_ctr_info*)(sr->ddata);
250 |     
251 |     aes_lctr_crypt<<<
252 | 	dim3(sr->grid_x, sr->grid_y),
253 | 	dim3(sr->block_x, sr->block_y),
254 | 	0, (cudaStream_t)(sr->stream)>>>
255 | 	((u32*)dinfo->key_enc,
256 | 	 hinfo->key_length/4+6,
257 | 	 (u8*)sr->dout,
258 | 	 dinfo->ctrblk);
259 |     return 0;
260 | }
261 | 
262 | /*
263 |  * Naming convention of ciphers:
264 |  * g{algorithm}_{mode}[-({enc}|{dev})]
265 |  *
266 |  * {}  : var value
267 |  * []  : optional
268 |  * (|) : or
269 |  */
270 | extern "C" int init_service(void *lh, int (*reg_srv)(struct kgpu_service*, void*))
271 | {
272 |     int err;
273 |     printf("[libsrv_gaes] Info: init gaes services\n");
274 | 
275 |     cudaFuncSetCacheConfig(aes_decrypt_bpt, cudaFuncCachePreferL1);
276 |     cudaFuncSetCacheConfig(aes_encrypt_bpt, cudaFuncCachePreferL1);
277 |     cudaFuncSetCacheConfig(aes_decrypt_bp4t, cudaFuncCachePreferL1);
278 |     cudaFuncSetCacheConfig(aes_encrypt_bp4t, cudaFuncCachePreferL1);
279 |     cudaFuncSetCacheConfig(aes_ctr_crypt, cudaFuncCachePreferL1);
280 |     cudaFuncSetCacheConfig(aes_lctr_crypt, cudaFuncCachePreferL1);
281 |     cudaFuncSetCacheConfig(xts_decrypt, cudaFuncCachePreferL1);
282 |     cudaFuncSetCacheConfig(xts_encrypt, cudaFuncCachePreferL1);
283 |     
284 |     sprintf(gaes_ecb_enc_srv.name, "gaes_ecb-enc");
285 |     gaes_ecb_enc_srv.sid = 0;
286 |     gaes_ecb_enc_srv.compute_size = gaes_ecb_compute_size_bpt;
287 |     gaes_ecb_enc_srv.launch = gaes_ecb_launch_bpt;
288 |     gaes_ecb_enc_srv.prepare = gaes_ecb_prepare;
289 |     gaes_ecb_enc_srv.post = gaes_ecb_post;
290 |     
291 |     sprintf(gaes_ecb_dec_srv.name, "gaes_ecb-dec");
292 |     gaes_ecb_dec_srv.sid = 0;
293 |     gaes_ecb_dec_srv.compute_size = gaes_ecb_compute_size_bpt;
294 |     gaes_ecb_dec_srv.launch = gaes_ecb_launch_bpt;
295 |     gaes_ecb_dec_srv.prepare = gaes_ecb_prepare;
296 |     gaes_ecb_dec_srv.post = gaes_ecb_post;
297 | 
298 |     sprintf(gaes_ctr_srv.name, "gaes_ctr");
299 |     gaes_ctr_srv.sid = 0;
300 |     gaes_ctr_srv.compute_size = gaes_ctr_compute_size;
301 |     gaes_ctr_srv.launch = gaes_ctr_launch;
302 |     gaes_ctr_srv.prepare = gaes_ctr_prepare;
303 |     gaes_ctr_srv.post = gaes_ctr_post;
304 | 
305 |     sprintf(gaes_lctr_srv.name, "gaes_lctr");
306 |     gaes_lctr_srv.sid = 0;
307 |     gaes_lctr_srv.compute_size = gaes_lctr_compute_size;
308 |     gaes_lctr_srv.launch = gaes_lctr_launch;
309 |     gaes_lctr_srv.prepare = gaes_ctr_prepare;
310 |     gaes_lctr_srv.post = gaes_ctr_post;
311 |     
312 |     sprintf(gaes_xts_enc_srv.name, "gaes_xts-enc");
313 |     gaes_xts_enc_srv.sid = 0;
314 |     gaes_xts_enc_srv.compute_size = gaes_xts_compute_size;
315 |     gaes_xts_enc_srv.launch = gaes_xts_launch;
316 |     gaes_xts_enc_srv.prepare = gaes_xts_prepare;
317 |     gaes_xts_enc_srv.post = gaes_xts_post;
318 |     
319 |     sprintf(gaes_xts_dec_srv.name, "gaes_xts-dec");
320 |     gaes_xts_dec_srv.sid = 0;
321 |     gaes_xts_dec_srv.compute_size = gaes_xts_compute_size;
322 |     gaes_xts_dec_srv.launch = gaes_xts_launch;
323 |     gaes_xts_dec_srv.prepare = gaes_xts_prepare;
324 |     gaes_xts_dec_srv.post = gaes_xts_post;
325 | 
326 |     err = reg_srv(&gaes_ecb_enc_srv, lh);
327 |     err |= reg_srv(&gaes_ecb_dec_srv, lh);
328 |     err |= reg_srv(&gaes_ctr_srv, lh);
329 |     err |= reg_srv(&gaes_lctr_srv, lh);
330 |     err |= reg_srv(&gaes_xts_enc_srv, lh);
331 |     err |= reg_srv(&gaes_xts_dec_srv, lh);
332 |     if (err) {
333 |     	fprintf(stderr,
334 | 		"[libsrv_gaes] Error: failed to register gaes services\n");
335 |     } 
336 |     
337 |     return err;
338 | }
339 | 
340 | extern "C" int finit_service(void *lh, int (*unreg_srv)(const char*))
341 | {
342 |     int err;
343 |     printf("[libsrv_gaes] Info: finit gaes services\n");
344 |     
345 |     err = unreg_srv(gaes_ecb_enc_srv.name);
346 |     err |= unreg_srv(gaes_ecb_dec_srv.name);
347 |     err |= unreg_srv(gaes_ctr_srv.name);
348 |     err |= unreg_srv(gaes_lctr_srv.name);
349 |     err |= unreg_srv(gaes_xts_enc_srv.name);
350 |     err |= unreg_srv(gaes_xts_dec_srv.name);
351 |     if (err) {
352 |     	fprintf(stderr,
353 | 		"[libsrv_gaes] Error: failed to unregister gaes services\n");
354 |     }
355 |     
356 |     return err;
357 | }
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/38/bitmap.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
  3 |  *
  4 |  * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
  5 |  */
  6 | #ifndef BITMAP_H
  7 | #define BITMAP_H 1
  8 | 
  9 | #define BITMAP_MAJOR_LO 3
 10 | /* version 4 insists the bitmap is in little-endian order
 11 |  * with version 3, it is host-endian which is non-portable
 12 |  */
 13 | #define BITMAP_MAJOR_HI 4
 14 | #define	BITMAP_MAJOR_HOSTENDIAN 3
 15 | 
 16 | #define BITMAP_MINOR 39
 17 | 
 18 | /*
 19 |  * in-memory bitmap:
 20 |  *
 21 |  * Use 16 bit block counters to track pending writes to each "chunk".
 22 |  * The 2 high order bits are special-purpose, the first is a flag indicating
 23 |  * whether a resync is needed.  The second is a flag indicating whether a
 24 |  * resync is active.
 25 |  * This means that the counter is actually 14 bits:
 26 |  *
 27 |  * +--------+--------+------------------------------------------------+
 28 |  * | resync | resync |               counter                          |
 29 |  * | needed | active |                                                |
 30 |  * |  (0-1) |  (0-1) |              (0-16383)                         |
 31 |  * +--------+--------+------------------------------------------------+
 32 |  *
 33 |  * The "resync needed" bit is set when:
 34 |  *    a '1' bit is read from storage at startup.
 35 |  *    a write request fails on some drives
 36 |  *    a resync is aborted on a chunk with 'resync active' set
 37 |  * It is cleared (and resync-active set) when a resync starts across all drives
 38 |  * of the chunk.
 39 |  *
 40 |  *
 41 |  * The "resync active" bit is set when:
 42 |  *    a resync is started on all drives, and resync_needed is set.
 43 |  *       resync_needed will be cleared (as long as resync_active wasn't already set).
 44 |  * It is cleared when a resync completes.
 45 |  *
 46 |  * The counter counts pending write requests, plus the on-disk bit.
 47 |  * When the counter is '1' and the resync bits are clear, the on-disk
 48 |  * bit can be cleared aswell, thus setting the counter to 0.
 49 |  * When we set a bit, or in the counter (to start a write), if the fields is
 50 |  * 0, we first set the disk bit and set the counter to 1.
 51 |  *
 52 |  * If the counter is 0, the on-disk bit is clear and the stipe is clean
 53 |  * Anything that dirties the stipe pushes the counter to 2 (at least)
 54 |  * and sets the on-disk bit (lazily).
 55 |  * If a periodic sweep find the counter at 2, it is decremented to 1.
 56 |  * If the sweep find the counter at 1, the on-disk bit is cleared and the
 57 |  * counter goes to zero.
 58 |  *
 59 |  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
 60 |  * counters as a fallback when "page" memory cannot be allocated:
 61 |  *
 62 |  * Normal case (page memory allocated):
 63 |  *
 64 |  *     page pointer (32-bit)
 65 |  *
 66 |  *     [ ] ------+
 67 |  *               |
 68 |  *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
 69 |  *                          c1   c2    c2048
 70 |  *
 71 |  * Hijacked case (page memory allocation failed):
 72 |  *
 73 |  *     hijacked page pointer (32-bit)
 74 |  *
 75 |  *     [		  ][		  ] (no page memory allocated)
 76 |  *      counter #1 (16-bit) counter #2 (16-bit)
 77 |  *
 78 |  */
 79 | 
 80 | #ifdef __KERNEL__
 81 | 
 82 | #define PAGE_BITS (PAGE_SIZE << 3)
 83 | #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
 84 | 
 85 | typedef __u16 bitmap_counter_t;
 86 | #define COUNTER_BITS 16
 87 | #define COUNTER_BIT_SHIFT 4
 88 | #define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
 89 | #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
 90 | 
 91 | #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
 92 | #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
 93 | #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
 94 | #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
 95 | #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
 96 | #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
 97 | 
 98 | /* how many counters per page? */
 99 | #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
100 | /* same, except a shift value for more efficient bitops */
101 | #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
102 | /* same, except a mask value for more efficient bitops */
103 | #define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
104 | 
105 | #define BITMAP_BLOCK_SIZE 512
106 | #define BITMAP_BLOCK_SHIFT 9
107 | 
108 | /* how many blocks per chunk? (this is variable) */
109 | #define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT)
110 | #define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
111 | #define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
112 | 
113 | /* when hijacked, the counters and bits represent even larger "chunks" */
114 | /* there will be 1024 chunks represented by each counter in the page pointers */
115 | #define PAGEPTR_BLOCK_RATIO(bitmap) \
116 | 			(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
117 | #define PAGEPTR_BLOCK_SHIFT(bitmap) \
118 | 			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
119 | #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
120 | 
121 | #endif
122 | 
123 | /*
124 |  * bitmap structures:
125 |  */
126 | 
127 | #define BITMAP_MAGIC 0x6d746962
128 | 
129 | /* use these for bitmap->flags and bitmap->sb->state bit-fields */
130 | enum bitmap_state {
131 | 	BITMAP_STALE  = 0x002,  /* the bitmap file is out of date or had -EIO */
132 | 	BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
133 | 	BITMAP_HOSTENDIAN = 0x8000,
134 | };
135 | 
136 | /* the superblock at the front of the bitmap file -- little endian */
137 | typedef struct bitmap_super_s {
138 | 	__le32 magic;        /*  0  BITMAP_MAGIC */
139 | 	__le32 version;      /*  4  the bitmap major for now, could change... */
140 | 	__u8  uuid[16];      /*  8  128 bit uuid - must match md device uuid */
141 | 	__le64 events;       /* 24  event counter for the bitmap (1)*/
142 | 	__le64 events_cleared;/*32  event counter when last bit cleared (2) */
143 | 	__le64 sync_size;    /* 40  the size of the md device's sync range(3) */
144 | 	__le32 state;        /* 48  bitmap state information */
145 | 	__le32 chunksize;    /* 52  the bitmap chunk size in bytes */
146 | 	__le32 daemon_sleep; /* 56  seconds between disk flushes */
147 | 	__le32 write_behind; /* 60  number of outstanding write-behind writes */
148 | 
149 | 	__u8  pad[256 - 64]; /* set to zero */
150 | } bitmap_super_t;
151 | 
152 | /* notes:
153 |  * (1) This event counter is updated before the eventcounter in the md superblock
154 |  *    When a bitmap is loaded, it is only accepted if this event counter is equal
155 |  *    to, or one greater than, the event counter in the superblock.
156 |  * (2) This event counter is updated when the other one is *if*and*only*if* the
157 |  *    array is not degraded.  As bits are not cleared when the array is degraded,
158 |  *    this represents the last time that any bits were cleared.
159 |  *    If a device is being added that has an event count with this value or
160 |  *    higher, it is accepted as conforming to the bitmap.
161 |  * (3)This is the number of sectors represented by the bitmap, and is the range that
162 |  *    resync happens across.  For raid1 and raid5/6 it is the size of individual
163 |  *    devices.  For raid10 it is the size of the array.
164 |  */
165 | 
166 | #ifdef __KERNEL__
167 | 
168 | /* the in-memory bitmap is represented by bitmap_pages */
169 | struct bitmap_page {
170 | 	/*
171 | 	 * map points to the actual memory page
172 | 	 */
173 | 	char *map;
174 | 	/*
175 | 	 * in emergencies (when map cannot be alloced), hijack the map
176 | 	 * pointer and use it as two counters itself
177 | 	 */
178 | 	unsigned int hijacked:1;
179 | 	/*
180 | 	 * count of dirty bits on the page
181 | 	 */
182 | 	unsigned int  count:31;
183 | };
184 | 
185 | /* keep track of bitmap file pages that have pending writes on them */
186 | struct page_list {
187 | 	struct list_head list;
188 | 	struct page *page;
189 | };
190 | 
191 | /* the main bitmap structure - one per mddev */
192 | struct bitmap {
193 | 	struct bitmap_page *bp;
194 | 	unsigned long pages; /* total number of pages in the bitmap */
195 | 	unsigned long missing_pages; /* number of pages not yet allocated */
196 | 
197 | 	mddev_t *mddev; /* the md device that the bitmap is for */
198 | 
199 | 	int counter_bits; /* how many bits per block counter */
200 | 
201 | 	/* bitmap chunksize -- how much data does each bit represent? */
202 | 	unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
203 | 	unsigned long chunks; /* total number of data chunks for the array */
204 | 
205 | 	/* We hold a count on the chunk currently being synced, and drop
206 | 	 * it when the last block is started.  If the resync is aborted
207 | 	 * midway, we need to be able to drop that count, so we remember
208 | 	 * the counted chunk..
209 | 	 */
210 | 	unsigned long syncchunk;
211 | 
212 | 	__u64	events_cleared;
213 | 	int need_sync;
214 | 
215 | 	/* bitmap spinlock */
216 | 	spinlock_t lock;
217 | 
218 | 	struct file *file; /* backing disk file */
219 | 	struct page *sb_page; /* cached copy of the bitmap file superblock */
220 | 	struct page **filemap; /* list of cache pages for the file */
221 | 	unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
222 | 	unsigned long file_pages; /* number of pages in the file */
223 | 	int last_page_size; /* bytes in the last page */
224 | 
225 | 	unsigned long logattrs; /* used when filemap_attr doesn't exist
226 | 				 * because we are working with a dirty_log
227 | 				 */
228 | 
229 | 	unsigned long flags;
230 | 
231 | 	int allclean;
232 | 
233 | 	atomic_t behind_writes;
234 | 	unsigned long behind_writes_used; /* highest actual value at runtime */
235 | 
236 | 	/*
237 | 	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
238 | 	 * file, cleaning up bits and flushing out pages to disk as necessary
239 | 	 */
240 | 	unsigned long daemon_lastrun; /* jiffies of last run */
241 | 	unsigned long last_end_sync; /* when we lasted called end_sync to
242 | 				      * update bitmap with resync progress */
243 | 
244 | 	atomic_t pending_writes; /* pending writes to the bitmap file */
245 | 	wait_queue_head_t write_wait;
246 | 	wait_queue_head_t overflow_wait;
247 | 	wait_queue_head_t behind_wait;
248 | 
249 | 	struct sysfs_dirent *sysfs_can_clear;
250 | 
251 | };
252 | 
253 | /* the bitmap API */
254 | 
255 | /* these are used only by md/bitmap */
256 | int  bitmap_create(mddev_t *mddev);
257 | int bitmap_load(mddev_t *mddev);
258 | void bitmap_flush(mddev_t *mddev);
259 | void bitmap_destroy(mddev_t *mddev);
260 | 
261 | void bitmap_print_sb(struct bitmap *bitmap);
262 | void bitmap_update_sb(struct bitmap *bitmap);
263 | 
264 | int  bitmap_setallbits(struct bitmap *bitmap);
265 | void bitmap_write_all(struct bitmap *bitmap);
266 | 
267 | void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);
268 | 
269 | /* these are exported */
270 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
271 | 			unsigned long sectors, int behind);
272 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
273 | 			unsigned long sectors, int success, int behind);
274 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
275 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
276 | void bitmap_close_sync(struct bitmap *bitmap);
277 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
278 | 
279 | void bitmap_unplug(struct bitmap *bitmap);
280 | void bitmap_daemon_work(mddev_t *mddev);
281 | #endif
282 | 
283 | #endif
284 | 


--------------------------------------------------------------------------------
/services/raid6/raid456/39/bitmap.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
  3 |  *
  4 |  * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
  5 |  */
  6 | #ifndef BITMAP_H
  7 | #define BITMAP_H 1
  8 | 
  9 | #define BITMAP_MAJOR_LO 3
 10 | /* version 4 insists the bitmap is in little-endian order
 11 |  * with version 3, it is host-endian which is non-portable
 12 |  */
 13 | #define BITMAP_MAJOR_HI 4
 14 | #define	BITMAP_MAJOR_HOSTENDIAN 3
 15 | 
 16 | #define BITMAP_MINOR 39
 17 | 
 18 | /*
 19 |  * in-memory bitmap:
 20 |  *
 21 |  * Use 16 bit block counters to track pending writes to each "chunk".
 22 |  * The 2 high order bits are special-purpose, the first is a flag indicating
 23 |  * whether a resync is needed.  The second is a flag indicating whether a
 24 |  * resync is active.
 25 |  * This means that the counter is actually 14 bits:
 26 |  *
 27 |  * +--------+--------+------------------------------------------------+
 28 |  * | resync | resync |               counter                          |
 29 |  * | needed | active |                                                |
 30 |  * |  (0-1) |  (0-1) |              (0-16383)                         |
 31 |  * +--------+--------+------------------------------------------------+
 32 |  *
 33 |  * The "resync needed" bit is set when:
 34 |  *    a '1' bit is read from storage at startup.
 35 |  *    a write request fails on some drives
 36 |  *    a resync is aborted on a chunk with 'resync active' set
 37 |  * It is cleared (and resync-active set) when a resync starts across all drives
 38 |  * of the chunk.
 39 |  *
 40 |  *
 41 |  * The "resync active" bit is set when:
 42 |  *    a resync is started on all drives, and resync_needed is set.
 43 |  *       resync_needed will be cleared (as long as resync_active wasn't already set).
 44 |  * It is cleared when a resync completes.
 45 |  *
 46 |  * The counter counts pending write requests, plus the on-disk bit.
 47 |  * When the counter is '1' and the resync bits are clear, the on-disk
 48 |  * bit can be cleared as well, thus setting the counter to 0.
 49 |  * When we set a bit, or in the counter (to start a write), if the fields is
 50 |  * 0, we first set the disk bit and set the counter to 1.
 51 |  *
 52 |  * If the counter is 0, the on-disk bit is clear and the stipe is clean
 53 |  * Anything that dirties the stipe pushes the counter to 2 (at least)
 54 |  * and sets the on-disk bit (lazily).
 55 |  * If a periodic sweep find the counter at 2, it is decremented to 1.
 56 |  * If the sweep find the counter at 1, the on-disk bit is cleared and the
 57 |  * counter goes to zero.
 58 |  *
 59 |  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
 60 |  * counters as a fallback when "page" memory cannot be allocated:
 61 |  *
 62 |  * Normal case (page memory allocated):
 63 |  *
 64 |  *     page pointer (32-bit)
 65 |  *
 66 |  *     [ ] ------+
 67 |  *               |
 68 |  *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
 69 |  *                          c1   c2    c2048
 70 |  *
 71 |  * Hijacked case (page memory allocation failed):
 72 |  *
 73 |  *     hijacked page pointer (32-bit)
 74 |  *
 75 |  *     [		  ][		  ] (no page memory allocated)
 76 |  *      counter #1 (16-bit) counter #2 (16-bit)
 77 |  *
 78 |  */
 79 | 
 80 | #ifdef __KERNEL__
 81 | 
 82 | #define PAGE_BITS (PAGE_SIZE << 3)
 83 | #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
 84 | 
 85 | typedef __u16 bitmap_counter_t;
 86 | #define COUNTER_BITS 16
 87 | #define COUNTER_BIT_SHIFT 4
 88 | #define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
 89 | #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
 90 | 
 91 | #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
 92 | #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
 93 | #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
 94 | #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
 95 | #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
 96 | #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
 97 | 
 98 | /* how many counters per page? */
 99 | #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
100 | /* same, except a shift value for more efficient bitops */
101 | #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
102 | /* same, except a mask value for more efficient bitops */
103 | #define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
104 | 
105 | #define BITMAP_BLOCK_SIZE 512
106 | #define BITMAP_BLOCK_SHIFT 9
107 | 
108 | /* how many blocks per chunk? (this is variable) */
109 | #define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT)
110 | #define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
111 | #define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
112 | 
113 | /* when hijacked, the counters and bits represent even larger "chunks" */
114 | /* there will be 1024 chunks represented by each counter in the page pointers */
115 | #define PAGEPTR_BLOCK_RATIO(bitmap) \
116 | 			(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
117 | #define PAGEPTR_BLOCK_SHIFT(bitmap) \
118 | 			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
119 | #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
120 | 
121 | #endif
122 | 
123 | /*
124 |  * bitmap structures:
125 |  */
126 | 
127 | #define BITMAP_MAGIC 0x6d746962
128 | 
129 | /* use these for bitmap->flags and bitmap->sb->state bit-fields */
130 | enum bitmap_state {
131 | 	BITMAP_STALE  = 0x002,  /* the bitmap file is out of date or had -EIO */
132 | 	BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
133 | 	BITMAP_HOSTENDIAN = 0x8000,
134 | };
135 | 
136 | /* the superblock at the front of the bitmap file -- little endian */
137 | typedef struct bitmap_super_s {
138 | 	__le32 magic;        /*  0  BITMAP_MAGIC */
139 | 	__le32 version;      /*  4  the bitmap major for now, could change... */
140 | 	__u8  uuid[16];      /*  8  128 bit uuid - must match md device uuid */
141 | 	__le64 events;       /* 24  event counter for the bitmap (1)*/
142 | 	__le64 events_cleared;/*32  event counter when last bit cleared (2) */
143 | 	__le64 sync_size;    /* 40  the size of the md device's sync range(3) */
144 | 	__le32 state;        /* 48  bitmap state information */
145 | 	__le32 chunksize;    /* 52  the bitmap chunk size in bytes */
146 | 	__le32 daemon_sleep; /* 56  seconds between disk flushes */
147 | 	__le32 write_behind; /* 60  number of outstanding write-behind writes */
148 | 
149 | 	__u8  pad[256 - 64]; /* set to zero */
150 | } bitmap_super_t;
151 | 
152 | /* notes:
153 |  * (1) This event counter is updated before the eventcounter in the md superblock
154 |  *    When a bitmap is loaded, it is only accepted if this event counter is equal
155 |  *    to, or one greater than, the event counter in the superblock.
156 |  * (2) This event counter is updated when the other one is *if*and*only*if* the
157 |  *    array is not degraded.  As bits are not cleared when the array is degraded,
158 |  *    this represents the last time that any bits were cleared.
159 |  *    If a device is being added that has an event count with this value or
160 |  *    higher, it is accepted as conforming to the bitmap.
161 |  * (3)This is the number of sectors represented by the bitmap, and is the range that
162 |  *    resync happens across.  For raid1 and raid5/6 it is the size of individual
163 |  *    devices.  For raid10 it is the size of the array.
164 |  */
165 | 
166 | #ifdef __KERNEL__
167 | 
168 | /* the in-memory bitmap is represented by bitmap_pages */
169 | struct bitmap_page {
170 | 	/*
171 | 	 * map points to the actual memory page
172 | 	 */
173 | 	char *map;
174 | 	/*
175 | 	 * in emergencies (when map cannot be alloced), hijack the map
176 | 	 * pointer and use it as two counters itself
177 | 	 */
178 | 	unsigned int hijacked:1;
179 | 	/*
180 | 	 * count of dirty bits on the page
181 | 	 */
182 | 	unsigned int  count:31;
183 | };
184 | 
185 | /* keep track of bitmap file pages that have pending writes on them */
186 | struct page_list {
187 | 	struct list_head list;
188 | 	struct page *page;
189 | };
190 | 
191 | /* the main bitmap structure - one per mddev */
192 | struct bitmap {
193 | 	struct bitmap_page *bp;
194 | 	unsigned long pages; /* total number of pages in the bitmap */
195 | 	unsigned long missing_pages; /* number of pages not yet allocated */
196 | 
197 | 	mddev_t *mddev; /* the md device that the bitmap is for */
198 | 
199 | 	int counter_bits; /* how many bits per block counter */
200 | 
201 | 	/* bitmap chunksize -- how much data does each bit represent? */
202 | 	unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
203 | 	unsigned long chunks; /* total number of data chunks for the array */
204 | 
205 | 	/* We hold a count on the chunk currently being synced, and drop
206 | 	 * it when the last block is started.  If the resync is aborted
207 | 	 * midway, we need to be able to drop that count, so we remember
208 | 	 * the counted chunk..
209 | 	 */
210 | 	unsigned long syncchunk;
211 | 
212 | 	__u64	events_cleared;
213 | 	int need_sync;
214 | 
215 | 	/* bitmap spinlock */
216 | 	spinlock_t lock;
217 | 
218 | 	struct file *file; /* backing disk file */
219 | 	struct page *sb_page; /* cached copy of the bitmap file superblock */
220 | 	struct page **filemap; /* list of cache pages for the file */
221 | 	unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
222 | 	unsigned long file_pages; /* number of pages in the file */
223 | 	int last_page_size; /* bytes in the last page */
224 | 
225 | 	unsigned long logattrs; /* used when filemap_attr doesn't exist
226 | 				 * because we are working with a dirty_log
227 | 				 */
228 | 
229 | 	unsigned long flags;
230 | 
231 | 	int allclean;
232 | 
233 | 	atomic_t behind_writes;
234 | 	unsigned long behind_writes_used; /* highest actual value at runtime */
235 | 
236 | 	/*
237 | 	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
238 | 	 * file, cleaning up bits and flushing out pages to disk as necessary
239 | 	 */
240 | 	unsigned long daemon_lastrun; /* jiffies of last run */
241 | 	unsigned long last_end_sync; /* when we lasted called end_sync to
242 | 				      * update bitmap with resync progress */
243 | 
244 | 	atomic_t pending_writes; /* pending writes to the bitmap file */
245 | 	wait_queue_head_t write_wait;
246 | 	wait_queue_head_t overflow_wait;
247 | 	wait_queue_head_t behind_wait;
248 | 
249 | 	struct sysfs_dirent *sysfs_can_clear;
250 | 
251 | };
252 | 
253 | /* the bitmap API */
254 | 
255 | /* these are used only by md/bitmap */
256 | int  bitmap_create(mddev_t *mddev);
257 | int bitmap_load(mddev_t *mddev);
258 | void bitmap_flush(mddev_t *mddev);
259 | void bitmap_destroy(mddev_t *mddev);
260 | 
261 | void bitmap_print_sb(struct bitmap *bitmap);
262 | void bitmap_update_sb(struct bitmap *bitmap);
263 | 
264 | int  bitmap_setallbits(struct bitmap *bitmap);
265 | void bitmap_write_all(struct bitmap *bitmap);
266 | 
267 | void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);
268 | 
269 | /* these are exported */
270 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
271 | 			unsigned long sectors, int behind);
272 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
273 | 			unsigned long sectors, int success, int behind);
274 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
275 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
276 | void bitmap_close_sync(struct bitmap *bitmap);
277 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
278 | 
279 | void bitmap_unplug(struct bitmap *bitmap);
280 | void bitmap_daemon_work(mddev_t *mddev);
281 | #endif
282 | 
283 | #endif
284 | 


--------------------------------------------------------------------------------
/services/raid6/gpq/gpq.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  */
  8 | #include <linux/raid/pq.h>
  9 | #include <linux/kernel.h>
 10 | #include <linux/module.h>
 11 | #include <linux/moduleparam.h>
 12 | #include <linux/slab.h>
 13 | #include <linux/init.h>
 14 | #include <linux/completion.h>
 15 | #include "../../../kgpu/kgpu.h"
 16 | #include "../gpq.h"
 17 | 
 18 | 
 19 | struct gpq_async_data {
 20 |     struct completion *c;
 21 |     int disks;
 22 |     size_t dsize;
 23 |     void **dps;
 24 | };
 25 | 
 26 | /* customized log function */
 27 | #define gpq_log(level, ...) kgpu_do_log(level, "gpq", ##__VA_ARGS__)
 28 | #define dbg(...) gpq_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
 29 | 
 30 | static int replace_global = 0;
 31 | static int use_hybrid = 0;
 32 | 
 33 | module_param(replace_global, int, 0444);
 34 | MODULE_PARM_DESC(replace_global, "replace global pq algorithm with gpq");
 35 | 
 36 | module_param(use_hybrid, int, 0444);
 37 | MODULE_PARM_DESC(use_hybrid,
 38 | 		 "use hybrid pq computing, which uses both CPU and GPU");
 39 | 
 40 | static struct raid6_calls oldcall;
 41 | 
 42 | static void make_load_policy(void)
 43 | {
 44 |     ;
 45 | }
 46 | 
 47 | static size_t decide_gpu_load(int disks, size_t dsize)
 48 | {
 49 |     if (dsize > (64*1024)) {
 50 | 	return roundup(dsize/16, PAGE_SIZE);
 51 |     }
 52 |     return 0;
 53 | }
 54 | 
 55 | static void cpu_gen_syndrome(int disks, size_t dsize, void **dps)
 56 | {
 57 |     if (replace_global) 
 58 | 	oldcall.gen_syndrome(disks, dsize, dps);
 59 |     else
 60 | 	raid6_call.gen_syndrome(disks, dsize, dps);
 61 | }
 62 | 
 63 | static void end_syndrome_gen(
 64 |     int disks, size_t dsize, void **dps, struct kgpu_request *req)
 65 | {
 66 |     int b;
 67 |     size_t rdsize = roundup(dsize, PAGE_SIZE);
 68 |     
 69 |     for (b=disks-2; b<disks; b++) {
 70 | 	memcpy(dps[b], ((char*)(req->out))+(b-disks+2)*rdsize, dsize);
 71 |     }
 72 | }
 73 | 
 74 | static int async_gpu_callback(
 75 |     struct kgpu_request *req)
 76 | {
 77 |     struct gpq_async_data *adata = (struct gpq_async_data*)req->kdata;
 78 | 
 79 |     end_syndrome_gen(
 80 | 	adata->disks, adata->dsize, adata->dps, req);
 81 | 
 82 |     complete(adata->c);
 83 | 
 84 |     kgpu_vfree(req->in);
 85 |     kgpu_free_request(req);
 86 |     kfree(adata);
 87 | 
 88 |     return 0;
 89 | }
 90 | 
 91 | /*
 92 |  * A NULL completion c means synchronized call
 93 |  */
 94 | static void gpu_gen_syndrome(
 95 |     int disks, size_t dsize, void **dps, struct completion *c)
 96 | {
 97 |     size_t rsz, rdsize;
 98 | 
 99 |     struct raid6_pq_data *data;
100 |     struct kgpu_request *req;
101 |     char *buf;
102 | 
103 |     int b;
104 |  
105 |     rdsize = roundup(dsize, PAGE_SIZE);
106 |     rsz = roundup(rdsize*disks, PAGE_SIZE) + sizeof(struct raid6_pq_data);
107 | 
108 |     buf = (char*)kgpu_vmalloc(rsz);
109 |     if (unlikely(!buf)) {
110 | 	gpq_log(KGPU_LOG_ERROR, "GPU buffer allocation failed\n");
111 | 	return;
112 |     }
113 | 
114 |     req = kgpu_alloc_request();
115 |     if (unlikely(!req)) {
116 | 	gpq_log(KGPU_LOG_ERROR,
117 | 		"GPU request allocation failed\n");
118 | 	return;
119 |     }
120 | 
121 |     strcpy(req->service_name, "raid6_pq");
122 |     req->in = buf;
123 |     req->out = buf+rdsize*(disks-2);
124 |     req->insize = rsz;
125 |     req->outsize = rdsize*2;
126 |     req->udata = buf+roundup(rdsize*disks, PAGE_SIZE);
127 |     req->udatasize  = sizeof(struct raid6_pq_data);
128 | 
129 |     data = (struct raid6_pq_data*)req->udata;
130 |     data->dsize = (unsigned long)dsize;
131 |     data->nr_d = (unsigned int)disks;
132 |     
133 |     for (b=0; b<disks-2; b++) {
134 | 	memcpy(buf, dps[b], dsize);
135 | 	buf += rdsize;
136 |     }
137 |     
138 |     if (c) {
139 | 	struct gpq_async_data *adata =
140 | 	    kmalloc(sizeof(struct gpq_async_data), GFP_KERNEL);
141 | 	if (!adata) {
142 | 	    gpq_log(KGPU_LOG_ERROR,
143 | 		    "out of memory for gpq async data\n");
144 | 	    // TODO: do something here
145 | 	} else {	    
146 | 	    req->callback = async_gpu_callback;
147 | 	    req->kdata = adata;
148 | 
149 | 	    adata->c = c;
150 | 	    adata->disks = disks;
151 | 	    adata->dsize = dsize;
152 | 	    adata->dps = dps;
153 | 	    
154 | 	    kgpu_call_async(req);
155 | 	}
156 |     } else {
157 | 	if (kgpu_call_sync(req)) {
158 | 	    gpq_log(KGPU_LOG_ERROR, "callgpu failed\n");
159 | 	} else {
160 | 	    end_syndrome_gen(disks, dsize, dps, req);
161 | 	}
162 | 
163 | 	kgpu_vfree(buf);
164 | 	kgpu_free_request(req);
165 |     }    
166 | }
167 | 
168 | #define SPLIT_NR 4
169 | 
170 | static void* __multi_gpu_gen_syndrome(
171 |     int disks, size_t dsize, void **dps, struct completion cs[], int async)
172 | {
173 |     void *ret = NULL;
174 |     
175 |     if ((dsize%(SPLIT_NR*PAGE_SIZE)) != 0) {
176 | 	if (async) {
177 | 	single_thread_async:
178 | 	    init_completion(&cs[0]);
179 | 	    gpu_gen_syndrome(disks, dsize, dps, &cs[0]);
180 | 	} else {
181 | 	    gpu_gen_syndrome(disks, dsize, dps, NULL);
182 | 	}
183 |     } else {
184 | 	int i, j;
185 | 	void **ps;
186 | 	size_t tsksz = dsize/SPLIT_NR;
187 | 
188 | 	ps = kmalloc(sizeof(void*)*SPLIT_NR*disks, GFP_KERNEL);
189 | 	if (!ps) {
190 | 	    gpq_log(KGPU_LOG_ERROR, "out of memory for dps\n");
191 | 	    if (async) {
192 | 		goto single_thread_async;
193 | 	    } else {
194 | 		gpu_gen_syndrome(disks, dsize, dps, NULL);
195 | 	    }
196 | 	} else {
197 | 	    for (i=0; i<SPLIT_NR; i++) {
198 | 		for (j=0; j<disks; j++) {
199 | 		    ps[i*SPLIT_NR+j] = ((char*)(dps[j]))+tsksz*i;
200 | 		}
201 | 		init_completion(cs+i);
202 | 		gpu_gen_syndrome(disks, tsksz, ps+i*SPLIT_NR, cs+i);
203 | 	    }
204 | 
205 | 	    ret = (void*)ps;
206 | 	}
207 |     }
208 | 
209 |     return ret;
210 | }
211 | 
212 | static void multi_gpu_gen_syndrome(int disks, size_t dsize, void **dps)
213 | {
214 |     struct completion cs[SPLIT_NR];
215 |     int i;
216 |     void *p =
217 | 	__multi_gpu_gen_syndrome(disks, dsize, dps, cs, 0);
218 |     if (p) {
219 | 	for (i=0; i<SPLIT_NR; i++) {
220 | 	    wait_for_completion_interruptible(cs+i);
221 | 	}
222 | 	
223 | 	kfree(p);
224 |     }
225 | }
226 | 
227 | static void gpq_gen_syndrome(int disks, size_t dsize, void **dps)
228 | {
229 |     if (!use_hybrid) {
230 | 	/* gpu_gen_syndrome(disks, dsize, dps, NULL); */
231 | 	multi_gpu_gen_syndrome(disks, dsize, dps);
232 |     } else {
233 | 	size_t gpuload = decide_gpu_load(disks, dsize);
234 | 
235 | 	if (gpuload == dsize) {
236 | 	    gpu_gen_syndrome(disks, dsize, dps, NULL);
237 | 	} if (gpuload == 0) {
238 | 	    cpu_gen_syndrome(disks, dsize, dps);
239 | 	} else {	    
240 | 	    void *cdps[MAX_DISKS];
241 | 	    int i;
242 | 	    struct completion cs[SPLIT_NR];
243 | 	    void *p;
244 | 	    size_t csize = dsize-gpuload;
245 | 		
246 | 	    p = __multi_gpu_gen_syndrome(disks, gpuload, dps, cs, 1);
247 | 	    while (csize > 0) {
248 | 		for (i=0; i<disks; i++)
249 | 		    cdps[i] = (char*)dps[i] + (dsize - csize);
250 | 		if (csize >= PAGE_SIZE)
251 | 		    cpu_gen_syndrome(disks, PAGE_SIZE, cdps);
252 | 		else
253 | 		    cpu_gen_syndrome(disks, csize, cdps);
254 | 		csize -= PAGE_SIZE;
255 | 	    }
256 | 
257 | 	    if (p) {
258 | 		for (i=0; i<SPLIT_NR; i++) {
259 | 		    wait_for_completion_interruptible(cs+i);
260 | 		}
261 | 		kfree(p);
262 | 	    } else {
263 | 		wait_for_completion_interruptible(cs+0);
264 | 	    }
265 | 	}
266 |     }
267 | }
268 | 
269 | const struct raid6_calls raid6_gpq = {
270 |     gpq_gen_syndrome,
271 |     NULL,
272 |     "gpq",
273 |     0
274 | };
275 | 
276 | #include <linux/timex.h>
277 | 
278 | static long test_pq(int disks, size_t dsize, const struct raid6_calls *rc)
279 | {
280 |     struct timeval t0, t1;
281 |     long t;
282 |     int i;
283 |     void **dps = vmalloc(sizeof(void*)*disks);
284 |     char *data = vmalloc(disks*dsize); 
285 | 
286 |     if (!data || !dps) {
287 | 	gpq_log(KGPU_LOG_ERROR,
288 | 		"out of memory for %s test\n",
289 | 		rc->name);
290 | 	if (dps) vfree(dps);
291 | 	if (data) vfree(data);
292 | 	return 0;
293 |     }
294 | 
295 |     for (i=0; i<disks; i++) {
296 | 	dps[i] = data + i*dsize;
297 |     }
298 | 
299 |     do_gettimeofday(&t0);
300 |     rc->gen_syndrome(disks, dsize, dps);
301 |     do_gettimeofday(&t1);
302 | 
303 |     t = 1000000*(t1.tv_sec-t0.tv_sec) +
304 | 	((int)(t1.tv_usec) - (int)(t0.tv_usec));
305 | 
306 |     vfree(dps);
307 |     vfree(data);
308 | 
309 |     return t;
310 | }
311 | 
312 | long test_gpq(int disks, size_t dsize)
313 | {
314 |     return test_pq(disks, dsize, &raid6_gpq);
315 | }
316 | EXPORT_SYMBOL_GPL(test_gpq);
317 | 
318 | long test_cpq(int disks, size_t dsize)
319 | {
320 |     return test_pq(disks, dsize, replace_global? &oldcall:&raid6_call);
321 | }
322 | EXPORT_SYMBOL_GPL(test_cpq);
323 | 
324 | static long test_recov_2data(int disks, size_t dsize)
325 | {
326 |     struct timeval t0, t1;
327 |     long t;
328 |     int i;
329 |     /* void **dps = vmalloc(sizeof(void*)*disks); */
330 |     void **dps = kmalloc(sizeof(void*)*disks, GFP_KERNEL);
331 |     /* char *data = vmalloc(disks*dsize); */
332 |     char *data = kmalloc(disks*dsize, GFP_KERNEL);
333 | 
334 |     if (!data || !dps) {
335 | 	gpq_log(KGPU_LOG_ERROR,
336 | 		"out of memory for RAID6 recov test\n");
337 | 	if (dps) kfree(dps);
338 | 	if (data) kfree(data);
339 | 	return 0;
340 |     }
341 | 
342 |     for (i=0; i<disks; i++) {
343 | 	dps[i] = data + i*dsize;
344 |     }
345 | 
346 |     do_gettimeofday(&t0);
347 |     raid6_2data_recov(disks, dsize, 0, 1, dps);
348 |     do_gettimeofday(&t1);
349 | 
350 |     t = 1000000*(t1.tv_sec-t0.tv_sec) +
351 | 	((int)(t1.tv_usec) - (int)(t0.tv_usec));
352 | 
353 |     kfree(dps);
354 |     kfree(data);
355 | 
356 |     return t;
357 | }
358 | 
359 | #define TEST_NDISKS 8
360 | #define MIN_DSZ (1024*4)
361 | #define MAX_DSZ (64*1024)
362 | #define TEST_TIMES_SHIFT 4
363 | #define TEST_TIMES (1<<TEST_TIMES_SHIFT)
364 | 
365 | static void do_benchmark(void)
366 | {
367 |     size_t sz;
368 |     long t;
369 |     int i;
370 |     const struct raid6_calls *gcall;
371 |     const struct raid6_calls **rc;
372 | 
373 |     if (replace_global)
374 | 	rc = (const struct raid6_calls **)&oldcall;
375 |     else
376 | 	rc = (const struct raid6_calls **)&raid6_call;
377 | 
378 |     gcall = &raid6_gpq;
379 | 
380 |     /* init CUDA context */
381 |     test_gpq(TEST_NDISKS, PAGE_SIZE);
382 |     gpq_log(KGPU_LOG_PRINT, "init CUDA done\n");
383 | 
384 |     /*t = 0;
385 |     for (i=0; i<TEST_TIMES; i++)
386 | 	t+=test_recov_2data(TEST_NDISKS, PAGE_SIZE);
387 |     t>>= TEST_TIMES_SHIFT;
388 |     gpq_log(KGPU_LOG_PRINT,
389 | 	    "md recovery PAGE_SIZE*%d disks %8luMB/s %8luMB/s %8luus\n",
390 | 	    TEST_NDISKS, (PAGE_SIZE*(TEST_NDISKS-2))/t, (PAGE_SIZE*2)/t,
391 | 	    t);
392 |     */
393 |     for (sz = MIN_DSZ; sz <= MAX_DSZ; sz += MIN_DSZ)
394 |     {
395 | 	size_t tsz = sz*(TEST_NDISKS-2);
396 | 
397 | 	t=0;
398 | 	for (i=0; i<TEST_TIMES; i++) {
399 | 	    t += test_pq(TEST_NDISKS, sz, gcall);		
400 | 	}
401 | 	t >>= TEST_TIMES_SHIFT; 
402 | 	
403 | 	gpq_log(KGPU_LOG_PRINT,
404 | 		"PQ Size: %10luKB, %10luKB, %10s: %8luMB/s\n",
405 | 		sz>>10,
406 | 		tsz>>10,
407 | 		gcall->name,
408 | 		tsz/t
409 | 	    );
410 | 
411 | 	//for (rc = raid6_algos; *rc; rc++) {
412 | 	//    if (!(*rc)->valid || (*rc)->valid()) {
413 | 	/*	t=0;
414 | 		for (i=0; i<TEST_TIMES; i++) {
415 | 		    t += test_pq(TEST_NDISKS, sz, *rc);		
416 | 		}
417 | 		t >>= TEST_TIMES_SHIFT; 
418 | 		
419 | 		gpq_log(KGPU_LOG_PRINT,
420 | 			"PQ Size: %10luKB, %10luKB, %10s: %8luMB/s\n",
421 | 			sz>>10,
422 | 			tsz>>10,
423 | 			(*rc)->name,
424 | 			tsz/t
425 | 			);*/
426 | 		//   }
427 | 		//}
428 |     }
429 | }
430 | 
431 | static int __init raid6_gpq_init(void)
432 | {
433 |     if (replace_global) {
434 | 	oldcall = raid6_call;
435 | 	raid6_call = raid6_gpq;
436 | 	gpq_log(KGPU_LOG_PRINT,
437 | 		"global pq algorithm replaced with gpq\n");
438 |     }
439 |     if (use_hybrid) {
440 | 	make_load_policy();
441 |     }
442 | 
443 |     do_benchmark();
444 |     
445 |     return 0;
446 | }
447 | 
448 | static void __exit raid6_gpq_exit(void)
449 | {
450 |     if (replace_global) {
451 | 	raid6_call = oldcall;
452 |     }
453 | }
454 | 
455 | module_init(raid6_gpq_init);
456 | module_exit(raid6_gpq_exit);
457 | 
458 | MODULE_LICENSE("GPL");
459 | MODULE_DESCRIPTION("gpq - GPU RAID6 PQ computing module");
460 | MODULE_AUTHOR("Weibin Sun");
461 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/file.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * eCryptfs: Linux filesystem encryption layer
  3 |  *
  4 |  * Copyright (C) 1997-2004 Erez Zadok
  5 |  * Copyright (C) 2001-2004 Stony Brook University
  6 |  * Copyright (C) 2004-2007 International Business Machines Corp.
  7 |  *   Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
  8 |  *   		Michael C. Thompson <mcthomps@us.ibm.com>
  9 |  *
 10 |  * This program is free software; you can redistribute it and/or
 11 |  * modify it under the terms of the GNU General Public License as
 12 |  * published by the Free Software Foundation; either version 2 of the
 13 |  * License, or (at your option) any later version.
 14 |  *
 15 |  * This program is distributed in the hope that it will be useful, but
 16 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 18 |  * General Public License for more details.
 19 |  *
 20 |  * You should have received a copy of the GNU General Public License
 21 |  * along with this program; if not, write to the Free Software
 22 |  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 23 |  * 02111-1307, USA.
 24 |  * 
 25 |  * See the GPL-COPYING file in the top-level directory.
 26 |  *
 27 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 28 |  * All rights reserved.
 29 |  * 
 30 |  */
 31 | 
 32 | #include <linux/file.h>
 33 | #include <linux/poll.h>
 34 | #include <linux/slab.h>
 35 | #include <linux/mount.h>
 36 | #include <linux/pagemap.h>
 37 | #include <linux/security.h>
 38 | #include <linux/compat.h>
 39 | #include <linux/fs_stack.h>
 40 | #include "ecryptfs_kernel.h"
 41 | 
 42 | /**
 43 |  * ecryptfs_read_update_atime
 44 |  *
 45 |  * generic_file_read updates the atime of upper layer inode.  But, it
 46 |  * doesn't give us a chance to update the atime of the lower layer
 47 |  * inode.  This function is a wrapper to generic_file_read.  It
 48 |  * updates the atime of the lower level inode if generic_file_read
 49 |  * returns without any errors. This is to be used only for file reads.
 50 |  * The function to be used for directory reads is ecryptfs_read.
 51 |  */
 52 | static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 53 | 				const struct iovec *iov,
 54 | 				unsigned long nr_segs, loff_t pos)
 55 | {
 56 | 	ssize_t rc;
 57 | 	struct dentry *lower_dentry;
 58 | 	struct vfsmount *lower_vfsmount;
 59 | 	struct file *file = iocb->ki_filp;
 60 | 
 61 | 	rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
 62 | 	/*
 63 | 	 * Even though this is a async interface, we need to wait
 64 | 	 * for IO to finish to update atime
 65 | 	 */
 66 | 	if (-EIOCBQUEUED == rc)
 67 | 		rc = wait_on_sync_kiocb(iocb);
 68 | 	if (rc >= 0) {
 69 | 		lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
 70 | 		lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
 71 | 		touch_atime(lower_vfsmount, lower_dentry);
 72 | 	}
 73 | 	return rc;
 74 | }
 75 | 
 76 | struct ecryptfs_getdents_callback {
 77 | 	void *dirent;
 78 | 	struct dentry *dentry;
 79 | 	filldir_t filldir;
 80 | 	int filldir_called;
 81 | 	int entries_written;
 82 | };
 83 | 
 84 | /* Inspired by generic filldir in fs/readdir.c */
 85 | static int
 86 | ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
 87 | 		 loff_t offset, u64 ino, unsigned int d_type)
 88 | {
 89 | 	struct ecryptfs_getdents_callback *buf =
 90 | 	    (struct ecryptfs_getdents_callback *)dirent;
 91 | 	size_t name_size;
 92 | 	char *name;
 93 | 	int rc;
 94 | 
 95 | 	buf->filldir_called++;
 96 | 	rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size,
 97 | 						  buf->dentry, lower_name,
 98 | 						  lower_namelen);
 99 | 	if (rc) {
100 | 		printk(KERN_ERR "%s: Error attempting to decode and decrypt "
101 | 		       "filename [%s]; rc = [%d]\n", __func__, lower_name,
102 | 		       rc);
103 | 		goto out;
104 | 	}
105 | 	rc = buf->filldir(buf->dirent, name, name_size, offset, ino, d_type);
106 | 	kfree(name);
107 | 	if (rc >= 0)
108 | 		buf->entries_written++;
109 | out:
110 | 	return rc;
111 | }
112 | 
113 | /**
114 |  * ecryptfs_readdir
115 |  * @file: The eCryptfs directory file
116 |  * @dirent: Directory entry handle
117 |  * @filldir: The filldir callback function
118 |  */
119 | static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
120 | {
121 | 	int rc;
122 | 	struct file *lower_file;
123 | 	struct inode *inode;
124 | 	struct ecryptfs_getdents_callback buf;
125 | 
126 | 	lower_file = ecryptfs_file_to_lower(file);
127 | 	lower_file->f_pos = file->f_pos;
128 | 	inode = file->f_path.dentry->d_inode;
129 | 	memset(&buf, 0, sizeof(buf));
130 | 	buf.dirent = dirent;
131 | 	buf.dentry = file->f_path.dentry;
132 | 	buf.filldir = filldir;
133 | 	buf.filldir_called = 0;
134 | 	buf.entries_written = 0;
135 | 	rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf);
136 | 	file->f_pos = lower_file->f_pos;
137 | 	if (rc < 0)
138 | 		goto out;
139 | 	if (buf.filldir_called && !buf.entries_written)
140 | 		goto out;
141 | 	if (rc >= 0)
142 | 		fsstack_copy_attr_atime(inode,
143 | 					lower_file->f_path.dentry->d_inode);
144 | out:
145 | 	return rc;
146 | }
147 | 
148 | struct kmem_cache *ecryptfs_file_info_cache;
149 | 
150 | /**
151 |  * ecryptfs_open
152 |  * @inode: inode speciying file to open
153 |  * @file: Structure to return filled in
154 |  *
155 |  * Opens the file specified by inode.
156 |  *
157 |  * Returns zero on success; non-zero otherwise
158 |  */
159 | static int ecryptfs_open(struct inode *inode, struct file *file)
160 | {
161 | 	int rc = 0;
162 | 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
163 | 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
164 | 	struct dentry *ecryptfs_dentry = file->f_path.dentry;
165 | 	/* Private value of ecryptfs_dentry allocated in
166 | 	 * ecryptfs_lookup() */
167 | 	struct dentry *lower_dentry;
168 | 	struct ecryptfs_file_info *file_info;
169 | 
170 | 	mount_crypt_stat = &ecryptfs_superblock_to_private(
171 | 		ecryptfs_dentry->d_sb)->mount_crypt_stat;
172 | 	if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
173 | 	    && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
174 | 		|| (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
175 | 		|| (file->f_flags & O_APPEND))) {
176 | 		printk(KERN_WARNING "Mount has encrypted view enabled; "
177 | 		       "files may only be read\n");
178 | 		rc = -EPERM;
179 | 		goto out;
180 | 	}
181 | 	/* Released in ecryptfs_release or end of function if failure */
182 | 	file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
183 | 	ecryptfs_set_file_private(file, file_info);
184 | 	if (!file_info) {
185 | 		ecryptfs_printk(KERN_ERR,
186 | 				"Error attempting to allocate memory\n");
187 | 		rc = -ENOMEM;
188 | 		goto out;
189 | 	}
190 | 	lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
191 | 	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
192 | 	mutex_lock(&crypt_stat->cs_mutex);
193 | 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
194 | 		ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
195 | 		/* Policy code enabled in future release */
196 | 		crypt_stat->flags |= (ECRYPTFS_POLICY_APPLIED
197 | 				      | ECRYPTFS_ENCRYPTED);
198 | 	}
199 | 	mutex_unlock(&crypt_stat->cs_mutex);
200 | 	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
201 | 	if (rc) {
202 | 		printk(KERN_ERR "%s: Error attempting to initialize "
203 | 			"the persistent file for the dentry with name "
204 | 			"[%s]; rc = [%d]\n", __func__,
205 | 			ecryptfs_dentry->d_name.name, rc);
206 | 		goto out_free;
207 | 	}
208 | 	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
209 | 	    == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
210 | 		rc = -EPERM;
211 | 		printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
212 | 		       "file must hence be opened RO\n", __func__);
213 | 		goto out_free;
214 | 	}
215 | 	ecryptfs_set_file_lower(
216 | 		file, ecryptfs_inode_to_private(inode)->lower_file);
217 | 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
218 | 		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
219 | 		mutex_lock(&crypt_stat->cs_mutex);
220 | 		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
221 | 		mutex_unlock(&crypt_stat->cs_mutex);
222 | 		rc = 0;
223 | 		goto out;
224 | 	}
225 | 	mutex_lock(&crypt_stat->cs_mutex);
226 | 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
227 | 	    || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
228 | 		rc = ecryptfs_read_metadata(ecryptfs_dentry);
229 | 		if (rc) {
230 | 			ecryptfs_printk(KERN_DEBUG,
231 | 					"Valid headers not found\n");
232 | 			if (!(mount_crypt_stat->flags
233 | 			      & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
234 | 				rc = -EIO;
235 | 				printk(KERN_WARNING "Either the lower file "
236 | 				       "is not in a valid eCryptfs format, "
237 | 				       "or the key could not be retrieved. "
238 | 				       "Plaintext passthrough mode is not "
239 | 				       "enabled; returning -EIO\n");
240 | 				mutex_unlock(&crypt_stat->cs_mutex);
241 | 				goto out_free;
242 | 			}
243 | 			rc = 0;
244 | 			crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
245 | 			mutex_unlock(&crypt_stat->cs_mutex);
246 | 			goto out;
247 | 		}
248 | 	}
249 | 	mutex_unlock(&crypt_stat->cs_mutex);
250 | 	ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
251 | 			"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
252 | 			(unsigned long long)i_size_read(inode));
253 | 	goto out;
254 | out_free:
255 | 	kmem_cache_free(ecryptfs_file_info_cache,
256 | 			ecryptfs_file_to_private(file));
257 | out:
258 | 	return rc;
259 | }
260 | 
261 | static int ecryptfs_flush(struct file *file, fl_owner_t td)
262 | {
263 | 	int rc = 0;
264 | 	struct file *lower_file = NULL;
265 | 
266 | 	if (file) {
267 | 	    lower_file = ecryptfs_file_to_lower(file);
268 | 	    if (lower_file && lower_file->f_op && lower_file->f_op->flush)
269 | 		rc = lower_file->f_op->flush(lower_file, td);
270 | 	}
271 | 	return rc;
272 | }
273 | 
274 | static int ecryptfs_release(struct inode *inode, struct file *file)
275 | {
276 |     /*if (file && ecryptfs_file_to_private(file))
277 | 		kmem_cache_free(ecryptfs_file_info_cache,
278 | 		ecryptfs_file_to_private(file));*/
279 | 	return 0;
280 | }
281 | 
282 | static int
283 | ecryptfs_fsync(struct file *file, int datasync)
284 | {
285 | 	if (ecryptfs_file_to_lower(file))
286 | 		return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
287 | 	return 0;
288 | }
289 | 
290 | static int ecryptfs_fasync(int fd, struct file *file, int flag)
291 | {
292 | 	int rc = 0;
293 | 	struct file *lower_file = NULL;
294 | 
295 | 	lower_file = ecryptfs_file_to_lower(file);
296 | 	if (lower_file && lower_file->f_op && lower_file->f_op->fasync)
297 | 		rc = lower_file->f_op->fasync(fd, lower_file, flag);
298 | 	return rc;
299 | }
300 | 
301 | static long
302 | ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
303 | {
304 | 	struct file *lower_file = NULL;
305 | 	long rc = -ENOTTY;
306 | 
307 | 	if (ecryptfs_file_to_private(file))
308 | 		lower_file = ecryptfs_file_to_lower(file);
309 | 	if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
310 | 		rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
311 | 	return rc;
312 | }
313 | 
314 | #ifdef CONFIG_COMPAT
315 | static long
316 | ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
317 | {
318 | 	struct file *lower_file = NULL;
319 | 	long rc = -ENOIOCTLCMD;
320 | 
321 | 	if (ecryptfs_file_to_private(file))
322 | 		lower_file = ecryptfs_file_to_lower(file);
323 | 	if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
324 | 		rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
325 | 	return rc;
326 | }
327 | #endif
328 | 
329 | const struct file_operations ecryptfs_dir_fops = {
330 | 	.readdir = ecryptfs_readdir,
331 | 	.read = generic_read_dir,
332 | 	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
333 | #ifdef CONFIG_COMPAT
334 | 	.compat_ioctl = ecryptfs_compat_ioctl,
335 | #endif
336 | 	.open = ecryptfs_open,
337 | 	.flush = ecryptfs_flush,
338 | 	.release = ecryptfs_release,
339 | 	.fsync = ecryptfs_fsync,
340 | 	.fasync = ecryptfs_fasync,
341 | 	.splice_read = generic_file_splice_read,
342 | 	.llseek = default_llseek,
343 | };
344 | 
345 | static ssize_t ecryptfs_file_write(struct file *f, const char __user *buf,
346 | 				   size_t sz, loff_t *poffset)
347 | {
348 |     /* printk("[g-ecryptfs] Info: write %u at %u\n", sz, *poffset); */
349 |     ecryptfs_write2(f->f_path.dentry->d_inode, (char*)buf, *poffset, sz);
350 |     *poffset += sz;
351 |     return sz;
352 | }
353 | 
354 | /*static ssize_t ecryptfs_file_write(struct file *f, const char __user *buf,
355 | 				   size_t sz, loff_t *poffset)
356 | {
357 |     struct page **pgs;
358 |     char *virt;
359 |     int i,j;
360 |     ssize_t rt;
361 |     int nrpages = sz/PAGE_SIZE;
362 |     if (sz&(PAGE_SIZE-1))
363 | 	nrpages += 1;
364 | 
365 |     pgs = kmalloc(nrpages*sizeof(struct page*). GFP_KERNEL);
366 |     for (i=0; i<nrpages; i++) {
367 | 	pgs[i] = alloc_page((i==nrpages-1)?GFP_USER|__GFP_ZERO:GFP_USER);
368 | 	if (!pgs[i]) {
369 | 	    printk("[g-ecryptfs] Error: allocate page failed\n");
370 | 	    rt = -ENOMEM;
371 | 	    goto errout;
372 | 	}
373 | 	virt = kmap(pgs[i]);
374 | 	memcpy(virt, buf+i*PAGE_SIZE,
375 | 	       (i==nrpages-1)?(sz&(PAGE_SIZE-1)):PAGE_SIZE);
376 | 	kunmap(pgs[i]);	
377 |     }
378 | 
379 |     ecryptfs_crypto_encrypt_pages(pgs, nrpages, 1);
380 | 
381 |     for (i=0; i<nrpages; i++) {
382 | 	rt = ecryptfs_write_lower(,
383 | 
384 | errout:
385 |     for (j=0; j<i; j++) {
386 | 	__free_page(pgs[i]);
387 |     }
388 | 
389 |     return rt;
390 |     }*/
391 | 
392 | const struct file_operations ecryptfs_main_fops = {
393 | 	.llseek = generic_file_llseek,
394 | 	.read = do_sync_read,
395 | 	.aio_read = ecryptfs_read_update_atime,
396 | 	.write = ecryptfs_file_write,
397 | 	.aio_write = generic_file_aio_write,
398 | 	.readdir = ecryptfs_readdir,
399 | 	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
400 | #ifdef CONFIG_COMPAT
401 | 	.compat_ioctl = ecryptfs_compat_ioctl,
402 | #endif
403 | 	.mmap = generic_file_mmap,
404 | 	.open = ecryptfs_open,
405 | 	.flush = ecryptfs_flush,
406 | 	.release = ecryptfs_release,
407 | 	.fsync = ecryptfs_fsync,
408 | 	.fasync = ecryptfs_fasync,
409 | 	.splice_read = generic_file_splice_read,
410 | };
411 | 


--------------------------------------------------------------------------------
/services/gaes/gaes_ctr/gaes_ctr.c:
--------------------------------------------------------------------------------
  1 | /* -*- linux-c -*-
  2 |  * This work is licensed under the terms of the GNU GPL, version 2.  See
  3 |  * the GPL-COPYING file in the top-level directory.
  4 |  *
  5 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
  6 |  * All rights reserved.
  7 |  * 
  8 |  * GPU accelerated AES-CTR cipher
  9 |  * 
 10 |  * This module is mostly based on the crypto/ctr.c in Linux kernel.
 11 |  *
 12 |  * At this time, we support simple coutner mode only. CTR mode for
 13 |  * IPSec is not implemented.
 14 |  *
 15 |  */
 16 | 
 17 | #include <crypto/algapi.h>
 18 | #include <crypto/ctr.h>
 19 | #include <linux/err.h>
 20 | #include <linux/init.h>
 21 | #include <linux/kernel.h>
 22 | #include <linux/module.h>
 23 | #include <linux/random.h>
 24 | #include <linux/scatterlist.h>
 25 | #include <linux/slab.h>
 26 | #include <crypto/aes.h>
 27 | #include "../../../kgpu/kgpu.h"
 28 | #include "../gaesk.h"
 29 | 
 30 | /* customized log function */
 31 | #define g_log(level, ...) kgpu_do_log(level, "gaes_ctr", ##__VA_ARGS__)
 32 | #define dbg(...) g_log(KGPU_LOG_DEBUG, ##__VA_ARGS__)
 33 | 
 34 | struct crypto_ctr_ctx {
 35 |     struct crypto_cipher *child;
 36 |     struct crypto_gaes_ctr_info info;
 37 |     u8 key[AES_MAX_KEY_SIZE];	
 38 | };
 39 | 
 40 | /* only take the low-64bit for adding */
 41 | static void big_u128_add(u8 *ctr, u64 offset, u8 *res)
 42 | {
 43 |     u64 c = be64_to_cpu(*(u64*)(ctr+8));
 44 | 
 45 |     c += offset;
 46 |     *(u64*)(res) = *(u64*)(ctr);
 47 |     *(u64*)(res+8) = cpu_to_be64(c);
 48 | }
 49 | 
 50 | static int _crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
 51 | 			     unsigned int keylen, int use_lctr)
 52 | {
 53 |     struct crypto_ctr_ctx *ctx = crypto_tfm_ctx(parent);
 54 |     struct crypto_cipher *child = ctx->child;
 55 |     struct crypto_gaes_ctr_config *cfg = NULL;
 56 |     int err;
 57 | 
 58 |     crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
 59 |     crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
 60 | 			    CRYPTO_TFM_REQ_MASK);
 61 | 
 62 |     /* local couter cipher, with explict range size */
 63 |     if (use_lctr) {
 64 | 	if (keylen > AES_MAX_KEY_SIZE) {
 65 | 	    cfg = (struct crypto_gaes_ctr_config*)(key+AES_MAX_KEY_SIZE);
 66 | 	    keylen = cfg->key_length;
 67 | 	    ctx->info.ctr_range = cfg->ctr_range;
 68 | 	    if (cfg->ctr_range > PAGE_SIZE) {
 69 | 		    g_log(KGPU_LOG_ERROR, "local counter range "
 70 | 		    "%u is larger than PAGE_SIZE!",
 71 | 		    cfg->ctr_range);
 72 | 		return -EINVAL;
 73 | 	    }		
 74 | 	} else {
 75 | 	    ctx->info.ctr_range = PAGE_SIZE;
 76 | 	}
 77 |     } else {
 78 | 	ctx->info.ctr_range = 0;
 79 |     }
 80 | 	
 81 |     err = crypto_cipher_setkey(child, key, keylen);
 82 |     err = crypto_aes_expand_key(/* yes, the next line is dangerous */
 83 | 	(struct crypto_aes_ctx*)(&ctx->info),
 84 | 	key, keylen);
 85 | 
 86 |     cvt_endian_u32(ctx->info.key_enc, AES_MAX_KEYLENGTH_U32);
 87 |     cvt_endian_u32(ctx->info.key_dec, AES_MAX_KEYLENGTH_U32);
 88 | 
 89 |     memcpy(ctx->key, key, keylen);
 90 | 	
 91 |     crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
 92 | 			 CRYPTO_TFM_RES_MASK);
 93 | 
 94 |     return err;
 95 | }
 96 | 
 97 | static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
 98 | 			     unsigned int keylen)
 99 | {
100 |     return _crypto_ctr_setkey(parent, key, keylen, 0);
101 | }
102 | 
103 | static int crypto_lctr_setkey(struct crypto_tfm *parent, const u8 *key,
104 | 			      unsigned int keylen)
105 | {
106 |     return _crypto_ctr_setkey(parent, key, keylen, 1);
107 | }
108 | 
109 | static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
110 | 				   struct crypto_cipher *tfm,
111 | 				   unsigned int donebytes,
112 | 				   unsigned int ctr_range)
113 | {
114 |     unsigned int bsize = crypto_cipher_blocksize(tfm);
115 |     unsigned long alignmask = crypto_cipher_alignmask(tfm);
116 |     u8 *ctrblk = walk->iv;
117 |     u8 tmp[bsize + alignmask];
118 |     u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
119 |     u8 *src = walk->src.virt.addr;
120 |     u8 *dst = walk->dst.virt.addr;
121 |     unsigned int nbytes = walk->nbytes;
122 | 
123 |     /* for local counter mode */
124 |     if (ctr_range && donebytes%ctr_range==0) {
125 | 	memset(ctrblk, 0, bsize);
126 |     }
127 | 
128 |     crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
129 |     crypto_xor(keystream, src, nbytes);
130 |     memcpy(dst, keystream, nbytes);
131 | 
132 |     crypto_inc(ctrblk, bsize);
133 | }
134 | 
135 | static int crypto_ctr_crypt_segment(struct blkcipher_walk *walk,
136 | 				    struct crypto_cipher *tfm,
137 | 				    unsigned int donebytes,
138 | 				    unsigned int ctr_range)
139 | {
140 |     void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
141 | 	crypto_cipher_alg(tfm)->cia_encrypt;
142 |     unsigned int bsize = crypto_cipher_blocksize(tfm);
143 |     u8 *ctrblk = walk->iv;
144 |     u8 *src = walk->src.virt.addr;
145 |     u8 *dst = walk->dst.virt.addr;
146 |     unsigned int nbytes = walk->nbytes;
147 | 
148 |     do {
149 | 	/* for local counter mode */
150 | 	if (ctr_range && donebytes%ctr_range==0) {
151 | 	    memset(ctrblk, 0, bsize);
152 | 	}
153 | 	
154 | 	/* create keystream */
155 | 	fn(crypto_cipher_tfm(tfm), dst, ctrblk);
156 | 	crypto_xor(dst, src, bsize);
157 | 
158 | 	/* increment counter in counterblock */
159 | 	crypto_inc(ctrblk, bsize);
160 | 
161 | 	src += bsize;
162 | 	dst += bsize;
163 | 	donebytes += bsize;
164 |     } while ((nbytes -= bsize) >= bsize);
165 | 
166 |     return nbytes;
167 | }
168 | 
169 | static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk,
170 | 				    struct crypto_cipher *tfm,
171 | 				    unsigned int donebytes,
172 | 				    unsigned int ctr_range)
173 | {
174 |     void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
175 | 	crypto_cipher_alg(tfm)->cia_encrypt;
176 |     unsigned int bsize = crypto_cipher_blocksize(tfm);
177 |     unsigned long alignmask = crypto_cipher_alignmask(tfm);
178 |     unsigned int nbytes = walk->nbytes;
179 |     u8 *ctrblk = walk->iv;
180 |     u8 *src = walk->src.virt.addr;
181 |     u8 tmp[bsize + alignmask];
182 |     u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
183 | 
184 |     do {
185 | 	/* for local counter mode */
186 | 	if (ctr_range && donebytes%ctr_range==0) {
187 | 	    memset(ctrblk, 0, bsize);
188 | 	}
189 | 	
190 | 	/* create keystream */
191 | 	fn(crypto_cipher_tfm(tfm), keystream, ctrblk);
192 | 	crypto_xor(src, keystream, bsize);
193 | 
194 | 	/* increment counter in counterblock */
195 | 	crypto_inc(ctrblk, bsize);
196 | 
197 | 	src += bsize;
198 | 	donebytes += bsize;
199 |     } while ((nbytes -= bsize) >= bsize);
200 | 
201 |     return nbytes;
202 | }
203 | 
204 | static int
205 | crypto_ctr_crypt(struct blkcipher_desc *desc,
206 | 		 struct scatterlist *dst, struct scatterlist *src,
207 | 		 unsigned int nbytes)
208 | {
209 |     struct blkcipher_walk walk;
210 |     struct crypto_blkcipher *tfm = desc->tfm;
211 |     struct crypto_ctr_ctx *ctx = crypto_blkcipher_ctx(tfm);
212 |     struct crypto_cipher *child = ctx->child;
213 |     unsigned int bsize = crypto_cipher_blocksize(child);
214 |     int err;
215 |     unsigned int donebytes = 0;
216 | 
217 |     blkcipher_walk_init(&walk, dst, src, nbytes);
218 |     err = blkcipher_walk_virt_block(desc, &walk, bsize);
219 | 
220 |     while (walk.nbytes >= bsize) {
221 | 	if (walk.src.virt.addr == walk.dst.virt.addr)
222 | 	    nbytes = crypto_ctr_crypt_inplace(
223 | 		&walk, child, donebytes, ctx->info.ctr_range);
224 | 	else
225 | 	    nbytes = crypto_ctr_crypt_segment(
226 | 		&walk, child, donebytes, ctx->info.ctr_range);
227 | 
228 | 	donebytes += walk.nbytes-nbytes;
229 | 	err = blkcipher_walk_done(desc, &walk, nbytes);
230 |     }
231 | 
232 |     if (walk.nbytes) {
233 | 	if (ctx->info.ctr_range) {
234 | 		g_log(KGPU_LOG_ALERT, "We got a problem: "
235 | 		"the size of data, %u, is not a multiple of "
236 | 		"block size ...", nbytes);
237 | 	}
238 | 	crypto_ctr_crypt_final(&walk, child,
239 | 			       donebytes, ctx->info.ctr_range);
240 | 	err = blkcipher_walk_done(desc, &walk, 0);
241 |     }
242 | 
243 |     return err;
244 | }
245 | 
246 | static int
247 | _crypto_gaes_ctr_crypt(
248 |     struct blkcipher_desc *desc,
249 |     struct scatterlist *dst, struct scatterlist *src,
250 |     unsigned int sz)
251 | {
252 |     int err=0;
253 |     unsigned int rsz = roundup(sz, PAGE_SIZE);
254 |     unsigned int nbytes;
255 |     u8 *ctrblk;	
256 |     
257 |     struct kgpu_request *req;
258 |     char *buf;
259 | 	
260 |     struct crypto_blkcipher *tfm = desc->tfm;
261 |     struct crypto_ctr_ctx *ctx = crypto_blkcipher_ctx(tfm);
262 |     struct blkcipher_walk walk;
263 | 
264 |     blkcipher_walk_init(&walk, dst, src, sz);
265 | 
266 |     buf = kgpu_vmalloc(rsz+sizeof(struct crypto_gaes_ctr_info));
267 |     if (!buf) {
268 | 	g_log(KGPU_LOG_ERROR, "GPU buffer is null.\n");
269 | 	return -EFAULT;
270 |     }
271 | 
272 |     req  = kgpu_alloc_request();
273 |     if (!req) {
274 | 	kgpu_vfree(buf);
275 | 	g_log(KGPU_LOG_ERROR, "can't allocate request\n");
276 | 	return -EFAULT;
277 |     }
278 | 
279 |     req->in = buf;
280 |     req->out = buf;
281 |     req->insize = rsz+sizeof(struct crypto_gaes_ctr_info);
282 |     req->outsize = sz;
283 |     req->udatasize = sizeof(struct crypto_gaes_ctr_info);
284 |     req->udata = buf+rsz;
285 |     	
286 |     err = blkcipher_walk_virt(desc, &walk);
287 |     ctrblk = walk.iv;
288 | 	
289 |     while ((nbytes = walk.nbytes)) {
290 | 	u8 *wsrc = walk.src.virt.addr;
291 | 
292 | 	memcpy(buf, wsrc, nbytes);
293 | 	buf += nbytes;
294 | 	
295 | 	err = blkcipher_walk_done(desc, &walk, 0);
296 |     }
297 | 	
298 |     memcpy(req->udata, &(ctx->info), sizeof(struct crypto_gaes_ctr_info));
299 |     if (ctrblk)
300 | 	memcpy(((struct crypto_gaes_ctr_info*)req->udata)->ctrblk, ctrblk,
301 | 	       crypto_cipher_blocksize(ctx->child));
302 | 
303 |     if (ctx->info.ctr_range) {
304 | 	strcpy(req->service_name, "gaes_lctr");
305 | 	memset(((struct crypto_gaes_ctr_info*)req->udata)->ctrblk, 0,
306 | 	       crypto_cipher_blocksize(ctx->child));
307 |     }
308 |     else
309 | 	strcpy(req->service_name, "gaes_ctr");
310 | 	
311 |     if (kgpu_call_sync(req)) {
312 | 	err = -EFAULT;
313 | 	g_log(KGPU_LOG_ERROR, "callgpu error\n");
314 |     } else {
315 | 	blkcipher_walk_init(&walk, dst, src, sz);
316 | 	err = blkcipher_walk_virt(desc, &walk);
317 | 	buf = (char*)req->out;
318 | 		
319 | 	while ((nbytes = walk.nbytes)) {
320 | 	    u8 *wdst = walk.dst.virt.addr;
321 | 
322 | 	    memcpy(wdst, buf, nbytes);
323 | 	    buf += nbytes;
324 | 	    
325 | 	    err = blkcipher_walk_done(desc, &walk, 0);	    
326 | 	}
327 | 
328 | 	/* change counter value */
329 | 	if (!ctx->info.ctr_range)
330 | 	    big_u128_add(ctrblk, sz/crypto_cipher_blocksize(ctx->child),
331 | 			 ctrblk);
332 |     }
333 | 	
334 |     kgpu_vfree(req->in);
335 |     kgpu_free_request(req);
336 |     
337 |     return err;
338 | }
339 | 
340 | static int
341 | crypto_gaes_ctr_crypt(
342 |     struct blkcipher_desc *desc,
343 |     struct scatterlist *dst, struct scatterlist *src,
344 |     unsigned int nbytes)
345 | {
346 |     if (/*(nbytes % PAGE_SIZE) ||*/ nbytes <= GAES_CTR_SIZE_THRESHOLD)
347 | 	return crypto_ctr_crypt(desc, dst, src, nbytes);
348 |     return _crypto_gaes_ctr_crypt(desc, dst, src, nbytes);
349 | }
350 | 
351 | static int
352 | crypto_gaes_lctr_crypt(
353 |     struct blkcipher_desc *desc,
354 |     struct scatterlist *dst, struct scatterlist *src,
355 |     unsigned int nbytes)
356 | {
357 |     struct crypto_blkcipher *tfm = desc->tfm;
358 |     struct crypto_ctr_ctx *ctx = crypto_blkcipher_ctx(tfm);
359 |     
360 |     if (nbytes % ctx->info.ctr_range) {
361 | 	    g_log(KGPU_LOG_ALERT, "using local "
362 | 		  "counter mode, but data size is not "
363 | 		  "multiple of %u\n", ctx->info.ctr_range);
364 | 	    return crypto_ctr_crypt(desc, dst, src, nbytes);
365 |     }		
366 |     return _crypto_gaes_ctr_crypt(desc, dst, src, nbytes);
367 | }
368 | 
369 | static int crypto_ctr_init_tfm(struct crypto_tfm *tfm)
370 | {
371 |     struct crypto_instance *inst = (void *)tfm->__crt_alg;
372 |     struct crypto_spawn *spawn = crypto_instance_ctx(inst);
373 |     struct crypto_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
374 |     struct crypto_cipher *cipher;
375 | 
376 |     cipher = crypto_spawn_cipher(spawn);
377 |     if (IS_ERR(cipher))
378 | 	return PTR_ERR(cipher);
379 | 
380 |     ctx->child = cipher;
381 | 
382 |     return 0;
383 | }
384 | 
385 | static void crypto_ctr_exit_tfm(struct crypto_tfm *tfm)
386 | {
387 |     struct crypto_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
388 | 
389 |     crypto_free_cipher(ctx->child);
390 | }
391 | 
392 | static struct crypto_instance*
393 | _crypto_ctr_alloc(struct rtattr **tb, int use_lctr)
394 | {
395 |     struct crypto_instance *inst;
396 |     struct crypto_alg *alg;
397 |     int err;
398 | 
399 |     err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
400 |     if (err)
401 | 	return ERR_PTR(err);
402 | 
403 |     alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
404 | 			  CRYPTO_ALG_TYPE_MASK);
405 |     if (IS_ERR(alg))
406 | 	return ERR_CAST(alg);
407 | 
408 |     /* Block size must be >= 4 bytes. */
409 |     err = -EINVAL;
410 |     if (alg->cra_blocksize < 4)
411 | 	goto out_put_alg;
412 | 
413 |     /* If this is false we'd fail the alignment of crypto_inc. */
414 |     if (alg->cra_blocksize % 4)
415 | 	goto out_put_alg;
416 | 
417 |     if (use_lctr)
418 | 	inst = crypto_alloc_instance("gaes_lctr", alg);
419 |     else
420 | 	inst = crypto_alloc_instance("gaes_ctr", alg);
421 |     if (IS_ERR(inst))
422 | 	goto out;
423 | 
424 |     inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
425 |     inst->alg.cra_priority = alg->cra_priority;
426 |     inst->alg.cra_blocksize = 1;
427 |     inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
428 |     inst->alg.cra_type = &crypto_blkcipher_type;
429 | 
430 |     inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
431 |     inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
432 | 
433 |     /* not quite sure whether this is OK */
434 |     inst->alg.cra_blkcipher.max_keysize =
435 | 	use_lctr?
436 | 	alg->cra_cipher.cia_max_keysize+sizeof(struct crypto_gaes_ctr_config)
437 | 	:alg->cra_cipher.cia_max_keysize;
438 | 
439 |     inst->alg.cra_ctxsize = sizeof(struct crypto_ctr_ctx);
440 | 
441 |     inst->alg.cra_init = crypto_ctr_init_tfm;
442 |     inst->alg.cra_exit = crypto_ctr_exit_tfm;
443 | 
444 |     inst->alg.cra_blkcipher.setkey =
445 | 	use_lctr?crypto_lctr_setkey:crypto_ctr_setkey;
446 |     inst->alg.cra_blkcipher.encrypt =
447 | 	use_lctr?crypto_gaes_lctr_crypt:crypto_gaes_ctr_crypt;
448 |     inst->alg.cra_blkcipher.decrypt =
449 | 	use_lctr?crypto_gaes_lctr_crypt:crypto_gaes_ctr_crypt;
450 | 
451 |     inst->alg.cra_blkcipher.geniv = "chainiv";
452 | 
453 | out:
454 |     crypto_mod_put(alg);
455 |     return inst;
456 | 
457 | out_put_alg:
458 |     inst = ERR_PTR(err);
459 |     goto out;
460 | }
461 | 
462 | static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
463 | {
464 |     return _crypto_ctr_alloc(tb, 0);
465 | }
466 | 
467 | static struct crypto_instance *crypto_lctr_alloc(struct rtattr **tb)
468 | {
469 |     return _crypto_ctr_alloc(tb, 1);
470 | }
471 | 
472 | static void crypto_ctr_free(struct crypto_instance *inst)
473 | {
474 |     crypto_drop_spawn(crypto_instance_ctx(inst));
475 |     kfree(inst);
476 | }
477 | 
478 | static struct crypto_template crypto_ctr_tmpl = {
479 |     .name = "gaes_ctr",
480 |     .alloc = crypto_ctr_alloc,
481 |     .free = crypto_ctr_free,
482 |     .module = THIS_MODULE,
483 | };
484 | 
485 | static struct crypto_template crypto_lctr_tmpl = {
486 |     .name = "gaes_lctr",
487 |     .alloc = crypto_lctr_alloc,
488 |     .free = crypto_ctr_free,
489 |     .module = THIS_MODULE,
490 | };
491 | 
492 | #include "../gaes_test.c"
493 | 
494 | long test_gaes_ctr(size_t sz)
495 | {
496 |     return test_gaes(sz, 1, "gaes_ctr(aes)");
497 | }
498 | EXPORT_SYMBOL_GPL(test_gaes_ctr);
499 | 
500 | long test_gaes_lctr(size_t sz)
501 | {
502 |     return test_gaes(sz, 1, "gaes_lctr(aes)");
503 | }
504 | EXPORT_SYMBOL_GPL(test_gaes_lctr);
505 | 
506 | static int __init crypto_ctr_module_init(void)
507 | {
508 |     int err;
509 | 
510 |     err = crypto_register_template(&crypto_ctr_tmpl);
511 |     err |= crypto_register_template(&crypto_lctr_tmpl);
512 |     g_log(KGPU_LOG_PRINT, "module load\n");
513 |     return err;
514 | }
515 | 
516 | static void __exit crypto_ctr_module_exit(void)
517 | {
518 |     g_log(KGPU_LOG_PRINT, "module unload\n");
519 |     crypto_unregister_template(&crypto_lctr_tmpl);
520 |     crypto_unregister_template(&crypto_ctr_tmpl);
521 | }
522 | 
523 | module_init(crypto_ctr_module_init);
524 | module_exit(crypto_ctr_module_exit);
525 | 
526 | MODULE_LICENSE("GPL");
527 | MODULE_DESCRIPTION("GPU AES-CTR Counter block mode");
528 | 


--------------------------------------------------------------------------------
/services/gaes/ecryptfs/read_write.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * eCryptfs: Linux filesystem encryption layer
  3 |  *
  4 |  * Copyright (C) 2007 International Business Machines Corp.
  5 |  *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
  6 |  *
  7 |  * This program is free software; you can redistribute it and/or
  8 |  * modify it under the terms of the GNU General Public License as
  9 |  * published by the Free Software Foundation; either version 2 of the
 10 |  * License, or (at your option) any later version.
 11 |  *
 12 |  * This program is distributed in the hope that it will be useful, but
 13 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 15 |  * General Public License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with this program; if not, write to the Free Software
 19 |  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 20 |  * 02111-1307, USA.
 21 |  * 
 22 |  * See the GPL-COPYING file in the top-level directory.
 23 |  *
 24 |  * Copyright (c) 2010-2011 University of Utah and the Flux Group.
 25 |  * All rights reserved.
 26 |  * 
 27 |  */
 28 | 
 29 | #include <linux/slab.h>
 30 | #include <linux/fs.h>
 31 | #include <linux/pagemap.h>
 32 | #include "ecryptfs_kernel.h"
 33 | 
 34 | /**
 35 |  * ecryptfs_write_lower
 36 |  * @ecryptfs_inode: The eCryptfs inode
 37 |  * @data: Data to write
 38 |  * @offset: Byte offset in the lower file to which to write the data
 39 |  * @size: Number of bytes from @data to write at @offset in the lower
 40 |  *        file
 41 |  *
 42 |  * Write data to the lower file.
 43 |  *
 44 |  * Returns bytes written on success; less than zero on error
 45 |  */
 46 | int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
 47 | 			 loff_t offset, size_t size)
 48 | {
 49 | 	struct ecryptfs_inode_info *inode_info;
 50 | 	mm_segment_t fs_save;
 51 | 	ssize_t rc;
 52 | 
 53 | 	inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
 54 | 	mutex_lock(&inode_info->lower_file_mutex);
 55 | 	BUG_ON(!inode_info->lower_file);
 56 | 	inode_info->lower_file->f_pos = offset;
 57 | 	fs_save = get_fs();
 58 | 	set_fs(get_ds());
 59 | 	rc = vfs_write(inode_info->lower_file, data, size,
 60 | 		       &inode_info->lower_file->f_pos);
 61 | 	set_fs(fs_save);
 62 | 	mutex_unlock(&inode_info->lower_file_mutex);
 63 | 	mark_inode_dirty_sync(ecryptfs_inode);
 64 | 	return rc;
 65 | }
 66 | 
 67 | /**
 68 |  * ecryptfs_write_lower_page_segment
 69 |  * @ecryptfs_inode: The eCryptfs inode
 70 |  * @page_for_lower: The page containing the data to be written to the
 71 |  *                  lower file
 72 |  * @offset_in_page: The offset in the @page_for_lower from which to
 73 |  *                  start writing the data
 74 |  * @size: The amount of data from @page_for_lower to write to the
 75 |  *        lower file
 76 |  *
 77 |  * Determines the byte offset in the file for the given page and
 78 |  * offset within the page, maps the page, and makes the call to write
 79 |  * the contents of @page_for_lower to the lower inode.
 80 |  *
 81 |  * Returns zero on success; non-zero otherwise
 82 |  */
 83 | int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
 84 | 				      struct page *page_for_lower,
 85 | 				      size_t offset_in_page, size_t size)
 86 | {
 87 | 	char *virt;
 88 | 	loff_t offset;
 89 | 	int rc;
 90 | 
 91 | 	offset = ((((loff_t)page_for_lower->index) << PAGE_CACHE_SHIFT)
 92 | 		  + offset_in_page);
 93 | 	virt = kmap(page_for_lower);
 94 | 	rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
 95 | 	if (rc > 0)
 96 | 		rc = 0;
 97 | 	kunmap(page_for_lower);
 98 | 	return rc;
 99 | }
100 | 
101 | /**
102 |  * ecryptfs_write
103 |  * @ecryptfs_inode: The eCryptfs file into which to write
104 |  * @data: Virtual address where data to write is located
105 |  * @offset: Offset in the eCryptfs file at which to begin writing the
106 |  *          data from @data
107 |  * @size: The number of bytes to write from @data
108 |  *
109 |  * Write an arbitrary amount of data to an arbitrary location in the
110 |  * eCryptfs inode page cache. This is done on a page-by-page, and then
111 |  * by an extent-by-extent, basis; individual extents are encrypted and
112 |  * written to the lower page cache (via VFS writes). This function
113 |  * takes care of all the address translation to locations in the lower
114 |  * filesystem; it also handles truncate events, writing out zeros
115 |  * where necessary.
116 |  *
117 |  * Returns zero on success; non-zero otherwise
118 |  */
119 | int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
120 | 		   size_t size)
121 | {
122 | 	struct page *ecryptfs_page;
123 | 	struct ecryptfs_crypt_stat *crypt_stat;
124 | 	char *ecryptfs_page_virt;
125 | 	loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
126 | 	loff_t data_offset = 0;
127 | 	loff_t pos;
128 | 	int rc = 0;
129 | 
130 | 	crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
131 | 	/*
132 | 	 * if we are writing beyond current size, then start pos
133 | 	 * at the current size - we'll fill in zeros from there.
134 | 	 */
135 | 	if (offset > ecryptfs_file_size)
136 | 		pos = ecryptfs_file_size;
137 | 	else
138 | 		pos = offset;
139 | 	while (pos < (offset + size)) {
140 | 		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
141 | 		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
142 | 		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
143 | 		size_t total_remaining_bytes = ((offset + size) - pos);
144 | 
145 | 		if (num_bytes > total_remaining_bytes)
146 | 			num_bytes = total_remaining_bytes;
147 | 		if (pos < offset) {
148 | 			/* remaining zeros to write, up to destination offset */
149 | 			size_t total_remaining_zeros = (offset - pos);
150 | 
151 | 			if (num_bytes > total_remaining_zeros)
152 | 				num_bytes = total_remaining_zeros;
153 | 		}
154 | 		ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode,
155 | 							 ecryptfs_page_idx);
156 | 		if (IS_ERR(ecryptfs_page)) {
157 | 			rc = PTR_ERR(ecryptfs_page);
158 | 			printk(KERN_ERR "%s: Error getting page at "
159 | 			       "index [%ld] from eCryptfs inode "
160 | 			       "mapping; rc = [%d]\n", __func__,
161 | 			       ecryptfs_page_idx, rc);
162 | 			goto out;
163 | 		}
164 | 		ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
165 | 
166 | 		/*
167 | 		 * pos: where we're now writing, offset: where the request was
168 | 		 * If current pos is before request, we are filling zeros
169 | 		 * If we are at or beyond request, we are writing the *data*
170 | 		 * If we're in a fresh page beyond eof, zero it in either case
171 | 		 */
172 | 		if (pos < offset || !start_offset_in_page) {
173 | 			/* We are extending past the previous end of the file.
174 | 			 * Fill in zero values to the end of the page */
175 | 			memset(((char *)ecryptfs_page_virt
176 | 				+ start_offset_in_page), 0,
177 | 				PAGE_CACHE_SIZE - start_offset_in_page);
178 | 		}
179 | 
180 | 		/* pos >= offset, we are now writing the data request */
181 | 		if (pos >= offset) {
182 | 			memcpy(((char *)ecryptfs_page_virt
183 | 				+ start_offset_in_page),
184 | 			       (data + data_offset), num_bytes);
185 | 			data_offset += num_bytes;
186 | 		}
187 | 		kunmap_atomic(ecryptfs_page_virt, KM_USER0);
188 | 		flush_dcache_page(ecryptfs_page);
189 | 		SetPageUptodate(ecryptfs_page);
190 | 		unlock_page(ecryptfs_page);
191 | 		if (crypt_stat->flags & ECRYPTFS_ENCRYPTED)
192 | 			rc = ecryptfs_encrypt_page(ecryptfs_page);
193 | 		else
194 | 			rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
195 | 						ecryptfs_page,
196 | 						start_offset_in_page,
197 | 						data_offset);
198 | 		page_cache_release(ecryptfs_page);
199 | 		if (rc) {
200 | 			printk(KERN_ERR "%s: Error encrypting "
201 | 			       "page; rc = [%d]\n", __func__, rc);
202 | 			goto out;
203 | 		}
204 | 		pos += num_bytes;
205 | 	}
206 | 	if ((offset + size) > ecryptfs_file_size) {
207 | 		i_size_write(ecryptfs_inode, (offset + size));
208 | 		if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
209 | 			rc = ecryptfs_write_inode_size_to_metadata(
210 | 								ecryptfs_inode);
211 | 			if (rc) {
212 | 				printk(KERN_ERR	"Problem with "
213 | 				       "ecryptfs_write_inode_size_to_metadata; "
214 | 				       "rc = [%d]\n", rc);
215 | 				goto out;
216 | 			}
217 | 		}
218 | 	}
219 | out:
220 | 	return rc;
221 | }
222 | 
223 | int ecryptfs_write2(struct inode *ecryptfs_inode, char *data, loff_t offset,
224 | 		   size_t size)
225 | {
226 | 	struct page *ecryptfs_page;
227 | 	struct page **pgs;
228 | 	struct ecryptfs_crypt_stat *crypt_stat;
229 | 	char *ecryptfs_page_virt;
230 | 	loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
231 | 	loff_t data_offset = 0;
232 | 	loff_t pos;
233 | 	int nrpgs = size/PAGE_SIZE;
234 | 	int rc = 0;
235 | 	int i=0;
236 | 
237 | 	if (size&(PAGE_SIZE-1))
238 | 	    nrpgs++;
239 | 
240 | 	pgs = kmalloc(nrpgs*sizeof(struct page*), GFP_KERNEL);
241 | 	if (!pgs) {
242 | 	    printk("[g-ecryptfs] Error: allocate pages failed\n");
243 | 	    rc = -ENOMEM;
244 | 	    goto out;
245 | 	}
246 | 
247 | 	crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
248 | 	/*
249 | 	 * if we are writing beyond current size, then start pos
250 | 	 * at the current size - we'll fill in zeros from there.
251 | 	 */
252 | 	if (offset > ecryptfs_file_size)
253 | 		pos = ecryptfs_file_size;
254 | 	else
255 | 		pos = offset;
256 | 	while (pos < (offset + size)) {
257 | 		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
258 | 		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
259 | 		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
260 | 		size_t total_remaining_bytes = ((offset + size) - pos);
261 | 
262 | 		if (num_bytes > total_remaining_bytes)
263 | 			num_bytes = total_remaining_bytes;
264 | 		if (pos < offset) {
265 | 			/* remaining zeros to write, up to destination offset */
266 | 			size_t total_remaining_zeros = (offset - pos);
267 | 
268 | 			if (num_bytes > total_remaining_zeros)
269 | 				num_bytes = total_remaining_zeros;
270 | 		}
271 | 		ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode,
272 | 							 ecryptfs_page_idx);
273 | 		if (IS_ERR(ecryptfs_page)) {
274 | 			rc = PTR_ERR(ecryptfs_page);
275 | 			printk(KERN_ERR "%s: Error getting page at "
276 | 			       "index [%ld] from eCryptfs inode "
277 | 			       "mapping; rc = [%d]\n", __func__,
278 | 			       ecryptfs_page_idx, rc);
279 | 			goto out;
280 | 		}
281 | 		ecryptfs_page_virt = kmap(ecryptfs_page);
282 | 
283 | 		/*
284 | 		 * pos: where we're now writing, offset: where the request was
285 | 		 * If current pos is before request, we are filling zeros
286 | 		 * If we are at or beyond request, we are writing the *data*
287 | 		 * If we're in a fresh page beyond eof, zero it in either case
288 | 		 */
289 | 		if (pos < offset || !start_offset_in_page) {
290 | 			/* We are extending past the previous end of the file.
291 | 			 * Fill in zero values to the end of the page */
292 | 			memset(((char *)ecryptfs_page_virt
293 | 				+ start_offset_in_page), 0,
294 | 				PAGE_CACHE_SIZE - start_offset_in_page);
295 | 		}
296 | 
297 | 		/* pos >= offset, we are now writing the data request */
298 | 		if (pos >= offset) {
299 | 			memcpy(((char *)ecryptfs_page_virt
300 | 				+ start_offset_in_page),
301 | 			       (data + data_offset), num_bytes);
302 | 			data_offset += num_bytes;
303 | 		}
304 | 		kunmap(ecryptfs_page);
305 | 		flush_dcache_page(ecryptfs_page);
306 | 		SetPageUptodate(ecryptfs_page);
307 | 		unlock_page(ecryptfs_page);
308 | 		if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
309 | 		    pgs[i++] = ecryptfs_page;
310 | 		    /* rc = ecryptfs_encrypt_page(ecryptfs_page); */
311 | 		}
312 | 		else {
313 | 		    rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
314 | 						ecryptfs_page,
315 | 						start_offset_in_page,
316 | 						data_offset);
317 | 		    page_cache_release(ecryptfs_page);
318 | 		    if (rc) {
319 | 			printk(KERN_ERR "%s: Error encrypting "
320 | 			       "page; rc = [%d]\n", __func__, rc);
321 | 			goto out;
322 | 		    }
323 | 		}
324 | 		pos += num_bytes;
325 | 	}
326 | 
327 | 	if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
328 | 	    rc = ecryptfs_encrypt_pages2(pgs, nrpgs);
329 | 	    for (i=0; i<nrpgs; i++)
330 | 		page_cache_release(pgs[i]);
331 | 	    kfree(pgs);
332 | 	}
333 | 	
334 | 	if ((offset + size) > ecryptfs_file_size) {
335 | 		i_size_write(ecryptfs_inode, (offset + size));
336 | 		if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
337 | 			rc = ecryptfs_write_inode_size_to_metadata(
338 | 								ecryptfs_inode);
339 | 			if (rc) {
340 | 				printk(KERN_ERR	"Problem with "
341 | 				       "ecryptfs_write_inode_size_to_metadata; "
342 | 				       "rc = [%d]\n", rc);
343 | 				goto out;
344 | 			}
345 | 		}
346 | 	}
347 | out:
348 | 	return rc;
349 | }
350 | 
351 | /**
352 |  * ecryptfs_read_lower
353 |  * @data: The read data is stored here by this function
354 |  * @offset: Byte offset in the lower file from which to read the data
355 |  * @size: Number of bytes to read from @offset of the lower file and
356 |  *        store into @data
357 |  * @ecryptfs_inode: The eCryptfs inode
358 |  *
359 |  * Read @size bytes of data at byte offset @offset from the lower
360 |  * inode into memory location @data.
361 |  *
362 |  * Returns bytes read on success; 0 on EOF; less than zero on error
363 |  */
364 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
365 | 			struct inode *ecryptfs_inode)
366 | {
367 | 	struct ecryptfs_inode_info *inode_info =
368 | 		ecryptfs_inode_to_private(ecryptfs_inode);
369 | 	mm_segment_t fs_save;
370 | 	ssize_t rc;
371 | 
372 | 	mutex_lock(&inode_info->lower_file_mutex);
373 | 	BUG_ON(!inode_info->lower_file);
374 | 	inode_info->lower_file->f_pos = offset;
375 | 	fs_save = get_fs();
376 | 	set_fs(get_ds());
377 | 	rc = vfs_read(inode_info->lower_file, data, size,
378 | 		      &inode_info->lower_file->f_pos);
379 | 	set_fs(fs_save);
380 | 	mutex_unlock(&inode_info->lower_file_mutex);
381 | 	return rc;
382 | }
383 | 
384 | /**
385 |  * ecryptfs_read_lower_page_segment
386 |  * @page_for_ecryptfs: The page into which data for eCryptfs will be
387 |  *                     written
388 |  * @offset_in_page: Offset in @page_for_ecryptfs from which to start
389 |  *                  writing
390 |  * @size: The number of bytes to write into @page_for_ecryptfs
391 |  * @ecryptfs_inode: The eCryptfs inode
392 |  *
393 |  * Determines the byte offset in the file for the given page and
394 |  * offset within the page, maps the page, and makes the call to read
395 |  * the contents of @page_for_ecryptfs from the lower inode.
396 |  *
397 |  * Returns zero on success; non-zero otherwise
398 |  */
399 | int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
400 | 				     pgoff_t page_index,
401 | 				     size_t offset_in_page, size_t size,
402 | 				     struct inode *ecryptfs_inode)
403 | {
404 | 	char *virt;
405 | 	loff_t offset;
406 | 	int rc;
407 | 
408 | 	offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
409 | 	virt = kmap(page_for_ecryptfs);
410 | 	rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
411 | 	if (rc > 0)
412 | 		rc = 0;
413 | 	kunmap(page_for_ecryptfs);
414 | 	flush_dcache_page(page_for_ecryptfs);
415 | 	return rc;
416 | }
417 | 
418 | #if 0
419 | /**
420 |  * ecryptfs_read
421 |  * @data: The virtual address into which to write the data read (and
422 |  *        possibly decrypted) from the lower file
423 |  * @offset: The offset in the decrypted view of the file from which to
424 |  *          read into @data
425 |  * @size: The number of bytes to read into @data
426 |  * @ecryptfs_file: The eCryptfs file from which to read
427 |  *
428 |  * Read an arbitrary amount of data from an arbitrary location in the
429 |  * eCryptfs page cache. This is done on an extent-by-extent basis;
430 |  * individual extents are decrypted and read from the lower page
431 |  * cache (via VFS reads). This function takes care of all the
432 |  * address translation to locations in the lower filesystem.
433 |  *
434 |  * Returns zero on success; non-zero otherwise
435 |  */
436 | int ecryptfs_read(char *data, loff_t offset, size_t size,
437 | 		  struct file *ecryptfs_file)
438 | {
439 | 	struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
440 | 	struct page *ecryptfs_page;
441 | 	char *ecryptfs_page_virt;
442 | 	loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
443 | 	loff_t data_offset = 0;
444 | 	loff_t pos;
445 | 	int rc = 0;
446 | 
447 | 	if ((offset + size) > ecryptfs_file_size) {
448 | 		rc = -EINVAL;
449 | 		printk(KERN_ERR "%s: Attempt to read data past the end of the "
450 | 			"file; offset = [%lld]; size = [%td]; "
451 | 		       "ecryptfs_file_size = [%lld]\n",
452 | 		       __func__, offset, size, ecryptfs_file_size);
453 | 		goto out;
454 | 	}
455 | 	pos = offset;
456 | 	while (pos < (offset + size)) {
457 | 		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
458 | 		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
459 | 		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
460 | 		size_t total_remaining_bytes = ((offset + size) - pos);
461 | 
462 | 		if (num_bytes > total_remaining_bytes)
463 | 			num_bytes = total_remaining_bytes;
464 | 		ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode,
465 | 							 ecryptfs_page_idx);
466 | 		if (IS_ERR(ecryptfs_page)) {
467 | 			rc = PTR_ERR(ecryptfs_page);
468 | 			printk(KERN_ERR "%s: Error getting page at "
469 | 			       "index [%ld] from eCryptfs inode "
470 | 			       "mapping; rc = [%d]\n", __func__,
471 | 			       ecryptfs_page_idx, rc);
472 | 			goto out;
473 | 		}
474 | 		ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
475 | 		memcpy((data + data_offset),
476 | 		       ((char *)ecryptfs_page_virt + start_offset_in_page),
477 | 		       num_bytes);
478 | 		kunmap_atomic(ecryptfs_page_virt, KM_USER0);
479 | 		flush_dcache_page(ecryptfs_page);
480 | 		SetPageUptodate(ecryptfs_page);
481 | 		unlock_page(ecryptfs_page);
482 | 		page_cache_release(ecryptfs_page);
483 | 		pos += num_bytes;
484 | 		data_offset += num_bytes;
485 | 	}
486 | out:
487 | 	return rc;
488 | }
489 | #endif  /*  0  */
490 | 


--------------------------------------------------------------------------------