├── src_python ├── ctf │ ├── __init__.py │ └── random.pyx ├── Makefile └── ctf_ext.h ├── .gitignore ├── studies ├── Makefile ├── fast_3mm.cxx └── fast_diagram.cxx ├── scalapack_tests ├── Makefile ├── conj.h └── qr.cxx ├── src ├── scripts │ ├── license.sh │ ├── expand_includes.sh │ ├── manual_readlink.sh │ └── recursive_expand_includes.sh ├── Makefile ├── sparse_formats │ ├── Makefile │ └── coo.h ├── mapping │ ├── Makefile │ └── distribution.h ├── shared │ ├── memcontrol.h │ ├── fompi_wrapper.h │ ├── Makefile │ ├── int_timer.h │ ├── init_models.h │ ├── init_models.cxx │ ├── blas_symbs.cxx │ └── offload.h ├── symmetry │ ├── Makefile │ └── sym_indices.h ├── redistribution │ ├── Makefile │ ├── dgtog_redist.h │ ├── dgtog_redist.cxx │ ├── nosym_transp.h │ └── glb_cyclic_reshuffle.h ├── scaling │ ├── Makefile │ ├── sym_seq_scl.h │ ├── scale_tsr.h │ └── scaling.h ├── interface │ ├── flop_counter.cxx │ ├── ring.cxx │ ├── Makefile │ ├── partition.h │ ├── scalar.h │ ├── decomposition.h │ ├── scalar.cxx │ ├── group.h │ ├── fun_term.h │ ├── partition.cxx │ ├── monoid.cxx │ ├── vector.cxx │ ├── back_comp.h │ ├── ring.h │ ├── timer.h │ ├── vector.h │ ├── sparse_tensor.cxx │ └── sparse_tensor.h ├── summation │ ├── Makefile │ └── sym_seq_sum.h ├── tensor │ └── Makefile └── contraction │ ├── Makefile │ ├── sp_seq_ctr.h │ ├── spctr_comm.h │ ├── ctr_offload.h │ └── spctr_offload.h ├── include └── ctf.hpp ├── test ├── Makefile ├── ccsdt_map_test.cxx ├── reduce_bcast.cxx ├── python │ └── test_dot.py ├── diag_ctr.cxx ├── endomorphism.cxx ├── univar_function.cxx ├── multi_tsr_sym.cxx ├── diag_sym.cxx ├── repack.cxx ├── readall_test.cxx ├── speye.cxx ├── bivar_function.cxx ├── sy_times_ns.cxx ├── endomorphism_cust_sp.cxx ├── sptensor_sum.cxx ├── dft.cxx ├── bivar_transform.cxx └── endomorphism_cust.cxx ├── examples ├── Makefile ├── btwn_central.h ├── moldynamics.h ├── spmv.cxx ├── checkpoint.cxx ├── particle_interaction.cxx ├── spectral_element.cxx ├── dft_3D.cxx ├── trace.cxx ├── jacobi.cxx ├── sssp.cxx └── scan.cxx ├── bench ├── Makefile ├── model_trainer_kernels.cxx └── bench_contraction.cxx ├── license.txt └── .travis.yml /src_python/ctf/__init__.py: -------------------------------------------------------------------------------- 1 | from ctf.core import * 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build directories 2 | lib 3 | lib_shared 4 | lib_python 5 | obj 6 | obj_ext 7 | obj_shared 8 | bin 9 | 10 | # Autogenerated in first invocation of configure 11 | config.mk 12 | setup.py 13 | how-did-i-configure 14 | .*.swp 15 | .*.swo 16 | -------------------------------------------------------------------------------- /studies/Makefile: -------------------------------------------------------------------------------- 1 | include $(BDIR)/config.mk 2 | 3 | 4 | .PHONY: $(STUDIES) 5 | $(STUDIES): %: $(BDIR)/bin/% 6 | 7 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx ../studies/*.cxx Makefile ../Makefile 8 | $(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS) 9 | 10 | -------------------------------------------------------------------------------- /scalapack_tests/Makefile: -------------------------------------------------------------------------------- 1 | include $(BDIR)/config.mk 2 | 3 | .PHONY: 4 | $(SCALAPACK_TESTS): %: $(BDIR)/bin/% 5 | 6 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx Makefile ../Makefile ../src/interface 7 | $(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIB_SCLPCK) $(LIBS) 8 | 9 | -------------------------------------------------------------------------------- /src/scripts/license.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for file in ../*/*.hxx ../*/*.cxx ../*/*.h 4 | do 5 | if grep MERCH $file 6 | then 7 | vim $file -c ":d22" -c ":%s/\ \*\ SUCH\ DAMAGE\.\ \*\//\/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*\//g" -c ":wq" 8 | fi 9 | done 10 | -------------------------------------------------------------------------------- /src/scripts/expand_includes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | REL_SCRIPT_DIR=$(dirname $0) 3 | SCRIPT=$(${REL_SCRIPT_DIR}/manual_readlink.sh $0) 4 | SCRIPT_DIR=$(dirname $SCRIPT) 5 | touch ${SCRIPT_DIR}/visited_list.txt 6 | $SCRIPT_DIR/recursive_expand_includes.sh $SCRIPT_DIR/../../include/ctf.hpp &> $SCRIPT_DIR/../../include/ctf_all.hpp 7 | rm -f ${SCRIPT_DIR}/visited_list.txt 8 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | ctf: subdirs 2 | 3 | SUBDIRS = interface shared tensor symmetry mapping redistribution scaling summation contraction sparse_formats 4 | 5 | .PHONY: subdirs $(SUBDIRS) 6 | 7 | subdirs: $(SUBDIRS) 8 | 9 | $(SUBDIRS): 10 | $(MAKE) -C $@ 11 | 12 | clean: 13 | for dir in $(SUBDIRS) ; do \ 14 | $(MAKE) $@ -C $$dir ; \ 15 | done 16 | 17 | -------------------------------------------------------------------------------- /src/sparse_formats/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = coo.o csr.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../tensor/algstrct.h ../shared/util.h 6 | 7 | ctf: $(OBJS) 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | 13 | clean: 14 | rm -f *.o 15 | -------------------------------------------------------------------------------- /src/mapping/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = mapping.o distribution.o topology.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | ctf: $(OBJS) 5 | 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 7 | HDRS = ../../Makefile $(BDIR)/config.mk ../interface/common.h ../mapping/mapping.h ../shared/util.h ../summation/sum_tsr.h ../tensor/untyped_tensor.h 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | -------------------------------------------------------------------------------- /src/shared/memcontrol.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #ifndef __MEMCONTROL_H__ 4 | #define __MEMCONTROL_H__ 5 | 6 | namespace CTF_int { 7 | void inc_tot_mem_used(int64_t a); 8 | int64_t proc_bytes_used(); 9 | int64_t proc_bytes_total(); 10 | int64_t proc_bytes_available(); 11 | void set_memcap(double cap); 12 | void set_mem_size(int64_t size); 13 | int get_num_instances(); 14 | } 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/symmetry/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = sym_indices.o symmetrization.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../contraction/contraction.h ../interface/common.h ../interface/timer.h ../scaling/scaling.h ../shared/util.h ../summation/summation.h ../tensor/untyped_tensor.h 6 | 7 | ctf: $(OBJS) 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | -------------------------------------------------------------------------------- /src/shared/fompi_wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef __FOMPI_WRAPPER__ 2 | #define __FOMPI_WRAPPER__ 3 | 4 | #ifdef USE_FOMPI 5 | #include 6 | 7 | typedef foMPI_Win CTF_Win; 8 | #define MPI_Init(...) foMPI_Init(__VA_ARGS__) 9 | #define MPI_Win_create(...) foMPI_Win_create(__VA_ARGS__) 10 | #define MPI_Win_fence(...) foMPI_Win_fence(__VA_ARGS__) 11 | #define MPI_Win_free(...) foMPI_Win_free(__VA_ARGS__) 12 | #define MPI_Put(...) foMPI_Put(__VA_ARGS__) 13 | #else 14 | #include 15 | typedef MPI_Win CTF_Win; 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/redistribution/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = redist.o sparse_rw.o pad.o nosym_transp.o cyclic_reshuffle.o glb_cyclic_reshuffle.o dgtog_redist.o dgtog_calc_cnt.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | ctf: $(OBJS) 5 | 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 7 | HDRS = ../../Makefile $(BDIR)/config.mk ../interface/common.h ../mapping/distribution.h ../shared/util.h ../tensor/algstrct.h ../shared/model.h ../shared/init_models.h 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | 13 | clean: 14 | rm -f *.o 15 | -------------------------------------------------------------------------------- /src/shared/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = util.o memcontrol.o int_timer.o model.o init_models.o blas_symbs.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../interface/common.h ../interface/timer.h 6 | 7 | LNVCC_OBJS = offload.o 8 | NVCC_OBJS = $(addprefix $(ODIR)/, $(LNVCC_OBJS)) 9 | 10 | ctf: $(OBJS) $(NVCC_OBJS) 11 | 12 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 13 | $(FCXX) -c $< -o $@ 14 | 15 | $(NVCC_OBJS): $(ODIR)/%.o: %.cu *.h $(HDRS) 16 | $(OFFLOAD_CXX) -c $< -o $@ 17 | 18 | -------------------------------------------------------------------------------- /include/ctf.hpp: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | #ifndef __CTF_HPP__ 3 | #define __CTF_HPP__ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define CTF_VERSION 150 16 | 17 | #include "../src/interface/tensor.h" 18 | #include "../src/interface/idx_tensor.h" 19 | #include "../src/interface/timer.h" 20 | #include "../src/interface/back_comp.h" 21 | #include "../src/interface/kernel.h" 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src_python/ctf/random.pyx: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.insert(0, os.path.abspath(".")) 3 | 4 | cdef extern from "ctf.hpp" namespace "CTF_int": 5 | void init_rng(int seed) 6 | 7 | cdef extern from "ctf.hpp" namespace "CTF": 8 | cdef cppclass World: 9 | int rank, np; 10 | World() 11 | World(int) 12 | World & get_universe() 13 | 14 | def seed(seed): 15 | init_rng(seed+get_universe().rank) 16 | 17 | def all_seed(seed): 18 | init_rng(seed) 19 | 20 | def random(shape): 21 | import ctf 22 | A = ctf.tensor(shape) 23 | A.fill_random() 24 | return A 25 | 26 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | include $(BDIR)/config.mk 2 | 3 | 4 | .PHONY: 5 | $(TESTS): %: $(BDIR)/bin/% 6 | 7 | ifneq (,$(findstring DUSE_SCALAPACK,$(DEFS))) 8 | SCALA_TESTS = pgemm_test nonsq_pgemm_test 9 | $(SCALA_TESTS): %: $(ODIR)/bin/% 10 | endif 11 | 12 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx ../examples/*.cxx Makefile ../Makefile $(ODIR)/btwn_central_kernels.o 13 | $(FCXX) $< $(ODIR)/btwn_central_kernels.o -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS) 14 | 15 | $(ODIR)/btwn_central_kernels.o: ../examples/btwn_central_kernels.cxx ../examples/btwn_central.h 16 | $(OFFLOAD_CXX) -c $< -o $@ -I../include/ 17 | -------------------------------------------------------------------------------- /src/scaling/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = scaling.o sym_seq_scl.o scale_tsr.o strp_tsr.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | ctf: $(OBJS) 5 | 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 7 | HDRS = ../../Makefile $(BDIR)/config.mk ../contraction/ctr_comm.h ../interface/common.h ../interface/idx_tensor.h ../interface/term.h ../mapping/distribution.h ../mapping/mapping.h ../scaling/scale_tsr.h ../shared/iter_tsr.h ../shared/memcontrol.h ../shared/util.h ../summation/sum_tsr.h ../tensor/algstrct.h ../tensor/untyped_tensor.h 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: 3 | $(EXAMPLES): %: $(BDIR)/bin/% 4 | $(BDIR)/bin/btwn_central: btwn_central.cxx btwn_central_kernels.cxx $(ODIR)/btwn_central_kernels.o $(BDIR)/lib/libctf.a Makefile ../Makefile 5 | $(FCXX) $< $(ODIR)/btwn_central_kernels.o -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS) 6 | 7 | $(ODIR)/btwn_central_kernels.o: btwn_central_kernels.cxx btwn_central.h ../src/interface 8 | $(OFFLOAD_CXX) -c $< -o $@ -I../include/ 9 | 10 | 11 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a Makefile ../Makefile ../src/interface 12 | $(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS) 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/interface/flop_counter.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #include "timer.h" 4 | #include "common.h" 5 | 6 | namespace CTF { 7 | Flop_counter::Flop_counter(){ 8 | start_count = CTF_int::get_flops(); 9 | } 10 | 11 | Flop_counter::~Flop_counter(){ 12 | } 13 | 14 | void Flop_counter::zero(){ 15 | start_count = CTF_int::get_flops(); 16 | } 17 | 18 | int64_t Flop_counter::count(MPI_Comm comm){ 19 | int64_t allf; 20 | int64_t myf = (CTF_int::get_flops() - start_count); 21 | MPI_Allreduce(&myf,&allf,1,MPI_INT64_T,MPI_SUM,comm); 22 | return allf; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/interface/ring.cxx: -------------------------------------------------------------------------------- 1 | #include "../../include/ctf.hpp" 2 | 3 | 4 | namespace CTF_int { 5 | CTF::Ring float_ring = CTF::Ring(); 6 | CTF_int::algstrct const * get_float_ring(){ 7 | return &float_ring; 8 | } 9 | CTF::Ring double_ring = CTF::Ring(); 10 | CTF_int::algstrct const * get_double_ring(){ 11 | return &double_ring; 12 | } 13 | CTF::Ring int_ring = CTF::Ring(); 14 | CTF_int::algstrct const * get_int_ring(){ 15 | return &int_ring; 16 | } 17 | CTF::Ring int64_t_ring = CTF::Ring(); 18 | CTF_int::algstrct const * get_int64_t_ring(){ 19 | return &int64_t_ring; 20 | } 21 | } 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/scripts/manual_readlink.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # taken from https://stackoverflow.com/questions/1055671/how-can-i-get-the-behavior-of-gnus-readlink-f-on-a-mac 3 | 4 | TARGET_FILE=$1 5 | 6 | cd `dirname $TARGET_FILE` 7 | TARGET_FILE=`basename $TARGET_FILE` 8 | 9 | # Iterate down a (possible) chain of symlinks 10 | while [ -L "$TARGET_FILE" ] 11 | do 12 | TARGET_FILE=`readlink $TARGET_FILE` 13 | cd `dirname $TARGET_FILE` 14 | TARGET_FILE=`basename $TARGET_FILE` 15 | done 16 | 17 | # Compute the canonicalized name by finding the physical path 18 | # for the directory we're in and appending the target file. 19 | PHYS_DIR=`pwd -P` 20 | RESULT=$PHYS_DIR/$TARGET_FILE 21 | echo $RESULT 22 | -------------------------------------------------------------------------------- /src/scripts/recursive_expand_includes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | REL_SCRIPT_DIR=$(dirname $0) 3 | SCRIPT=$(${REL_SCRIPT_DIR}/manual_readlink.sh $0) 4 | SCRIPT_DIR=$(dirname $SCRIPT) 5 | DIR=$(pwd) 6 | cd $(dirname $1) 7 | FNAME=$(basename $1) 8 | FDIR=$(pwd) 9 | FULLFNAME="${FDIR}/${FNAME}" 10 | if grep -Fxq "$FULLFNAME" $SCRIPT_DIR/visited_list.txt 11 | then 12 | exit 0 13 | else 14 | echo $FULLFNAME >> $SCRIPT_DIR/visited_list.txt 15 | TMP_FILE="${FNAME}.tmp.concat" 16 | cp $FNAME $TMP_FILE 17 | sed -i -e 's/#include "\(.*\)"/include \1/g' $TMP_FILE 18 | awk ' 19 | $1=="include" && NF>=2 { 20 | system("'$SCRIPT' " $2) 21 | next 22 | } 23 | {print}' "$TMP_FILE" 24 | rm $TMP_FILE 25 | fi 26 | -------------------------------------------------------------------------------- /src/summation/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = summation.o sym_seq_sum.o sum_tsr.o spr_seq_sum.o spsum_tsr.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../mapping/distribution.h ../mapping/mapping.h ../redistribution/nosym_transp.h ../redistribution/redist.h ../scaling/scaling.h ../scaling/strp_tsr.h ../shared/iter_tsr.h ../shared/memcontrol.h ../shared/util.h ../symmetry/sym_indices.h ../symmetry/symmetrization.h ../tensor/algstrct.h ../tensor/untyped_tensor.h ../shared/model.h ../shared/init_models.h 6 | 7 | ctf: $(OBJS) 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | 13 | clean: 14 | rm -f *.o 15 | -------------------------------------------------------------------------------- /scalapack_tests/conj.h: -------------------------------------------------------------------------------- 1 | #ifndef __CONJ_H__ 2 | #define __CONJ_H__ 3 | 4 | template 5 | CTF::Matrix conj(CTF::Matrix & A){ 6 | return A; 7 | } 8 | template <> 9 | CTF::Matrix< std::complex > conj(CTF::Matrix< std::complex > & A){ 10 | CTF::Matrix< std::complex > B(A); 11 | B["ij"] = CTF::Function< std::complex>([](std::complex a){ return std::conj(a); })(A["ij"]); 12 | return B; 13 | } 14 | template <> 15 | CTF::Matrix> conj(CTF::Matrix> & A){ 16 | CTF::Matrix> B(A); 17 | B["ij"] = CTF::Function>([](std::complex a){ return std::conj(a); })(A["ij"]); 18 | return B; 19 | } 20 | #endif 21 | -------------------------------------------------------------------------------- /src/interface/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = common.o flop_counter.o world.o idx_tensor.o term.o schedule.o semiring.o partition.o fun_term.o monoid.o set.o ring.o 2 | 3 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 4 | 5 | 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 7 | HDRS = ../../Makefile $(BDIR)/config.mk ../contraction/contraction.h ../../include/ctf.hpp ../interface/common.h ../mapping/topology.h ../scaling/scaling.h ../shared/blas_symbs.h ../shared/memcontrol.h ../shared/util.h ../summation/summation.h ../tensor/algstrct.h ../tensor/untyped_tensor.h ../tensor/untyped_tensor_tmpl.h ../sparse_formats/csr.h ../shared/lapack_symbs.h 8 | 9 | ctf: $(OBJS) 10 | 11 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 12 | $(FCXX) -c $< -o $@ 13 | 14 | 15 | -------------------------------------------------------------------------------- /bench/Makefile: -------------------------------------------------------------------------------- 1 | include $(BDIR)/config.mk 2 | 3 | .PHONY: 4 | $(BENCHMARKS): %: $(BDIR)/bin/% 5 | 6 | 7 | ifneq (,$(findstring DUSE_SCALAPACK,$(DEFS))) 8 | SCALA_BENCHMARKS = nonsq_pgemm_bench 9 | $(SCALA_BENCHMARKS): %: $(BDIR)/bin/% 10 | endif 11 | 12 | 13 | $(ODIR)/model_trainer_kernels.o: model_trainer_kernels.cxx ../src/interface 14 | $(OFFLOAD_CXX) -c $< -o $@ -I../include/ 15 | 16 | $(BDIR)/bin/model_trainer: model_trainer.cxx $(ODIR)/model_trainer_kernels.o $(BDIR)/lib/libctf.a *.cxx Makefile ../Makefile ../examples/ccsd.cxx 17 | $(FCXX) $< $(ODIR)/model_trainer_kernels.o -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS) 18 | 19 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx Makefile ../Makefile 20 | $(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS) 21 | 22 | -------------------------------------------------------------------------------- /src/tensor/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = untyped_tensor.o algstrct.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../contraction/contraction.h ../interface/common.h ../interface/idx_tensor.h ../interface/partition.h ../interface/timer.h ../interface/world.h ../mapping/distribution.h ../mapping/mapping.h ../redistribution/cyclic_reshuffle.h ../redistribution/dgtog_redist.h ../redistribution/glb_cyclic_reshuffle.h ../redistribution/nosym_transp.h ../redistribution/pad.h ../redistribution/redist.h ../redistribution/sparse_rw.h ../shared/blas_symbs.h ../shared/memcontrol.h ../shared/util.h ../summation/summation.h 6 | 7 | ctf: $(OBJS) 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | -------------------------------------------------------------------------------- /src/contraction/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = contraction.o sym_seq_ctr.o ctr_offload.o ctr_comm.o ctr_tsr.o ctr_2d_general.o sp_seq_ctr.o spctr_tsr.o spctr_comm.o spctr_2d_general.o spctr_offload.o 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../interface/functions.h ../mapping/distribution.h ../mapping/mapping.h ../redistribution/nosym_transp.h ../redistribution/redist.h ../scaling/strp_tsr.h ../shared/iter_tsr.h ../shared/memcontrol.h ../shared/offload.h ../shared/util.h ../symmetry/sym_indices.h ../symmetry/symmetrization.h ../tensor/algstrct.h ../tensor/untyped_tensor.h ../shared/model.h ../shared/init_models.h ../sparse_formats/coo.h ../sparse_formats/csr.h 6 | 7 | ctf: $(OBJS) 8 | 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | 13 | -------------------------------------------------------------------------------- /src_python/Makefile: -------------------------------------------------------------------------------- 1 | LOBJS = ctf_ext.o 2 | OBJS = $(addprefix $(OEDIR)/, $(LOBJS)) 3 | 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq 5 | HDRS = ctf_ext.h ../Makefile $(BDIR)/config.mk ../src/contraction/contraction.h ../src/interface/common.h ../src/interface/idx_tensor.h ../src/interface/partition.h ../src/interface/timer.h ../src/interface/world.h ../src/mapping/distribution.h ../src/mapping/mapping.h ../src/redistribution/cyclic_reshuffle.h ../src/redistribution/dgtog_redist.h ../src/redistribution/glb_cyclic_reshuffle.h ../src/redistribution/nosym_transp.h ../src/redistribution/pad.h ../src/redistribution/redist.h ../src/redistribution/sparse_rw.h ../src/shared/blas_symbs.h ../src/shared/memcontrol.h ../src/shared/util.h ../src/summation/summation.h 6 | 7 | ctf_ext_objs: $(OBJS) 8 | 9 | $(OBJS): $(OEDIR)/%.o: %.cxx *.h $(HDRS) 10 | $(FCXX) -c $< -o $@ 11 | 12 | -------------------------------------------------------------------------------- /src/interface/partition.h: -------------------------------------------------------------------------------- 1 | #ifndef __PARTITION_H__ 2 | #define __PARTITION_H__ 3 | 4 | 5 | namespace CTF { 6 | 7 | /** 8 | * \defgroup CTF_part Partition/Decomposition interface 9 | * \addtogroup CTF_part 10 | * @{ 11 | */ 12 | class Idx_Partition; 13 | 14 | class Partition { 15 | public: 16 | int order; 17 | int * lens; 18 | 19 | Partition(int order, int const * lens); 20 | ~Partition(); 21 | Partition(Partition const & other); 22 | Partition(); 23 | 24 | Idx_Partition operator[](char const * idx); 25 | void operator=(Partition const & other); 26 | }; 27 | 28 | class Idx_Partition { 29 | public: 30 | Partition part; 31 | char * idx; 32 | Idx_Partition(); 33 | ~Idx_Partition(); 34 | Idx_Partition(Partition const & part, char const * idx); 35 | 36 | /** 37 | * \brief extracts non-trivial part of partition by ommitting unit dimensions 38 | * \return new partition with all dimensions non-unit 39 | */ 40 | Idx_Partition reduce_order() const; 41 | }; 42 | 43 | /** 44 | * @} 45 | */ 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /examples/btwn_central.h: -------------------------------------------------------------------------------- 1 | #ifndef __BTWN_CENTRAL_H__ 2 | #define __BTWN_CENTRAL_H__ 3 | 4 | #include 5 | 6 | #ifdef __CUDACC__ 7 | #define DEVICE __device__ 8 | #define HOST __host__ 9 | #else 10 | #define DEVICE 11 | #define HOST 12 | #endif 13 | 14 | 15 | //structure for regular path that keeps track of the multiplicity of paths 16 | class mpath { 17 | public: 18 | int w; // weighted distance 19 | int m; // multiplictiy 20 | DEVICE HOST 21 | mpath(int w_, int m_){ w=w_; m=m_; } 22 | DEVICE HOST 23 | mpath(mpath const & p){ w=p.w; m=p.m; } 24 | DEVICE HOST 25 | mpath(){ w=0; m=0;}; 26 | }; 27 | 28 | //path with a centrality score 29 | class cpath { 30 | public: 31 | double c; // centrality score 32 | float m; 33 | int w; 34 | DEVICE HOST 35 | cpath(int w_, float m_, double c_){ w=w_; m=m_; c=c_;} 36 | DEVICE HOST 37 | cpath(cpath const & p){ w=p.w; m=p.m; c=p.c; } 38 | cpath(){ c=0.0; m=0.0; w=0;}; 39 | }; 40 | 41 | 42 | // min Monoid for cpath structure 43 | CTF::Monoid get_cpath_monoid(); 44 | 45 | //(min, +) tropical semiring for mpath structure 46 | CTF::Semiring get_mpath_semiring(); 47 | 48 | CTF::Bivar_Function * get_Bellman_kernel(); 49 | 50 | CTF::Bivar_Function * get_Brandes_kernel(); 51 | #endif 52 | -------------------------------------------------------------------------------- /src/shared/int_timer.h: -------------------------------------------------------------------------------- 1 | #ifndef __INT_TIMER_H__ 2 | #define __INT_TIMER_H__ 3 | 4 | namespace CTF { 5 | /** 6 | * \defgroup timer Timing and cost measurement 7 | * \addtogroup timer 8 | * @{ 9 | */ 10 | void set_main_args(int argc, const char * const * argv); 11 | 12 | /** 13 | * @} 14 | */ 15 | 16 | } 17 | #ifdef PROFILE 18 | #define TAU 19 | #endif 20 | 21 | #ifdef TAU 22 | #define TAU_FSTART(ARG) \ 23 | do { CTF::Timer t(#ARG); t.start(); } while (0); 24 | 25 | #define TAU_FSTOP(ARG) \ 26 | do { CTF::Timer t(#ARG); t.stop(); } while (0); 27 | 28 | #define TAU_PROFILE_TIMER(ARG1, ARG2, ARG3, ARG4) 29 | 30 | #define TAU_PROFILE_INIT(argc, argv) \ 31 | CTF::set_main_args(argc, argv); 32 | 33 | #define TAU_PROFILE_SET_NODE(ARG) 34 | 35 | #define TAU_PROFILE_START(ARG) \ 36 | CTF::Timer __CTF::Timer##ARG(#ARG); 37 | 38 | #define TAU_PROFILE_STOP(ARG) \ 39 | __CTF::Timer##ARG.stop(); 40 | 41 | #define TAU_PROFILE_SET_CONTEXT(ARG) \ 42 | if (ARG==0) CTF::set_context(MPI_COMM_WORLD); \ 43 | else CTF::set_context((MPI_Comm)ARG); 44 | #endif 45 | 46 | 47 | #endif 48 | 49 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011, Edgar Solomonik> 2 | * All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following 6 | * conditions are met: 7 | * * Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * * Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * 13 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | * ARE DISCLAIMED. IN NO EVENT SHALL EDGAR SOLOMONIK BE LIABLE FOR ANY 17 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 | * SERVICES LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 20 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | * LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 | * SUCH DAMAGE. */ 24 | -------------------------------------------------------------------------------- /src/contraction/sp_seq_ctr.h: -------------------------------------------------------------------------------- 1 | #ifndef __SP_SEQ_CTR_H__ 2 | #define __SP_SEQ_CTR_H__ 3 | 4 | #include "contraction.h" 5 | namespace CTF_int{ 6 | void spA_dnB_dnC_seq_ctr(char const * alpha, 7 | char const * A, 8 | int64_t size_A, 9 | algstrct const * sr_A, 10 | int order_A, 11 | int const * edge_len_A, 12 | int const * sym_A, 13 | int const * idx_map_A, 14 | char const * B, 15 | algstrct const * sr_B, 16 | int order_B, 17 | int const * edge_len_B, 18 | int const * sym_B, 19 | int const * idx_map_B, 20 | char const * beta, 21 | char * C, 22 | algstrct const * sr_C, 23 | int order_C, 24 | int const * edge_len_C, 25 | int const * sym_C, 26 | int const * idx_map_C, 27 | bivar_function const * func); 28 | } 29 | #endif 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | os: linux 3 | language: python 4 | python: 5 | - "2.7" 6 | - "3.5" 7 | - "3.6" 8 | env: 9 | - CTF_CXX=clang++ 10 | - CTF_CXX=g++ 11 | 12 | addons: 13 | apt: 14 | sources: 15 | - ubuntu-toolchain-r-test 16 | packages: 17 | - gcc-5 18 | - g++-5 19 | - gfortran-5 20 | - libgfortran-5-dev 21 | - libblas-dev 22 | - liblapack-dev 23 | - mpich2 24 | - libmpich2-dev 25 | - cmake 26 | before_install: 27 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "2.6" ]] ; then 28 | export PYTHONMAJORV=2; 29 | else 30 | export PYTHONMAJORV=3; 31 | fi 32 | - wget https://repo.continuum.io/miniconda/Miniconda${PYTHONMAJORV}-latest-Linux-x86_64.sh 33 | - bash Miniconda${PYTHONMAJORV}-latest-Linux-x86_64.sh -b 34 | - export PATH=$HOME/miniconda${PYTHONMAJORV}/bin:$PATH 35 | - conda create -y -n mypy python=$TRAVIS_PYTHON_VERSION 36 | - source activate mypy 37 | - conda install -y cython 38 | - conda install -y numpy nomkl blas=*=openblas 39 | install: 40 | - $CTF_CXX --version 41 | - FC=gfortran-5 ./configure CXX="mpicxx -cxx=$CTF_CXX" --build-hptt --build-scalapack 42 | - make -j2 43 | - make python -j2 44 | script: 45 | - make test 46 | - make test2 47 | - make python_test 48 | - make python_test2 49 | after_failure: 50 | notifications: 51 | email: 52 | recipients: 53 | - solomon2@illinois.edu 54 | on_success: change 55 | on_failure: always 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/interface/scalar.h: -------------------------------------------------------------------------------- 1 | #ifndef __SCALAR_H__ 2 | #define __SCALAR_H__ 3 | namespace CTF { 4 | 5 | /** 6 | * \addtogroup CTF 7 | * @{ 8 | **/ 9 | /** 10 | * \brief Scalar class which encapsulates a 0D tensor 11 | */ 12 | template 13 | class Scalar : public Tensor { 14 | public: 15 | /** 16 | * \brief constructor for a scalar 17 | * \param[in] wrld CTF world where the tensor will live 18 | * \param[in] sr defines the tensor arithmetic for this tensor 19 | */ 20 | Scalar(World & wrld=get_universe(), 21 | CTF_int::algstrct const & sr=Ring()); 22 | 23 | /** 24 | * \brief constructor for a scalar with predefined value 25 | * \param[in] val scalar value 26 | * \param[in] wrld CTF world where the tensor will live 27 | * \param[in] sr defines the tensor arithmetic for this tensor 28 | */ 29 | Scalar(dtype val, 30 | World & wrld=get_universe(), 31 | CTF_int::algstrct const & sr=Ring()); 32 | 33 | /** 34 | * \brief returns scalar value 35 | */ 36 | dtype get_val(); 37 | 38 | /** 39 | * \brief sets scalar value 40 | */ 41 | void set_val(dtype val); 42 | 43 | /** 44 | * \brief casts into a dtype value 45 | */ 46 | operator dtype() { return get_val(); } 47 | 48 | Scalar & operator=(const Scalar & A); 49 | 50 | }; 51 | 52 | /** 53 | * @} 54 | */ 55 | } 56 | #include "scalar.cxx" 57 | #endif 58 | -------------------------------------------------------------------------------- /src/interface/decomposition.h: -------------------------------------------------------------------------------- 1 | #ifndef __DECOMPOSITION_H__ 2 | #define __DECOMPOSITION_H__ 3 | #include "tensor.h" 4 | #include "matrix.h" 5 | #include "vector.h" 6 | namespace CTF { 7 | 8 | void fold_unfold(Tensor& X, Tensor& Y); 9 | 10 | template 11 | class Decomposition { 12 | public: 13 | /** 14 | * \brief associated an index map with the tensor decomposition for algebra 15 | * \param[in] idx_map index assignment for this tensor 16 | */ 17 | virtual Contract_Term operator[](char const * idx_map) = 0; 18 | }; 19 | 20 | template 21 | class HoSVD : public Decomposition { 22 | public: 23 | Tensor core_tensor; 24 | std::vector< Matrix > factor_matrices; 25 | 26 | /** 27 | * \calculate higher order singular value decomposition of a tensor 28 | * \param[in] ranks ranks(dimensions) of the core tensor and factor matrices 29 | */ 30 | HoSVD(Tensor T, int * ranks); 31 | 32 | /** 33 | * \calculate initialize a higher order singular value decomposition of a tensor to zero 34 | * \param[in] lens ranks(dimensions) of the factored tensor 35 | * \param[in] ranks ranks(dimensions) of the core tensor and factor matrices 36 | */ 37 | HoSVD(int * lens, int * ranks); 38 | 39 | /** 40 | * \brief associated an index map with the tensor decomposition for algebra 41 | * \param[in] idx_map index assignment for this tensor 42 | */ 43 | Contract_Term operator[](char const * idx_map); 44 | 45 | }; 46 | 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/interface/scalar.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | #include "common.h" 3 | 4 | namespace CTF { 5 | 6 | template 7 | Scalar::Scalar(World & world_, CTF_int::algstrct const & sr_) : 8 | Tensor(0, 0, NULL, NULL, world_, sr_) { 9 | 10 | } 11 | 12 | template 13 | Scalar::Scalar(dtype val, 14 | World & world, 15 | CTF_int::algstrct const & sr_) 16 | : Tensor(0, 0, NULL, NULL, world, sr_) { 17 | int64_t s; 18 | dtype * arr; 19 | 20 | if (world.cdt.rank == 0){ 21 | arr = this->get_raw_data(&s); 22 | arr[0] = val; 23 | } 24 | } 25 | 26 | 27 | template 28 | dtype Scalar::get_val(){ 29 | int64_t s; 30 | dtype * datap; 31 | dtype val; 32 | datap = this->get_raw_data(&s); 33 | memcpy(&val, datap, sizeof(dtype)); 34 | MPI_Bcast((char *)&val, sizeof(dtype), MPI_CHAR, 0, this->wrld->comm); 35 | return val; 36 | } 37 | 38 | template 39 | void Scalar::set_val(dtype const val){ 40 | int64_t s; 41 | dtype * arr; 42 | if (this->world->ctf->get_rank() == 0){ 43 | arr = this->world->ctf->get_raw_data(&s); 44 | arr[0] = val; 45 | } 46 | } 47 | 48 | template 49 | Scalar & Scalar::operator=(const Scalar & A){ 50 | CTF_int::tensor::free_self(); 51 | CTF_int::tensor::init(A.sr, A.order, A.lens, A.sym, A.wrld, 1, A.name, A.profile, A.is_sparse); 52 | return *this; 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/redistribution/dgtog_redist.h: -------------------------------------------------------------------------------- 1 | 2 | #include "dgtog_calc_cnt.h" 3 | 4 | namespace CTF_int { 5 | /** 6 | * \brief estimates execution time, given this processor sends a receives tot_sz across np procs 7 | * \param[in] tot_sz amount of data sent/recved 8 | * \param[in] np number of procs involved 9 | */ 10 | double dgtog_est_time(int64_t tot_sz, int np); 11 | 12 | void dgtog_reshuffle(int const * sym, 13 | int const * edge_len, 14 | distribution const & old_dist, 15 | distribution const & new_dist, 16 | char ** ptr_tsr_data, 17 | char ** ptr_tsr_new_data, 18 | algstrct const * sr, 19 | CommData ord_glb_comm); 20 | 21 | void redist_bucket_r0(int * const * bucket_offset, 22 | int64_t * const * data_offset, 23 | int * const * ivmax_pre, 24 | int rep_phase0, 25 | int rep_idx0, 26 | int virt_dim0, 27 | bool data_to_buckets, 28 | char * __restrict__ data, 29 | char ** __restrict__ buckets, 30 | int64_t * counts, 31 | algstrct const * sr, 32 | int64_t data_off, 33 | int bucket_off, 34 | int prev_idx); 35 | 36 | } 37 | -------------------------------------------------------------------------------- /examples/moldynamics.h: -------------------------------------------------------------------------------- 1 | #ifndef __MOLDYNAMICS_H__ 2 | #define __MOLDYNAMICS_H__ 3 | 4 | class force { 5 | public: 6 | double fx; 7 | double fy; 8 | 9 | force operator-() const { 10 | force fnew; 11 | fnew.fx = -fx; 12 | fnew.fy = -fy; 13 | return fnew; 14 | } 15 | 16 | force operator+(force const & fother) const { 17 | force fnew; 18 | fnew.fx = fx+fother.fx; 19 | fnew.fy = fy+fother.fy; 20 | return fnew; 21 | } 22 | 23 | force(){ 24 | fx = 0.0; 25 | fy = 0.0; 26 | } 27 | 28 | // additive identity 29 | force(int){ 30 | fx = 0.0; 31 | fy = 0.0; 32 | } 33 | }; 34 | 35 | class particle { 36 | public: 37 | double dx; 38 | double dy; 39 | double coeff; 40 | int id; 41 | 42 | particle(){ 43 | dx = 0.0; 44 | dy = 0.0; 45 | coeff = 0.0; 46 | id = 0; 47 | } 48 | }; 49 | 50 | void acc_force(force f, particle & p){ 51 | p.dx += f.fx*p.coeff; 52 | p.dy += f.fy*p.coeff; 53 | } 54 | 55 | #ifdef __CUDACC__ 56 | __device__ __host__ 57 | #endif 58 | double get_distance(particle const & p, particle const & q){ 59 | return sqrt((p.dx-q.dx)*(p.dx-q.dx)+(p.dy-q.dy)*(p.dy-q.dy)); 60 | } 61 | 62 | #ifdef __CUDACC__ 63 | __device__ __host__ 64 | #endif 65 | force get_force(particle const p, particle const q){ 66 | force f; 67 | f.fx = (p.dx-q.dx)/std::pow(get_distance(p,q)+.01,3); 68 | f.fy = (p.dy-q.dy)/std::pow(get_distance(p,q)+.01,3); 69 | return f; 70 | } 71 | namespace CTF { 72 | template <> 73 | inline void Set::print(char const * a, FILE * fp) const { 74 | fprintf(fp,"(dx=%lf dy=%lf coeff=%lf id=%d)",((particle*)a)[0].dx,((particle*)a)[0].dy,((particle*)a)[0].coeff,((particle*)a)[0].id); 75 | } 76 | template <> 77 | inline void Set::print(char const * a, FILE * fp) const { 78 | fprintf(fp,"(fx=%lf fy=%lf)",((force*)a)[0].fx,((force*)a)[0].fy); 79 | } 80 | 81 | } 82 | 83 | 84 | #endif 85 | 86 | -------------------------------------------------------------------------------- /src/shared/init_models.h: -------------------------------------------------------------------------------- 1 | #ifndef __INIT_MODELS_H__ 2 | #define __INIT_MODELS_H__ 3 | namespace CTF_int{ 4 | extern double long_contig_transp_mdl_init[]; 5 | extern double shrt_contig_transp_mdl_init[]; 6 | extern double non_contig_transp_mdl_init[]; 7 | extern double alltoall_mdl_init[]; 8 | extern double alltoallv_mdl_init[]; 9 | extern double red_mdl_init[]; 10 | extern double red_mdl_cst_init[]; 11 | extern double csrred_mdl_init[]; 12 | extern double csrred_mdl_cst_init[]; 13 | extern double allred_mdl_init[]; 14 | extern double allred_mdl_cst_init[]; 15 | extern double bcast_mdl_init[]; 16 | extern double dgtog_res_mdl_init[]; 17 | extern double spredist_mdl_init[]; 18 | extern double blres_mdl_init[]; 19 | extern double pin_keys_mdl_init[]; 20 | extern double seq_tsr_ctr_mdl_cst_init[]; 21 | extern double seq_tsr_ctr_mdl_ref_init[]; 22 | extern double seq_tsr_ctr_mdl_inr_init[]; 23 | extern double seq_tsr_ctr_mdl_off_init[]; 24 | extern double seq_tsr_ctr_mdl_cst_inr_init[]; 25 | extern double seq_tsr_ctr_mdl_cst_off_init[]; 26 | extern double upload_mdl_init[]; 27 | extern double download_mdl_init[]; 28 | extern double seq_tsr_spctr_cst_off_k0_init[]; 29 | extern double seq_tsr_spctr_cst_off_k1_init[]; 30 | extern double seq_tsr_spctr_cst_off_k2_init[]; 31 | extern double seq_tsr_spctr_cst_k0_init[]; 32 | extern double seq_tsr_spctr_cst_k1_init[]; 33 | extern double seq_tsr_spctr_cst_k2_init[]; 34 | extern double seq_tsr_spctr_cst_k3_init[]; 35 | extern double seq_tsr_spctr_cst_k4_init[]; 36 | extern double seq_tsr_spctr_off_k0_init[]; 37 | extern double seq_tsr_spctr_off_k1_init[]; 38 | extern double seq_tsr_spctr_off_k2_init[]; 39 | extern double seq_tsr_spctr_k0_init[]; 40 | extern double seq_tsr_spctr_k1_init[]; 41 | extern double seq_tsr_spctr_k2_init[]; 42 | extern double seq_tsr_spctr_k3_init[]; 43 | extern double seq_tsr_spctr_k4_init[]; 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /bench/model_trainer_kernels.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace CTF; 3 | 4 | struct grp{ 5 | #ifdef __CUDACC__ 6 | __device__ __host__ 7 | #endif 8 | static double op1(double a, double b){ return b-b/a; }; 9 | #ifdef __CUDACC__ 10 | __device__ __host__ 11 | #endif 12 | static void op2(double a, double & b){ b+=a; }; 13 | static double op2_t2(double a, double b){ return a+b; }; 14 | static void op2_red(double const * a, double * b, int n){ 15 | #pragma omp parallel for 16 | for (int i=0; i mon(0, grp::op2_t2, madd); 29 | for (double sp = .005; sp<.32; sp*=2.){ 30 | Matrix<> A(m, n, dw, mon); 31 | Matrix<> B(m, n, dw, mon); 32 | Matrix<> G(n, n, dw, mon); 33 | Vector<> b(n, dw, mon); 34 | Vector<> c(m, dw, mon); 35 | 36 | srand48(dw.rank); 37 | b.fill_random(-.5, .5); 38 | c.fill_random(-.5, .5); 39 | A.fill_random(-.5, .5); 40 | B.fill_random(-.5, .5); 41 | G.fill_random(-.5, .5); 42 | 43 | Bivar_Kernel k1; 44 | 45 | if (sp > .009){ 46 | if (sp_A) 47 | A.sparsify([=](double a){ return fabs(a)<=.5*sp; }); 48 | if (sp_B){ 49 | G.sparsify([=](double a){ return fabs(a)<=.5*sp; }); 50 | b.sparsify([=](double a){ return fabs(a)<=.5*sp; }); 51 | } 52 | if (sp_C){ 53 | B.sparsify([=](double a){ return fabs(a)<=.5*sp; }); 54 | c.sparsify([=](double a){ return fabs(a)<=.5*sp; }); 55 | } 56 | } 57 | 58 | k1(A["ik"],G["kj"],B["ij"]); 59 | k1(A["ij"],b["j"],c["i"]); 60 | 61 | } 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/symmetry/sym_indices.h: -------------------------------------------------------------------------------- 1 | /** Written by Devin Matthews */ 2 | 3 | #ifndef __INT_SYM_INDICES_H__ 4 | #define __INT_SYM_INDICES_H__ 5 | 6 | #include 7 | 8 | template 9 | int relativeSign(RAIterator s1b, RAIterator s1e, RAIterator s2b, RAIterator s2e) 10 | { 11 | int sz = s1e-s1b; 12 | assert(sz == (int)(s2e-s2b)); 13 | int i, k; 14 | int sign = 1; 15 | std::vector seen(sz); 16 | 17 | for (i = 0;i < sz;i++) seen[i] = false; 18 | 19 | for (i = 0;i < sz;i++) 20 | { 21 | if (seen[i]) continue; 22 | int j = i; 23 | while (true) 24 | { 25 | for (k = 0;k < sz && (!(*(s1b+k) == *(s2b+j)) || seen[k]);k++); 26 | assert(k < sz); 27 | j = k; 28 | seen[j] = true; 29 | if (j == i) break; 30 | sign = -sign; 31 | } 32 | } 33 | 34 | return sign; 35 | } 36 | 37 | template 38 | int relativeSign(const T& s1, const T& s2) 39 | { 40 | return relativeSign(s1.begin(), s1.end(), s2.begin(), s2.end()); 41 | } 42 | 43 | template 44 | int align_symmetric_indices(int order_A, T& idx_A, const int* sym_A, 45 | int order_B, T& idx_B, const int* sym_B); 46 | 47 | template 48 | int align_symmetric_indices(int order_A, T& idx_A, const int* sym_A, 49 | int order_B, T& idx_B, const int* sym_B, 50 | int order_C, T& idx_C, const int* sym_C); 51 | 52 | template 53 | int overcounting_factor(int order_A, const T& idx_A, const int* sym_A, 54 | int order_B, const T& idx_B, const int* sym_B, 55 | int order_C, const T& idx_C, const int* sym_C); 56 | 57 | template 58 | int overcounting_factor(int order_A, const T& idx_A, const int* sym_A, 59 | int order_B, const T& idx_B, const int* sym_B); 60 | 61 | #endif 62 | 63 | -------------------------------------------------------------------------------- /src/interface/group.h: -------------------------------------------------------------------------------- 1 | #ifndef __GROUP_H__ 2 | #define __GROUP_H__ 3 | 4 | #include "../tensor/algstrct.h" 5 | 6 | namespace CTF { 7 | /** 8 | * \addtogroup algstrct 9 | * @{ 10 | **/ 11 | /** 12 | * \brief Group is a Monoid with operator '-' defined 13 | * special case (parent) of a ring 14 | */ 15 | template ()> 16 | class Group : public Monoid { 17 | public: 18 | Group(Group const & other) : Monoid(other) { } 19 | 20 | virtual CTF_int::algstrct * clone() const { 21 | return new Group(*this); 22 | } 23 | 24 | Group() : Monoid() { 25 | this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs >; 26 | } 27 | 28 | Group(dtype taddid_, 29 | dtype (*fadd_)(dtype a, dtype b), 30 | MPI_Op addmop_) 31 | : Monoid(taddid_, fadd_, addmop_) { 32 | this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs >; 33 | } 34 | 35 | //treat NULL as mulid 36 | void safeaddinv(char const * a, char *& b) const { 37 | if (a == NULL){ 38 | printf("CTF ERROR: unfortunately additive inverse functionality for groups is currently limited, as it is done for rings via scaling by the inverse of the multiplicative identity, which groups don't have. Use the tensor addinv function rather than an indexed expression.\n"); 39 | double * ptr = NULL; 40 | ptr[0]=3.; 41 | assert(0); 42 | } else { 43 | if (b==NULL) b = (char*)malloc(this->el_size); 44 | ((dtype*)b)[0] = -((dtype*)a)[0]; 45 | } 46 | } 47 | 48 | void addinv(char const * a, char * b) const { 49 | ((dtype*)b)[0] = -((dtype*)a)[0]; 50 | } 51 | }; 52 | 53 | /** 54 | * @} 55 | */ 56 | } 57 | #include "semiring.h" 58 | #endif 59 | -------------------------------------------------------------------------------- /src/interface/fun_term.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUST_TERM_H__ 2 | #define __CUST_TERM_H__ 3 | 4 | #include "term.h" 5 | 6 | namespace CTF_int { 7 | class univar_function; 8 | class bivar_function; 9 | } 10 | 11 | namespace CTF_int { 12 | class Unifun_Term : public Term{ 13 | public: 14 | Term * A; 15 | univar_function const * func; 16 | 17 | Unifun_Term(Term * A, 18 | univar_function const * func); 19 | 20 | Unifun_Term(Unifun_Term const & other, 21 | std::map* remap=NULL); 22 | 23 | ~Unifun_Term(); 24 | 25 | Term * clone(std::map* remap = NULL) const; 26 | 27 | void execute(CTF::Idx_Tensor output) const; 28 | 29 | CTF::Idx_Tensor execute() const; 30 | 31 | CTF::Idx_Tensor estimate_time(double & cost) const; 32 | 33 | double estimate_time(CTF::Idx_Tensor output) const; 34 | 35 | void get_inputs(std::set* inputs_set) const; 36 | 37 | CTF::World * where_am_i() const; 38 | }; 39 | 40 | class Bifun_Term : public Term { 41 | public: 42 | Term * A; 43 | Term * B; 44 | bivar_function const * func; 45 | 46 | Bifun_Term(Term * A, 47 | Term * B, 48 | bivar_function const * func); 49 | 50 | Bifun_Term(Bifun_Term const & other, 51 | std::map* remap=NULL); 52 | 53 | ~Bifun_Term(); 54 | 55 | Term * clone(std::map* remap = NULL) const; 56 | 57 | void execute(CTF::Idx_Tensor output) const; 58 | 59 | CTF::Idx_Tensor execute() const; 60 | 61 | CTF::Idx_Tensor estimate_time(double & cost) const; 62 | 63 | double estimate_time(CTF::Idx_Tensor output) const; 64 | 65 | void get_inputs(std::set* inputs_set) const; 66 | 67 | CTF::World * where_am_i() const; 68 | }; 69 | 70 | } 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /src/interface/partition.cxx: -------------------------------------------------------------------------------- 1 | #include "partition.h" 2 | #include "../shared/util.h" 3 | 4 | namespace CTF { 5 | Partition::Partition(int order_, int const * lens_){ 6 | order = order_; 7 | lens = (int*)CTF_int::alloc(order*sizeof(int)); 8 | memcpy(lens, lens_, order*sizeof(int)); 9 | } 10 | 11 | Partition::Partition(){ 12 | order = 0; 13 | lens = NULL; 14 | } 15 | 16 | Partition::~Partition(){ 17 | CTF_int::cdealloc(lens); 18 | } 19 | 20 | Partition::Partition(Partition const & other){ 21 | order = other.order; 22 | lens = (int*)CTF_int::alloc(order*sizeof(int)); 23 | memcpy(lens, other.lens, order*sizeof(int)); 24 | } 25 | 26 | void Partition::operator=(Partition const & other){ 27 | order = other.order; 28 | lens = (int*)CTF_int::alloc(order*sizeof(int)); 29 | memcpy(lens, other.lens, order*sizeof(int)); 30 | } 31 | 32 | 33 | Idx_Partition Partition::operator[](char const * idx){ 34 | return Idx_Partition(*this, idx); 35 | } 36 | 37 | Idx_Partition::Idx_Partition(){ 38 | part = Partition(0, NULL); 39 | idx = NULL; 40 | } 41 | 42 | Idx_Partition::Idx_Partition(Partition const & part_, char const * idx_){ 43 | part = part_; 44 | idx = (char*)malloc(part.order*sizeof(char)); 45 | memcpy(idx, idx_, part.order*sizeof(char)); 46 | } 47 | 48 | Idx_Partition::~Idx_Partition(){ 49 | if (idx != NULL){ 50 | free(idx); 51 | idx = NULL; 52 | } 53 | } 54 | 55 | Idx_Partition Idx_Partition::reduce_order() const { 56 | int * new_lens = (int*)malloc(part.order*sizeof(int)); 57 | int new_order = 0; 58 | char * new_idx = (char*)malloc(part.order); 59 | for (int i=0; i 9 | 10 | using namespace CTF; 11 | 12 | int ccsdt_map_test(int n, 13 | World &dw){ 14 | 15 | int rank, num_pes; 16 | 17 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 18 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 19 | 20 | //int shapeAS6[] = {AS,AS,NS,AS,AS,NS}; 21 | int shapeNS6[] = {NS,NS,NS,NS,NS,NS}; 22 | int nnnnnn[] = {n,n,n,n,n,n}; 23 | int shapeNS4[] = {NS,NS,NS,NS}; 24 | int nnnn[] = {n,n,n,n}; 25 | 26 | //* Creates distributed tensors initialized with zeros 27 | Tensor<> W(4, nnnn, shapeNS4, dw, "W", 1); 28 | Tensor<> T(4, nnnn, shapeNS4, dw, "T", 1); 29 | Tensor<> Z(6, nnnnnn, shapeNS6, dw, "Z", 1); 30 | 31 | Z["hijmno"] += W["hijk"]*T["kmno"]; 32 | 33 | return 1; 34 | } 35 | 36 | char* getCmdOption(char ** begin, 37 | char ** end, 38 | const std::string & option){ 39 | char ** itr = std::find(begin, end, option); 40 | if (itr != end && ++itr != end){ 41 | return *itr; 42 | } 43 | return 0; 44 | } 45 | 46 | 47 | int main(int argc, char ** argv){ 48 | int rank, np, niter, n; 49 | int const in_num = argc; 50 | char ** input_str = argv; 51 | 52 | MPI_Init(&argc, &argv); 53 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 54 | MPI_Comm_size(MPI_COMM_WORLD, &np); 55 | 56 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 57 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 58 | if (n < 0) n = 4; 59 | } else n = 4; 60 | 61 | if (getCmdOption(input_str, input_str+in_num, "-niter")){ 62 | niter = atoi(getCmdOption(input_str, input_str+in_num, "-niter")); 63 | if (niter < 0) niter = 3; 64 | } else niter = 3; 65 | 66 | 67 | 68 | { 69 | World dw(argc, argv); 70 | int pass = ccsdt_map_test(n, dw); 71 | assert(pass); 72 | } 73 | 74 | 75 | MPI_Finalize(); 76 | return 0; 77 | } 78 | /** 79 | * @} 80 | * @} 81 | */ 82 | 83 | 84 | -------------------------------------------------------------------------------- /src/interface/monoid.cxx: -------------------------------------------------------------------------------- 1 | #include "../sparse_formats/csr.h" 2 | #include "set.h" 3 | #include "../shared/blas_symbs.h" 4 | #include "../shared/mkl_symbs.h" 5 | #include "../shared/util.h" 6 | using namespace CTF_int; 7 | namespace CTF { 8 | /* template <> 9 | void Monoid::csr_add(int64_t m, int64_t n, char const * a, int const * ja, int const * ia, char const * b, int const * jb, int const * ib, char *& c, int *& jc, int *& ic){ 10 | if (fadd != default_add){ 11 | printf("CTF error: support for CSR addition for this type unavailable\n"); 12 | assert(0); 13 | } 14 | alloc(sizeof(int)*(m+1), (void**)&ic); 15 | bool tA = 'N'; 16 | bool tB = 'N'; 17 | int job = 1; 18 | int sort = 1; 19 | float mlid = 1.0; 20 | int info; 21 | MKL_SCSRADD(tA, tB, &job, &sort, m, n, (float*)a, ja, ia, &mlid, (float*)b, jb, ib, NULL, NULL, ic, NULL, &info); 22 | alloc(sizeof(int)*ic[m], (void**)&jc); 23 | alloc(sizeof(float)*ic[m], (void**)&c); 24 | int job = 2; 25 | MKL_SCSRADD(tA, tB, &job, &sort, m, n, (float*)a, ja, ia, &mlid, (float*)b, jb, ib, (float*)c, jc, ic, NULL, &info); 26 | }*/ 27 | 28 | template <> 29 | char * CTF::Monoid::csr_add(char * cA, char * cB) const { 30 | #if USE_MKL 31 | TAU_FSTART(mkl_csr_add) 32 | if (fadd != &default_add){ 33 | return CTF_int::algstrct::csr_add(cA, cB); 34 | } 35 | CSR_Matrix A(cA); 36 | CSR_Matrix B(cB); 37 | int * ic; 38 | int m = A.nrow(); 39 | int n = A.ncol(); 40 | alloc_ptr(sizeof(int)*(m+1), (void**)&ic); 41 | char tA = 'N'; 42 | int job = 1; 43 | int sort = 1; 44 | double mlid = 1.0; 45 | int info; 46 | CTF_BLAS::MKL_DCSRADD(&tA, &job, &sort, &m, &n, (double*)A.vals(), A.JA(), A.IA(), &mlid, (double*)B.vals(), B.JA(), B.IA(), NULL, NULL, ic, NULL, &info); 47 | CSR_Matrix C(ic[m]-1, m, n, this); 48 | memcpy(C.IA(), ic, sizeof(int)*(m+1)); 49 | cdealloc(ic); 50 | job = 2; 51 | CTF_BLAS::MKL_DCSRADD(&tA, &job, &sort, &m, &n, (double*)A.vals(), A.JA(), A.IA(), &mlid, (double*)B.vals(), B.JA(), B.IA(), (double*)C.vals(), C.JA(), C.IA(), NULL, &info); 52 | TAU_FSTOP(mkl_csr_add) 53 | return C.all_data; 54 | #else 55 | return CTF_int::algstrct::csr_add(cA, cB); 56 | #endif 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/scaling/sym_seq_scl.h: -------------------------------------------------------------------------------- 1 | #ifndef __SYM_SEQ_SCL_H__ 2 | #define __SYM_SEQ_SCL_H__ 3 | 4 | #include "../tensor/algstrct.h" 5 | #include "../interface/term.h" 6 | 7 | namespace CTF_int { 8 | 9 | /** 10 | * \brief untyped internal class for singly-typed single variable function (Endomorphism) 11 | */ 12 | class endomorphism { 13 | public: 14 | /** 15 | * \brief apply function f to value stored at a 16 | * \param[in,out] a pointer to operand that will be cast to type by extending class 17 | * return result of applying f on value at a 18 | */ 19 | virtual void apply_f(char * a) const { assert(0); } 20 | 21 | /** 22 | * \brief apply f to A 23 | * \param[in] A operand tensor with pre-defined indices 24 | */ 25 | void operator()(Term const & A) const; 26 | 27 | virtual ~endomorphism(){} 28 | }; 29 | 30 | /** 31 | * \brief performs symmetric scaling using custom func 32 | */ 33 | int sym_seq_scl_cust(char const * alpha, 34 | char * A, 35 | algstrct const * sr_A, 36 | int const order_A, 37 | int const * edge_len_A, 38 | int const * sym_A, 39 | int const * idx_map_A, 40 | endomorphism const * func); 41 | /** 42 | * \brief performs symmetric scaling using algstrct const * sr_A 43 | */ 44 | int sym_seq_scl_ref(char const * alpha, 45 | char * A, 46 | algstrct const * sr_A, 47 | int order_A, 48 | int const * edge_len_A, 49 | int const * sym_A, 50 | int const * idx_map_A); 51 | /** 52 | * \brief invert index map 53 | * \param[in] order_A number of dimensions of A 54 | * \param[in] idx_A index map of A 55 | * \param[out] order_tot number of total dimensions 56 | * \param[out] idx_arr 2*ndim_tot index array 57 | */ 58 | void inv_idx(int const order_A, 59 | int const * idx_A, 60 | int * order_tot, 61 | int ** idx_arr); 62 | } 63 | #endif 64 | -------------------------------------------------------------------------------- /src/scaling/scale_tsr.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #ifndef __SCL_TSR_H__ 4 | #define __SCL_TSR_H__ 5 | 6 | #include "../tensor/algstrct.h" 7 | #include "sym_seq_scl.h" 8 | 9 | namespace CTF_int { 10 | 11 | 12 | class scl { 13 | public: 14 | char * A; 15 | algstrct const * sr_A; 16 | char const * alpha; 17 | void * buffer; 18 | 19 | virtual void run() {}; 20 | virtual int64_t mem_fp() { return 0; }; 21 | virtual scl * clone() { return NULL; }; 22 | 23 | virtual ~scl(){ if (buffer != NULL) CTF_int::cdealloc(buffer); } 24 | scl(scl * other); 25 | scl(){ buffer = NULL; } 26 | }; 27 | 28 | class scl_virt : public scl { 29 | public: 30 | /* Class to be called on sub-blocks */ 31 | scl * rec_scl; 32 | 33 | int num_dim; 34 | int * virt_dim; 35 | int order_A; 36 | int64_t blk_sz_A; 37 | int const * idx_map_A; 38 | 39 | void run(); 40 | int64_t mem_fp(); 41 | scl * clone(); 42 | 43 | scl_virt(scl * other); 44 | ~scl_virt(); 45 | scl_virt(){} 46 | }; 47 | 48 | class seq_tsr_scl : public scl { 49 | public: 50 | int order; 51 | int * edge_len; 52 | int const * idx_map; 53 | int const * sym; 54 | //fseq_tsr_scl func_ptr; 55 | 56 | int is_custom; 57 | endomorphism const * func; //fseq_elm_scl custom_params; 58 | 59 | void run(); 60 | void print(); 61 | int64_t mem_fp(); 62 | scl * clone(); 63 | 64 | /** 65 | * \brief copies scl object 66 | * \param[in] other object to copy 67 | */ 68 | seq_tsr_scl(scl * other); 69 | ~seq_tsr_scl(){ CTF_int::cdealloc(edge_len); }; 70 | seq_tsr_scl(){} 71 | }; 72 | 73 | /** 74 | * \brief invert index map 75 | * \param[in] order_A number of dimensions of A 76 | * \param[in] idx_A index map of A 77 | * \param[in] order_B number of dimensions of B 78 | * \param[in] idx_B index map of B 79 | * \param[out] order_tot number of total dimensions 80 | * \param[out] idx_arr 2*order_tot index array 81 | */ 82 | void inv_idx(int order_A, 83 | int const * idx_A, 84 | int * order_tot, 85 | int ** idx_arr); 86 | 87 | } 88 | #endif // __SCL_TSR_H__ 89 | -------------------------------------------------------------------------------- /examples/spmv.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup examples 2 | * @{ 3 | * \defgroup spmv spmv 4 | * @{ 5 | * \brief Multiplication of a random square sparse matrix by a vector 6 | */ 7 | 8 | #include 9 | using namespace CTF; 10 | 11 | int spmv(int n, 12 | World & dw){ 13 | 14 | Matrix<> spA(n, n, SP, dw); 15 | Matrix<> dnA(n, n, dw); 16 | Vector<> b(n, dw); 17 | Vector<> c1(n, dw); 18 | Vector<> c2(n, dw); 19 | 20 | srand48(dw.rank); 21 | b.fill_random(0.0,1.0); 22 | c1.fill_random(0.0,1.0); 23 | dnA.fill_random(0.0,1.0); 24 | 25 | spA["ij"] += dnA["ij"]; 26 | spA.sparsify(.5); 27 | dnA["ij"] = 0.0; 28 | dnA["ij"] += spA["ij"]; 29 | 30 | c2["i"] = c1["i"]; 31 | 32 | c1["i"] += dnA["ij"]*b["j"]; 33 | 34 | c2["i"] += .5*spA["ij"]*b["j"]; 35 | c2["i"] += .5*b["j"]*spA["ij"]; 36 | 37 | 38 | bool pass = c2.norm2() >= 1E-6; 39 | 40 | c2["i"] -= c1["i"]; 41 | 42 | if (pass) pass = c2.norm2() <= 1.E-6; 43 | 44 | if (dw.rank == 0){ 45 | if (pass) 46 | printf("{ c[\"i\"] += A[\"ij\"]*b[\"j\"] with sparse, A } passed \n"); 47 | else 48 | printf("{ c[\"i\"] += A[\"ij\"]*b[\"j\"] with sparse, A } failed \n"); 49 | } 50 | return pass; 51 | } 52 | 53 | 54 | #ifndef TEST_SUITE 55 | char* getCmdOption(char ** begin, 56 | char ** end, 57 | const std::string & option){ 58 | char ** itr = std::find(begin, end, option); 59 | if (itr != end && ++itr != end){ 60 | return *itr; 61 | } 62 | return 0; 63 | } 64 | 65 | 66 | int main(int argc, char ** argv){ 67 | int rank, np, n, pass; 68 | int const in_num = argc; 69 | char ** input_str = argv; 70 | 71 | MPI_Init(&argc, &argv); 72 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 73 | MPI_Comm_size(MPI_COMM_WORLD, &np); 74 | 75 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 76 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 77 | if (n < 0) n = 7; 78 | } else n = 7; 79 | 80 | 81 | { 82 | World dw(argc, argv); 83 | 84 | if (rank == 0){ 85 | printf("Multiplying %d-by-%d sparse matrix by vector\n",n,n); 86 | } 87 | pass = spmv(n, dw); 88 | assert(pass); 89 | } 90 | 91 | MPI_Finalize(); 92 | return 0; 93 | } 94 | /** 95 | * @} 96 | * @} 97 | */ 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /src/mapping/distribution.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #ifndef __INT_DISTRIBUTION_H__ 4 | #define __INT_DISTRIBUTION_H__ 5 | 6 | #include "mapping.h" 7 | 8 | namespace CTF_int { 9 | 10 | class tensor; 11 | 12 | inline 13 | int get_distribution_size(int order){ 14 | return sizeof(int)*2 + sizeof(int64_t) + order*sizeof(int)*7; 15 | } 16 | 17 | // \brief data distribution object used for redistribution 18 | class distribution { 19 | public: 20 | int order; 21 | int * phase; 22 | int * virt_phase; 23 | int * phys_phase; 24 | int * pe_lda; 25 | int * pad_edge_len; 26 | int * padding; 27 | int * perank; 28 | int is_cyclic; 29 | int64_t size; 30 | 31 | distribution(); 32 | ~distribution(); 33 | 34 | /** 35 | * \brief create distribution object which defines a tensor's data decomposition 36 | * \param[in] tsr tensor whose distribution to record 37 | */ 38 | distribution(tensor const * tsr); 39 | 40 | /** 41 | * \brief create distribution object by deserializing buffer 42 | * \param[in] buffer serialized distribution data 43 | */ 44 | distribution(char const * buffer); 45 | 46 | /** 47 | * \brief serialize object into contiguous data buffer 48 | \param[out] buffer unallocated array into which to serialize 49 | * \param[out] size length of serialized array 50 | */ 51 | void serialize(char ** buffer, int * size); 52 | private: 53 | void free_data(); 54 | }; 55 | 56 | /** 57 | * \brief calculate the block-sizes of a tensor 58 | * \param[in] order number of dimensions of this tensor 59 | * \param[in] size is the size of the local tensor stored 60 | * \param[in] edge_len edge lengths of global tensor 61 | * \param[in] edge_map mapping of each dimension 62 | * \param[out] vrt_sz size of virtual block 63 | * \param[out] vrt_edge_len edge lengths of virtual block 64 | * \param[out] blk_edge_len edge lengths of local block 65 | */ 66 | void calc_dim(int order, 67 | int64_t size, 68 | int const * edge_len, 69 | mapping const * edge_map, 70 | int64_t * vrt_sz, 71 | int * vrt_edge_len, 72 | int * blk_edge_len); 73 | } 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /test/reduce_bcast.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup reduce_bcast reduce_bcast 6 | * @{ 7 | * \brief Summation along tensor diagonals 8 | */ 9 | 10 | #include 11 | 12 | using namespace CTF; 13 | 14 | int reduce_bcast(int n, 15 | World & dw){ 16 | int pass; 17 | 18 | Matrix<> A(n,n,dw); 19 | Matrix<> B(n,1,dw); 20 | Matrix<> C(n,n,dw); 21 | Matrix<> C2(n,n,dw); 22 | Vector<> d(n,dw); 23 | 24 | srand48(13*dw.rank); 25 | 26 | A.fill_random(0.,1.); 27 | B.fill_random(0.,1.); 28 | C.fill_random(0.,1.); 29 | C2["ij"] = C["ij"]; 30 | d.fill_random(0.,1.); 31 | 32 | C["ij"] += B["ik"]; 33 | 34 | d["i"] = B["ij"]; 35 | 36 | C2["ij"] += d["i"]; 37 | 38 | C["ij"] -= C2["ij"]; 39 | 40 | pass = true; 41 | if (C.norm2() > 1.E-6){ 42 | pass = false; 43 | if (dw.rank == 0) 44 | printf("{ (A[\"ij\"]+=B[\"ik\"] with square B } failed \n"); 45 | return pass; 46 | } 47 | 48 | C["ij"] = C2["ij"]; 49 | 50 | C["ij"] += B["ik"]; 51 | 52 | d["i"] = B["ik"]; 53 | 54 | C2["ij"] += d["i"]; 55 | 56 | C["ij"] -= C2["ij"]; 57 | 58 | if (C.norm2() > 1.E-6) 59 | pass = false; 60 | 61 | if (pass){ 62 | if (dw.rank == 0) 63 | printf("{ (A[\"ij\"]+=B[\"ik\"] } passed \n"); 64 | } else { 65 | if (dw.rank == 0) 66 | printf("{ (A[\"ij\"]+=B[\"ik\"] with column vector B } failed \n"); 67 | } 68 | return pass; 69 | } 70 | 71 | 72 | #ifndef TEST_SUITE 73 | char* getCmdOption(char ** begin, 74 | char ** end, 75 | const std::string & option){ 76 | char ** itr = std::find(begin, end, option); 77 | if (itr != end && ++itr != end){ 78 | return *itr; 79 | } 80 | return 0; 81 | } 82 | 83 | 84 | int main(int argc, char ** argv){ 85 | int rank, np, n; 86 | int in_num = argc; 87 | char ** input_str = argv; 88 | 89 | MPI_Init(&argc, &argv); 90 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 91 | MPI_Comm_size(MPI_COMM_WORLD, &np); 92 | 93 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 94 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 95 | if (n < 0) n = 7; 96 | } else n = 7; 97 | 98 | 99 | { 100 | World dw(argc, argv); 101 | reduce_bcast(n, dw); 102 | } 103 | 104 | MPI_Finalize(); 105 | return 0; 106 | } 107 | /** 108 | * @} 109 | * @} 110 | */ 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /src/scaling/scaling.h: -------------------------------------------------------------------------------- 1 | #ifndef __INT_SCALING_H__ 2 | #define __INT_SCALING_H__ 3 | 4 | #include "../interface/common.h" 5 | #include "sym_seq_scl.h" 6 | 7 | namespace CTF_int { 8 | class tensor; 9 | class endomorphism; 10 | 11 | /** 12 | * \brief class for execution distributed scaling of a tensor 13 | */ 14 | class scaling { 15 | public: 16 | /** \brief operand/output */ 17 | tensor * A; 18 | 19 | /** \brief scaling of A */ 20 | char const * alpha; 21 | 22 | /** \brief indices of A */ 23 | int * idx_map; 24 | 25 | /** \brief whether there is a elementwise custom function */ 26 | bool is_custom; 27 | 28 | /** \brief function to execute on elementwise elements */ 29 | endomorphism const * func; 30 | 31 | /** 32 | * \brief constructor definining contraction with C's mul and add ops 33 | * \param[in] A left operand tensor 34 | * \param[in] idx_map indices of left operand 35 | * \param[in] alpha scaling factor alpha * A[idx_map]; 36 | A[idx_map] = alpha * A[idx_map] 37 | */ 38 | scaling(tensor * A, 39 | int const * idx_map, 40 | char const * alpha); 41 | scaling(tensor * A, 42 | char const * idx_map, 43 | char const * alpha); 44 | 45 | /** 46 | * \brief constructor definining scaling with custom function 47 | * \param[in] A left operand tensor 48 | * \param[in] idx_map indices of left operand 49 | func(&A[idx_map]) 50 | * \param[in] alpha scaling factor alpha * A[idx_map]; 51 | A[idx_map] = alpha * func(A[idx_map]) 52 | * \param[in] func elementwise function 53 | */ 54 | scaling(tensor * A, 55 | int const * idx_map, 56 | char const * alpha, 57 | endomorphism const * func); 58 | scaling(tensor * A, 59 | char const * idx_map, 60 | char const * alpha, 61 | endomorphism const * func); 62 | 63 | /** \brief destructor */ 64 | ~scaling(); 65 | 66 | /** \brief run scaling \return whether success or error */ 67 | int execute(); 68 | 69 | /** \brief predicts execution time in seconds using performance models */ 70 | double estimate_time(); 71 | 72 | /** 73 | * \brief scales a sparse tensor 74 | */ 75 | void sp_scl(); 76 | }; 77 | 78 | } 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /src/contraction/spctr_comm.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #ifndef __SPCTR_COMM_H__ 4 | #define __SPCTR_COMM_H__ 5 | 6 | #include "spctr_tsr.h" 7 | 8 | namespace CTF_int{ 9 | class contraction; 10 | 11 | class spctr_replicate : public spctr { 12 | public: 13 | int ncdt_A; /* number of processor dimensions to replicate A along */ 14 | int ncdt_B; /* number of processor dimensions to replicate B along */ 15 | int ncdt_C; /* number of processor dimensions to replicate C along */ 16 | int64_t size_A; /* size of A blocks */ 17 | int64_t size_B; /* size of B blocks */ 18 | int64_t size_C; /* size of C blocks */ 19 | 20 | CommData ** cdt_A; 21 | CommData ** cdt_B; 22 | CommData ** cdt_C; 23 | /* Class to be called on sub-blocks */ 24 | spctr * rec_ctr; 25 | /* void set_size_blk_A(int new_nblk_A, int64_t const * nnbA){ 26 | spctr::set_size_blk_A(new_nblk_A, nnbA); 27 | rec_ctr->set_size_blk_A(new_nblk_A, nnbA); 28 | }*/ 29 | 30 | void run(char * A, int nblk_A, int64_t const * size_blk_A, 31 | char * B, int nblk_B, int64_t const * size_blk_B, 32 | char * C, int nblk_C, int64_t * size_blk_C, 33 | char *& new_C); 34 | /** 35 | * \brief returns the number of bytes of buffer space 36 | * we need 37 | * \return bytes needed 38 | */ 39 | /** 40 | * \brief returns the number of bytes need by each processor in this kernel 41 | * \return bytes needed for contraction 42 | */ 43 | int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 44 | /** 45 | * \brief returns the number of bytes need by each processor in this kernel and its recursive calls 46 | * \return bytes needed for recursive contraction 47 | */ 48 | int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 49 | double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 50 | double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 51 | void print(); 52 | spctr * clone(); 53 | 54 | spctr_replicate(spctr * other); 55 | ~spctr_replicate(); 56 | spctr_replicate(contraction const * c, 57 | int const * phys_mapped, 58 | int64_t blk_sz_A, 59 | int64_t blk_sz_B, 60 | int64_t blk_sz_C); 61 | }; 62 | 63 | } 64 | #endif // __CTR_COMM_H__ 65 | -------------------------------------------------------------------------------- /test/python/test_dot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | import numpy 5 | import ctf 6 | import os 7 | import sys 8 | 9 | 10 | def allclose(a, b): 11 | return abs(ctf.to_nparray(a) - ctf.to_nparray(b)).sum() < 1e-14 12 | 13 | class KnowValues(unittest.TestCase): 14 | def test_dot_1d(self): 15 | a1 = numpy.ones(4) 16 | self.assertTrue(allclose(ctf.dot(ctf.astensor(a1), a1), numpy.dot(a1, a1))) 17 | self.assertTrue(allclose(ctf.dot(a1+1j, ctf.astensor(a1)), numpy.dot(a1+1j, a1))) 18 | a2 = ctf.astensor(a1).dot(a1+0j) 19 | self.assertTrue(a2.dtype == numpy.complex128) 20 | #self.assertTrue(ctf.astensor(a1).dot(a1+0j).dtype == numpy.complex) 21 | 22 | def test_dot_2d(self): 23 | a1 = numpy.random.random(4) 24 | a2 = numpy.random.random((4,3)) 25 | self.assertTrue(ctf.dot(ctf.astensor(a1), ctf.astensor(a2)).shape == (3,)) 26 | self.assertTrue(allclose(ctf.dot(a1, ctf.astensor(a2)), numpy.dot(a1, a2))) 27 | self.assertTrue(ctf.dot(ctf.astensor(a2).T(), a1).shape == (3,)) 28 | self.assertTrue(allclose(ctf.dot(ctf.astensor(a2).T(), a1), numpy.dot(a2.T, a1))) 29 | 30 | with self.assertRaises(ValueError): 31 | ctf.dot(a2, a2) 32 | self.assertTrue(allclose(ctf.dot(ctf.astensor(a2).T(), a2), numpy.dot(a2.T, a2))) 33 | self.assertTrue(allclose(ctf.astensor(a2).dot(a2.T), a2.dot(a2.T))) 34 | 35 | def test_tensordot(self): 36 | a0 = numpy.random.random((2,2,2)) 37 | self.assertTrue(allclose(ctf.tensordot(a0, a0), numpy.tensordot(a0, a0))) 38 | self.assertTrue(allclose(ctf.tensordot(a0, a0, 1), numpy.tensordot(a0, a0, 1))) 39 | self.assertTrue(allclose(ctf.tensordot(a0, a0, [[1,0],[1,0]]), numpy.tensordot(a0, a0, [[1,0],[1,0]]))) 40 | self.assertTrue(allclose(ctf.tensordot(a0, a0, [[0,1],[1,0]]), numpy.tensordot(a0, a0, [[0,1],[1,0]]))) 41 | self.assertTrue(allclose(ctf.tensordot(a0, a0, [[2,1,0],[1,0,2]]), numpy.tensordot(a0, a0, [[2,1,0],[1,0,2]]))) 42 | with self.assertRaises(IndexError): 43 | ctf.tensordot(a0, a0, [[2,1,0,3],[0,1,2,3]]) 44 | 45 | 46 | if __name__ == "__main__": 47 | numpy.random.seed(5330); 48 | if ctf.comm().rank() != 0: 49 | result = unittest.TextTestRunner(stream = open(os.devnull, 'w')).run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues))) 50 | else: 51 | print("Tests for dot") 52 | result = unittest.TextTestRunner().run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues))) 53 | ctf.MPI_Stop() 54 | sys.exit(not result) 55 | -------------------------------------------------------------------------------- /src/interface/vector.cxx: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | namespace CTF_int { 4 | 5 | struct int1 6 | { 7 | int i[1]; 8 | int1(int a) 9 | { 10 | i[0] = a; 11 | } 12 | operator const int*() const 13 | { 14 | return i; 15 | } 16 | }; 17 | } 18 | namespace CTF { 19 | template 20 | Vector::Vector() : Tensor() { } 21 | 22 | template 23 | Vector::Vector(Vector const & A) 24 | : Tensor(A) { 25 | len = A.len; 26 | } 27 | 28 | template 29 | Vector::Vector(Tensor const & A) 30 | : Tensor(A) { 31 | IASSERT(A.order == 1); 32 | len = A.lens[0]; 33 | } 34 | 35 | template 36 | Vector::Vector(int len_, 37 | World & world_, 38 | CTF_int::algstrct const & sr_) 39 | : Tensor(1, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_, NULL, 0) { 40 | len = len_; 41 | } 42 | 43 | template 44 | Vector::Vector(int len_, 45 | World & world_, 46 | char const * name_, 47 | int profile_, 48 | CTF_int::algstrct const & sr_) 49 | : Tensor(1, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_, name_, profile_) { 50 | len = len_; 51 | } 52 | 53 | template 54 | Vector::Vector(int len_, 55 | int atr_, 56 | World & world_, 57 | char const * name_, 58 | int profile_, 59 | CTF_int::algstrct const & sr_) 60 | : Tensor(1, atr_>0, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_, name_, profile_) { 61 | len = len_; 62 | } 63 | 64 | template 65 | Vector::Vector(int len_, 66 | int atr_, 67 | World & world_, 68 | CTF_int::algstrct const & sr_) 69 | : Tensor(1, atr_>0, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_) { 70 | len = len_; 71 | } 72 | 73 | //template 74 | //Vector & Vector::operator=(const Vector & A){ 75 | // CTF_int::tensor::free_self(); 76 | // CTF_int::tensor::init(A.sr, A.order, A.lens, A.sym, A.wrld, 1, A.name, A.profile, A.is_sparse); 77 | // return *this; 78 | //} 79 | 80 | 81 | 82 | } 83 | -------------------------------------------------------------------------------- /test/diag_ctr.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup diag_ctr diag_ctr 6 | * @{ 7 | * \brief Summation along tensor diagonals 8 | */ 9 | #include 10 | 11 | using namespace CTF; 12 | 13 | int diag_ctr(int n, 14 | int m, 15 | World & dw){ 16 | int rank, i, num_pes, pass; 17 | int64_t np; 18 | double * pairs; 19 | int64_t * indices; 20 | 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 23 | 24 | 25 | int shapeN4[] = {NS,NS,NS,NS}; 26 | int sizeN4[] = {n,m,n,m}; 27 | 28 | //* Creates distributed tensors initialized with zeros 29 | Tensor<> A(4, sizeN4, shapeN4, dw); 30 | 31 | srand48(13*rank); 32 | 33 | Matrix<> mA(n,m,NS,dw); 34 | Matrix<> mB(n,m,NS,dw); 35 | A.get_local_data(&np, &indices, &pairs); 36 | for (i=0; i 1.E-10) 50 | pass = 0; 51 | if (pass){ 52 | if (rank == 0) 53 | printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } passed \n"); 54 | } else { 55 | if (rank == 0) 56 | printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } failed \n"); 57 | } 58 | 59 | 60 | return pass; 61 | } 62 | 63 | 64 | #ifndef TEST_SUITE 65 | char* getCmdOption(char ** begin, 66 | char ** end, 67 | const std::string & option){ 68 | char ** itr = std::find(begin, end, option); 69 | if (itr != end && ++itr != end){ 70 | return *itr; 71 | } 72 | return 0; 73 | } 74 | 75 | 76 | int main(int argc, char ** argv){ 77 | int rank, np, n, m; 78 | int in_num = argc; 79 | char ** input_str = argv; 80 | 81 | MPI_Init(&argc, &argv); 82 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 83 | MPI_Comm_size(MPI_COMM_WORLD, &np); 84 | 85 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 86 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 87 | if (n < 0) n = 7; 88 | } else n = 7; 89 | 90 | if (getCmdOption(input_str, input_str+in_num, "-m")){ 91 | m = atoi(getCmdOption(input_str, input_str+in_num, "-m")); 92 | if (m < 0) m = 7; 93 | } else m = 7; 94 | 95 | { 96 | World dw(argc, argv); 97 | diag_ctr(n, m, dw); 98 | } 99 | 100 | MPI_Finalize(); 101 | return 0; 102 | } 103 | /** 104 | * @} 105 | * @} 106 | */ 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /src/summation/sym_seq_sum.h: -------------------------------------------------------------------------------- 1 | #ifndef __INT_SYM_SEQ_SUM_H__ 2 | #define __INT_SYM_SEQ_SUM_H__ 3 | 4 | #include "summation.h" 5 | 6 | namespace CTF_int { 7 | /** 8 | * \brief performs symmetric contraction with unblocked reference kernel 9 | */ 10 | int sym_seq_sum_ref( char const * alpha, 11 | char const * A, 12 | algstrct const * sr_A, 13 | int order_A, 14 | int const * edge_len_A, 15 | int const * sym_A, 16 | int const * idx_map_A, 17 | char const * beta, 18 | char * B, 19 | algstrct const * sr_B, 20 | int order_B, 21 | int const * edge_len_B, 22 | int const * sym_B, 23 | int const * idx_map_B); 24 | 25 | /** 26 | * \brief performs symmetric summation with custom elementwise function 27 | */ 28 | int sym_seq_sum_cust(char const * alpha, 29 | char const * A, 30 | algstrct const * sr_A, 31 | int order_A, 32 | int const * edge_len_A, 33 | int const * sym_A, 34 | int const * idx_map_A, 35 | char const * beta, 36 | char * B, 37 | algstrct const * sr_B, 38 | int order_B, 39 | int const * edge_len_B, 40 | int const * sym_B, 41 | int const * idx_map_B, 42 | univar_function const * func); 43 | 44 | /** 45 | * \brief performs symmetric summation with blocked daxpy 46 | */ 47 | int sym_seq_sum_inr( char const * alpha, 48 | char const * A, 49 | algstrct const * sr_A, 50 | int order_A, 51 | int const * edge_len_A, 52 | int const * sym_A, 53 | int const * idx_map_A, 54 | char const * beta, 55 | char * B, 56 | algstrct const * sr_B, 57 | int order_B, 58 | int const * edge_len_B, 59 | int const * sym_B, 60 | int const * idx_map_B, 61 | int inr_stride); 62 | } 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /test/endomorphism.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup endomorphism endomorphism 6 | * @{ 7 | * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors 8 | */ 9 | 10 | #include 11 | using namespace CTF; 12 | 13 | void fdbl(double & a){ 14 | a=a*a*a; 15 | } 16 | 17 | int endomorphism(int n, 18 | World & dw){ 19 | 20 | int shapeN4[] = {NS,NS,NS,NS}; 21 | int sizeN4[] = {n+1,n,n+2,n+3}; 22 | 23 | Tensor<> A(4, sizeN4, shapeN4, dw); 24 | 25 | A.fill_random(-.5, .5); 26 | 27 | 28 | double * all_start_data; 29 | int64_t nall; 30 | A.read_all(&nall, &all_start_data); 31 | 32 | double scale = 1.0; 33 | 34 | CTF::Transform endo([=](double & d){ d=scale*d*d*d; }); 35 | // below is equivalent to A.scale(1.0, "ijkl", endo); 36 | endo(A["ijkl"]); 37 | 38 | double * all_end_data; 39 | int64_t nall2; 40 | A.read_all(&nall2, &all_end_data); 41 | 42 | int pass = (nall == nall2); 43 | if (pass){ 44 | for (int64_t i=0; i=1.E-6) pass =0; 47 | } 48 | } 49 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 50 | 51 | if (dw.rank == 0){ 52 | if (pass){ 53 | printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } passed\n"); 54 | } else { 55 | printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } failed\n"); 56 | } 57 | } 58 | 59 | delete [] all_start_data; 60 | delete [] all_end_data; 61 | 62 | return pass; 63 | } 64 | 65 | 66 | #ifndef TEST_SUITE 67 | 68 | char* getCmdOption(char ** begin, 69 | char ** end, 70 | const std::string & option){ 71 | char ** itr = std::find(begin, end, option); 72 | if (itr != end && ++itr != end){ 73 | return *itr; 74 | } 75 | return 0; 76 | } 77 | 78 | 79 | int main(int argc, char ** argv){ 80 | int rank, np, n; 81 | int const in_num = argc; 82 | char ** input_str = argv; 83 | 84 | MPI_Init(&argc, &argv); 85 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 86 | MPI_Comm_size(MPI_COMM_WORLD, &np); 87 | 88 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 89 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 90 | if (n < 0) n = 5; 91 | } else n = 5; 92 | 93 | 94 | { 95 | World dw(MPI_COMM_WORLD, argc, argv); 96 | 97 | if (rank == 0){ 98 | printf("Computing endomorphism A_ijkl = f(A_ijkl)\n"); 99 | } 100 | endomorphism(n, dw); 101 | } 102 | 103 | 104 | MPI_Finalize(); 105 | return 0; 106 | } 107 | 108 | /** 109 | * @} 110 | * @} 111 | */ 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /src/shared/init_models.cxx: -------------------------------------------------------------------------------- 1 | namespace CTF_int{ 2 | double seq_tsr_spctr_cst_off_k0_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 3 | double seq_tsr_spctr_cst_off_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 4 | double seq_tsr_spctr_cst_off_k2_init[] = {-2.1996E-04, 3.1883E-09, 3.8743E-11}; 5 | double seq_tsr_spctr_off_k0_init[] = {8.6970E-06, 4.5598E-11, 1.1544E-09}; 6 | double seq_tsr_spctr_off_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 7 | double seq_tsr_spctr_off_k2_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 8 | double seq_tsr_spctr_cst_k0_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 9 | double seq_tsr_spctr_cst_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 10 | double seq_tsr_spctr_cst_k2_init[] = {-8.8459E-08, 8.1207E-10, -2.8486E-12}; 11 | double seq_tsr_spctr_cst_k3_init[] = {1.8504E-08, 2.9154E-11, 2.1973E-11}; 12 | double seq_tsr_spctr_cst_k4_init[] = {2.0948E-05, 1.2294E-09, 8.0037E-10}; 13 | double seq_tsr_spctr_k0_init[] = {2.2620E-08, -5.7494E-10, 2.2146E-09}; 14 | double seq_tsr_spctr_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10}; 15 | double seq_tsr_spctr_k2_init[] = {3.0917E-08, 5.2181E-11, 4.1634E-12}; 16 | double seq_tsr_spctr_k3_init[] = {7.2456E-08, 1.5128E-10, -1.5528E-12}; 17 | double seq_tsr_spctr_k4_init[] = {1.6880E-07, 4.9411E-10, 9.2847E-13}; 18 | double pin_keys_mdl_init[] = {3.1189E-09, 6.6717E-08}; 19 | double seq_tsr_ctr_mdl_cst_init[] = {5.1626E-06, -6.3215E-11, 3.9638E-09}; 20 | double seq_tsr_ctr_mdl_ref_init[] = {4.9138E-08, 5.8290E-10, 4.8575E-11}; 21 | double seq_tsr_ctr_mdl_inr_init[] = {2.0647E-08, 1.9721E-10, 2.9948E-11}; 22 | double seq_tsr_ctr_mdl_off_init[] = {6.2925E-05, 1.7449E-11, 1.7211E-12}; 23 | double seq_tsr_ctr_mdl_cst_inr_init[] = {1.3863E-04, 2.0119E-10, 9.8820E-09}; 24 | double seq_tsr_ctr_mdl_cst_off_init[] = {8.4844E-04, -5.9246E-11, 3.5247E-10}; 25 | double long_contig_transp_mdl_init[] = {2.9158E-10, 3.0501E-09}; 26 | double shrt_contig_transp_mdl_init[] = {1.3427E-08, 4.3168E-09}; 27 | double non_contig_transp_mdl_init[] = {4.0475E-08, 4.0463E-09}; 28 | double dgtog_res_mdl_init[] = {2.9786E-05, 2.4335E-04, 1.0845E-08}; 29 | double blres_mdl_init[] = {1.0598E-05, 7.2741E-08}; 30 | double alltoall_mdl_init[] = {1.0000E-06, 1.0000E-06, 5.0000E-10}; 31 | double alltoallv_mdl_init[] = {2.7437E-06, 2.2416E-05, 1.0469E-08}; 32 | double red_mdl_init[] = {6.2935E-07, 4.6276E-06, 9.2245E-10}; 33 | double red_mdl_cst_init[] = {5.7302E-07, 4.7347E-06, 6.0191E-10}; 34 | double allred_mdl_init[] = {8.4416E-07, 6.8651E-06, 3.5845E-08}; 35 | double allred_mdl_cst_init[] = {-3.3754E-04, 2.1343E-04, 3.0801E-09}; 36 | double bcast_mdl_init[] = {1.5045E-06, 1.4485E-05, 3.2876E-09}; 37 | double spredist_mdl_init[] = {1.2744E-04, 1.0278E-03, 7.6837E-08}; 38 | double csrred_mdl_init[] = {3.7005E-05, 1.1854E-04, 5.5165E-09}; 39 | double csrred_mdl_cst_init[] = {-1.8323E-04, 1.3076E-04, 2.8732E-09}; 40 | } 41 | -------------------------------------------------------------------------------- /src/interface/back_comp.h: -------------------------------------------------------------------------------- 1 | #ifndef __BACK_COMP_H__ 2 | #define __BACK_COMP_H__ 3 | 4 | /* pure double version of templated namespace CTF, 5 | 'using namespace CTF_double' cannot be used in combination in conjunction with 'using namespace CTF' */ 6 | namespace CTF_double { 7 | typedef CTF::World World; 8 | 9 | typedef CTF::Tensor<> Tensor; 10 | typedef CTF::Matrix<> Matrix; 11 | typedef CTF::Vector<> Vector; 12 | typedef CTF::Scalar<> Scalar; 13 | 14 | typedef CTF::Timer Timer; 15 | typedef CTF::Timer_epoch Timer_epoch; 16 | typedef CTF::Function_timer Function_timer; 17 | typedef CTF::Flop_counter Flop_counter; 18 | } 19 | 20 | //typdefs for backwards compatibility to CTF_VERSION 10x 21 | typedef CTF::World CTF_World; 22 | typedef CTF::World cCTF_World; 23 | template 24 | class tCTF_World : public CTF::World { 25 | public: 26 | tCTF_World(int argc, char * const * argv) : CTF::World(argc, argv){} 27 | tCTF_World(MPI_Comm comm = MPI_COMM_WORLD, 28 | int argc = 0, 29 | char * const * argv = NULL) : CTF::World(comm, argc, argv){} 30 | tCTF_World(int order, 31 | int const * lens, 32 | MPI_Comm comm = MPI_COMM_WORLD, 33 | int argc = 0, 34 | char * const * argv = NULL) : CTF::World(order, lens, comm, argc, argv){} 35 | 36 | }; 37 | 38 | typedef CTF::Tensor<> CTF_Tensor; 39 | typedef CTF::Matrix<> CTF_Matrix; 40 | typedef CTF::Vector<> CTF_Vector; 41 | typedef CTF::Scalar<> CTF_Scalar; 42 | typedef CTF::Idx_Tensor CTF_Idx_Tensor; 43 | typedef CTF::Tensor< std::complex > cCTF_Tensor; 44 | typedef CTF::Matrix< std::complex > cCTF_Matrix; 45 | typedef CTF::Vector< std::complex > cCTF_Vector; 46 | typedef CTF::Scalar< std::complex > cCTF_Scalar; 47 | typedef CTF::Idx_Tensor cCTF_Idx_Tensor; 48 | 49 | //this needs C++11, possible to do C++03 using struct 50 | template 51 | using tCTF_Tensor = CTF::Tensor; 52 | template 53 | using tCTF_Matrix = CTF::Matrix; 54 | template 55 | using tCTF_Vector = CTF::Vector; 56 | template 57 | using tCTF_Scalar = CTF::Scalar; 58 | template 59 | class tCTF_Idx_Tensor : CTF::Idx_Tensor { }; 60 | 61 | typedef CTF::Timer CTF_Timer; 62 | typedef CTF::Flop_counter CTF_Flop_Counter; 63 | typedef CTF::Timer_epoch CTF_Timer_epoch; 64 | 65 | typedef int64_t long_int; 66 | typedef int64_t key; 67 | 68 | template 69 | using tkv_pair = CTF::Pair; 70 | 71 | typedef tkv_pair kv_pair; 72 | typedef tkv_pair< std::complex > ckv_pair; 73 | 74 | 75 | //deprecated 76 | //enum CTF_OP { CTF_OP_SUM, CTF_OP_SUMABS, CTF_OP_SUMSQ, CTF_OP_MAX, CTF_OP_MIN, CTF_OP_MAXABS, CTF_OP_MINABS}; 77 | 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /test/univar_function.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup univar_function univar_function 6 | * @{ 7 | * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors 8 | */ 9 | 10 | #include 11 | using namespace CTF; 12 | 13 | double fquad(double a){ 14 | return a*a*a*a; 15 | } 16 | 17 | int univar_function(int n, 18 | World & dw){ 19 | 20 | int shapeN4[] = {NS,NS,NS,NS}; 21 | int sizeN4[] = {n+1,n,n+2,n+3}; 22 | 23 | Tensor<> A(4, sizeN4, shapeN4, dw); 24 | 25 | srand48(dw.rank); 26 | A.fill_random(-.5, .5); 27 | 28 | 29 | double * all_start_data; 30 | int64_t nall; 31 | A.read_all(&nall, &all_start_data); 32 | 33 | 34 | //CTF::Function<> ufun(&fquad); 35 | CTF::Function<> ufun([](double a){ return a*a*a*a; }); 36 | // below is equivalent to A.scale(1.0, "ijkl", ufun); 37 | .5*A["ijkl"]+=ufun(.5*A["ijkl"]); 38 | 39 | double * all_end_data; 40 | int64_t nall2; 41 | A.read_all(&nall2, &all_end_data); 42 | 43 | int pass = (nall == nall2); 44 | if (pass){ 45 | for (int64_t i=0; i=1.E-6) pass =0; 47 | } 48 | } 49 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 50 | 51 | if (dw.rank == 0){ 52 | if (pass){ 53 | printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } passed\n"); 54 | } else { 55 | printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } failed\n"); 56 | } 57 | } 58 | 59 | delete [] all_start_data; 60 | delete [] all_end_data; 61 | 62 | return pass; 63 | } 64 | 65 | 66 | #ifndef TEST_SUITE 67 | 68 | char* getCmdOption(char ** begin, 69 | char ** end, 70 | const std::string & option){ 71 | char ** itr = std::find(begin, end, option); 72 | if (itr != end && ++itr != end){ 73 | return *itr; 74 | } 75 | return 0; 76 | } 77 | 78 | 79 | int main(int argc, char ** argv){ 80 | int rank, np, n; 81 | int const in_num = argc; 82 | char ** input_str = argv; 83 | 84 | MPI_Init(&argc, &argv); 85 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 86 | MPI_Comm_size(MPI_COMM_WORLD, &np); 87 | 88 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 89 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 90 | if (n < 0) n = 5; 91 | } else n = 5; 92 | 93 | 94 | { 95 | World dw(MPI_COMM_WORLD, argc, argv); 96 | 97 | if (rank == 0){ 98 | printf("Computing univar_function A_ijkl = f(A_ijkl)\n"); 99 | } 100 | univar_function(n, dw); 101 | } 102 | 103 | 104 | MPI_Finalize(); 105 | return 0; 106 | } 107 | 108 | /** 109 | * @} 110 | * @} 111 | */ 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /test/multi_tsr_sym.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | /** \addtogroup tests 3 | * @{ 4 | * \defgroupmulti_tsr_sym 5 | * @{ 6 | * \brief Matrix multiplication 7 | */ 8 | 9 | #include 10 | 11 | using namespace CTF; 12 | 13 | int multi_tsr_sym(int m, 14 | int n, 15 | World & dw){ 16 | int rank, i, num_pes; 17 | int64_t np; 18 | double * pairs; 19 | int64_t * indices; 20 | 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 23 | 24 | #ifndef TEST_SUITE 25 | if (rank == 0) 26 | printf("m = %d, n = %d, p = %d\n", 27 | m,n,num_pes); 28 | #endif 29 | 30 | //* Creates distributed tensors initialized with zeros 31 | Matrix<> A(n, m, NS, dw); 32 | Matrix<> C_NS(n, n, NS, dw); 33 | Matrix<> C_SY(n, n, SY, dw); 34 | Matrix<> diff(n, n, NS, dw); 35 | 36 | srand48(13*rank); 37 | //* Writes noise to local data based on global index 38 | A.get_local_data(&np, &indices, &pairs); 39 | for (i=0; i 11 | 12 | using namespace CTF; 13 | 14 | int diag_sym(int n, 15 | World & dw){ 16 | int rank, i, num_pes, pass; 17 | int64_t np; 18 | double * pairs; 19 | int64_t * indices; 20 | 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 23 | 24 | 25 | int shapeN4[] = {SY,NS,SY,NS}; 26 | int sizeN4[] = {n,n,n,n}; 27 | 28 | //* Creates distributed tensors initialized with zeros 29 | Tensor<> A(4, sizeN4, shapeN4, dw); 30 | Tensor<> B(4, sizeN4, shapeN4, dw); 31 | Tensor<> C(4, sizeN4, shapeN4, dw); 32 | 33 | srand48(13*rank); 34 | 35 | Matrix<> mA(n,n,NS,dw); 36 | Matrix<> mB(n,n,NS,dw); 37 | mA.get_local_data(&np, &indices, &pairs); 38 | for (i=0; i 11 | 12 | using namespace CTF; 13 | 14 | int repack(int n, 15 | World & dw){ 16 | int rank, i, num_pes, pass; 17 | int64_t np; 18 | double * pairs; 19 | int64_t * indices; 20 | 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 23 | 24 | 25 | int shapeN4[] = {NS,NS,NS,NS}; 26 | int shapeS4[] = {NS,NS,SY,NS}; 27 | int sizeN4[] = {n,n,n,n}; 28 | 29 | //* Creates distributed tensors initialized with zeros 30 | Tensor<> An(4, sizeN4, shapeN4, dw); 31 | Tensor<> As(4, sizeN4, shapeS4, dw); 32 | 33 | As.get_local_data(&np, &indices, &pairs); 34 | for (i=0; i Anr(An, shapeS4); 39 | 40 | Anr["ijkl"] -= As["ijkl"]; 41 | 42 | double norm = Anr.norm2(); 43 | 44 | if (norm < 1.E-6) 45 | pass = 1; 46 | else 47 | pass = 0; 48 | 49 | if (!pass) 50 | printf("{ NS -> SY repack } failed \n"); 51 | else { 52 | Tensor<> Anur(As, shapeN4); 53 | Tensor<> Asur(As, shapeN4); 54 | Asur["ijkl"] = 0.0; 55 | Asur.write(np, indices, pairs); 56 | Anur["ijkl"] -= Asur["ijkl"]; 57 | 58 | norm = Anur.norm2(); 59 | 60 | if (norm < 1.E-6){ 61 | pass = 1; 62 | if (rank == 0) 63 | printf("{ NS -> SY -> NS repack } passed \n"); 64 | } else { 65 | pass = 0; 66 | if (rank == 0) 67 | printf("{ SY -> NS repack } failed \n"); 68 | } 69 | 70 | } 71 | delete [] pairs; 72 | free(indices); 73 | return pass; 74 | } 75 | 76 | 77 | #ifndef TEST_SUITE 78 | char* getCmdOption(char ** begin, 79 | char ** end, 80 | const std::string & option){ 81 | char ** itr = std::find(begin, end, option); 82 | if (itr != end && ++itr != end){ 83 | return *itr; 84 | } 85 | return 0; 86 | } 87 | 88 | 89 | int main(int argc, char ** argv){ 90 | int rank, np, n; 91 | int in_num = argc; 92 | char ** input_str = argv; 93 | 94 | MPI_Init(&argc, &argv); 95 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 96 | MPI_Comm_size(MPI_COMM_WORLD, &np); 97 | 98 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 99 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 100 | if (n < 0) n = 7; 101 | } else n = 7; 102 | 103 | 104 | { 105 | World dw(argc, argv); 106 | repack(n, dw); 107 | } 108 | 109 | MPI_Finalize(); 110 | return 0; 111 | } 112 | /** 113 | * @} 114 | * @} 115 | */ 116 | 117 | #endif 118 | -------------------------------------------------------------------------------- /studies/fast_3mm.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #include 4 | 5 | using namespace CTF; 6 | 7 | int fast_diagram(int const n, 8 | World &ctf){ 9 | int rank, i, num_pes; 10 | 11 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 13 | 14 | 15 | Matrix<> T(n,n,NS,ctf); 16 | Matrix<> V(n,n,NS,ctf); 17 | Matrix<> Z_SY(n,n,SY,ctf); 18 | Matrix<> Z_AS(n,n,AS,ctf); 19 | Matrix<> Z_NS(n,n,NS,ctf); 20 | Vector<> Z_D(n,ctf); 21 | Matrix<> W(n,n,SH,ctf); 22 | Matrix<> W_ans(n,n,SH,ctf); 23 | 24 | int64_t * indices; 25 | double * values; 26 | int64_t size; 27 | srand48(173*rank); 28 | 29 | T.read_local(&size, &indices, &values); 30 | for (i=0; i 11 | 12 | using namespace CTF; 13 | 14 | int readall_test(int n, 15 | int m, 16 | World &dw){ 17 | int rank, i, num_pes, pass; 18 | 19 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 20 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 21 | 22 | 23 | int shapeN4[] = {NS,NS,NS,NS}; 24 | int sizeN4[] = {n,m,n,m}; 25 | 26 | //* Creates distributed tensors initialized with zeros 27 | Tensor<> A(4, sizeN4, shapeN4, dw); 28 | 29 | std::vector vals; 30 | std::vector inds; 31 | if (rank == 0){ 32 | World sw(MPI_COMM_SELF); 33 | 34 | Tensor<> sA(4, sizeN4, shapeN4, sw); 35 | 36 | 37 | if (rank == 0){ 38 | srand48(13*rank); 39 | for (i=0; i1.E-10) 63 | pass = 0; 64 | } 65 | } 66 | delete [] vs; 67 | 68 | if (pass){ 69 | if (rank == 0) 70 | printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } passed \n"); 71 | } else { 72 | if (rank == 0) 73 | printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } failed \n"); 74 | } 75 | 76 | 77 | return pass; 78 | } 79 | 80 | 81 | #ifndef TEST_SUITE 82 | char* getCmdOption(char ** begin, 83 | char ** end, 84 | const std::string & option){ 85 | char ** itr = std::find(begin, end, option); 86 | if (itr != end && ++itr != end){ 87 | return *itr; 88 | } 89 | return 0; 90 | } 91 | 92 | 93 | int main(int argc, char ** argv){ 94 | int rank, np, n, m; 95 | int in_num = argc; 96 | char ** input_str = argv; 97 | 98 | MPI_Init(&argc, &argv); 99 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 100 | MPI_Comm_size(MPI_COMM_WORLD, &np); 101 | 102 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 103 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 104 | if (n < 0) n = 7; 105 | } else n = 7; 106 | 107 | if (getCmdOption(input_str, input_str+in_num, "-m")){ 108 | m = atoi(getCmdOption(input_str, input_str+in_num, "-m")); 109 | if (m < 0) m = 9; 110 | } else m = 9; 111 | 112 | 113 | 114 | { 115 | World dw(argc, argv); 116 | readall_test(n, m, dw); 117 | } 118 | 119 | MPI_Finalize(); 120 | return 0; 121 | } 122 | #endif 123 | /** 124 | * @} 125 | * @} 126 | */ 127 | 128 | -------------------------------------------------------------------------------- /src/interface/ring.h: -------------------------------------------------------------------------------- 1 | #ifndef __RING_H__ 2 | #define __RING_H__ 3 | 4 | #include "../tensor/algstrct.h" 5 | 6 | namespace CTF { 7 | 8 | /** 9 | * \addtogroup algstrct 10 | * @{ 11 | */ 12 | /** 13 | * \brief Ring class defined by a datatype and addition and multiplicaton functions 14 | * addition must have an identity, inverse, and be associative, does not need to be commutative 15 | * multiplications must have an identity and be distributive 16 | */ 17 | template ()> 18 | class Ring : public Semiring { 19 | public: 20 | Ring(Ring const & other) : Semiring(other) { 21 | this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs >; 22 | } 23 | /** 24 | * \brief default constructor valid for only certain types: 25 | * bool, int, unsigned int, int64_t, uint64_t, 26 | * float, double, std::complex, std::complex 27 | */ 28 | Ring() : Semiring() { 29 | this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs >; 30 | } 31 | 32 | virtual CTF_int::algstrct * clone() const { 33 | return new Ring(*this); 34 | } 35 | 36 | /** 37 | * \brief constructor for algstrct equipped with * and + 38 | * \param[in] addid_ additive identity 39 | * \param[in] fadd_ binary addition function 40 | * \param[in] addmop_ MPI_Op operation for addition 41 | * \param[in] mulid_ multiplicative identity 42 | * \param[in] fmul_ binary multiplication function 43 | * \param[in] gemm_ block matrix multiplication function 44 | * \param[in] axpy_ vector sum function 45 | * \param[in] scal_ vector scale function 46 | */ 47 | Ring(dtype addid_, 48 | dtype (*fadd_)(dtype a, dtype b), 49 | MPI_Op addmop_, 50 | dtype mulid_, 51 | dtype (*fmul_)(dtype a, dtype b), 52 | void (*gemm_)(char,char,int,int,int,dtype,dtype const*,dtype const*,dtype,dtype*)=NULL, 53 | void (*axpy_)(int,dtype,dtype const*,int,dtype*,int)=NULL, 54 | void (*scal_)(int,dtype,dtype*,int)=NULL) 55 | : Semiring(addid_, fadd_, mulid_, addmop_, fmul_, gemm_, axpy_, scal_) { 56 | this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs >; 57 | } 58 | 59 | //treat NULL as mulid 60 | void safeaddinv(char const * a, char *& b) const { 61 | if (b==NULL) b = (char*)malloc(this->el_size); 62 | if (a == NULL){ 63 | 64 | ((dtype*)b)[0] = -this->tmulid; 65 | } else { 66 | ((dtype*)b)[0] = -((dtype*)a)[0]; 67 | } 68 | } 69 | 70 | void addinv(char const * a, char * b) const { 71 | ((dtype*)b)[0] = -((dtype*)a)[0]; 72 | } 73 | 74 | }; 75 | /** 76 | * @} 77 | */ 78 | 79 | } 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /test/speye.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup tests 2 | * @{ 3 | * \defgroup speye speye 4 | * @{ 5 | * \brief Sparse identity matrix test 6 | */ 7 | 8 | #include 9 | using namespace CTF; 10 | 11 | int speye(int n, 12 | int order, 13 | World & dw){ 14 | 15 | int shape[order]; 16 | int size[order]; 17 | char idx_rep[order+1]; 18 | idx_rep[order]='\0'; 19 | char idx_chg[order+1]; 20 | idx_chg[order]='\0'; 21 | for (int i=0; i A(order, true, size, shape, dw); 33 | 34 | A[idx_rep] = 1.0; 35 | 36 | /* if (order == 3){ 37 | int ns[] = {n,n,n}; 38 | int sy[] = {SY,SY,NS}; 39 | Tensor<> AA(3, ns, sy, dw); 40 | AA.fill_random(0.0,1.0); 41 | A["ijk"] += AA["ijk"]; 42 | AA["ijk"] += A["ijk"]; 43 | AA["ijk"] += A["ijk"]; 44 | }*/ 45 | 46 | /*if (dw.rank == 0) 47 | printf("PRINTING\n"); 48 | A.print();*/ 49 | 50 | double sum1 = A[idx_chg]; 51 | double sum2 = A[idx_rep]; 52 | 53 | int pass = (fabs(sum1-n)<1.E-9) & (fabs(sum2-n)<1.E-9); 54 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 55 | if (dw.rank == 0){ 56 | if (pass) 57 | printf("{ A is sparse; A[\"iii...\"]=1; sum(A) = range of i } passed \n"); 58 | else 59 | printf("{ A is sparse; A[\"iii...\"]=1; sum(A) = range of i } failed \n"); 60 | } 61 | return pass; 62 | } 63 | 64 | 65 | #ifndef TEST_SUITE 66 | char* getCmdOption(char ** begin, 67 | char ** end, 68 | const std::string & option){ 69 | char ** itr = std::find(begin, end, option); 70 | if (itr != end && ++itr != end){ 71 | return *itr; 72 | } 73 | return 0; 74 | } 75 | 76 | 77 | int main(int argc, char ** argv){ 78 | int rank, np, n, pass, order; 79 | int const in_num = argc; 80 | char ** input_str = argv; 81 | 82 | MPI_Init(&argc, &argv); 83 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 84 | MPI_Comm_size(MPI_COMM_WORLD, &np); 85 | 86 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 87 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 88 | if (n < 0) n = 7; 89 | } else n = 7; 90 | 91 | if (getCmdOption(input_str, input_str+in_num, "-order")){ 92 | order = atoi(getCmdOption(input_str, input_str+in_num, "-order")); 93 | if (order < 0) order = 3; 94 | } else order = 3; 95 | 96 | { 97 | World dw(argc, argv); 98 | 99 | if (rank == 0){ 100 | printf("Computing sum of I where I is an identity tensor of order %d and dimension %d stored sparse\n", order, n); 101 | } 102 | pass = speye(n, order, dw); 103 | assert(pass); 104 | } 105 | 106 | MPI_Finalize(); 107 | return 0; 108 | } 109 | /** 110 | * @} 111 | * @} 112 | */ 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /test/bivar_function.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup bivar_function bivar_function 6 | * @{ 7 | * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors 8 | */ 9 | 10 | #include 11 | using namespace CTF; 12 | 13 | double f2(double a, double b){ 14 | return a*b+b*a; 15 | } 16 | 17 | int bivar_function(int n, 18 | World & dw){ 19 | 20 | int shapeN4[] = {NS,NS,NS,NS}; 21 | int sizeN4[] = {n+1,n,n+2,n+3}; 22 | 23 | Tensor<> A(4, sizeN4, shapeN4, dw); 24 | Tensor<> B(4, sizeN4, shapeN4, dw); 25 | 26 | srand48(dw.rank); 27 | A.fill_random(-.5, .5); 28 | B.fill_random(-.5, .5); 29 | 30 | 31 | double * all_start_data_A; 32 | int64_t nall_A; 33 | A.read_all(&nall_A, &all_start_data_A); 34 | double * all_start_data_B; 35 | int64_t nall_B; 36 | B.read_all(&nall_B, &all_start_data_B); 37 | 38 | CTF::Function<> bfun([](double a, double b){ return a*b + b*a; }); 39 | .5*A["ijkl"]+=bfun(A["ijkl"],B["ijkl"]); 40 | 41 | double * all_end_data_A; 42 | int64_t nall2_A; 43 | A.read_all(&nall2_A, &all_end_data_A); 44 | 45 | int pass = (nall_A == nall2_A); 46 | if (pass){ 47 | for (int64_t i=0; i=1.E-6) pass =0; 49 | } 50 | } 51 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 52 | 53 | if (dw.rank == 0){ 54 | if (pass){ 55 | printf("{ A[\"ijkl\"] = f2(A[\"ijkl\"], B[\"ijkl\"]) } passed\n"); 56 | } else { 57 | printf("{ A[\"ijkl\"] = f2(A[\"ijkl\"], B[\"ijkl\"]) } failed\n"); 58 | } 59 | } 60 | 61 | delete [] all_start_data_A; 62 | delete [] all_end_data_A; 63 | delete [] all_start_data_B; 64 | 65 | return pass; 66 | } 67 | 68 | 69 | #ifndef TEST_SUITE 70 | 71 | char* getCmdOption(char ** begin, 72 | char ** end, 73 | const std::string & option){ 74 | char ** itr = std::find(begin, end, option); 75 | if (itr != end && ++itr != end){ 76 | return *itr; 77 | } 78 | return 0; 79 | } 80 | 81 | 82 | int main(int argc, char ** argv){ 83 | int rank, np, n; 84 | int const in_num = argc; 85 | char ** input_str = argv; 86 | 87 | MPI_Init(&argc, &argv); 88 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 89 | MPI_Comm_size(MPI_COMM_WORLD, &np); 90 | 91 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 92 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 93 | if (n < 0) n = 5; 94 | } else n = 5; 95 | 96 | 97 | { 98 | World dw(MPI_COMM_WORLD, argc, argv); 99 | 100 | if (rank == 0){ 101 | printf("Computing bivar_function A_ijkl = f(B_ijkl, A_ijkl)\n"); 102 | } 103 | bivar_function(n, dw); 104 | } 105 | 106 | 107 | MPI_Finalize(); 108 | return 0; 109 | } 110 | 111 | /** 112 | * @} 113 | * @} 114 | */ 115 | 116 | #endif 117 | -------------------------------------------------------------------------------- /src/interface/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_H__ 2 | #define __TIMER_H__ 3 | 4 | #include "common.h" 5 | 6 | 7 | namespace CTF { 8 | /** 9 | * \defgroup timer Timing and cost measurement 10 | * \addtogroup timer 11 | * @{ 12 | */ 13 | 14 | #define MAX_NAME_LENGTH 53 15 | 16 | /** 17 | * \brief times a specific symbol 18 | */ 19 | class Function_timer{ 20 | public: 21 | char name[MAX_NAME_LENGTH]; 22 | double start_time; 23 | double start_excl_time; 24 | double acc_time; 25 | double acc_excl_time; 26 | int calls; 27 | 28 | double total_time; 29 | double total_excl_time; 30 | int total_calls; 31 | 32 | public: 33 | Function_timer(char const * name_, 34 | double const start_time_, 35 | double const start_excl_time_); 36 | //Function_timer(Function_timer const & other); 37 | //~Function_timer(); 38 | void compute_totals(MPI_Comm comm); 39 | bool operator<(Function_timer const & w) const ; 40 | void print(FILE * output, 41 | MPI_Comm const comm, 42 | int const rank, 43 | int const np); 44 | }; 45 | 46 | 47 | /** 48 | * \brief local process walltime measurement 49 | */ 50 | class Timer{ 51 | public: 52 | char const * timer_name; 53 | int index; 54 | int exited; 55 | int original; 56 | 57 | public: 58 | Timer(char const * name); 59 | ~Timer(); 60 | void stop(); 61 | void start(); 62 | void exit(); 63 | 64 | }; 65 | 66 | /** 67 | * \brief epoch during which to measure timers 68 | */ 69 | class Timer_epoch{ 70 | private: 71 | Timer * tmr_inner; 72 | Timer * tmr_outer; 73 | double save_excl_time; 74 | std::vector saved_function_timers; 75 | public: 76 | char const * name; 77 | //create epoch called name 78 | Timer_epoch(char const * name_); 79 | 80 | ~Timer_epoch(){ 81 | saved_function_timers.clear(); 82 | } 83 | 84 | //clears timers and begins epoch 85 | void begin(); 86 | 87 | //prints timers and clears them 88 | void end(); 89 | }; 90 | 91 | 92 | /** 93 | * \brief a term is an abstract object representing some expression of tensors 94 | */ 95 | 96 | /** 97 | * \brief measures flops done in a code region 98 | */ 99 | class Flop_counter{ 100 | public: 101 | int64_t start_count; 102 | 103 | public: 104 | /** 105 | * \brief constructor, starts counter 106 | */ 107 | Flop_counter(); 108 | ~Flop_counter(); 109 | 110 | /** 111 | * \brief restarts counter 112 | */ 113 | void zero(); 114 | 115 | /** 116 | * \brief get total flop count over all counters in comm 117 | */ 118 | int64_t count(MPI_Comm comm = MPI_COMM_SELF); 119 | 120 | }; 121 | 122 | /** 123 | * @} 124 | */ 125 | } 126 | 127 | 128 | #endif 129 | 130 | -------------------------------------------------------------------------------- /examples/checkpoint.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2016, Edgar Solomonik, all rights reserved.*/ 2 | /** \addtogroup examples 3 | * @{ 4 | * \defgroup checkpoint checkpoint 5 | * @{ 6 | * \brief tests read and write dense data to file functionality 7 | */ 8 | 9 | #include 10 | using namespace CTF; 11 | 12 | int checkpoint(int n, 13 | World & dw, 14 | int qtf=NS){ 15 | 16 | Matrix<> A(n, n, qtf, dw); 17 | Matrix<> A2(n, n, qtf, dw); 18 | Matrix<> A3(n, n, qtf, dw); 19 | Matrix<> A4(n, n, qtf, dw); 20 | Matrix<> A5(n, n, qtf, dw); 21 | 22 | srand48(13*dw.rank); 23 | A.fill_random(0.0,1.0); 24 | A.print(); 25 | A["ii"] = 0.0; 26 | A2["ij"] = A["ij"]; 27 | A3["ij"] = 2.*A["ij"]; 28 | 29 | MPI_File file; 30 | MPI_File_open(dw.comm, "CTF_checkpoint_test_file.bin", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file); 31 | A2.write_dense_to_file(file); 32 | A3.write_dense_to_file(file,n*n*sizeof(double)); 33 | MPI_File_close(&file); 34 | 35 | MPI_File_open(dw.comm, "CTF_checkpoint_test_file.bin", MPI_MODE_RDONLY | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &file); 36 | A4.read_dense_from_file(file); 37 | 38 | A4.print(); 39 | A["ij"] -= A4["ij"]; 40 | int pass = A.norm2() <= 1.e-9*n; 41 | 42 | A5.read_dense_from_file(file,n*n*sizeof(double)); 43 | MPI_File_close(&file); 44 | A5["ij"] -= 2.*A4["ij"]; 45 | pass = pass & (A5.norm2() <= 1.e-9*n); 46 | 47 | if (dw.rank == 0){ 48 | if (!pass){ 49 | printf("{ checkpointing using dense data representation with qtf=%d } failed\n",qtf); 50 | } else { 51 | printf("{ checkpointing using dense data representation with qtf=%d } passed\n",qtf); 52 | } 53 | } 54 | return pass; 55 | 56 | } 57 | 58 | 59 | #ifndef TEST_SUITE 60 | char* getCmdOption(char ** begin, 61 | char ** end, 62 | const std::string & option){ 63 | char ** itr = std::find(begin, end, option); 64 | if (itr != end && ++itr != end){ 65 | return *itr; 66 | } 67 | return 0; 68 | } 69 | 70 | 71 | int main(int argc, char ** argv){ 72 | int rank, np, n, qtf; 73 | int const in_num = argc; 74 | char ** input_str = argv; 75 | 76 | MPI_Init(&argc, &argv); 77 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 78 | MPI_Comm_size(MPI_COMM_WORLD, &np); 79 | 80 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 81 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 82 | if (n < 0) n = 7; 83 | } else n = 7; 84 | 85 | if (getCmdOption(input_str, input_str+in_num, "-qtf")){ 86 | qtf = atoi(getCmdOption(input_str, input_str+in_num, "-qtf")); 87 | if (qtf < 0) qtf = NS; 88 | } else qtf = NS; 89 | 90 | 91 | 92 | { 93 | World dw(MPI_COMM_WORLD, argc, argv); 94 | if (rank == 0){ 95 | printf("Checking checkpoint calculation n = %d, p = %d, qtf = %d:\n",n,np,qtf); 96 | } 97 | int pass = checkpoint(n,dw,qtf); 98 | assert(pass); 99 | } 100 | 101 | MPI_Finalize(); 102 | return 0; 103 | } 104 | /** 105 | * @} 106 | * @} 107 | */ 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /src/contraction/ctr_offload.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #ifndef __CTR_OFFLOAD_H__ 4 | #define __CTR_OFFLOAD_H__ 5 | 6 | #include "../shared/offload.h" 7 | #include "ctr_comm.h" 8 | 9 | namespace CTF_int { 10 | #ifdef OFFLOAD 11 | class ctr_offload : public ctr { 12 | public: 13 | /* Class to be called on sub-blocks */ 14 | ctr * rec_ctr; 15 | int64_t size_A; 16 | int64_t size_B; 17 | int64_t size_C; 18 | int iter_counter; 19 | int total_iter; 20 | int upload_phase_A; 21 | int upload_phase_B; 22 | int download_phase_C; 23 | offload_tsr * ptr_A; 24 | offload_tsr * ptr_B; 25 | offload_tsr * ptr_C; 26 | 27 | /** 28 | * \brief print ctr object 29 | */ 30 | void print(); 31 | 32 | /** 33 | * \brief offloads and downloads local blocks of dense tensors 34 | */ 35 | void run(char * A, char * B, char * C); 36 | 37 | /** 38 | * \brief returns the number of bytes of buffer space 39 | we need 40 | * \return bytes needed 41 | */ 42 | int64_t mem_fp(); 43 | 44 | /** 45 | * \brief returns the number of bytes of buffer space we need recursively 46 | * \return bytes needed for recursive contraction 47 | */ 48 | int64_t mem_rec(); 49 | 50 | /** 51 | * \brief returns the time this kernel will take excluding calls to rec_ctr 52 | * \return seconds needed 53 | */ 54 | double est_time_fp(int nlyr); 55 | 56 | 57 | /** 58 | * \brief returns the time this kernel will take including calls to rec_ctr 59 | * \return seconds needed for recursive contraction 60 | */ 61 | double est_time_rec(int nlyr); 62 | 63 | /** 64 | * \brief copies ctr object 65 | */ 66 | ctr * clone(); 67 | 68 | /** 69 | * \brief copies ctr object 70 | */ 71 | ctr_offload(ctr * other); 72 | 73 | /** 74 | * \brief deallocates ctr_offload object 75 | */ 76 | ~ctr_offload(); 77 | 78 | /** 79 | * \brief allocates ctr_offload object 80 | * \param[in] c contraction object 81 | * \param[in] size_A size of the A tensor 82 | * \param[in] size_B size of the B tensor 83 | * \param[in] size_C size of the C tensor 84 | * \param[in] total_iter number of gemms to be done 85 | * \param[in] upload_phase_A period in iterations with which to upload A 86 | * \param[in] upload_phase_B period in iterations with which to upload B 87 | * \param[in] download_phase_C period in iterations with which to download C 88 | */ 89 | ctr_offload(contraction const * c, 90 | int64_t size_A, 91 | int64_t size_B, 92 | int64_t size_C, 93 | int total_iter, 94 | int upload_phase_A, 95 | int upload_phase_B, 96 | int download_phase_C); 97 | 98 | }; 99 | #endif 100 | 101 | } 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /test/sy_times_ns.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup sy_times_ns sy_times_ns 6 | * @{ 7 | * \brief Tests contraction of a symmetric index group with a nonsymmetric one 8 | */ 9 | 10 | #include 11 | 12 | using namespace CTF; 13 | 14 | int sy_times_ns(int n, 15 | World & dw){ 16 | int rank, i, num_pes, pass; 17 | int64_t np; 18 | double * pairs; 19 | int64_t * indices; 20 | 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 23 | 24 | 25 | int shapeN4[] = {NS,NS,NS,NS}; 26 | int sizeN4[] = {n,n,n,n}; 27 | 28 | //* Creates distributed tensors initialized with zeros 29 | Tensor<> B(4, sizeN4, shapeN4, dw); 30 | 31 | Matrix<> A(n, n, SY, dw); 32 | Matrix<> An(n, n, NS, dw); 33 | Matrix<> C(n, n, SY, dw, "C"); 34 | Matrix<> Cn(n, n, NS, dw, "Cn"); 35 | 36 | srand48(13*rank); 37 | 38 | 39 | A.get_local_data(&np, &indices, &pairs); 40 | for (i=0; i 11 | using namespace CTF; 12 | 13 | struct cust_sp_type { 14 | char name[256]; 15 | int len_name; 16 | }; 17 | 18 | void comp_len(cust_sp_type & a){ 19 | a.len_name = strlen(a.name); 20 | } 21 | 22 | int endomorphism_cust_sp(int n, 23 | World & dw){ 24 | 25 | int shapeN4[] = {NS,NS,NS,NS}; 26 | int sizeN4[] = {n+1,n,n+2,n+3}; 27 | 28 | Set s = Set(); 29 | 30 | Tensor A(4, true, sizeN4, shapeN4, dw, s); 31 | 32 | if (dw.rank < n*n*n*n){ 33 | srand48(dw.rank); 34 | int str_len = drand48()*255; 35 | 36 | cust_sp_type my_obj; 37 | std::fill(my_obj.name, my_obj.name+str_len, 'a'); 38 | my_obj.name[str_len]='\0'; 39 | 40 | int64_t idx = dw.rank; 41 | A.write(1, &idx, &my_obj); 42 | } else 43 | A.write(0, NULL, NULL); 44 | 45 | CTF::Transform endo(comp_len); 46 | // below is equivalent to A.scale(NULL, "ijkl", endo); 47 | endo(A["ijkl"]); 48 | 49 | int64_t * indices; 50 | cust_sp_type * loc_data; 51 | int64_t nloc; 52 | A.get_local_data(&nloc, &indices, &loc_data, true); 53 | 54 | int pass = 1; 55 | if (pass){ 56 | for (int64_t i=0; i 14 | class Vector : public Tensor { 15 | public: 16 | int len; 17 | /** 18 | * \brief default constructor for a vector 19 | */ 20 | Vector(); 21 | 22 | /** 23 | * \brief copy constructor for a matrix 24 | * \param[in] A matrix to copy along with its data 25 | */ 26 | Vector(Vector const & A); 27 | 28 | /** 29 | * \brief casts a tensor to a matrix 30 | * \param[in] A tensor object of order 1 31 | */ 32 | Vector(Tensor const & A); 33 | 34 | /** 35 | * \brief constructor for a vector 36 | * \param[in] len dimension of vector 37 | * \param[in] world CTF world where the tensor will live 38 | * \param[in] sr defines the tensor arithmetic for this tensor 39 | */ 40 | Vector(int len, 41 | World & world, 42 | CTF_int::algstrct const & sr); 43 | 44 | /** 45 | * \brief constructor for a vector 46 | * \param[in] len dimension of vector 47 | * \param[in] world CTF world where the tensor will live 48 | * \param[in] name an optionary name for the tensor 49 | * \param[in] profile set to 1 to profile contractions involving this tensor 50 | * \param[in] sr defines the tensor arithmetic for this tensor 51 | */ 52 | Vector(int len, 53 | World & world=get_universe(), 54 | char const * name=NULL, 55 | int profile=0, 56 | CTF_int::algstrct const & sr=Ring()); 57 | 58 | /** 59 | * \brief constructor for a vector 60 | * \param[in] len dimension of vector 61 | * \param[in] atr quantifier for sparsity and symmetry of matrix (0 -> dense, >0 -> sparse) 62 | * \param[in] world CTF world where the tensor will live 63 | * \param[in] sr defines the tensor arithmetic for this tensor 64 | */ 65 | Vector(int len, 66 | int atr, 67 | World & world=get_universe(), 68 | CTF_int::algstrct const & sr=Ring()); 69 | 70 | 71 | /** 72 | * \brief constructor for a vector 73 | * \param[in] len dimension of vector 74 | * \param[in] atr quantifier for sparsity and symmetry of matrix (0 -> dense, >0 -> sparse) 75 | * \param[in] world CTF world where the tensor will live 76 | */ 77 | Vector(int len, 78 | int atr, 79 | World & world, 80 | char const * name, 81 | int profile=0, 82 | CTF_int::algstrct const & sr=Ring()); 83 | 84 | 85 | 86 | //Vector & operator=(const Vector & A); 87 | /** 88 | * @} 89 | */ 90 | }; 91 | } 92 | #include "vector.cxx" 93 | #endif 94 | -------------------------------------------------------------------------------- /src/interface/sparse_tensor.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #include "common.h" 4 | 5 | namespace CTF { 6 | 7 | template 8 | Sparse_Tensor::Sparse_Tensor(){ 9 | parent = NULL; 10 | } 11 | 12 | template 13 | Sparse_Tensor::Sparse_Tensor(std::vector indices_, 14 | Tensor * parent_){ 15 | parent = parent_; 16 | indices = indices_; 17 | scale = *(dtype*)parent_->sr->mulid(); 18 | } 19 | 20 | template 21 | Sparse_Tensor::Sparse_Tensor(int64_t n, 22 | int64_t * indices_, 23 | Tensor * parent_){ 24 | parent = parent_; 25 | indices = std::vector(indices_,indices_+n); 26 | scale = *(dtype*)parent_->sr->mulid(); 27 | } 28 | 29 | template 30 | void Sparse_Tensor::write(dtype alpha, 31 | dtype * values, 32 | dtype beta){ 33 | parent->write(indices.size(),alpha,beta,&indices[0],&values[0]); 34 | } 35 | 36 | // C++ overload special-cases of above method 37 | template 38 | void Sparse_Tensor::operator=(std::vector values){ 39 | write(*(dtype const*)parent->sr->mulid(), &values[0], *(dtype const*)parent->sr->addid()); 40 | } 41 | template 42 | void Sparse_Tensor::operator=(dtype* values){ 43 | write(*(dtype const*)parent->sr->mulid(), values, *(dtype const*)parent->sr->addid()); 44 | } 45 | 46 | template 47 | void Sparse_Tensor::operator+=(std::vector values){ 48 | write(*(dtype const*)parent->sr->mulid(), &values[0], *(dtype const*)parent->sr->mulid()); 49 | } 50 | 51 | template 52 | void Sparse_Tensor::operator+=(dtype* values){ 53 | write(*(dtype const*)parent->sr->mulid(), values, *(dtype const*)parent->sr->mulid()); 54 | } 55 | 56 | template 57 | void Sparse_Tensor::operator-=(std::vector values){ 58 | write(-*(dtype const*)parent->sr->mulid(), &values[0], *(dtype const*)parent->sr->mulid()); 59 | } 60 | 61 | template 62 | void Sparse_Tensor::operator-=(dtype* values){ 63 | write(-*(dtype const*)parent->sr->mulid(), values, *(dtype const*)parent->sr->mulid()); 64 | } 65 | 66 | template 67 | void Sparse_Tensor::read(dtype alpha, 68 | dtype * values, 69 | dtype beta){ 70 | parent->read(indices.size(),alpha,beta,&indices[0],values); 71 | } 72 | template 73 | Sparse_Tensor::operator std::vector(){ 74 | std::vector values(indices.size()); 75 | read(parent->sr->mulid(), &values[0], parent->sr->addid()); 76 | return values; 77 | } 78 | 79 | template 80 | Sparse_Tensor::operator dtype*(){ 81 | dtype * values = (dtype*)malloc(sizeof(dtype)*indices.size()); 82 | read(parent->sr->mulid(), values, parent->sr->addid()); 83 | return values; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /examples/particle_interaction.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup examples 4 | * @{ 5 | * \defgroup particle_interaction particle_interaction 6 | * @{ 7 | * \brief tests custom element-wise functions by computing interactions between particles and integrating 8 | */ 9 | 10 | #include 11 | #include "moldynamics.h" 12 | using namespace CTF; 13 | int particle_interaction(int n, 14 | World & dw){ 15 | 16 | Set sP = Set(); 17 | Group gF = Group(); 18 | 19 | Vector P(n, dw, sP); 20 | 21 | particle * loc_parts; 22 | int64_t nloc; 23 | int64_t * inds; 24 | P.get_local_data(&nloc, &inds, &loc_parts); 25 | 26 | srand48(dw.rank); 27 | 28 | for (int64_t i=0; i F(n, dw, gF); 39 | 40 | // CTF::Bivar_Function fGF(&get_force); 41 | CTF::Bivar_Kernel fGF; 42 | 43 | F["i"] += fGF(P["i"],P["j"]); 44 | 45 | Matrix F_all(n, n, NS, dw, gF); 46 | 47 | F_all["ij"] = fGF(P["i"],P["j"]); 48 | 49 | 50 | Vector<> f_mgn(n, dw); 51 | 52 | CTF::Function get_mgn([](force f){ return f.fx+f.fy; } ); 53 | 54 | f_mgn["i"] += get_mgn(F_all["ij"]); 55 | -1.0*f_mgn["i"] += get_mgn(F["i"]); 56 | 57 | int pass = (f_mgn.norm2() < 1.E-6); 58 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 59 | 60 | if (dw.rank == 0){ 61 | if (pass){ 62 | printf("{ F[\"i\"] = get_force(P[\"i\"],P[\"j\"]) } passed\n"); 63 | } else { 64 | printf("{ F[\"i\"] = get_force(P[\"i\"],P[\"j\"]) } failed\n"); 65 | } 66 | } 67 | 68 | Transform([] (force f, particle & p){ p.dx += f.fx*p.coeff; p.dy += f.fy*p.coeff; })(F["i"], P["i"]); 69 | 70 | return pass; 71 | } 72 | 73 | 74 | #ifndef TEST_SUITE 75 | 76 | char* getCmdOption(char ** begin, 77 | char ** end, 78 | const std::string & option){ 79 | char ** itr = std::find(begin, end, option); 80 | if (itr != end && ++itr != end){ 81 | return *itr; 82 | } 83 | return 0; 84 | } 85 | 86 | 87 | int main(int argc, char ** argv){ 88 | int rank, np, n; 89 | int const in_num = argc; 90 | char ** input_str = argv; 91 | 92 | MPI_Init(&argc, &argv); 93 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 94 | MPI_Comm_size(MPI_COMM_WORLD, &np); 95 | 96 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 97 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 98 | if (n < 0) n = 5; 99 | } else n = 5; 100 | 101 | 102 | { 103 | World dw(MPI_COMM_WORLD, argc, argv); 104 | 105 | if (rank == 0){ 106 | printf("Computing particle_interaction A_ijkl = f(B_ijkl, A_ijkl)\n"); 107 | } 108 | particle_interaction(n, dw); 109 | } 110 | 111 | 112 | MPI_Finalize(); 113 | return 0; 114 | } 115 | 116 | /** 117 | * @} 118 | * @} 119 | */ 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /src/interface/sparse_tensor.h: -------------------------------------------------------------------------------- 1 | #ifndef __SPARSE_TENSOR_H__ 2 | #define __SPARSE_TENSOR_H__ 3 | 4 | namespace CTF { 5 | /** 6 | * \defgroup CTF CTF Tensor 7 | * \addtogroup CTF 8 | * @{ 9 | */ 10 | /** 11 | * \brief a sparse subset of a tensor 12 | */ 13 | template 14 | class Sparse_Tensor { 15 | public: 16 | /** \brief dense tensor whose subset this sparse tensor is of */ 17 | Tensor * parent; 18 | /** \brief indices of the sparse elements of this tensor */ 19 | std::vector indices; 20 | /** \brief scaling factor by which to scale the tensor elements */ 21 | dtype scale; 22 | 23 | /** 24 | * \brief base constructor 25 | */ 26 | Sparse_Tensor(); 27 | 28 | /** 29 | * \brief initialize a tensor which corresponds to a set of indices 30 | * \param[in] indices a vector of global indices to tensor values 31 | * \param[in] parent dense distributed tensor to which this sparse tensor belongs to 32 | */ 33 | Sparse_Tensor(std::vector indices, 34 | Tensor * parent); 35 | 36 | /** 37 | * \brief initialize a tensor which corresponds to a set of indices 38 | * \param[in] n number of values this sparse tensor will have locally 39 | * \param[in] indices an array of global indices to tensor values 40 | * \param[in] parent dense distributed tensor to which this sparse tensor belongs to 41 | */ 42 | Sparse_Tensor(int64_t n, 43 | int64_t * indices, 44 | Tensor * parent); 45 | 46 | /** 47 | * \brief set the sparse set of indices on the parent tensor to values 48 | * forall(j) i = indices[j]; parent[i] = beta*parent[i] + alpha*values[j]; 49 | * \param[in] alpha scaling factor on values array 50 | * \param[in] values data, should be of same size as the number of indices (n) 51 | * \param[in] beta scaling factor to apply to previously existing data 52 | */ 53 | void write(dtype alpha, 54 | dtype * values, 55 | dtype beta); 56 | 57 | // C++ overload special-cases of above method 58 | void operator=(std::vector values); 59 | void operator+=(std::vector values); 60 | void operator-=(std::vector values); 61 | void operator=(dtype * values); 62 | void operator+=(dtype * values); 63 | void operator-=(dtype * values); 64 | 65 | /** 66 | * \brief read the sparse set of indices on the parent tensor to values 67 | * forall(j) i = indices[j]; values[j] = alpha*parent[i] + beta*values[j]; 68 | * \param[in] alpha scaling factor on parent array 69 | * \param[in] values data, should be preallocated to the same size as the number of indices (n) 70 | * \param[in] beta scaling factor to apply to previously existing data in values 71 | */ 72 | void read(dtype alpha, 73 | dtype * values, 74 | dtype beta); 75 | 76 | // C++ overload special-cases of above method 77 | operator std::vector(); 78 | operator dtype*(); 79 | }; 80 | /** 81 | * @} 82 | */ 83 | } 84 | 85 | #include "sparse_tensor.cxx" 86 | #endif 87 | -------------------------------------------------------------------------------- /test/sptensor_sum.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup tests 2 | * @{ 3 | * \defgroup sptensor_sum sptensor_sum 4 | * @{ 5 | * \brief Summation of sparse tensors 6 | */ 7 | 8 | #include 9 | using namespace CTF; 10 | 11 | int sptensor_sum(int n, 12 | World & dw){ 13 | 14 | int shapeN4[] = {NS,NS,NS,NS}; 15 | int sizeN4[] = {n,n,n,n}; 16 | 17 | // Creates distributed sparse tensors initialized with zeros 18 | Tensor<> A(4, true, sizeN4, shapeN4, dw); 19 | Tensor<> B(4, true, sizeN4, shapeN4, dw); 20 | 21 | if (dw.rank == dw.np/2){ 22 | int64_t keys_A[4] = {1,2,4,8}; 23 | double vals_A[4] = {3.2,42.,1.4,-.8}; 24 | 25 | A.write(4, keys_A, vals_A); 26 | 27 | int64_t keys_B[4] = {2,3}; 28 | double vals_B[4] = {24.,7.2}; 29 | 30 | B.write(2, keys_B, vals_B); 31 | } else { 32 | A.write(0, NULL, NULL); 33 | B.write(0, NULL, NULL); 34 | } 35 | 36 | //A.print(); 37 | //B.print(); 38 | 39 | B["abij"] += A["abij"]; 40 | 41 | //B.print(); 42 | 43 | int64_t * new_keys_B; 44 | double * new_vals_B; 45 | int64_t nloc; 46 | B.get_local_data(&nloc, &new_keys_B, &new_vals_B, true); 47 | int pass = 1; 48 | for (int i=0; i 1.E-9) pass = 0; 52 | break; 53 | case 2: 54 | if (fabs(66.-new_vals_B[i]) > 1.E-9) pass = 0; 55 | break; 56 | case 3: 57 | if (fabs(7.2-new_vals_B[i]) > 1.E-9) pass = 0; 58 | break; 59 | case 4: 60 | if (fabs(1.4-new_vals_B[i]) > 1.E-9) pass = 0; 61 | break; 62 | case 8: 63 | if (fabs(-.8-new_vals_B[i]) > 1.E-9) pass = 0; 64 | break; 65 | default: 66 | pass = 0; 67 | break; 68 | } 69 | } 70 | free(new_keys_B); 71 | delete [] new_vals_B; 72 | 73 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 74 | if (dw.rank == 0){ 75 | if (pass) 76 | printf("{ B[\"abij\"] += A[\"abij\"] with sparse, A, B } passed \n"); 77 | else 78 | printf("{ B[\"abij\"] += A[\"abij\"] with sparse, A, B } failed\n"); 79 | } 80 | return pass; 81 | } 82 | 83 | 84 | #ifndef TEST_SUITE 85 | char* getCmdOption(char ** begin, 86 | char ** end, 87 | const std::string & option){ 88 | char ** itr = std::find(begin, end, option); 89 | if (itr != end && ++itr != end){ 90 | return *itr; 91 | } 92 | return 0; 93 | } 94 | 95 | 96 | int main(int argc, char ** argv){ 97 | int rank, np, n, pass; 98 | int const in_num = argc; 99 | char ** input_str = argv; 100 | 101 | MPI_Init(&argc, &argv); 102 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 103 | MPI_Comm_size(MPI_COMM_WORLD, &np); 104 | 105 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 106 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 107 | if (n < 0) n = 7; 108 | } else n = 7; 109 | 110 | 111 | { 112 | World dw(argc, argv); 113 | 114 | if (rank == 0){ 115 | printf("Computing B+=A with B, A sparse\n"); 116 | } 117 | pass = sptensor_sum(n, dw); 118 | assert(pass); 119 | } 120 | 121 | MPI_Finalize(); 122 | return 0; 123 | } 124 | /** 125 | * @} 126 | * @} 127 | */ 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /src/contraction/spctr_offload.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | #ifndef __SPCTR_OFFLOAD_H__ 4 | #define __SPCTR_OFFLOAD_H__ 5 | 6 | #include "../shared/offload.h" 7 | #include "spctr_tsr.h" 8 | 9 | namespace CTF_int { 10 | #ifdef OFFLOAD 11 | class spctr_offload : public spctr { 12 | public: 13 | /* Class to be called on sub-blocks */ 14 | spctr * rec_ctr; 15 | int iter_counter; 16 | int total_iter; 17 | int upload_phase_A; 18 | int upload_phase_B; 19 | int download_phase_C; 20 | int64_t size_A; /* size of A blocks */ 21 | int64_t size_B; /* size of B blocks */ 22 | int64_t size_C; /* size of C blocks */ 23 | offload_arr * spr_A; 24 | offload_arr * spr_B; 25 | offload_arr * spr_C; 26 | 27 | /** 28 | * \brief print ctr object 29 | */ 30 | void print(); 31 | 32 | /** 33 | * \brief offloads and downloads local blocks of dense or CSR tensors 34 | */ 35 | void run(char * A, int nblk_A, int64_t const * size_blk_A, 36 | char * B, int nblk_B, int64_t const * size_blk_B, 37 | char * C, int nblk_C, int64_t * size_blk_C, 38 | char *& new_C); 39 | 40 | /** 41 | * \brief returns the number of bytes of buffer space 42 | we need 43 | * \return bytes needed 44 | */ 45 | int64_t spmem_fp(); 46 | 47 | /** 48 | * \brief returns the number of bytes of buffer space we need recursively 49 | * \return bytes needed for recursive contraction 50 | */ 51 | int64_t mem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 52 | 53 | /** 54 | * \brief returns the time this kernel will take excluding calls to rec_ctr 55 | * \return seconds needed 56 | */ 57 | double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 58 | 59 | /** 60 | * \brief returns the time this kernel will take including calls to rec_ctr 61 | * \return seconds needed for recursive contraction 62 | */ 63 | double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C); 64 | 65 | spctr * clone(); 66 | 67 | spctr_offload(spctr * other); 68 | 69 | /** 70 | * \brief deallocates ctr_offload object 71 | */ 72 | ~spctr_offload(); 73 | 74 | /** 75 | * \brief allocates ctr_offload object 76 | * \param[in] c contraction object 77 | * \param[in] size_A size of the A tensor 78 | * \param[in] size_B size of the B tensor 79 | * \param[in] size_C size of the C tensor 80 | * \param[in] total_iter number of gemms to be done 81 | * \param[in] upload_phase_A period in iterations with which to upload A 82 | * \param[in] upload_phase_B period in iterations with which to upload B 83 | * \param[in] download_phase_C period in iterations with which to download C 84 | */ 85 | spctr_offload(contraction const * c, 86 | int64_t size_A, 87 | int64_t size_B, 88 | int64_t size_C, 89 | int total_iter, 90 | int upload_phase_A, 91 | int upload_phase_B, 92 | int download_phase_C); 93 | 94 | }; 95 | #endif 96 | 97 | } 98 | #endif 99 | -------------------------------------------------------------------------------- /src_python/ctf_ext.h: -------------------------------------------------------------------------------- 1 | 2 | #include "../include/ctf.hpp" 3 | 4 | namespace CTF_int{ 5 | 6 | 7 | /** 8 | * \python absolute value function 9 | * \param[in] A tensor, param[in,out] B tensor (becomes absolute value of A) 10 | * \return None 11 | */ 12 | template 13 | void abs_helper(tensor * A, tensor * B); 14 | 15 | /** 16 | * \python pow function 17 | * \param[in] A tensor, param[in] B tensor, param[in,out] C tensor, param[in] index of A, param[in] index of B, param[in] index of C 18 | * \return None 19 | */ 20 | template 21 | void pow_helper(tensor * A, tensor * B, tensor * C, char const * idx_A, char const * idx_B, char const * idx_C); 22 | 23 | /** 24 | * \python all function 25 | * \param[in] A tensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B 26 | * \return None 27 | */ 28 | template 29 | void all_helper(tensor * A, tensor * B_bool, char const * idx_A, char const * idx_B); 30 | 31 | template 32 | void conj_helper(tensor * A, tensor * B); 33 | 34 | /** 35 | * \python function that get the real part from complex numbers 36 | * \param[in] A tensor, param[in] B tensor stores the real part from tensor A 37 | * \return None 38 | */ 39 | template 40 | void get_real(tensor * A, tensor * B); 41 | 42 | /** 43 | * \python function that get the imaginary part from complex numbers 44 | * \param[in] A tensor, param[in] B tensor stores the imaginary part from tensor A 45 | * \return None 46 | */ 47 | template 48 | void get_imag(tensor * A, tensor * B); 49 | 50 | /** 51 | * \python function that set the real part from complex numbers 52 | * \param[in] A tensor, param[in] B tensor stores the real part from tensor A 53 | * \return None 54 | */ 55 | template 56 | void set_real(tensor * A, tensor * B); 57 | 58 | /** 59 | * \python function that set the imaginary part from complex numbers 60 | * \param[in] A tensor, param[in] B tensor stores the imaginary part from tensor A 61 | * \return None 62 | */ 63 | template 64 | void set_imag(tensor * A, tensor * B); 65 | 66 | /** 67 | * \python any function 68 | * \param[in] A tensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B 69 | * \return None 70 | */ 71 | template 72 | void any_helper(tensor * A, tensor * B_bool, char const * idx_A, char const * idx_B); 73 | /** 74 | * \brief sum all 1 values in boolean tensor 75 | * \param[in] A tensor of boolean values 76 | * \return number of 1s in A 77 | */ 78 | int64_t sum_bool_tsr(tensor * A); 79 | 80 | void matrix_svd(tensor * A, tensor * U, tensor * S, tensor * VT, int rank); 81 | void matrix_svd_cmplx(tensor * A, tensor * U, tensor * S, tensor * VT, int rank); 82 | 83 | void matrix_qr(tensor * A, tensor * Q, tensor * R); 84 | void matrix_qr_cmplx(tensor * A, tensor * Q, tensor * R); 85 | 86 | /** 87 | * \brief convert tensor from one type to another 88 | * \param[in] type_idx1 index of first ype 89 | * \param[in] type_idx2 index of second ype 90 | * \param[in] A tensor to convert 91 | * \param[in] B tensor to convert to 92 | */ 93 | void conv_type(int type_idx1, int type_idx2, tensor * A, tensor * B); 94 | 95 | } 96 | -------------------------------------------------------------------------------- /test/dft.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup tests 4 | * @{ 5 | * \defgroup DFT DFT 6 | * @{ 7 | * \brief Discrete Fourier Transform by matrix multiplication 8 | */ 9 | 10 | #include 11 | using namespace CTF; 12 | 13 | int test_dft(int64_t n, 14 | World &wrld){ 15 | int numPes, myRank; 16 | int64_t np, i; 17 | int64_t * idx; 18 | std::complex * data; 19 | std::complex imag(0,1); 20 | MPI_Comm_size(MPI_COMM_WORLD, &numPes); 21 | MPI_Comm_rank(MPI_COMM_WORLD, &myRank); 22 | Matrix < std::complex >DFT(n, n, SY, wrld, "DFT", 1); 23 | Matrix < std::complex >IDFT(n, n, SY, wrld, "IDFT", 0); 24 | 25 | DFT.get_local_data(&np, &idx, &data); 26 | 27 | for (i=0; i (1.0, 0.0), DFT, "ij", IDFT, "jk", 47 | std::complex (0.0, 0.0), "ik");*/ 48 | DFT["ik"] = .5*DFT["ij"]*IDFT["jk"]; 49 | 50 | Scalar< std::complex > ss(wrld); 51 | ss[""] = Function< std::complex, std::complex, std::complex >([](std::complex a, std::complex b){ return a+b; })(DFT["ij"],DFT["ij"]); 52 | 53 | DFT.get_local_data(&np, &idx, &data); 54 | int pass = 1; 55 | //DFT.print(stdout); 56 | for (i=0; i=1.E-9) 60 | pass = 0; 61 | } else { 62 | if (fabs(data[i].real())>=1.E-9) 63 | pass = 0; 64 | } 65 | } 66 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 67 | 68 | if (myRank == 0) { 69 | MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 70 | if (pass) 71 | printf("{ DFT[\"ik\"] = DFT[\"ij\"]*IDFT[\"jk\"] } passed\n"); 72 | else 73 | printf("{ DFT[\"ik\"] = DFT[\"ij\"]*IDFT[\"jk\"] } failed\n"); 74 | } else 75 | MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 76 | 77 | MPI_Barrier(MPI_COMM_WORLD); 78 | 79 | free(idx); 80 | delete [] data; 81 | return pass; 82 | } 83 | 84 | #ifndef TEST_SUITE 85 | /** 86 | * \brief Forms N-by-N DFT matrix A and inverse-dft iA and checks A*iA=I 87 | */ 88 | int main(int argc, char ** argv){ 89 | int logn; 90 | int64_t n; 91 | 92 | MPI_Init(&argc, &argv); 93 | 94 | if (argc > 1){ 95 | logn = atoi(argv[1]); 96 | if (logn<0) logn = 5; 97 | } else { 98 | logn = 5; 99 | } 100 | n = 1< 11 | using namespace CTF; 12 | 13 | void f3(double a, double b, double & c){ 14 | c = a*c*a+b*c*b; 15 | } 16 | 17 | int bivar_transform(int n, 18 | World & dw){ 19 | 20 | int shapeN4[] = {NS,NS,NS,NS}; 21 | int sizeN4[] = {n+1,n,n+2,n+3}; 22 | 23 | Tensor<> A(4, sizeN4, shapeN4, dw); 24 | Tensor<> B(4, sizeN4, shapeN4, dw); 25 | Tensor<> C(4, sizeN4, shapeN4, dw); 26 | 27 | srand48(dw.rank); 28 | A.fill_random(-.5, .5); 29 | B.fill_random(-.5, .5); 30 | C.fill_random(-.5, .5); 31 | 32 | 33 | double * all_start_data_A; 34 | int64_t nall_A; 35 | A.read_all(&nall_A, &all_start_data_A); 36 | double * all_start_data_B; 37 | int64_t nall_B; 38 | B.read_all(&nall_B, &all_start_data_B); 39 | double * all_start_data_C; 40 | int64_t nall_C; 41 | C.read_all(&nall_C, &all_start_data_C); 42 | 43 | CTF::Transform<> bfun([](double a, double b, double & c){ c = a*c*a + b*c*b; }); 44 | bfun(A["ijkl"],B["ijkl"],C["ijkl"]); 45 | 46 | double * all_end_data_C; 47 | int64_t nall2_C; 48 | C.read_all(&nall2_C, &all_end_data_C); 49 | 50 | int pass = (nall_C == nall2_C); 51 | if (pass){ 52 | for (int64_t i=0; i=1.E-6){ 56 | pass =0; 57 | printf(" %lf %lf %lf %lf %lf\n",all_start_data_A[i],all_start_data_B[i],all_start_data_C[i],k,all_end_data_C[i]); 58 | } 59 | } 60 | } 61 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 62 | 63 | if (dw.rank == 0){ 64 | if (pass){ 65 | printf("{ f3(A[\"ijkl\"], B[\"ijkl\"], C[\"ijkl\"]) } passed\n"); 66 | } else { 67 | printf("{ f3(A[\"ijkl\"], B[\"ijkl\"], C[\"ijkl\"]) } failed\n"); 68 | } 69 | } 70 | 71 | delete [] all_start_data_A; 72 | delete [] all_start_data_B; 73 | delete [] all_start_data_C; 74 | delete [] all_end_data_C; 75 | 76 | return pass; 77 | } 78 | 79 | 80 | #ifndef TEST_SUITE 81 | 82 | char* getCmdOption(char ** begin, 83 | char ** end, 84 | const std::string & option){ 85 | char ** itr = std::find(begin, end, option); 86 | if (itr != end && ++itr != end){ 87 | return *itr; 88 | } 89 | return 0; 90 | } 91 | 92 | 93 | int main(int argc, char ** argv){ 94 | int rank, np, n; 95 | int const in_num = argc; 96 | char ** input_str = argv; 97 | 98 | MPI_Init(&argc, &argv); 99 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 100 | MPI_Comm_size(MPI_COMM_WORLD, &np); 101 | 102 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 103 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 104 | if (n < 0) n = 5; 105 | } else n = 5; 106 | 107 | 108 | { 109 | World dw(MPI_COMM_WORLD, argc, argv); 110 | 111 | if (rank == 0){ 112 | printf("Computing bivar_transform A_ijkl = f(A_ijkl)\n"); 113 | } 114 | bivar_transform(n, dw); 115 | } 116 | 117 | 118 | MPI_Finalize(); 119 | return 0; 120 | } 121 | 122 | /** 123 | * @} 124 | * @} 125 | */ 126 | 127 | #endif 128 | -------------------------------------------------------------------------------- /src/shared/blas_symbs.cxx: -------------------------------------------------------------------------------- 1 | #include "blas_symbs.h" 2 | #include "util.h" 3 | namespace CTF_BLAS { 4 | template 5 | void gemm(const char *, 6 | const char *, 7 | const int *, 8 | const int *, 9 | const int *, 10 | const dtype *, 11 | const dtype *, 12 | const int *, 13 | const dtype *, 14 | const int *, 15 | const dtype *, 16 | dtype *, 17 | const int *){ 18 | printf("CTF ERROR GEMM not available for this type.\n"); 19 | ASSERT(0); 20 | assert(0); 21 | } 22 | #define INST_GEMM(dtype,s) \ 23 | template <> \ 24 | void gemm(const char * a, \ 25 | const char * b, \ 26 | const int * c, \ 27 | const int * d, \ 28 | const int * e, \ 29 | const dtype * f, \ 30 | const dtype * g, \ 31 | const int * h, \ 32 | const dtype * i, \ 33 | const int * j, \ 34 | const dtype * k, \ 35 | dtype * l, \ 36 | const int * m){ \ 37 | s ## GEMM(a,b,c,d,e,f,g,h,i,j,k,l,m); \ 38 | } 39 | INST_GEMM(float,S) 40 | INST_GEMM(double,D) 41 | INST_GEMM(std::complex,C) 42 | INST_GEMM(std::complex,Z) 43 | #undef INST_GEMM 44 | 45 | 46 | #ifdef USE_BATCH_GEMM 47 | template 48 | void gemm_batch(const char *, 49 | const char *, 50 | const int *, 51 | const int *, 52 | const int *, 53 | const dtype *, 54 | dtype **, 55 | const int *, 56 | dtype **, 57 | const int *, 58 | const dtype *, 59 | dtype **, 60 | const int *, 61 | const int *, 62 | const int *){ 63 | printf("CTF ERROR gemm_batch not available for this type.\n"); 64 | ASSERT(0); 65 | assert(0); 66 | } 67 | 68 | #define INST_GEMM_BATCH(dtype,s) \ 69 | template <> \ 70 | void gemm_batch(const char * a, \ 71 | const char * b, \ 72 | const int * c, \ 73 | const int * d, \ 74 | const int * e, \ 75 | const dtype * f, \ 76 | dtype ** g, \ 77 | const int * h, \ 78 | dtype ** i, \ 79 | const int * j, \ 80 | const dtype * k, \ 81 | dtype ** l, \ 82 | const int * m, \ 83 | const int * n, \ 84 | const int * o){ \ 85 | s ## GEMM_BATCH(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o); \ 86 | } 87 | INST_GEMM_BATCH(float,S) 88 | INST_GEMM_BATCH(double,D) 89 | INST_GEMM_BATCH(std::complex,C) 90 | INST_GEMM_BATCH(std::complex,Z) 91 | #endif 92 | } 93 | #undef INST_GEMM_BATCH 94 | -------------------------------------------------------------------------------- /src/redistribution/dgtog_redist.cxx: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "dgtog_calc_cnt.h" 4 | #include "dgtog_redist.h" 5 | #include "../shared/util.h" 6 | #include "dgtog_bucket.h" 7 | namespace CTF_int { 8 | //static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW}; 9 | LinModel<3> dgtog_res_mdl(dgtog_res_mdl_init,"dgtog_res_mdl"); 10 | 11 | double dgtog_est_time(int64_t tot_sz, int np){ 12 | double ps[] = {1.0, (double)log2(np), (double)tot_sz*log2(np)}; 13 | return dgtog_res_mdl.est_time(ps); 14 | } 15 | } 16 | 17 | #define MTAG 777 18 | namespace CTF_redist_noror { 19 | #include "dgtog_redist_ror.h" 20 | } 21 | 22 | namespace CTF_redist_ror { 23 | #define ROR 24 | #include "dgtog_redist_ror.h" 25 | #undef ROR 26 | } 27 | 28 | namespace CTF_redist_ror_isr { 29 | #define ROR 30 | #define IREDIST 31 | #include "dgtog_redist_ror.h" 32 | #undef IREDIST 33 | #undef ROR 34 | } 35 | 36 | namespace CTF_redist_ror_put { 37 | #define ROR 38 | #define PUTREDIST 39 | #include "dgtog_redist_ror.h" 40 | #undef PUTREDIST 41 | #undef ROR 42 | } 43 | 44 | namespace CTF_redist_ror_isr_any { 45 | #define ROR 46 | #define IREDIST 47 | #define WAITANY 48 | #include "dgtog_redist_ror.h" 49 | #undef WAITANY 50 | #undef IREDIST 51 | #undef ROR 52 | } 53 | 54 | #ifdef USE_FOMPI 55 | namespace CTF_redist_ror_put_any { 56 | #define ROR 57 | #define IREDIST 58 | #define PUTREDIST 59 | #define WAITANY 60 | #define PUT_NOTIFY 61 | #include "dgtog_redist_ror.h" 62 | #undef PUT_NOTIFY 63 | #undef WAITANY 64 | #undef PUTREDIST 65 | #undef IREDIST 66 | #undef ROR 67 | } 68 | #endif 69 | 70 | 71 | namespace CTF_int { 72 | 73 | 74 | void dgtog_reshuffle(int const * sym, 75 | int const * edge_len, 76 | distribution const & old_dist, 77 | distribution const & new_dist, 78 | char ** ptr_tsr_data, 79 | char ** ptr_tsr_new_data, 80 | algstrct const * sr, 81 | CommData ord_glb_comm){ 82 | switch (CTF::DGTOG_SWITCH){ 83 | case 0: 84 | CTF_redist_noror::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm); 85 | break; 86 | case 1: 87 | CTF_redist_ror::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm); 88 | break; 89 | case 2: 90 | CTF_redist_ror_isr::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm); 91 | break; 92 | case 3: 93 | CTF_redist_ror_put::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm); 94 | break; 95 | case 4: 96 | CTF_redist_ror_isr_any::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm); 97 | break; 98 | #ifdef USE_FOMPI 99 | case 5: 100 | CTF_redist_ror_put_any::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm); 101 | break; 102 | #else 103 | case 5: 104 | if (ord_glb_comm.rank == 0) printf("FOMPI needed for this redistribution, ABORTING\n"); 105 | assert(0); 106 | break; 107 | #endif 108 | default: 109 | assert(0); 110 | break; 111 | } 112 | 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /examples/spectral_element.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup examples 2 | * @{ 3 | * \defgroup spectral spectral 4 | * @{ 5 | * \brief Spectral element methods test/benchmark 6 | */ 7 | #include 8 | using namespace CTF; 9 | 10 | /** 11 | * \brief computes the following kernel of the spectral element method 12 | * Given u, D, and diagonal matrices G_{xy} for x,y in [1,3], 13 | * let E_1 = I x I x D, E_2 = I x D x I, E_3 = D x I x I 14 | * [E_1^T, E_2^t, E_3^T] * [G_{11}, G_{12}, G_{13}] * [E_1] * u 15 | * [G_{21}, G_{22}, G_{23}] [E_2] 16 | * [G_{31}, G_{32}, G_{33}] [E_3] 17 | */ 18 | int spectral(int n, 19 | World & dw){ 20 | int lens_u[] = {n, n, n}; 21 | 22 | Tensor<> u(3, lens_u); 23 | Matrix<> D(n, n); 24 | u.fill_random(0.0,1.0); 25 | D.fill_random(0.0,1.0); 26 | 27 | Tensor<> ** G; 28 | G = (Tensor<>**)malloc(sizeof(Tensor<>*)*3); 29 | for (int a=0; a<3; a++){ 30 | G[a] = new Tensor<>[3]; 31 | for (int b=0; b<3; b++){ 32 | G[a][b] = Tensor<>(3, lens_u); 33 | G[a][b].fill_random(0.0,1.0); 34 | } 35 | } 36 | 37 | Tensor<> * w = new Tensor<>[3]; 38 | Tensor<> * z = new Tensor<>[3]; 39 | for (int a=0; a<3; a++){ 40 | w[a] = Tensor<>(3, lens_u); 41 | z[a] = Tensor<>(3, lens_u); 42 | } 43 | 44 | double st_time = MPI_Wtime(); 45 | 46 | w[0]["ijk"] = D["kl"]*u["ijl"]; 47 | w[1]["ijk"] = D["jl"]*u["ilk"]; 48 | w[2]["ijk"] = D["il"]*u["ljk"]; 49 | 50 | for (int a=0; a<3; a++){ 51 | for (int b=0; b<3; b++){ 52 | z[a]["ijk"] += G[a][b]["ijk"]*w[b]["ijk"]; 53 | } 54 | } 55 | 56 | u["ijk"] = D["lk"]*z[0]["ijl"]; 57 | u["ijk"] += D["lj"]*z[1]["ilk"]; 58 | u["ijk"] += D["li"]*z[2]["ljk"]; 59 | 60 | double exe_time = MPI_Wtime() - st_time; 61 | 62 | bool pass = u.norm2() >= 1.E-6; 63 | 64 | for (int a=0; a<3; a++){ 65 | delete [] G[a]; 66 | } 67 | free(G); 68 | delete [] w; 69 | delete [] z; 70 | 71 | if (dw.rank == 0){ 72 | if (pass) 73 | printf("{ Spectral element method } passed \n"); 74 | else 75 | printf("{ spectral element method } failed \n"); 76 | #ifndef TEST_SUITE 77 | printf("Spectral element method on %d*%d*%d grid with %d processors took %lf seconds\n", n,n,n,dw.np,exe_time); 78 | #endif 79 | } 80 | return pass; 81 | } 82 | 83 | 84 | #ifndef TEST_SUITE 85 | char* getCmdOption(char ** begin, 86 | char ** end, 87 | const std::string & option){ 88 | char ** itr = std::find(begin, end, option); 89 | if (itr != end && ++itr != end){ 90 | return *itr; 91 | } 92 | return 0; 93 | } 94 | 95 | 96 | int main(int argc, char ** argv){ 97 | int rank, np, n, pass; 98 | int const in_num = argc; 99 | char ** input_str = argv; 100 | 101 | MPI_Init(&argc, &argv); 102 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 103 | MPI_Comm_size(MPI_COMM_WORLD, &np); 104 | 105 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 106 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 107 | if (n < 0) n = 16; 108 | } else n = 16; 109 | 110 | { 111 | World dw(argc, argv); 112 | 113 | if (rank == 0){ 114 | printf("Running 3D spectral element method with %d*%d*%d grid\n",n,n,n); 115 | } 116 | pass = spectral(n, dw); 117 | assert(pass); 118 | } 119 | 120 | MPI_Finalize(); 121 | return 0; 122 | } 123 | /** 124 | * @} 125 | * @} 126 | */ 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /scalapack_tests/qr.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup tests 2 | * @{ 3 | * \defgroup qr qr 4 | * @{ 5 | * \brief QR factorization of CTF matrices 6 | */ 7 | 8 | #include 9 | #include "conj.h" 10 | using namespace CTF; 11 | 12 | 13 | template 14 | bool qr(Matrix A, 15 | int m, 16 | int n, 17 | World & dw){ 18 | 19 | // Perform QR 20 | Matrix Q,R; 21 | A.qr(Q,R); 22 | 23 | // Test orthogonality 24 | Matrix E(n,n,dw); 25 | 26 | E["ii"] = 1.; 27 | 28 | E["ij"] -= Q["ki"]*conj(Q)["kj"]; 29 | 30 | bool pass_orthogonality = true; 31 | 32 | double nrm; 33 | E.norm2(nrm); 34 | if (nrm > m*n*1.E-6){ 35 | pass_orthogonality = false; 36 | } 37 | 38 | A["ij"] -= Q["ik"]*R["kj"]; 39 | 40 | bool pass_residual = true; 41 | A.norm2(nrm); 42 | if (nrm > m*n*n*1.E-6){ 43 | pass_residual = false; 44 | } 45 | 46 | #ifndef TEST_SUITE 47 | if (dw.rank == 0){ 48 | printf("QR orthogonality check returned %d, residual check %d\n", pass_orthogonality, pass_residual); 49 | } 50 | #endif 51 | return pass_residual & pass_orthogonality; 52 | } 53 | 54 | bool test_qr(int m, int n, World dw){ 55 | bool pass = true; 56 | Matrix A(m,n,dw); 57 | Matrix AA(m,n,dw); 58 | A.fill_random(0.,1.); 59 | AA.fill_random(0.,1.); 60 | pass = pass & qr(A,m,n,dw); 61 | 62 | Matrix B(m,n,dw); 63 | Matrix BB(m,n,dw); 64 | B.fill_random(0.,1.); 65 | BB.fill_random(0.,1.); 66 | pass = pass & qr(B,m,n,dw); 67 | 68 | Matrix> cA(m,n,dw); 69 | cA["ij"] = Function>([](float a, float b){ return std::complex(a,b); })(A["ij"],AA["ij"]); 70 | pass = pass & qr>(cA,m,n,dw); 71 | 72 | 73 | Matrix> cB(m,n,dw); 74 | cB["ij"] = Function>([](double a, double b){ return std::complex(a,b); })(B["ij"],BB["ij"]); 75 | pass = pass & qr>(cB,m,n,dw); 76 | if (dw.rank == 0){ 77 | if (pass){ 78 | printf("{ A = QR and Q^TQ = I } passed\n"); 79 | } else { 80 | printf("{ A = QR and Q^TQ = I } failed\n"); 81 | } 82 | } 83 | 84 | 85 | return pass; 86 | } 87 | 88 | #ifndef TEST_SUITE 89 | char* getCmdOption(char ** begin, 90 | char ** end, 91 | const std::string & option){ 92 | char ** itr = std::find(begin, end, option); 93 | if (itr != end && ++itr != end){ 94 | return *itr; 95 | } 96 | return 0; 97 | } 98 | 99 | 100 | int main(int argc, char ** argv){ 101 | int rank, np, m, n, pass; 102 | int const in_num = argc; 103 | char ** input_str = argv; 104 | 105 | MPI_Init(&argc, &argv); 106 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 107 | MPI_Comm_size(MPI_COMM_WORLD, &np); 108 | 109 | if (getCmdOption(input_str, input_str+in_num, "-m")){ 110 | m = atoi(getCmdOption(input_str, input_str+in_num, "-m")); 111 | if (m < 0) m = 13; 112 | } else m = 13; 113 | 114 | 115 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 116 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 117 | if (n < 0) n = 7; 118 | } else n = 7; 119 | 120 | 121 | { 122 | World dw(argc, argv); 123 | 124 | if (rank == 0){ 125 | printf("Testing %d-by-%d QR factorization\n", m, n); 126 | } 127 | pass = test_qr(m, n, dw); 128 | assert(pass); 129 | } 130 | 131 | MPI_Finalize(); 132 | return 0; 133 | } 134 | /** 135 | * @} 136 | * @} 137 | */ 138 | 139 | #endif 140 | -------------------------------------------------------------------------------- /examples/dft_3D.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | 3 | /** \addtogroup examples 4 | * @{ 5 | * \defgroup DFT_3D DFT_3D 6 | * @{ 7 | * \brief 3D Discrete Fourier Transform by tensor contractions 8 | */ 9 | 10 | 11 | #include 12 | using namespace CTF; 13 | 14 | 15 | int test_dft_3D(int n, 16 | World & wrld){ 17 | int myRank, numPes; 18 | int i, j; 19 | int64_t np; 20 | int64_t * idx; 21 | std::complex * data; 22 | std::complex imag(0,1); 23 | 24 | int len[] = {n,n,n}; 25 | int sym[] = {NS,NS,NS}; 26 | 27 | MPI_Comm_size(MPI_COMM_WORLD, &numPes); 28 | MPI_Comm_rank(MPI_COMM_WORLD, &myRank); 29 | 30 | CTF::Ring< std::complex > ldr; 31 | 32 | Matrix < std::complex >DFT(n, n, SY, wrld, ldr); 33 | Matrix < std::complex >IDFT(n, n, SY, wrld, ldr); 34 | Tensor < std::complex >MESH(3, len, sym, wrld, ldr); 35 | 36 | DFT.get_local_data(&np, &idx, &data); 37 | 38 | for (i=0; i=1.E-9) pass = 0; 76 | } else { 77 | if (fabs((double)data[i].real())>=1.E-9) pass = 0; 78 | } 79 | } 80 | MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 81 | 82 | if (myRank == 0){ 83 | MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 84 | if (pass) 85 | printf("{ MESH[\"ijk\"] = MESH[\"pqr\"]*DFT[\"ip\"]*DFT[\"jq\"]*DFT[\"kr\"] } passed\n"); 86 | else 87 | printf("{ MESH[\"ijk\"] = MESH[\"pqr\"]*DFT[\"ip\"]*DFT[\"jq\"]*DFT[\"kr\"] } failed\n"); 88 | } else 89 | MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 90 | 91 | MPI_Barrier(MPI_COMM_WORLD); 92 | 93 | 94 | free(idx); 95 | delete [] data; 96 | return pass; 97 | } 98 | 99 | #ifndef TEST_SUITE 100 | /** 101 | * \brief Forms N-by-N DFT matrix A and inverse-dft iA and checks A*iA=I 102 | */ 103 | int main(int argc, char ** argv){ 104 | int logn; 105 | int64_t n; 106 | MPI_Init(&argc, &argv); 107 | 108 | if (argc > 1){ 109 | logn = atoi(argv[1]); 110 | if (logn<0) logn = 3; 111 | } else { 112 | logn = 3; 113 | } 114 | n = 1< 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "../src/shared/util.h" 18 | 19 | int bench_contraction(int n, 20 | int niter, 21 | char const * iA, 22 | char const * iB, 23 | char const * iC, 24 | CTF_World &dw){ 25 | 26 | int rank, i, num_pes; 27 | 28 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 29 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 30 | 31 | int order_A, order_B, order_C; 32 | order_A = strlen(iA); 33 | order_B = strlen(iB); 34 | order_C = strlen(iC); 35 | 36 | int NS_A[order_A]; 37 | int NS_B[order_B]; 38 | int NS_C[order_C]; 39 | int n_A[order_A]; 40 | int n_B[order_B]; 41 | int n_C[order_C]; 42 | 43 | for (i=0; i 11 | using namespace CTF; 12 | 13 | struct cust_type { 14 | char name[256]; 15 | int len_name; 16 | }; 17 | 18 | cust_type cadd(cust_type a, cust_type b){ 19 | if (strlen(a.name) >= strlen(b.name)) return a; 20 | else return b; 21 | } 22 | 23 | void mpi_cadd(void * a, void * b, int * len, MPI_Datatype * d){ 24 | for (int i=0; i<*len; i++){ 25 | ((cust_type*)b)[i] = cadd(((cust_type*)a)[i], ((cust_type*)b)[i]); 26 | 27 | } 28 | } 29 | 30 | int endomorphism_cust(int n, 31 | World & dw){ 32 | 33 | int shapeN4[] = {NS,NS,NS,NS}; 34 | int sizeN4[] = {n+1,n,n+2,n+3}; 35 | 36 | cust_type addid; 37 | addid.name[0] = '\0'; 38 | addid.len_name = 0; 39 | 40 | MPI_Op mop; 41 | MPI_Op_create(&mpi_cadd, 1, &mop); 42 | 43 | Monoid m = Monoid(addid, &cadd, mop); 44 | 45 | Tensor A(4, sizeN4, shapeN4, dw, m); 46 | 47 | int64_t * inds; 48 | cust_type * vals; 49 | int64_t nvals; 50 | 51 | A.get_local_data(&nvals, &inds, &vals); 52 | 53 | srand48(dw.rank); 54 | for (int64_t i=0; i endo( 63 | [](cust_type & a){ 64 | a.len_name = strlen(a.name); 65 | }); 66 | // below is equivalent to A.scale(NULL, "ijkl", endo); 67 | endo(A["ijkl"]); 68 | 69 | 70 | int64_t * indices; 71 | cust_type * loc_data; 72 | int64_t nloc; 73 | A.get_local_data(&nloc, &indices, &loc_data); 74 | 75 | int pass = 1; 76 | if (pass){ 77 | for (int64_t i=0; i 90 | void offload_gemm(char tA, 91 | char tB, 92 | int m, 93 | int n, 94 | int k, 95 | dtype alpha, 96 | offload_tsr & A, 97 | int lda_A, 98 | offload_tsr & B, 99 | int lda_B, 100 | dtype beta, 101 | offload_tsr & C, 102 | int lda_C); 103 | 104 | template 105 | void offload_gemm(char tA, 106 | char tB, 107 | int m, 108 | int n, 109 | int k, 110 | dtype alpha, 111 | dtype const * dev_A, 112 | int lda_A, 113 | dtype const * dev_B, 114 | int lda_B, 115 | dtype beta, 116 | dtype * dev_C, 117 | int lda_C); 118 | } 119 | #endif 120 | 121 | -------------------------------------------------------------------------------- /examples/trace.cxx: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/ 2 | /** \addtogroup examples 3 | * @{ 4 | * \defgroup trace trace 5 | * @{ 6 | * \brief tests trace over diagonal of Matrices 7 | */ 8 | 9 | #include 10 | using namespace CTF; 11 | 12 | int trace(int const n, 13 | World &dw){ 14 | int rank, i, num_pes; 15 | int64_t np; 16 | double * pairs; 17 | double tr1, tr2, tr3, tr4; 18 | int64_t * indices; 19 | 20 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 21 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 22 | 23 | Matrix<> A(n, n, NS, dw); 24 | Matrix<> B(n, n, NS, dw); 25 | Matrix<> C(n, n, NS, dw); 26 | Matrix<> D(n, n, NS, dw); 27 | Matrix<> C1(n, n, NS, dw); 28 | Matrix<> C2(n, n, NS, dw); 29 | Matrix<> C3(n, n, NS, dw); 30 | Matrix<> C4(n, n, NS, dw); 31 | Vector<> DIAG(n, dw); 32 | 33 | srand48(13*rank); 34 | 35 | A.get_local_data(&np, &indices, &pairs); 36 | for (i=0; i1.E-10 || fabs(tr2-tr3)/tr2>1.E-10 || fabs(tr3-tr4)/tr3>1.E-10){ 79 | pass = 0; 80 | } 81 | if (!pass){ 82 | printf("{ tr(ABCD) = tr(DABC) = tr(CDAB) = tr(BCDA) } failed\n"); 83 | } else { 84 | printf("{ tr(ABCD) = tr(DABC) = tr(CDAB) = tr(BCDA) } passed\n"); 85 | } 86 | } else 87 | MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 88 | return pass; 89 | 90 | } 91 | 92 | 93 | #ifndef TEST_SUITE 94 | char* getCmdOption(char ** begin, 95 | char ** end, 96 | const std::string & option){ 97 | char ** itr = std::find(begin, end, option); 98 | if (itr != end && ++itr != end){ 99 | return *itr; 100 | } 101 | return 0; 102 | } 103 | 104 | 105 | int main(int argc, char ** argv){ 106 | int rank, np, n; 107 | int const in_num = argc; 108 | char ** input_str = argv; 109 | 110 | MPI_Init(&argc, &argv); 111 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 112 | MPI_Comm_size(MPI_COMM_WORLD, &np); 113 | 114 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 115 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 116 | if (n < 0) n = 7; 117 | } else n = 7; 118 | 119 | 120 | { 121 | World dw(MPI_COMM_WORLD, argc, argv); 122 | if (rank == 0){ 123 | printf("Checking trace calculation n = %d, p = %d:\n",n,np); 124 | } 125 | int pass = trace(n,dw); 126 | assert(pass); 127 | } 128 | 129 | MPI_Finalize(); 130 | return 0; 131 | } 132 | /** 133 | * @} 134 | * @} 135 | */ 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /examples/jacobi.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup examples 2 | * @{ 3 | * \defgroup jacobi jacobi 4 | * @{ 5 | * \brief Jacobi iterative method using gemv and spmv 6 | */ 7 | 8 | #include 9 | using namespace CTF; 10 | 11 | // compute a single Jacobi iteration to get new x, elementwise: x_i <== d_i*(b_i-sum_j R_ij*x_j) 12 | // solves Ax=b where R_ij=A_ij for i!=j, while R_ii=0, and d_i=1/A_ii 13 | void jacobi_iter(Matrix<> & R, Vector<> & b, Vector<> & d, Vector<> &x){ 14 | x["i"] = -R["ij"]*x["j"]; 15 | x["i"] += b["i"]; 16 | x["i"] *= d["i"]; 17 | } 18 | 19 | int jacobi(int n, 20 | World & dw){ 21 | 22 | Matrix<> spA(n, n, SP, dw, "spA"); 23 | Matrix<> dnA(n, n, dw, "dnA"); 24 | Vector<> b(n, dw); 25 | Vector<> c1(n, dw); 26 | Vector<> c2(n, dw); 27 | Vector<> res(n, dw); 28 | 29 | srand48(dw.rank); 30 | b.fill_random(0.0,1.0); 31 | c1.fill_random(0.0,1.0); 32 | c2["i"] = c1["i"]; 33 | 34 | //make diagonally dominant matrix 35 | dnA.fill_random(0.0,1.0); 36 | spA["ij"] += dnA["ij"]; 37 | //sparsify 38 | spA.sparsify(.5); 39 | spA["ii"] += 2.*n; 40 | dnA["ij"] = spA["ij"]; 41 | 42 | Vector<> d(n, dw); 43 | d["i"] = spA["ii"]; 44 | Transform<> inv([](double & d){ d=1./d; }); 45 | inv(d["i"]); 46 | 47 | Matrix<> spR(n, n, SP, dw, "spR"); 48 | Matrix<> dnR(n, n, dw, "dnR"); 49 | spR["ij"] = spA["ij"]; 50 | dnR["ij"] = dnA["ij"]; 51 | spR["ii"] = 0; 52 | dnR["ii"] = 0; 53 | 54 | /* spR.print(); 55 | dnR.print(); */ 56 | 57 | //do up to 100 iterations 58 | double res_norm; 59 | int iter; 60 | for (iter=0; iter<100; iter++){ 61 | jacobi_iter(dnR, b, d, c1); 62 | 63 | res["i"] = b["i"]; 64 | res["i"] -= dnA["ij"]*c1["j"]; 65 | 66 | res_norm = res.norm2(); 67 | if (res_norm < 1.E-4) break; 68 | } 69 | #ifndef TEST_SUITE 70 | if (dw.rank == 0) 71 | printf("Completed %d iterations of Jacobi with dense matrix, residual F-norm is %E\n", iter, res_norm); 72 | #endif 73 | 74 | for (iter=0; iter<100; iter++){ 75 | jacobi_iter(spR, b, d, c2); 76 | 77 | res["i"] = b["i"]; 78 | res["i"] -= spA["ij"]*c2["j"]; 79 | 80 | res_norm = res.norm2(); 81 | if (res_norm < 1.E-4) break; 82 | } 83 | #ifndef TEST_SUITE 84 | if (dw.rank == 0) 85 | printf("Completed %d iterations of Jacobi with sparse matrix, residual F-norm is %E\n", iter, res_norm); 86 | #endif 87 | 88 | c2["i"] -= c1["i"]; 89 | 90 | bool pass = c2.norm2() <= 1.E-6; 91 | 92 | if (dw.rank == 0){ 93 | if (pass) 94 | printf("{ Jacobi x[\"i\"] = (1./A[\"ii\"])*(b[\"j\"] - (A[\"ij\"]-A[\"ii\"])*x[\"j\"]) with sparse A } passed \n"); 95 | else 96 | printf("{ Jacobi x[\"i\"] = (1./A[\"ii\"])*(b[\"j\"] - (A[\"ij\"]-A[\"ii\"])*x[\"j\"]) with sparse A } failed \n"); 97 | } 98 | return pass; 99 | } 100 | 101 | 102 | #ifndef TEST_SUITE 103 | char* getCmdOption(char ** begin, 104 | char ** end, 105 | const std::string & option){ 106 | char ** itr = std::find(begin, end, option); 107 | if (itr != end && ++itr != end){ 108 | return *itr; 109 | } 110 | return 0; 111 | } 112 | 113 | 114 | int main(int argc, char ** argv){ 115 | int rank, np, n, pass; 116 | int const in_num = argc; 117 | char ** input_str = argv; 118 | 119 | MPI_Init(&argc, &argv); 120 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 121 | MPI_Comm_size(MPI_COMM_WORLD, &np); 122 | 123 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 124 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 125 | if (n < 0) n = 7; 126 | } else n = 7; 127 | 128 | 129 | { 130 | World dw(argc, argv); 131 | 132 | if (rank == 0){ 133 | printf("Running Jacobi method on random %d-by-%d sparse matrix\n",n,n); 134 | } 135 | pass = jacobi(n, dw); 136 | assert(pass); 137 | } 138 | 139 | MPI_Finalize(); 140 | return 0; 141 | } 142 | /** 143 | * @} 144 | * @} 145 | */ 146 | 147 | #endif 148 | -------------------------------------------------------------------------------- /examples/sssp.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup examples 2 | * @{ 3 | * \defgroup sssp sssp 4 | * @{ 5 | * \brief single-source shortest-paths via the Bellman-Ford algorithm 6 | */ 7 | 8 | #include 9 | #include 10 | using namespace CTF; 11 | 12 | 13 | // return false if there are negative cycles, true otherwise 14 | template 15 | bool Bellman_Ford(Matrix A, Vector P, int n){ 16 | Vector Q(P); 17 | int r = 0; 18 | int new_tot_wht = P["ij"]; 19 | int tot_wht; 20 | do { 21 | if (r == n+1) return false; // exit if we did not converge in n iterations 22 | else r++; 23 | Q["i"] = P["i"]; // save old distances 24 | P["i"] += A["ij"]*P["j"]; // update distances 25 | tot_wht = new_tot_wht; 26 | new_tot_wht = P["ij"]; 27 | assert(new_tot_wht <= tot_wht); 28 | } while (new_tot_wht < tot_wht); // continue so long as some distance got shorter 29 | return true; 30 | } 31 | 32 | // calculate SSSP on a graph of n nodes distributed on World (communicator) dw 33 | int sssp(int n, 34 | World & dw){ 35 | 36 | //tropical semiring, define additive identity to be n*n (max weight) to prevent integer overflow 37 | Semiring s(n*n, 38 | [](int a, int b){ return std::min(a,b); }, 39 | MPI_MIN, 40 | 0, 41 | [](int a, int b){ return a+b; }); 42 | 43 | //random adjacency matrix 44 | Matrix A(n, n, dw, s); 45 | srand(dw.rank); 46 | A.fill_random(0, n*n); 47 | 48 | A["ii"] = n*n; 49 | 50 | A.sparsify([=](int a){ return a<5*n; }); 51 | 52 | Vector v(n, dw, s); 53 | if (dw.rank == 0){ 54 | int64_t idx = 0; 55 | int val = 0; 56 | v.write(1, &idx, &val); 57 | } else v.write(0, NULL, NULL); 58 | 59 | //make sure we converged 60 | int pass = Bellman_Ford(A, v, n); 61 | if (n>=3){ 62 | v["i"] = n*n; 63 | if (dw.rank == 0){ 64 | int64_t idx = 0; 65 | int val = 0; 66 | v.write(1, &idx, &val); 67 | } else v.write(0, NULL, NULL); 68 | 69 | 70 | // add a negative cycle to A 71 | if (dw.rank == 0){ 72 | int64_t idx[] = {1,n+2,2*n+0}; 73 | int val[] = {1, -1, -1}; 74 | A.write(3, idx, val); 75 | } else A.write(0, NULL, NULL); 76 | //make sure we did not converge 77 | int pass2 = Bellman_Ford(A, v, n); 78 | pass = pass & !pass2; 79 | } 80 | 81 | if (dw.rank == 0){ 82 | MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 83 | if (pass) 84 | printf("{ negative cycle check via Bellman-Ford } passed \n"); 85 | else 86 | printf("{ negative cycle check via Bellman-Ford } failed \n"); 87 | } else 88 | MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); 89 | return pass; 90 | } 91 | 92 | 93 | #ifndef TEST_SUITE 94 | char* getCmdOption(char ** begin, 95 | char ** end, 96 | const std::string & option){ 97 | char ** itr = std::find(begin, end, option); 98 | if (itr != end && ++itr != end){ 99 | return *itr; 100 | } 101 | return 0; 102 | } 103 | 104 | 105 | int main(int argc, char ** argv){ 106 | int rank, np, n, pass; 107 | int const in_num = argc; 108 | char ** input_str = argv; 109 | 110 | MPI_Init(&argc, &argv); 111 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 112 | MPI_Comm_size(MPI_COMM_WORLD, &np); 113 | 114 | if (getCmdOption(input_str, input_str+in_num, "-n")){ 115 | n = atoi(getCmdOption(input_str, input_str+in_num, "-n")); 116 | if (n < 0) n = 7; 117 | } else n = 7; 118 | 119 | { 120 | World dw(argc, argv); 121 | 122 | if (rank == 0){ 123 | printf("Computing SSSP on sparse graph with %d nodes using the Bellman-Ford algorithm\n",n); 124 | } 125 | pass = sssp(n, dw); 126 | assert(pass); 127 | } 128 | 129 | MPI_Finalize(); 130 | return 0; 131 | } 132 | /** 133 | * @} 134 | * @} 135 | */ 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /examples/scan.cxx: -------------------------------------------------------------------------------- 1 | /** \addtogroup examples 2 | * @{ 3 | * \defgroup scan scan 4 | * @{ 5 | * \brief scan iterative method using gemv and spmv 6 | */ 7 | 8 | #include 9 | #include 10 | using namespace CTF; 11 | 12 | template 13 | void rec_scan(Tensor & V){ 14 | 15 | if (V.order == 1){ 16 | Matrix W(2, V.lens[0], V.lens[0], *V.wrld, *V.sr); 17 | dtype mulid = ((dtype*)V.sr->mulid())[0]; 18 | W["ij"], [=](dtype & a){ a=mulid; }; 19 | int ssym[] = {SH, NS}; 20 | int nsym[] = {NS, NS}; 21 | Tensor W1(W, ssym); 22 | Tensor W2(W1, nsym); 23 | V["i"] = W2["ji"]*V["j"]; 24 | } else { 25 | Tensor V2(V.order-1, V.lens, *V.wrld, *V.sr); 26 | char str[V.order]; 27 | for (int i=0; i W(2, V.lens[V.order-1], V.lens[V.order-1], *V.wrld, *V.sr); 32 | dtype mulid = ((dtype*)V.sr->mulid())[0]; 33 | W["ij"], [=](dtype & a){ a=mulid; }; 34 | int hsym[] = {SH, NS}; 35 | int nsym[] = {NS, NS}; 36 | Tensor W1(W, hsym); 37 | Tensor W2(W1, nsym); 38 | char str2[V.order]; 39 | memcpy(str2+1, str+1, V.order-1); 40 | str2[0] = 'a'+V.order; 41 | char strW[2] = {str2[0],'a'}; 42 | V[str] = W2[strW]*V[str2]; 43 | V[str] += V2[str+1]; 44 | } 45 | } 46 | 47 | template 48 | void scan(Vector & v, int logn){ 49 | int64_t np; 50 | int64_t * inds; 51 | double * data; 52 | 53 | int lens[logn]; 54 | std::fill(lens, lens+logn, 2); 55 | 56 | // represent vector to scan as 2-by-...-by-2 tensor 57 | Tensor V(logn, lens, *v.wrld, *v.sr); 58 | 59 | v.get_local_data(&np, &inds, &data); 60 | V.write(np, inds, data); 61 | 62 | free(inds); 63 | delete [] data; 64 | 65 | rec_scan(V); 66 | 67 | // put the data from the tensor back into the vector 68 | V.get_local_data(&np, &inds, &data); 69 | v.write(np, inds, data); 70 | 71 | free(inds); 72 | delete [] data; 73 | } 74 | 75 | int scan_test(int logn, 76 | World & dw){ 77 | 78 | Vector<> v(1<= 1.E-9*(1< 4 | 5 | using namespace CTF; 6 | 7 | int fast_diagram(int const n, 8 | World &ctf){ 9 | int rank, i, num_pes; 10 | 11 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_pes); 13 | 14 | int len3[] = {n,n,n}; 15 | int len4[] = {n,n,n,n}; 16 | //int len5[] = {n,n,n,n,n}; 17 | int NNN[] = {NS,NS,NS}; 18 | int NNNN[] = {NS,NS,NS,NS}; 19 | int ANNN[] = {AS,NS,NS,NS}; 20 | int SNNN[] = {SH,NS,NS,NS}; 21 | //int AANNN[] = {AS,AS,NS,NS,NS}; 22 | 23 | Tensor<> T(4, len4, SNNN, ctf); 24 | Tensor<> V(4, len4, SNNN, ctf); 25 | 26 | Tensor<> W(4, len4, SNNN, ctf); 27 | Tensor<> W_ans(4, len4, SNNN, ctf); 28 | 29 | Tensor<> Z_AS(4, len4, ANNN, ctf); 30 | Tensor<> Z_SH(4, len4, SNNN, ctf); 31 | Tensor<> Z_NS(4, len4, NNNN, ctf); 32 | Tensor<> Z_D(3, len3, NNN, ctf); 33 | 34 | 35 | Tensor<> Ts(3, len3, NNN, ctf); 36 | Tensor<> Zs(3, len3, NNN, ctf); 37 | 38 | { 39 | int64_t * indices; 40 | double * values; 41 | int64_t size; 42 | srand48(173*rank); 43 | 44 | T.read_local(&size, &indices, &values); 45 | for (i=0; i